1 /* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License version 2 as 5 * published by the Free Software Foundation. 6 */ 7 8 /* Kernel module implementing an IP set type: the list:set type */ 9 10 #include <linux/module.h> 11 #include <linux/ip.h> 12 #include <linux/rculist.h> 13 #include <linux/skbuff.h> 14 #include <linux/errno.h> 15 16 #include <linux/netfilter/ipset/ip_set.h> 17 #include <linux/netfilter/ipset/ip_set_list.h> 18 19 #define IPSET_TYPE_REV_MIN 0 20 /* 1 Counters support added */ 21 /* 2 Comments support added */ 22 #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ 23 24 MODULE_LICENSE("GPL"); 25 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 26 IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); 27 MODULE_ALIAS("ip_set_list:set"); 28 29 /* Member elements */ 30 struct set_elem { 31 struct rcu_head rcu; 32 struct list_head list; 33 ip_set_id_t id; 34 } __aligned(__alignof__(u64)); 35 36 struct set_adt_elem { 37 ip_set_id_t id; 38 ip_set_id_t refid; 39 int before; 40 }; 41 42 /* Type structure */ 43 struct list_set { 44 u32 size; /* size of set list array */ 45 struct timer_list gc; /* garbage collection */ 46 struct net *net; /* namespace */ 47 struct list_head members; /* the set members */ 48 }; 49 50 static int 51 list_set_ktest(struct ip_set *set, const struct sk_buff *skb, 52 const struct xt_action_param *par, 53 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) 54 { 55 struct list_set *map = set->data; 56 struct set_elem *e; 57 u32 cmdflags = opt->cmdflags; 58 int ret; 59 60 /* Don't lookup sub-counters at all */ 61 opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; 62 if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) 63 opt->cmdflags |= IPSET_FLAG_SKIP_COUNTER_UPDATE; 64 list_for_each_entry_rcu(e, &map->members, list) { 65 if (SET_WITH_TIMEOUT(set) && 66 ip_set_timeout_expired(ext_timeout(e, set))) 67 continue; 68 ret = ip_set_test(e->id, skb, par, opt); 69 if (ret > 0) { 70 if (SET_WITH_COUNTER(set)) 71 ip_set_update_counter(ext_counter(e, set), 72 ext, &opt->ext, 73 cmdflags); 74 if (SET_WITH_SKBINFO(set)) 75 ip_set_get_skbinfo(ext_skbinfo(e, set), 76 ext, &opt->ext, 77 cmdflags); 78 return ret; 79 } 80 } 81 return 0; 82 } 83 84 static int 85 list_set_kadd(struct ip_set *set, const struct sk_buff *skb, 86 const struct xt_action_param *par, 87 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) 88 { 89 struct list_set *map = set->data; 90 struct set_elem *e; 91 int ret; 92 93 list_for_each_entry(e, &map->members, list) { 94 if (SET_WITH_TIMEOUT(set) && 95 ip_set_timeout_expired(ext_timeout(e, set))) 96 continue; 97 ret = ip_set_add(e->id, skb, par, opt); 98 if (ret == 0) 99 return ret; 100 } 101 return 0; 102 } 103 104 static int 105 list_set_kdel(struct ip_set *set, const struct sk_buff *skb, 106 const struct xt_action_param *par, 107 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) 108 { 109 struct list_set *map = set->data; 110 struct set_elem *e; 111 int ret; 112 113 list_for_each_entry(e, &map->members, list) { 114 if (SET_WITH_TIMEOUT(set) && 115 ip_set_timeout_expired(ext_timeout(e, set))) 116 continue; 117 ret = ip_set_del(e->id, skb, par, opt); 118 if (ret == 0) 119 return ret; 120 } 121 return 0; 122 } 123 124 static int 125 list_set_kadt(struct ip_set *set, const struct sk_buff *skb, 126 const struct xt_action_param *par, 127 enum ipset_adt adt, struct ip_set_adt_opt *opt) 128 { 129 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 130 int ret = -EINVAL; 131 132 rcu_read_lock(); 133 switch (adt) { 134 case IPSET_TEST: 135 ret = list_set_ktest(set, skb, par, opt, &ext); 136 break; 137 case IPSET_ADD: 138 ret = list_set_kadd(set, skb, par, opt, &ext); 139 break; 140 case IPSET_DEL: 141 ret = list_set_kdel(set, skb, par, opt, &ext); 142 break; 143 default: 144 break; 145 } 146 rcu_read_unlock(); 147 148 return ret; 149 } 150 151 /* Userspace interfaces: we are protected by the nfnl mutex */ 152 153 static void 154 __list_set_del(struct ip_set *set, struct set_elem *e) 155 { 156 struct list_set *map = set->data; 157 158 ip_set_put_byindex(map->net, e->id); 159 /* We may call it, because we don't have a to be destroyed 160 * extension which is used by the kernel. 161 */ 162 ip_set_ext_destroy(set, e); 163 kfree_rcu(e, rcu); 164 } 165 166 static inline void 167 list_set_del(struct ip_set *set, struct set_elem *e) 168 { 169 list_del_rcu(&e->list); 170 __list_set_del(set, e); 171 } 172 173 static inline void 174 list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) 175 { 176 list_replace_rcu(&old->list, &e->list); 177 __list_set_del(set, old); 178 } 179 180 static void 181 set_cleanup_entries(struct ip_set *set) 182 { 183 struct list_set *map = set->data; 184 struct set_elem *e, *n; 185 186 list_for_each_entry_safe(e, n, &map->members, list) 187 if (ip_set_timeout_expired(ext_timeout(e, set))) 188 list_set_del(set, e); 189 } 190 191 static int 192 list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, 193 struct ip_set_ext *mext, u32 flags) 194 { 195 struct list_set *map = set->data; 196 struct set_adt_elem *d = value; 197 struct set_elem *e, *next, *prev = NULL; 198 int ret; 199 200 list_for_each_entry(e, &map->members, list) { 201 if (SET_WITH_TIMEOUT(set) && 202 ip_set_timeout_expired(ext_timeout(e, set))) 203 continue; 204 else if (e->id != d->id) { 205 prev = e; 206 continue; 207 } 208 209 if (d->before == 0) { 210 ret = 1; 211 } else if (d->before > 0) { 212 next = list_next_entry(e, list); 213 ret = !list_is_last(&e->list, &map->members) && 214 next->id == d->refid; 215 } else { 216 ret = prev && prev->id == d->refid; 217 } 218 return ret; 219 } 220 return 0; 221 } 222 223 static void 224 list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext, 225 struct set_elem *e) 226 { 227 if (SET_WITH_COUNTER(set)) 228 ip_set_init_counter(ext_counter(e, set), ext); 229 if (SET_WITH_COMMENT(set)) 230 ip_set_init_comment(ext_comment(e, set), ext); 231 if (SET_WITH_SKBINFO(set)) 232 ip_set_init_skbinfo(ext_skbinfo(e, set), ext); 233 /* Update timeout last */ 234 if (SET_WITH_TIMEOUT(set)) 235 ip_set_timeout_set(ext_timeout(e, set), ext->timeout); 236 } 237 238 static int 239 list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, 240 struct ip_set_ext *mext, u32 flags) 241 { 242 struct list_set *map = set->data; 243 struct set_adt_elem *d = value; 244 struct set_elem *e, *n, *prev, *next; 245 bool flag_exist = flags & IPSET_FLAG_EXIST; 246 247 if (SET_WITH_TIMEOUT(set)) 248 set_cleanup_entries(set); 249 250 /* Find where to add the new entry */ 251 n = prev = next = NULL; 252 list_for_each_entry(e, &map->members, list) { 253 if (SET_WITH_TIMEOUT(set) && 254 ip_set_timeout_expired(ext_timeout(e, set))) 255 continue; 256 else if (d->id == e->id) 257 n = e; 258 else if (d->before == 0 || e->id != d->refid) 259 continue; 260 else if (d->before > 0) 261 next = e; 262 else 263 prev = e; 264 } 265 /* Re-add already existing element */ 266 if (n) { 267 if ((d->before > 0 && !next) || 268 (d->before < 0 && !prev)) 269 return -IPSET_ERR_REF_EXIST; 270 if (!flag_exist) 271 return -IPSET_ERR_EXIST; 272 /* Update extensions */ 273 ip_set_ext_destroy(set, n); 274 list_set_init_extensions(set, ext, n); 275 276 /* Set is already added to the list */ 277 ip_set_put_byindex(map->net, d->id); 278 return 0; 279 } 280 /* Add new entry */ 281 if (d->before == 0) { 282 /* Append */ 283 n = list_empty(&map->members) ? NULL : 284 list_last_entry(&map->members, struct set_elem, list); 285 } else if (d->before > 0) { 286 /* Insert after next element */ 287 if (!list_is_last(&next->list, &map->members)) 288 n = list_next_entry(next, list); 289 } else { 290 /* Insert before prev element */ 291 if (prev->list.prev != &map->members) 292 n = list_prev_entry(prev, list); 293 } 294 /* Can we replace a timed out entry? */ 295 if (n && 296 !(SET_WITH_TIMEOUT(set) && 297 ip_set_timeout_expired(ext_timeout(n, set)))) 298 n = NULL; 299 300 e = kzalloc(set->dsize, GFP_ATOMIC); 301 if (!e) 302 return -ENOMEM; 303 e->id = d->id; 304 INIT_LIST_HEAD(&e->list); 305 list_set_init_extensions(set, ext, e); 306 if (n) 307 list_set_replace(set, e, n); 308 else if (next) 309 list_add_tail_rcu(&e->list, &next->list); 310 else if (prev) 311 list_add_rcu(&e->list, &prev->list); 312 else 313 list_add_tail_rcu(&e->list, &map->members); 314 315 return 0; 316 } 317 318 static int 319 list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, 320 struct ip_set_ext *mext, u32 flags) 321 { 322 struct list_set *map = set->data; 323 struct set_adt_elem *d = value; 324 struct set_elem *e, *next, *prev = NULL; 325 326 list_for_each_entry(e, &map->members, list) { 327 if (SET_WITH_TIMEOUT(set) && 328 ip_set_timeout_expired(ext_timeout(e, set))) 329 continue; 330 else if (e->id != d->id) { 331 prev = e; 332 continue; 333 } 334 335 if (d->before > 0) { 336 next = list_next_entry(e, list); 337 if (list_is_last(&e->list, &map->members) || 338 next->id != d->refid) 339 return -IPSET_ERR_REF_EXIST; 340 } else if (d->before < 0) { 341 if (!prev || prev->id != d->refid) 342 return -IPSET_ERR_REF_EXIST; 343 } 344 list_set_del(set, e); 345 return 0; 346 } 347 return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; 348 } 349 350 static int 351 list_set_uadt(struct ip_set *set, struct nlattr *tb[], 352 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 353 { 354 struct list_set *map = set->data; 355 ipset_adtfn adtfn = set->variant->adt[adt]; 356 struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; 357 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 358 struct ip_set *s; 359 int ret = 0; 360 361 if (tb[IPSET_ATTR_LINENO]) 362 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 363 364 if (unlikely(!tb[IPSET_ATTR_NAME] || 365 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 366 return -IPSET_ERR_PROTOCOL; 367 368 ret = ip_set_get_extensions(set, tb, &ext); 369 if (ret) 370 return ret; 371 e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); 372 if (e.id == IPSET_INVALID_ID) 373 return -IPSET_ERR_NAME; 374 /* "Loop detection" */ 375 if (s->type->features & IPSET_TYPE_NAME) { 376 ret = -IPSET_ERR_LOOP; 377 goto finish; 378 } 379 380 if (tb[IPSET_ATTR_CADT_FLAGS]) { 381 u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 382 383 e.before = f & IPSET_FLAG_BEFORE; 384 } 385 386 if (e.before && !tb[IPSET_ATTR_NAMEREF]) { 387 ret = -IPSET_ERR_BEFORE; 388 goto finish; 389 } 390 391 if (tb[IPSET_ATTR_NAMEREF]) { 392 e.refid = ip_set_get_byname(map->net, 393 nla_data(tb[IPSET_ATTR_NAMEREF]), 394 &s); 395 if (e.refid == IPSET_INVALID_ID) { 396 ret = -IPSET_ERR_NAMEREF; 397 goto finish; 398 } 399 if (!e.before) 400 e.before = -1; 401 } 402 if (adt != IPSET_TEST && SET_WITH_TIMEOUT(set)) 403 set_cleanup_entries(set); 404 405 ret = adtfn(set, &e, &ext, &ext, flags); 406 407 finish: 408 if (e.refid != IPSET_INVALID_ID) 409 ip_set_put_byindex(map->net, e.refid); 410 if (adt != IPSET_ADD || ret) 411 ip_set_put_byindex(map->net, e.id); 412 413 return ip_set_eexist(ret, flags) ? 0 : ret; 414 } 415 416 static void 417 list_set_flush(struct ip_set *set) 418 { 419 struct list_set *map = set->data; 420 struct set_elem *e, *n; 421 422 list_for_each_entry_safe(e, n, &map->members, list) 423 list_set_del(set, e); 424 } 425 426 static void 427 list_set_destroy(struct ip_set *set) 428 { 429 struct list_set *map = set->data; 430 struct set_elem *e, *n; 431 432 if (SET_WITH_TIMEOUT(set)) 433 del_timer_sync(&map->gc); 434 list_for_each_entry_safe(e, n, &map->members, list) { 435 list_del(&e->list); 436 ip_set_put_byindex(map->net, e->id); 437 ip_set_ext_destroy(set, e); 438 kfree(e); 439 } 440 kfree(map); 441 442 set->data = NULL; 443 } 444 445 static int 446 list_set_head(struct ip_set *set, struct sk_buff *skb) 447 { 448 const struct list_set *map = set->data; 449 struct nlattr *nested; 450 struct set_elem *e; 451 u32 n = 0; 452 453 list_for_each_entry(e, &map->members, list) 454 n++; 455 456 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 457 if (!nested) 458 goto nla_put_failure; 459 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || 460 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 461 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 462 htonl(sizeof(*map) + n * set->dsize))) 463 goto nla_put_failure; 464 if (unlikely(ip_set_put_flags(skb, set))) 465 goto nla_put_failure; 466 ipset_nest_end(skb, nested); 467 468 return 0; 469 nla_put_failure: 470 return -EMSGSIZE; 471 } 472 473 static int 474 list_set_list(const struct ip_set *set, 475 struct sk_buff *skb, struct netlink_callback *cb) 476 { 477 const struct list_set *map = set->data; 478 struct nlattr *atd, *nested; 479 u32 i = 0, first = cb->args[IPSET_CB_ARG0]; 480 struct set_elem *e; 481 int ret = 0; 482 483 atd = ipset_nest_start(skb, IPSET_ATTR_ADT); 484 if (!atd) 485 return -EMSGSIZE; 486 list_for_each_entry(e, &map->members, list) { 487 if (i == first) 488 break; 489 i++; 490 } 491 492 rcu_read_lock(); 493 list_for_each_entry_from(e, &map->members, list) { 494 i++; 495 if (SET_WITH_TIMEOUT(set) && 496 ip_set_timeout_expired(ext_timeout(e, set))) 497 continue; 498 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 499 if (!nested) { 500 if (i == first) { 501 nla_nest_cancel(skb, atd); 502 ret = -EMSGSIZE; 503 goto out; 504 } 505 goto nla_put_failure; 506 } 507 if (nla_put_string(skb, IPSET_ATTR_NAME, 508 ip_set_name_byindex(map->net, e->id))) 509 goto nla_put_failure; 510 if (ip_set_put_extensions(skb, set, e, true)) 511 goto nla_put_failure; 512 ipset_nest_end(skb, nested); 513 } 514 515 ipset_nest_end(skb, atd); 516 /* Set listing finished */ 517 cb->args[IPSET_CB_ARG0] = 0; 518 goto out; 519 520 nla_put_failure: 521 nla_nest_cancel(skb, nested); 522 if (unlikely(i == first)) { 523 cb->args[IPSET_CB_ARG0] = 0; 524 ret = -EMSGSIZE; 525 } 526 cb->args[IPSET_CB_ARG0] = i - 1; 527 ipset_nest_end(skb, atd); 528 out: 529 rcu_read_unlock(); 530 return ret; 531 } 532 533 static bool 534 list_set_same_set(const struct ip_set *a, const struct ip_set *b) 535 { 536 const struct list_set *x = a->data; 537 const struct list_set *y = b->data; 538 539 return x->size == y->size && 540 a->timeout == b->timeout && 541 a->extensions == b->extensions; 542 } 543 544 static const struct ip_set_type_variant set_variant = { 545 .kadt = list_set_kadt, 546 .uadt = list_set_uadt, 547 .adt = { 548 [IPSET_ADD] = list_set_uadd, 549 [IPSET_DEL] = list_set_udel, 550 [IPSET_TEST] = list_set_utest, 551 }, 552 .destroy = list_set_destroy, 553 .flush = list_set_flush, 554 .head = list_set_head, 555 .list = list_set_list, 556 .same_set = list_set_same_set, 557 }; 558 559 static void 560 list_set_gc(unsigned long ul_set) 561 { 562 struct ip_set *set = (struct ip_set *)ul_set; 563 struct list_set *map = set->data; 564 565 spin_lock_bh(&set->lock); 566 set_cleanup_entries(set); 567 spin_unlock_bh(&set->lock); 568 569 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 570 add_timer(&map->gc); 571 } 572 573 static void 574 list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 575 { 576 struct list_set *map = set->data; 577 578 init_timer(&map->gc); 579 map->gc.data = (unsigned long)set; 580 map->gc.function = gc; 581 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 582 add_timer(&map->gc); 583 } 584 585 /* Create list:set type of sets */ 586 587 static bool 588 init_list_set(struct net *net, struct ip_set *set, u32 size) 589 { 590 struct list_set *map; 591 592 map = kzalloc(sizeof(*map), GFP_KERNEL); 593 if (!map) 594 return false; 595 596 map->size = size; 597 map->net = net; 598 INIT_LIST_HEAD(&map->members); 599 set->data = map; 600 601 return true; 602 } 603 604 static int 605 list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], 606 u32 flags) 607 { 608 u32 size = IP_SET_LIST_DEFAULT_SIZE; 609 610 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || 611 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 612 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 613 return -IPSET_ERR_PROTOCOL; 614 615 if (tb[IPSET_ATTR_SIZE]) 616 size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]); 617 if (size < IP_SET_LIST_MIN_SIZE) 618 size = IP_SET_LIST_MIN_SIZE; 619 620 set->variant = &set_variant; 621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem), 622 __alignof__(struct set_elem)); 623 if (!init_list_set(net, set, size)) 624 return -ENOMEM; 625 if (tb[IPSET_ATTR_TIMEOUT]) { 626 set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 627 list_set_gc_init(set, list_set_gc); 628 } 629 return 0; 630 } 631 632 static struct ip_set_type list_set_type __read_mostly = { 633 .name = "list:set", 634 .protocol = IPSET_PROTOCOL, 635 .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, 636 .dimension = IPSET_DIM_ONE, 637 .family = NFPROTO_UNSPEC, 638 .revision_min = IPSET_TYPE_REV_MIN, 639 .revision_max = IPSET_TYPE_REV_MAX, 640 .create = list_set_create, 641 .create_policy = { 642 [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, 643 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 644 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 645 }, 646 .adt_policy = { 647 [IPSET_ATTR_NAME] = { .type = NLA_STRING, 648 .len = IPSET_MAXNAMELEN }, 649 [IPSET_ATTR_NAMEREF] = { .type = NLA_STRING, 650 .len = IPSET_MAXNAMELEN }, 651 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 652 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 653 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 654 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 655 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 656 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, 657 .len = IPSET_MAX_COMMENT_SIZE }, 658 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 659 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 660 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 661 }, 662 .me = THIS_MODULE, 663 }; 664 665 static int __init 666 list_set_init(void) 667 { 668 return ip_set_type_register(&list_set_type); 669 } 670 671 static void __exit 672 list_set_fini(void) 673 { 674 rcu_barrier(); 675 ip_set_type_unregister(&list_set_type); 676 } 677 678 module_init(list_set_init); 679 module_exit(list_set_fini); 680
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.