1 /* 2 * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * The filters are packed to hash tables of key nodes 12 * with a set of 32bit key/mask pairs at every node. 13 * Nodes reference next level hash tables etc. 14 * 15 * This scheme is the best universal classifier I managed to 16 * invent; it is not super-fast, but it is not slow (provided you 17 * program it correctly), and general enough. And its relative 18 * speed grows as the number of rules becomes larger. 19 * 20 * It seems that it represents the best middle point between 21 * speed and manageability both by human and by machine. 22 * 23 * It is especially useful for link sharing combined with QoS; 24 * pure RSVP doesn't need such a general approach and can use 25 * much simpler (and faster) schemes, sort of cls_rsvp.c. 26 */ 27 28 #include <asm/uaccess.h> 29 #include <asm/system.h> 30 #include <asm/bitops.h> 31 #include <linux/config.h> 32 #include <linux/module.h> 33 #include <linux/types.h> 34 #include <linux/kernel.h> 35 #include <linux/sched.h> 36 #include <linux/string.h> 37 #include <linux/mm.h> 38 #include <linux/socket.h> 39 #include <linux/sockios.h> 40 #include <linux/in.h> 41 #include <linux/errno.h> 42 #include <linux/interrupt.h> 43 #include <linux/if_ether.h> 44 #include <linux/inet.h> 45 #include <linux/netdevice.h> 46 #include <linux/etherdevice.h> 47 #include <linux/notifier.h> 48 #include <linux/rtnetlink.h> 49 #include <net/ip.h> 50 #include <net/route.h> 51 #include <linux/skbuff.h> 52 #include <net/sock.h> 53 #include <net/pkt_sched.h> 54 55 56 struct tc_u_knode 57 { 58 struct tc_u_knode *next; 59 u32 handle; 60 struct tc_u_hnode *ht_up; 61 #ifdef CONFIG_NET_CLS_POLICE 62 struct tcf_police *police; 63 #endif 64 struct tcf_result res; 65 struct tc_u_hnode *ht_down; 66 struct tc_u32_sel sel; 67 }; 68 69 struct tc_u_hnode 70 { 71 struct tc_u_hnode *next; 72 u32 handle; 73 struct tc_u_common *tp_c; 74 int refcnt; 75 unsigned divisor; 76 u32 hgenerator; 77 struct tc_u_knode *ht[1]; 78 }; 79 80 struct tc_u_common 81 { 82 struct tc_u_common *next; 83 struct tc_u_hnode *hlist; 84 struct Qdisc *q; 85 int refcnt; 86 u32 hgenerator; 87 }; 88 89 static struct tc_u_common *u32_list; 90 91 static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel) 92 { 93 unsigned h = key & sel->hmask; 94 95 h ^= h>>16; 96 h ^= h>>8; 97 return h; 98 } 99 100 static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) 101 { 102 struct { 103 struct tc_u_knode *knode; 104 u8 *ptr; 105 } stack[TC_U32_MAXDEPTH]; 106 107 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; 108 u8 *ptr = skb->nh.raw; 109 struct tc_u_knode *n; 110 int sdepth = 0; 111 int off2 = 0; 112 int sel = 0; 113 int i; 114 115 next_ht: 116 n = ht->ht[sel]; 117 118 next_knode: 119 if (n) { 120 struct tc_u32_key *key = n->sel.keys; 121 122 for (i = n->sel.nkeys; i>0; i--, key++) { 123 if ((*(u32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { 124 n = n->next; 125 goto next_knode; 126 } 127 } 128 if (n->ht_down == NULL) { 129 check_terminal: 130 if (n->sel.flags&TC_U32_TERMINAL) { 131 *res = n->res; 132 #ifdef CONFIG_NET_CLS_POLICE 133 if (n->police) { 134 int pol_res = tcf_police(skb, n->police); 135 if (pol_res >= 0) 136 return pol_res; 137 } else 138 #endif 139 return 0; 140 } 141 n = n->next; 142 goto next_knode; 143 } 144 145 /* PUSH */ 146 if (sdepth >= TC_U32_MAXDEPTH) 147 goto deadloop; 148 stack[sdepth].knode = n; 149 stack[sdepth].ptr = ptr; 150 sdepth++; 151 152 ht = n->ht_down; 153 sel = 0; 154 if (ht->divisor) 155 sel = ht->divisor&u32_hash_fold(*(u32*)(ptr+n->sel.hoff), &n->sel); 156 157 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) 158 goto next_ht; 159 160 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { 161 off2 = n->sel.off + 3; 162 if (n->sel.flags&TC_U32_VAROFFSET) 163 off2 += ntohs(n->sel.offmask & *(u16*)(ptr+n->sel.offoff)) >>n->sel.offshift; 164 off2 &= ~3; 165 } 166 if (n->sel.flags&TC_U32_EAT) { 167 ptr += off2; 168 off2 = 0; 169 } 170 171 if (ptr < skb->tail) 172 goto next_ht; 173 } 174 175 /* POP */ 176 if (sdepth--) { 177 n = stack[sdepth].knode; 178 ht = n->ht_up; 179 ptr = stack[sdepth].ptr; 180 goto check_terminal; 181 } 182 return -1; 183 184 deadloop: 185 if (net_ratelimit()) 186 printk("cls_u32: dead loop\n"); 187 return -1; 188 } 189 190 static __inline__ struct tc_u_hnode * 191 u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) 192 { 193 struct tc_u_hnode *ht; 194 195 for (ht = tp_c->hlist; ht; ht = ht->next) 196 if (ht->handle == handle) 197 break; 198 199 return ht; 200 } 201 202 static __inline__ struct tc_u_knode * 203 u32_lookup_key(struct tc_u_hnode *ht, u32 handle) 204 { 205 unsigned sel; 206 struct tc_u_knode *n = NULL; 207 208 sel = TC_U32_HASH(handle); 209 if (sel > ht->divisor) 210 goto out; 211 212 for (n = ht->ht[sel]; n; n = n->next) 213 if (n->handle == handle) 214 break; 215 out: 216 return n; 217 } 218 219 220 static unsigned long u32_get(struct tcf_proto *tp, u32 handle) 221 { 222 struct tc_u_hnode *ht; 223 struct tc_u_common *tp_c = tp->data; 224 225 if (TC_U32_HTID(handle) == TC_U32_ROOT) 226 ht = tp->root; 227 else 228 ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); 229 230 if (!ht) 231 return 0; 232 233 if (TC_U32_KEY(handle) == 0) 234 return (unsigned long)ht; 235 236 return (unsigned long)u32_lookup_key(ht, handle); 237 } 238 239 static void u32_put(struct tcf_proto *tp, unsigned long f) 240 { 241 } 242 243 static u32 gen_new_htid(struct tc_u_common *tp_c) 244 { 245 int i = 0x800; 246 247 do { 248 if (++tp_c->hgenerator == 0x7FF) 249 tp_c->hgenerator = 1; 250 } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); 251 252 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; 253 } 254 255 static int u32_init(struct tcf_proto *tp) 256 { 257 struct tc_u_hnode *root_ht; 258 struct tc_u_common *tp_c; 259 260 for (tp_c = u32_list; tp_c; tp_c = tp_c->next) 261 if (tp_c->q == tp->q) 262 break; 263 264 root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL); 265 if (root_ht == NULL) 266 return -ENOBUFS; 267 268 memset(root_ht, 0, sizeof(*root_ht)); 269 root_ht->divisor = 0; 270 root_ht->refcnt++; 271 root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; 272 273 if (tp_c == NULL) { 274 tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL); 275 if (tp_c == NULL) { 276 kfree(root_ht); 277 return -ENOBUFS; 278 } 279 memset(tp_c, 0, sizeof(*tp_c)); 280 tp_c->q = tp->q; 281 tp_c->next = u32_list; 282 u32_list = tp_c; 283 } 284 285 tp_c->refcnt++; 286 root_ht->next = tp_c->hlist; 287 tp_c->hlist = root_ht; 288 root_ht->tp_c = tp_c; 289 290 tp->root = root_ht; 291 tp->data = tp_c; 292 return 0; 293 } 294 295 static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n) 296 { 297 unsigned long cl; 298 299 if ((cl = __cls_set_class(&n->res.class, 0)) != 0) 300 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl); 301 #ifdef CONFIG_NET_CLS_POLICE 302 tcf_police_release(n->police); 303 #endif 304 if (n->ht_down) 305 n->ht_down->refcnt--; 306 kfree(n); 307 return 0; 308 } 309 310 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) 311 { 312 struct tc_u_knode **kp; 313 struct tc_u_hnode *ht = key->ht_up; 314 315 if (ht) { 316 for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) { 317 if (*kp == key) { 318 tcf_tree_lock(tp); 319 *kp = key->next; 320 tcf_tree_unlock(tp); 321 322 u32_destroy_key(tp, key); 323 return 0; 324 } 325 } 326 } 327 BUG_TRAP(0); 328 return 0; 329 } 330 331 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 332 { 333 struct tc_u_knode *n; 334 unsigned h; 335 336 for (h=0; h<=ht->divisor; h++) { 337 while ((n = ht->ht[h]) != NULL) { 338 ht->ht[h] = n->next; 339 340 u32_destroy_key(tp, n); 341 } 342 } 343 } 344 345 static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 346 { 347 struct tc_u_common *tp_c = tp->data; 348 struct tc_u_hnode **hn; 349 350 BUG_TRAP(!ht->refcnt); 351 352 u32_clear_hnode(tp, ht); 353 354 for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) { 355 if (*hn == ht) { 356 *hn = ht->next; 357 kfree(ht); 358 return 0; 359 } 360 } 361 362 BUG_TRAP(0); 363 return -ENOENT; 364 } 365 366 static void u32_destroy(struct tcf_proto *tp) 367 { 368 struct tc_u_common *tp_c = tp->data; 369 struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); 370 371 BUG_TRAP(root_ht != NULL); 372 373 if (root_ht && --root_ht->refcnt == 0) 374 u32_destroy_hnode(tp, root_ht); 375 376 if (--tp_c->refcnt == 0) { 377 struct tc_u_hnode *ht; 378 struct tc_u_common **tp_cp; 379 380 for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) { 381 if (*tp_cp == tp_c) { 382 *tp_cp = tp_c->next; 383 break; 384 } 385 } 386 387 for (ht=tp_c->hlist; ht; ht = ht->next) 388 u32_clear_hnode(tp, ht); 389 390 while ((ht = tp_c->hlist) != NULL) { 391 tp_c->hlist = ht->next; 392 393 BUG_TRAP(ht->refcnt == 0); 394 395 kfree(ht); 396 }; 397 398 kfree(tp_c); 399 } 400 401 tp->data = NULL; 402 } 403 404 static int u32_delete(struct tcf_proto *tp, unsigned long arg) 405 { 406 struct tc_u_hnode *ht = (struct tc_u_hnode*)arg; 407 408 if (ht == NULL) 409 return 0; 410 411 if (TC_U32_KEY(ht->handle)) 412 return u32_delete_key(tp, (struct tc_u_knode*)ht); 413 414 if (tp->root == ht) 415 return -EINVAL; 416 417 if (--ht->refcnt == 0) 418 u32_destroy_hnode(tp, ht); 419 420 return 0; 421 } 422 423 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) 424 { 425 struct tc_u_knode *n; 426 unsigned i = 0x7FF; 427 428 for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next) 429 if (i < TC_U32_NODE(n->handle)) 430 i = TC_U32_NODE(n->handle); 431 i++; 432 433 return handle|(i>0xFFF ? 0xFFF : i); 434 } 435 436 static int u32_set_parms(struct Qdisc *q, unsigned long base, 437 struct tc_u_hnode *ht, 438 struct tc_u_knode *n, struct rtattr **tb, 439 struct rtattr *est) 440 { 441 if (tb[TCA_U32_LINK-1]) { 442 u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]); 443 struct tc_u_hnode *ht_down = NULL; 444 445 if (TC_U32_KEY(handle)) 446 return -EINVAL; 447 448 if (handle) { 449 ht_down = u32_lookup_ht(ht->tp_c, handle); 450 451 if (ht_down == NULL) 452 return -EINVAL; 453 ht_down->refcnt++; 454 } 455 456 sch_tree_lock(q); 457 ht_down = xchg(&n->ht_down, ht_down); 458 sch_tree_unlock(q); 459 460 if (ht_down) 461 ht_down->refcnt--; 462 } 463 if (tb[TCA_U32_CLASSID-1]) { 464 unsigned long cl; 465 466 n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]); 467 sch_tree_lock(q); 468 cl = __cls_set_class(&n->res.class, q->ops->cl_ops->bind_tcf(q, base, n->res.classid)); 469 sch_tree_unlock(q); 470 if (cl) 471 q->ops->cl_ops->unbind_tcf(q, cl); 472 } 473 #ifdef CONFIG_NET_CLS_POLICE 474 if (tb[TCA_U32_POLICE-1]) { 475 struct tcf_police *police = tcf_police_locate(tb[TCA_U32_POLICE-1], est); 476 477 sch_tree_lock(q); 478 police = xchg(&n->police, police); 479 sch_tree_unlock(q); 480 481 tcf_police_release(police); 482 } 483 #endif 484 return 0; 485 } 486 487 static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, 488 struct rtattr **tca, 489 unsigned long *arg) 490 { 491 struct tc_u_common *tp_c = tp->data; 492 struct tc_u_hnode *ht; 493 struct tc_u_knode *n; 494 struct tc_u32_sel *s; 495 struct rtattr *opt = tca[TCA_OPTIONS-1]; 496 struct rtattr *tb[TCA_U32_MAX]; 497 u32 htid; 498 int err; 499 500 if (opt == NULL) 501 return handle ? -EINVAL : 0; 502 503 if (rtattr_parse(tb, TCA_U32_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0) 504 return -EINVAL; 505 506 if ((n = (struct tc_u_knode*)*arg) != NULL) { 507 if (TC_U32_KEY(n->handle) == 0) 508 return -EINVAL; 509 510 return u32_set_parms(tp->q, base, n->ht_up, n, tb, tca[TCA_RATE-1]); 511 } 512 513 if (tb[TCA_U32_DIVISOR-1]) { 514 unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]); 515 516 if (--divisor > 0x100) 517 return -EINVAL; 518 if (TC_U32_KEY(handle)) 519 return -EINVAL; 520 if (handle == 0) { 521 handle = gen_new_htid(tp->data); 522 if (handle == 0) 523 return -ENOMEM; 524 } 525 ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); 526 if (ht == NULL) 527 return -ENOBUFS; 528 memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*)); 529 ht->tp_c = tp_c; 530 ht->refcnt = 0; 531 ht->divisor = divisor; 532 ht->handle = handle; 533 ht->next = tp_c->hlist; 534 tp_c->hlist = ht; 535 *arg = (unsigned long)ht; 536 return 0; 537 } 538 539 if (tb[TCA_U32_HASH-1]) { 540 htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]); 541 if (TC_U32_HTID(htid) == TC_U32_ROOT) { 542 ht = tp->root; 543 htid = ht->handle; 544 } else { 545 ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); 546 if (ht == NULL) 547 return -EINVAL; 548 } 549 } else { 550 ht = tp->root; 551 htid = ht->handle; 552 } 553 554 if (ht->divisor < TC_U32_HASH(htid)) 555 return -EINVAL; 556 557 if (handle) { 558 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) 559 return -EINVAL; 560 handle = htid | TC_U32_NODE(handle); 561 } else 562 handle = gen_new_kid(ht, htid); 563 564 if (tb[TCA_U32_SEL-1] == 0 || 565 RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel)) 566 return -EINVAL; 567 568 s = RTA_DATA(tb[TCA_U32_SEL-1]); 569 n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); 570 if (n == NULL) 571 return -ENOBUFS; 572 memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key)); 573 memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); 574 n->ht_up = ht; 575 n->handle = handle; 576 err = u32_set_parms(tp->q, base, ht, n, tb, tca[TCA_RATE-1]); 577 if (err == 0) { 578 struct tc_u_knode **ins; 579 for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) 580 if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle)) 581 break; 582 583 n->next = *ins; 584 wmb(); 585 *ins = n; 586 587 *arg = (unsigned long)n; 588 return 0; 589 } 590 kfree(n); 591 return err; 592 } 593 594 static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) 595 { 596 struct tc_u_common *tp_c = tp->data; 597 struct tc_u_hnode *ht; 598 struct tc_u_knode *n; 599 unsigned h; 600 601 if (arg->stop) 602 return; 603 604 for (ht = tp_c->hlist; ht; ht = ht->next) { 605 if (arg->count >= arg->skip) { 606 if (arg->fn(tp, (unsigned long)ht, arg) < 0) { 607 arg->stop = 1; 608 return; 609 } 610 } 611 arg->count++; 612 for (h = 0; h <= ht->divisor; h++) { 613 for (n = ht->ht[h]; n; n = n->next) { 614 if (arg->count < arg->skip) { 615 arg->count++; 616 continue; 617 } 618 if (arg->fn(tp, (unsigned long)n, arg) < 0) { 619 arg->stop = 1; 620 return; 621 } 622 arg->count++; 623 } 624 } 625 } 626 } 627 628 static int u32_dump(struct tcf_proto *tp, unsigned long fh, 629 struct sk_buff *skb, struct tcmsg *t) 630 { 631 struct tc_u_knode *n = (struct tc_u_knode*)fh; 632 unsigned char *b = skb->tail; 633 struct rtattr *rta; 634 635 if (n == NULL) 636 return skb->len; 637 638 t->tcm_handle = n->handle; 639 640 rta = (struct rtattr*)b; 641 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 642 643 if (TC_U32_KEY(n->handle) == 0) { 644 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; 645 u32 divisor = ht->divisor+1; 646 RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor); 647 } else { 648 RTA_PUT(skb, TCA_U32_SEL, 649 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key), 650 &n->sel); 651 if (n->ht_up) { 652 u32 htid = n->handle & 0xFFFFF000; 653 RTA_PUT(skb, TCA_U32_HASH, 4, &htid); 654 } 655 if (n->res.classid) 656 RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid); 657 if (n->ht_down) 658 RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle); 659 #ifdef CONFIG_NET_CLS_POLICE 660 if (n->police) { 661 struct rtattr * p_rta = (struct rtattr*)skb->tail; 662 663 RTA_PUT(skb, TCA_U32_POLICE, 0, NULL); 664 665 if (tcf_police_dump(skb, n->police) < 0) 666 goto rtattr_failure; 667 668 p_rta->rta_len = skb->tail - (u8*)p_rta; 669 } 670 #endif 671 } 672 673 rta->rta_len = skb->tail - b; 674 #ifdef CONFIG_NET_CLS_POLICE 675 if (TC_U32_KEY(n->handle) && n->police) { 676 if (qdisc_copy_stats(skb, &n->police->stats)) 677 goto rtattr_failure; 678 } 679 #endif 680 return skb->len; 681 682 rtattr_failure: 683 skb_trim(skb, b - skb->data); 684 return -1; 685 } 686 687 struct tcf_proto_ops cls_u32_ops = { 688 .next = NULL, 689 .kind = "u32", 690 .classify = u32_classify, 691 .init = u32_init, 692 .destroy = u32_destroy, 693 .get = u32_get, 694 .put = u32_put, 695 .change = u32_change, 696 .delete = u32_delete, 697 .walk = u32_walk, 698 .dump = u32_dump, 699 .owner = THIS_MODULE, 700 }; 701 702 #ifdef MODULE 703 int init_module(void) 704 { 705 return register_tcf_proto_ops(&cls_u32_ops); 706 } 707 708 void cleanup_module(void) 709 { 710 unregister_tcf_proto_ops(&cls_u32_ops); 711 } 712 #endif 713 MODULE_LICENSE("GPL"); 714
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.