1 /* 2 * Generic address resolution entity 3 * 4 * Authors: 5 * Pedro Roque <roque@di.fc.ul.pt> 6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Fixes: 14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 15 * Harald Welte Add neighbour cache statistics like rtstat 16 */ 17 18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 19 20 #include <linux/slab.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 #include <linux/module.h> 24 #include <linux/socket.h> 25 #include <linux/netdevice.h> 26 #include <linux/proc_fs.h> 27 #ifdef CONFIG_SYSCTL 28 #include <linux/sysctl.h> 29 #endif 30 #include <linux/times.h> 31 #include <net/net_namespace.h> 32 #include <net/neighbour.h> 33 #include <net/dst.h> 34 #include <net/sock.h> 35 #include <net/netevent.h> 36 #include <net/netlink.h> 37 #include <linux/rtnetlink.h> 38 #include <linux/random.h> 39 #include <linux/string.h> 40 #include <linux/log2.h> 41 #include <linux/inetdevice.h> 42 #include <net/addrconf.h> 43 44 #define DEBUG 45 #define NEIGH_DEBUG 1 46 #define neigh_dbg(level, fmt, ...) \ 47 do { \ 48 if (level <= NEIGH_DEBUG) \ 49 pr_debug(fmt, ##__VA_ARGS__); \ 50 } while (0) 51 52 #define PNEIGH_HASHMASK 0xF 53 54 static void neigh_timer_handler(unsigned long arg); 55 static void __neigh_notify(struct neighbour *n, int type, int flags); 56 static void neigh_update_notify(struct neighbour *neigh); 57 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); 58 59 static struct neigh_table *neigh_tables; 60 #ifdef CONFIG_PROC_FS 61 static const struct file_operations neigh_stat_seq_fops; 62 #endif 63 64 /* 65 Neighbour hash table buckets are protected with rwlock tbl->lock. 66 67 - All the scans/updates to hash buckets MUST be made under this lock. 68 - NOTHING clever should be made under this lock: no callbacks 69 to protocol backends, no attempts to send something to network. 70 It will result in deadlocks, if backend/driver wants to use neighbour 71 cache. 72 - If the entry requires some non-trivial actions, increase 73 its reference count and release table lock. 74 75 Neighbour entries are protected: 76 - with reference count. 77 - with rwlock neigh->lock 78 79 Reference count prevents destruction. 80 81 neigh->lock mainly serializes ll address data and its validity state. 82 However, the same lock is used to protect another entry fields: 83 - timer 84 - resolution queue 85 86 Again, nothing clever shall be made under neigh->lock, 87 the most complicated procedure, which we allow is dev->hard_header. 88 It is supposed, that dev->hard_header is simplistic and does 89 not make callbacks to neighbour tables. 90 91 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting 92 list of neighbour tables. This list is used only in process context, 93 */ 94 95 static DEFINE_RWLOCK(neigh_tbl_lock); 96 97 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 98 { 99 kfree_skb(skb); 100 return -ENETDOWN; 101 } 102 103 static void neigh_cleanup_and_release(struct neighbour *neigh) 104 { 105 if (neigh->parms->neigh_cleanup) 106 neigh->parms->neigh_cleanup(neigh); 107 108 __neigh_notify(neigh, RTM_DELNEIGH, 0); 109 neigh_release(neigh); 110 } 111 112 /* 113 * It is random distribution in the interval (1/2)*base...(3/2)*base. 114 * It corresponds to default IPv6 settings and is not overridable, 115 * because it is really reasonable choice. 116 */ 117 118 unsigned long neigh_rand_reach_time(unsigned long base) 119 { 120 return base ? (prandom_u32() % base) + (base >> 1) : 0; 121 } 122 EXPORT_SYMBOL(neigh_rand_reach_time); 123 124 125 static int neigh_forced_gc(struct neigh_table *tbl) 126 { 127 int shrunk = 0; 128 int i; 129 struct neigh_hash_table *nht; 130 131 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 132 133 write_lock_bh(&tbl->lock); 134 nht = rcu_dereference_protected(tbl->nht, 135 lockdep_is_held(&tbl->lock)); 136 for (i = 0; i < (1 << nht->hash_shift); i++) { 137 struct neighbour *n; 138 struct neighbour __rcu **np; 139 140 np = &nht->hash_buckets[i]; 141 while ((n = rcu_dereference_protected(*np, 142 lockdep_is_held(&tbl->lock))) != NULL) { 143 /* Neighbour record may be discarded if: 144 * - nobody refers to it. 145 * - it is not permanent 146 */ 147 write_lock(&n->lock); 148 if (atomic_read(&n->refcnt) == 1 && 149 !(n->nud_state & NUD_PERMANENT)) { 150 rcu_assign_pointer(*np, 151 rcu_dereference_protected(n->next, 152 lockdep_is_held(&tbl->lock))); 153 n->dead = 1; 154 shrunk = 1; 155 write_unlock(&n->lock); 156 neigh_cleanup_and_release(n); 157 continue; 158 } 159 write_unlock(&n->lock); 160 np = &n->next; 161 } 162 } 163 164 tbl->last_flush = jiffies; 165 166 write_unlock_bh(&tbl->lock); 167 168 return shrunk; 169 } 170 171 static void neigh_add_timer(struct neighbour *n, unsigned long when) 172 { 173 neigh_hold(n); 174 if (unlikely(mod_timer(&n->timer, when))) { 175 printk("NEIGH: BUG, double timer add, state is %x\n", 176 n->nud_state); 177 dump_stack(); 178 } 179 } 180 181 static int neigh_del_timer(struct neighbour *n) 182 { 183 if ((n->nud_state & NUD_IN_TIMER) && 184 del_timer(&n->timer)) { 185 neigh_release(n); 186 return 1; 187 } 188 return 0; 189 } 190 191 static void pneigh_queue_purge(struct sk_buff_head *list) 192 { 193 struct sk_buff *skb; 194 195 while ((skb = skb_dequeue(list)) != NULL) { 196 dev_put(skb->dev); 197 kfree_skb(skb); 198 } 199 } 200 201 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) 202 { 203 int i; 204 struct neigh_hash_table *nht; 205 206 nht = rcu_dereference_protected(tbl->nht, 207 lockdep_is_held(&tbl->lock)); 208 209 for (i = 0; i < (1 << nht->hash_shift); i++) { 210 struct neighbour *n; 211 struct neighbour __rcu **np = &nht->hash_buckets[i]; 212 213 while ((n = rcu_dereference_protected(*np, 214 lockdep_is_held(&tbl->lock))) != NULL) { 215 if (dev && n->dev != dev) { 216 np = &n->next; 217 continue; 218 } 219 rcu_assign_pointer(*np, 220 rcu_dereference_protected(n->next, 221 lockdep_is_held(&tbl->lock))); 222 write_lock(&n->lock); 223 neigh_del_timer(n); 224 n->dead = 1; 225 226 if (atomic_read(&n->refcnt) != 1) { 227 /* The most unpleasant situation. 228 We must destroy neighbour entry, 229 but someone still uses it. 230 231 The destroy will be delayed until 232 the last user releases us, but 233 we must kill timers etc. and move 234 it to safe state. 235 */ 236 __skb_queue_purge(&n->arp_queue); 237 n->arp_queue_len_bytes = 0; 238 n->output = neigh_blackhole; 239 if (n->nud_state & NUD_VALID) 240 n->nud_state = NUD_NOARP; 241 else 242 n->nud_state = NUD_NONE; 243 neigh_dbg(2, "neigh %p is stray\n", n); 244 } 245 write_unlock(&n->lock); 246 neigh_cleanup_and_release(n); 247 } 248 } 249 } 250 251 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 252 { 253 write_lock_bh(&tbl->lock); 254 neigh_flush_dev(tbl, dev); 255 write_unlock_bh(&tbl->lock); 256 } 257 EXPORT_SYMBOL(neigh_changeaddr); 258 259 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 260 { 261 write_lock_bh(&tbl->lock); 262 neigh_flush_dev(tbl, dev); 263 pneigh_ifdown(tbl, dev); 264 write_unlock_bh(&tbl->lock); 265 266 del_timer_sync(&tbl->proxy_timer); 267 pneigh_queue_purge(&tbl->proxy_queue); 268 return 0; 269 } 270 EXPORT_SYMBOL(neigh_ifdown); 271 272 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev) 273 { 274 struct neighbour *n = NULL; 275 unsigned long now = jiffies; 276 int entries; 277 278 entries = atomic_inc_return(&tbl->entries) - 1; 279 if (entries >= tbl->gc_thresh3 || 280 (entries >= tbl->gc_thresh2 && 281 time_after(now, tbl->last_flush + 5 * HZ))) { 282 if (!neigh_forced_gc(tbl) && 283 entries >= tbl->gc_thresh3) 284 goto out_entries; 285 } 286 287 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 288 if (!n) 289 goto out_entries; 290 291 __skb_queue_head_init(&n->arp_queue); 292 rwlock_init(&n->lock); 293 seqlock_init(&n->ha_lock); 294 n->updated = n->used = now; 295 n->nud_state = NUD_NONE; 296 n->output = neigh_blackhole; 297 seqlock_init(&n->hh.hh_lock); 298 n->parms = neigh_parms_clone(&tbl->parms); 299 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); 300 301 NEIGH_CACHE_STAT_INC(tbl, allocs); 302 n->tbl = tbl; 303 atomic_set(&n->refcnt, 1); 304 n->dead = 1; 305 out: 306 return n; 307 308 out_entries: 309 atomic_dec(&tbl->entries); 310 goto out; 311 } 312 313 static void neigh_get_hash_rnd(u32 *x) 314 { 315 get_random_bytes(x, sizeof(*x)); 316 *x |= 1; 317 } 318 319 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 320 { 321 size_t size = (1 << shift) * sizeof(struct neighbour *); 322 struct neigh_hash_table *ret; 323 struct neighbour __rcu **buckets; 324 int i; 325 326 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 327 if (!ret) 328 return NULL; 329 if (size <= PAGE_SIZE) 330 buckets = kzalloc(size, GFP_ATOMIC); 331 else 332 buckets = (struct neighbour __rcu **) 333 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 334 get_order(size)); 335 if (!buckets) { 336 kfree(ret); 337 return NULL; 338 } 339 ret->hash_buckets = buckets; 340 ret->hash_shift = shift; 341 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 342 neigh_get_hash_rnd(&ret->hash_rnd[i]); 343 return ret; 344 } 345 346 static void neigh_hash_free_rcu(struct rcu_head *head) 347 { 348 struct neigh_hash_table *nht = container_of(head, 349 struct neigh_hash_table, 350 rcu); 351 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 352 struct neighbour __rcu **buckets = nht->hash_buckets; 353 354 if (size <= PAGE_SIZE) 355 kfree(buckets); 356 else 357 free_pages((unsigned long)buckets, get_order(size)); 358 kfree(nht); 359 } 360 361 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 362 unsigned long new_shift) 363 { 364 unsigned int i, hash; 365 struct neigh_hash_table *new_nht, *old_nht; 366 367 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 368 369 old_nht = rcu_dereference_protected(tbl->nht, 370 lockdep_is_held(&tbl->lock)); 371 new_nht = neigh_hash_alloc(new_shift); 372 if (!new_nht) 373 return old_nht; 374 375 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 376 struct neighbour *n, *next; 377 378 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 379 lockdep_is_held(&tbl->lock)); 380 n != NULL; 381 n = next) { 382 hash = tbl->hash(n->primary_key, n->dev, 383 new_nht->hash_rnd); 384 385 hash >>= (32 - new_nht->hash_shift); 386 next = rcu_dereference_protected(n->next, 387 lockdep_is_held(&tbl->lock)); 388 389 rcu_assign_pointer(n->next, 390 rcu_dereference_protected( 391 new_nht->hash_buckets[hash], 392 lockdep_is_held(&tbl->lock))); 393 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 394 } 395 } 396 397 rcu_assign_pointer(tbl->nht, new_nht); 398 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 399 return new_nht; 400 } 401 402 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 403 struct net_device *dev) 404 { 405 struct neighbour *n; 406 int key_len = tbl->key_len; 407 u32 hash_val; 408 struct neigh_hash_table *nht; 409 410 NEIGH_CACHE_STAT_INC(tbl, lookups); 411 412 rcu_read_lock_bh(); 413 nht = rcu_dereference_bh(tbl->nht); 414 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 415 416 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 417 n != NULL; 418 n = rcu_dereference_bh(n->next)) { 419 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { 420 if (!atomic_inc_not_zero(&n->refcnt)) 421 n = NULL; 422 NEIGH_CACHE_STAT_INC(tbl, hits); 423 break; 424 } 425 } 426 427 rcu_read_unlock_bh(); 428 return n; 429 } 430 EXPORT_SYMBOL(neigh_lookup); 431 432 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, 433 const void *pkey) 434 { 435 struct neighbour *n; 436 int key_len = tbl->key_len; 437 u32 hash_val; 438 struct neigh_hash_table *nht; 439 440 NEIGH_CACHE_STAT_INC(tbl, lookups); 441 442 rcu_read_lock_bh(); 443 nht = rcu_dereference_bh(tbl->nht); 444 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); 445 446 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 447 n != NULL; 448 n = rcu_dereference_bh(n->next)) { 449 if (!memcmp(n->primary_key, pkey, key_len) && 450 net_eq(dev_net(n->dev), net)) { 451 if (!atomic_inc_not_zero(&n->refcnt)) 452 n = NULL; 453 NEIGH_CACHE_STAT_INC(tbl, hits); 454 break; 455 } 456 } 457 458 rcu_read_unlock_bh(); 459 return n; 460 } 461 EXPORT_SYMBOL(neigh_lookup_nodev); 462 463 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 464 struct net_device *dev, bool want_ref) 465 { 466 u32 hash_val; 467 int key_len = tbl->key_len; 468 int error; 469 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); 470 struct neigh_hash_table *nht; 471 472 if (!n) { 473 rc = ERR_PTR(-ENOBUFS); 474 goto out; 475 } 476 477 memcpy(n->primary_key, pkey, key_len); 478 n->dev = dev; 479 dev_hold(dev); 480 481 /* Protocol specific setup. */ 482 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 483 rc = ERR_PTR(error); 484 goto out_neigh_release; 485 } 486 487 if (dev->netdev_ops->ndo_neigh_construct) { 488 error = dev->netdev_ops->ndo_neigh_construct(n); 489 if (error < 0) { 490 rc = ERR_PTR(error); 491 goto out_neigh_release; 492 } 493 } 494 495 /* Device specific setup. */ 496 if (n->parms->neigh_setup && 497 (error = n->parms->neigh_setup(n)) < 0) { 498 rc = ERR_PTR(error); 499 goto out_neigh_release; 500 } 501 502 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 503 504 write_lock_bh(&tbl->lock); 505 nht = rcu_dereference_protected(tbl->nht, 506 lockdep_is_held(&tbl->lock)); 507 508 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 509 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 510 511 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 512 513 if (n->parms->dead) { 514 rc = ERR_PTR(-EINVAL); 515 goto out_tbl_unlock; 516 } 517 518 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 519 lockdep_is_held(&tbl->lock)); 520 n1 != NULL; 521 n1 = rcu_dereference_protected(n1->next, 522 lockdep_is_held(&tbl->lock))) { 523 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { 524 if (want_ref) 525 neigh_hold(n1); 526 rc = n1; 527 goto out_tbl_unlock; 528 } 529 } 530 531 n->dead = 0; 532 if (want_ref) 533 neigh_hold(n); 534 rcu_assign_pointer(n->next, 535 rcu_dereference_protected(nht->hash_buckets[hash_val], 536 lockdep_is_held(&tbl->lock))); 537 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 538 write_unlock_bh(&tbl->lock); 539 neigh_dbg(2, "neigh %p is created\n", n); 540 rc = n; 541 out: 542 return rc; 543 out_tbl_unlock: 544 write_unlock_bh(&tbl->lock); 545 out_neigh_release: 546 neigh_release(n); 547 goto out; 548 } 549 EXPORT_SYMBOL(__neigh_create); 550 551 static u32 pneigh_hash(const void *pkey, int key_len) 552 { 553 u32 hash_val = *(u32 *)(pkey + key_len - 4); 554 hash_val ^= (hash_val >> 16); 555 hash_val ^= hash_val >> 8; 556 hash_val ^= hash_val >> 4; 557 hash_val &= PNEIGH_HASHMASK; 558 return hash_val; 559 } 560 561 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 562 struct net *net, 563 const void *pkey, 564 int key_len, 565 struct net_device *dev) 566 { 567 while (n) { 568 if (!memcmp(n->key, pkey, key_len) && 569 net_eq(pneigh_net(n), net) && 570 (n->dev == dev || !n->dev)) 571 return n; 572 n = n->next; 573 } 574 return NULL; 575 } 576 577 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 578 struct net *net, const void *pkey, struct net_device *dev) 579 { 580 int key_len = tbl->key_len; 581 u32 hash_val = pneigh_hash(pkey, key_len); 582 583 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 584 net, pkey, key_len, dev); 585 } 586 EXPORT_SYMBOL_GPL(__pneigh_lookup); 587 588 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 589 struct net *net, const void *pkey, 590 struct net_device *dev, int creat) 591 { 592 struct pneigh_entry *n; 593 int key_len = tbl->key_len; 594 u32 hash_val = pneigh_hash(pkey, key_len); 595 596 read_lock_bh(&tbl->lock); 597 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 598 net, pkey, key_len, dev); 599 read_unlock_bh(&tbl->lock); 600 601 if (n || !creat) 602 goto out; 603 604 ASSERT_RTNL(); 605 606 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); 607 if (!n) 608 goto out; 609 610 write_pnet(&n->net, hold_net(net)); 611 memcpy(n->key, pkey, key_len); 612 n->dev = dev; 613 if (dev) 614 dev_hold(dev); 615 616 if (tbl->pconstructor && tbl->pconstructor(n)) { 617 if (dev) 618 dev_put(dev); 619 release_net(net); 620 kfree(n); 621 n = NULL; 622 goto out; 623 } 624 625 write_lock_bh(&tbl->lock); 626 n->next = tbl->phash_buckets[hash_val]; 627 tbl->phash_buckets[hash_val] = n; 628 write_unlock_bh(&tbl->lock); 629 out: 630 return n; 631 } 632 EXPORT_SYMBOL(pneigh_lookup); 633 634 635 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 636 struct net_device *dev) 637 { 638 struct pneigh_entry *n, **np; 639 int key_len = tbl->key_len; 640 u32 hash_val = pneigh_hash(pkey, key_len); 641 642 write_lock_bh(&tbl->lock); 643 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 644 np = &n->next) { 645 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 646 net_eq(pneigh_net(n), net)) { 647 *np = n->next; 648 write_unlock_bh(&tbl->lock); 649 if (tbl->pdestructor) 650 tbl->pdestructor(n); 651 if (n->dev) 652 dev_put(n->dev); 653 release_net(pneigh_net(n)); 654 kfree(n); 655 return 0; 656 } 657 } 658 write_unlock_bh(&tbl->lock); 659 return -ENOENT; 660 } 661 662 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 663 { 664 struct pneigh_entry *n, **np; 665 u32 h; 666 667 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 668 np = &tbl->phash_buckets[h]; 669 while ((n = *np) != NULL) { 670 if (!dev || n->dev == dev) { 671 *np = n->next; 672 if (tbl->pdestructor) 673 tbl->pdestructor(n); 674 if (n->dev) 675 dev_put(n->dev); 676 release_net(pneigh_net(n)); 677 kfree(n); 678 continue; 679 } 680 np = &n->next; 681 } 682 } 683 return -ENOENT; 684 } 685 686 static void neigh_parms_destroy(struct neigh_parms *parms); 687 688 static inline void neigh_parms_put(struct neigh_parms *parms) 689 { 690 if (atomic_dec_and_test(&parms->refcnt)) 691 neigh_parms_destroy(parms); 692 } 693 694 /* 695 * neighbour must already be out of the table; 696 * 697 */ 698 void neigh_destroy(struct neighbour *neigh) 699 { 700 struct net_device *dev = neigh->dev; 701 702 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 703 704 if (!neigh->dead) { 705 pr_warn("Destroying alive neighbour %p\n", neigh); 706 dump_stack(); 707 return; 708 } 709 710 if (neigh_del_timer(neigh)) 711 pr_warn("Impossible event\n"); 712 713 write_lock_bh(&neigh->lock); 714 __skb_queue_purge(&neigh->arp_queue); 715 write_unlock_bh(&neigh->lock); 716 neigh->arp_queue_len_bytes = 0; 717 718 if (dev->netdev_ops->ndo_neigh_destroy) 719 dev->netdev_ops->ndo_neigh_destroy(neigh); 720 721 dev_put(dev); 722 neigh_parms_put(neigh->parms); 723 724 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 725 726 atomic_dec(&neigh->tbl->entries); 727 kfree_rcu(neigh, rcu); 728 } 729 EXPORT_SYMBOL(neigh_destroy); 730 731 /* Neighbour state is suspicious; 732 disable fast path. 733 734 Called with write_locked neigh. 735 */ 736 static void neigh_suspect(struct neighbour *neigh) 737 { 738 neigh_dbg(2, "neigh %p is suspected\n", neigh); 739 740 neigh->output = neigh->ops->output; 741 } 742 743 /* Neighbour state is OK; 744 enable fast path. 745 746 Called with write_locked neigh. 747 */ 748 static void neigh_connect(struct neighbour *neigh) 749 { 750 neigh_dbg(2, "neigh %p is connected\n", neigh); 751 752 neigh->output = neigh->ops->connected_output; 753 } 754 755 static void neigh_periodic_work(struct work_struct *work) 756 { 757 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 758 struct neighbour *n; 759 struct neighbour __rcu **np; 760 unsigned int i; 761 struct neigh_hash_table *nht; 762 763 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 764 765 write_lock_bh(&tbl->lock); 766 nht = rcu_dereference_protected(tbl->nht, 767 lockdep_is_held(&tbl->lock)); 768 769 /* 770 * periodically recompute ReachableTime from random function 771 */ 772 773 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 774 struct neigh_parms *p; 775 tbl->last_rand = jiffies; 776 for (p = &tbl->parms; p; p = p->next) 777 p->reachable_time = 778 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 779 } 780 781 if (atomic_read(&tbl->entries) < tbl->gc_thresh1) 782 goto out; 783 784 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 785 np = &nht->hash_buckets[i]; 786 787 while ((n = rcu_dereference_protected(*np, 788 lockdep_is_held(&tbl->lock))) != NULL) { 789 unsigned int state; 790 791 write_lock(&n->lock); 792 793 state = n->nud_state; 794 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { 795 write_unlock(&n->lock); 796 goto next_elt; 797 } 798 799 if (time_before(n->used, n->confirmed)) 800 n->used = n->confirmed; 801 802 if (atomic_read(&n->refcnt) == 1 && 803 (state == NUD_FAILED || 804 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 805 *np = n->next; 806 n->dead = 1; 807 write_unlock(&n->lock); 808 neigh_cleanup_and_release(n); 809 continue; 810 } 811 write_unlock(&n->lock); 812 813 next_elt: 814 np = &n->next; 815 } 816 /* 817 * It's fine to release lock here, even if hash table 818 * grows while we are preempted. 819 */ 820 write_unlock_bh(&tbl->lock); 821 cond_resched(); 822 write_lock_bh(&tbl->lock); 823 nht = rcu_dereference_protected(tbl->nht, 824 lockdep_is_held(&tbl->lock)); 825 } 826 out: 827 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 828 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 829 * BASE_REACHABLE_TIME. 830 */ 831 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 832 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 833 write_unlock_bh(&tbl->lock); 834 } 835 836 static __inline__ int neigh_max_probes(struct neighbour *n) 837 { 838 struct neigh_parms *p = n->parms; 839 return (n->nud_state & NUD_PROBE) ? 840 NEIGH_VAR(p, UCAST_PROBES) : 841 NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 842 NEIGH_VAR(p, MCAST_PROBES); 843 } 844 845 static void neigh_invalidate(struct neighbour *neigh) 846 __releases(neigh->lock) 847 __acquires(neigh->lock) 848 { 849 struct sk_buff *skb; 850 851 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 852 neigh_dbg(2, "neigh %p is failed\n", neigh); 853 neigh->updated = jiffies; 854 855 /* It is very thin place. report_unreachable is very complicated 856 routine. Particularly, it can hit the same neighbour entry! 857 858 So that, we try to be accurate and avoid dead loop. --ANK 859 */ 860 while (neigh->nud_state == NUD_FAILED && 861 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 862 write_unlock(&neigh->lock); 863 neigh->ops->error_report(neigh, skb); 864 write_lock(&neigh->lock); 865 } 866 __skb_queue_purge(&neigh->arp_queue); 867 neigh->arp_queue_len_bytes = 0; 868 } 869 870 static void neigh_probe(struct neighbour *neigh) 871 __releases(neigh->lock) 872 { 873 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 874 /* keep skb alive even if arp_queue overflows */ 875 if (skb) 876 skb = skb_copy(skb, GFP_ATOMIC); 877 write_unlock(&neigh->lock); 878 neigh->ops->solicit(neigh, skb); 879 atomic_inc(&neigh->probes); 880 kfree_skb(skb); 881 } 882 883 /* Called when a timer expires for a neighbour entry. */ 884 885 static void neigh_timer_handler(unsigned long arg) 886 { 887 unsigned long now, next; 888 struct neighbour *neigh = (struct neighbour *)arg; 889 unsigned int state; 890 int notify = 0; 891 892 write_lock(&neigh->lock); 893 894 state = neigh->nud_state; 895 now = jiffies; 896 next = now + HZ; 897 898 if (!(state & NUD_IN_TIMER)) 899 goto out; 900 901 if (state & NUD_REACHABLE) { 902 if (time_before_eq(now, 903 neigh->confirmed + neigh->parms->reachable_time)) { 904 neigh_dbg(2, "neigh %p is still alive\n", neigh); 905 next = neigh->confirmed + neigh->parms->reachable_time; 906 } else if (time_before_eq(now, 907 neigh->used + 908 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 909 neigh_dbg(2, "neigh %p is delayed\n", neigh); 910 neigh->nud_state = NUD_DELAY; 911 neigh->updated = jiffies; 912 neigh_suspect(neigh); 913 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 914 } else { 915 neigh_dbg(2, "neigh %p is suspected\n", neigh); 916 neigh->nud_state = NUD_STALE; 917 neigh->updated = jiffies; 918 neigh_suspect(neigh); 919 notify = 1; 920 } 921 } else if (state & NUD_DELAY) { 922 if (time_before_eq(now, 923 neigh->confirmed + 924 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 925 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 926 neigh->nud_state = NUD_REACHABLE; 927 neigh->updated = jiffies; 928 neigh_connect(neigh); 929 notify = 1; 930 next = neigh->confirmed + neigh->parms->reachable_time; 931 } else { 932 neigh_dbg(2, "neigh %p is probed\n", neigh); 933 neigh->nud_state = NUD_PROBE; 934 neigh->updated = jiffies; 935 atomic_set(&neigh->probes, 0); 936 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 937 } 938 } else { 939 /* NUD_PROBE|NUD_INCOMPLETE */ 940 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 941 } 942 943 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 944 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 945 neigh->nud_state = NUD_FAILED; 946 notify = 1; 947 neigh_invalidate(neigh); 948 } 949 950 if (neigh->nud_state & NUD_IN_TIMER) { 951 if (time_before(next, jiffies + HZ/2)) 952 next = jiffies + HZ/2; 953 if (!mod_timer(&neigh->timer, next)) 954 neigh_hold(neigh); 955 } 956 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 957 neigh_probe(neigh); 958 } else { 959 out: 960 write_unlock(&neigh->lock); 961 } 962 963 if (notify) 964 neigh_update_notify(neigh); 965 966 neigh_release(neigh); 967 } 968 969 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 970 { 971 int rc; 972 bool immediate_probe = false; 973 974 write_lock_bh(&neigh->lock); 975 976 rc = 0; 977 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 978 goto out_unlock_bh; 979 if (neigh->dead) 980 goto out_dead; 981 982 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 983 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 984 NEIGH_VAR(neigh->parms, APP_PROBES)) { 985 unsigned long next, now = jiffies; 986 987 atomic_set(&neigh->probes, 988 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 989 neigh->nud_state = NUD_INCOMPLETE; 990 neigh->updated = now; 991 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 992 HZ/2); 993 neigh_add_timer(neigh, next); 994 immediate_probe = true; 995 } else { 996 neigh->nud_state = NUD_FAILED; 997 neigh->updated = jiffies; 998 write_unlock_bh(&neigh->lock); 999 1000 kfree_skb(skb); 1001 return 1; 1002 } 1003 } else if (neigh->nud_state & NUD_STALE) { 1004 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1005 neigh->nud_state = NUD_DELAY; 1006 neigh->updated = jiffies; 1007 neigh_add_timer(neigh, jiffies + 1008 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1009 } 1010 1011 if (neigh->nud_state == NUD_INCOMPLETE) { 1012 if (skb) { 1013 while (neigh->arp_queue_len_bytes + skb->truesize > 1014 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1015 struct sk_buff *buff; 1016 1017 buff = __skb_dequeue(&neigh->arp_queue); 1018 if (!buff) 1019 break; 1020 neigh->arp_queue_len_bytes -= buff->truesize; 1021 kfree_skb(buff); 1022 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1023 } 1024 skb_dst_force(skb); 1025 __skb_queue_tail(&neigh->arp_queue, skb); 1026 neigh->arp_queue_len_bytes += skb->truesize; 1027 } 1028 rc = 1; 1029 } 1030 out_unlock_bh: 1031 if (immediate_probe) 1032 neigh_probe(neigh); 1033 else 1034 write_unlock(&neigh->lock); 1035 local_bh_enable(); 1036 return rc; 1037 1038 out_dead: 1039 if (neigh->nud_state & NUD_STALE) 1040 goto out_unlock_bh; 1041 write_unlock_bh(&neigh->lock); 1042 kfree_skb(skb); 1043 return 1; 1044 } 1045 EXPORT_SYMBOL(__neigh_event_send); 1046 1047 static void neigh_update_hhs(struct neighbour *neigh) 1048 { 1049 struct hh_cache *hh; 1050 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1051 = NULL; 1052 1053 if (neigh->dev->header_ops) 1054 update = neigh->dev->header_ops->cache_update; 1055 1056 if (update) { 1057 hh = &neigh->hh; 1058 if (hh->hh_len) { 1059 write_seqlock_bh(&hh->hh_lock); 1060 update(hh, neigh->dev, neigh->ha); 1061 write_sequnlock_bh(&hh->hh_lock); 1062 } 1063 } 1064 } 1065 1066 1067 1068 /* Generic update routine. 1069 -- lladdr is new lladdr or NULL, if it is not supplied. 1070 -- new is new state. 1071 -- flags 1072 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1073 if it is different. 1074 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1075 lladdr instead of overriding it 1076 if it is different. 1077 It also allows to retain current state 1078 if lladdr is unchanged. 1079 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1080 1081 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1082 NTF_ROUTER flag. 1083 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1084 a router. 1085 1086 Caller MUST hold reference count on the entry. 1087 */ 1088 1089 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1090 u32 flags) 1091 { 1092 u8 old; 1093 int err; 1094 int notify = 0; 1095 struct net_device *dev; 1096 int update_isrouter = 0; 1097 1098 write_lock_bh(&neigh->lock); 1099 1100 dev = neigh->dev; 1101 old = neigh->nud_state; 1102 err = -EPERM; 1103 1104 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1105 (old & (NUD_NOARP | NUD_PERMANENT))) 1106 goto out; 1107 if (neigh->dead) 1108 goto out; 1109 1110 if (!(new & NUD_VALID)) { 1111 neigh_del_timer(neigh); 1112 if (old & NUD_CONNECTED) 1113 neigh_suspect(neigh); 1114 neigh->nud_state = new; 1115 err = 0; 1116 notify = old & NUD_VALID; 1117 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1118 (new & NUD_FAILED)) { 1119 neigh_invalidate(neigh); 1120 notify = 1; 1121 } 1122 goto out; 1123 } 1124 1125 /* Compare new lladdr with cached one */ 1126 if (!dev->addr_len) { 1127 /* First case: device needs no address. */ 1128 lladdr = neigh->ha; 1129 } else if (lladdr) { 1130 /* The second case: if something is already cached 1131 and a new address is proposed: 1132 - compare new & old 1133 - if they are different, check override flag 1134 */ 1135 if ((old & NUD_VALID) && 1136 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1137 lladdr = neigh->ha; 1138 } else { 1139 /* No address is supplied; if we know something, 1140 use it, otherwise discard the request. 1141 */ 1142 err = -EINVAL; 1143 if (!(old & NUD_VALID)) 1144 goto out; 1145 lladdr = neigh->ha; 1146 } 1147 1148 if (new & NUD_CONNECTED) 1149 neigh->confirmed = jiffies; 1150 neigh->updated = jiffies; 1151 1152 /* If entry was valid and address is not changed, 1153 do not change entry state, if new one is STALE. 1154 */ 1155 err = 0; 1156 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1157 if (old & NUD_VALID) { 1158 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1159 update_isrouter = 0; 1160 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1161 (old & NUD_CONNECTED)) { 1162 lladdr = neigh->ha; 1163 new = NUD_STALE; 1164 } else 1165 goto out; 1166 } else { 1167 if (lladdr == neigh->ha && new == NUD_STALE && 1168 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) || 1169 (old & NUD_CONNECTED)) 1170 ) 1171 new = old; 1172 } 1173 } 1174 1175 if (new != old) { 1176 neigh_del_timer(neigh); 1177 if (new & NUD_IN_TIMER) 1178 neigh_add_timer(neigh, (jiffies + 1179 ((new & NUD_REACHABLE) ? 1180 neigh->parms->reachable_time : 1181 0))); 1182 neigh->nud_state = new; 1183 notify = 1; 1184 } 1185 1186 if (lladdr != neigh->ha) { 1187 write_seqlock(&neigh->ha_lock); 1188 memcpy(&neigh->ha, lladdr, dev->addr_len); 1189 write_sequnlock(&neigh->ha_lock); 1190 neigh_update_hhs(neigh); 1191 if (!(new & NUD_CONNECTED)) 1192 neigh->confirmed = jiffies - 1193 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1194 notify = 1; 1195 } 1196 if (new == old) 1197 goto out; 1198 if (new & NUD_CONNECTED) 1199 neigh_connect(neigh); 1200 else 1201 neigh_suspect(neigh); 1202 if (!(old & NUD_VALID)) { 1203 struct sk_buff *skb; 1204 1205 /* Again: avoid dead loop if something went wrong */ 1206 1207 while (neigh->nud_state & NUD_VALID && 1208 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1209 struct dst_entry *dst = skb_dst(skb); 1210 struct neighbour *n2, *n1 = neigh; 1211 write_unlock_bh(&neigh->lock); 1212 1213 rcu_read_lock(); 1214 1215 /* Why not just use 'neigh' as-is? The problem is that 1216 * things such as shaper, eql, and sch_teql can end up 1217 * using alternative, different, neigh objects to output 1218 * the packet in the output path. So what we need to do 1219 * here is re-lookup the top-level neigh in the path so 1220 * we can reinject the packet there. 1221 */ 1222 n2 = NULL; 1223 if (dst) { 1224 n2 = dst_neigh_lookup_skb(dst, skb); 1225 if (n2) 1226 n1 = n2; 1227 } 1228 n1->output(n1, skb); 1229 if (n2) 1230 neigh_release(n2); 1231 rcu_read_unlock(); 1232 1233 write_lock_bh(&neigh->lock); 1234 } 1235 __skb_queue_purge(&neigh->arp_queue); 1236 neigh->arp_queue_len_bytes = 0; 1237 } 1238 out: 1239 if (update_isrouter) { 1240 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ? 1241 (neigh->flags | NTF_ROUTER) : 1242 (neigh->flags & ~NTF_ROUTER); 1243 } 1244 write_unlock_bh(&neigh->lock); 1245 1246 if (notify) 1247 neigh_update_notify(neigh); 1248 1249 return err; 1250 } 1251 EXPORT_SYMBOL(neigh_update); 1252 1253 /* Update the neigh to listen temporarily for probe responses, even if it is 1254 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1255 */ 1256 void __neigh_set_probe_once(struct neighbour *neigh) 1257 { 1258 if (neigh->dead) 1259 return; 1260 neigh->updated = jiffies; 1261 if (!(neigh->nud_state & NUD_FAILED)) 1262 return; 1263 neigh->nud_state = NUD_INCOMPLETE; 1264 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1265 neigh_add_timer(neigh, 1266 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); 1267 } 1268 EXPORT_SYMBOL(__neigh_set_probe_once); 1269 1270 struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1271 u8 *lladdr, void *saddr, 1272 struct net_device *dev) 1273 { 1274 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1275 lladdr || !dev->addr_len); 1276 if (neigh) 1277 neigh_update(neigh, lladdr, NUD_STALE, 1278 NEIGH_UPDATE_F_OVERRIDE); 1279 return neigh; 1280 } 1281 EXPORT_SYMBOL(neigh_event_ns); 1282 1283 /* called with read_lock_bh(&n->lock); */ 1284 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) 1285 { 1286 struct net_device *dev = dst->dev; 1287 __be16 prot = dst->ops->protocol; 1288 struct hh_cache *hh = &n->hh; 1289 1290 write_lock_bh(&n->lock); 1291 1292 /* Only one thread can come in here and initialize the 1293 * hh_cache entry. 1294 */ 1295 if (!hh->hh_len) 1296 dev->header_ops->cache(n, hh, prot); 1297 1298 write_unlock_bh(&n->lock); 1299 } 1300 1301 /* This function can be used in contexts, where only old dev_queue_xmit 1302 * worked, f.e. if you want to override normal output path (eql, shaper), 1303 * but resolution is not made yet. 1304 */ 1305 1306 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) 1307 { 1308 struct net_device *dev = skb->dev; 1309 1310 __skb_pull(skb, skb_network_offset(skb)); 1311 1312 if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, 1313 skb->len) < 0 && 1314 dev_rebuild_header(skb)) 1315 return 0; 1316 1317 return dev_queue_xmit(skb); 1318 } 1319 EXPORT_SYMBOL(neigh_compat_output); 1320 1321 /* Slow and careful. */ 1322 1323 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1324 { 1325 struct dst_entry *dst = skb_dst(skb); 1326 int rc = 0; 1327 1328 if (!dst) 1329 goto discard; 1330 1331 if (!neigh_event_send(neigh, skb)) { 1332 int err; 1333 struct net_device *dev = neigh->dev; 1334 unsigned int seq; 1335 1336 if (dev->header_ops->cache && !neigh->hh.hh_len) 1337 neigh_hh_init(neigh, dst); 1338 1339 do { 1340 __skb_pull(skb, skb_network_offset(skb)); 1341 seq = read_seqbegin(&neigh->ha_lock); 1342 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1343 neigh->ha, NULL, skb->len); 1344 } while (read_seqretry(&neigh->ha_lock, seq)); 1345 1346 if (err >= 0) 1347 rc = dev_queue_xmit(skb); 1348 else 1349 goto out_kfree_skb; 1350 } 1351 out: 1352 return rc; 1353 discard: 1354 neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh); 1355 out_kfree_skb: 1356 rc = -EINVAL; 1357 kfree_skb(skb); 1358 goto out; 1359 } 1360 EXPORT_SYMBOL(neigh_resolve_output); 1361 1362 /* As fast as possible without hh cache */ 1363 1364 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1365 { 1366 struct net_device *dev = neigh->dev; 1367 unsigned int seq; 1368 int err; 1369 1370 do { 1371 __skb_pull(skb, skb_network_offset(skb)); 1372 seq = read_seqbegin(&neigh->ha_lock); 1373 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1374 neigh->ha, NULL, skb->len); 1375 } while (read_seqretry(&neigh->ha_lock, seq)); 1376 1377 if (err >= 0) 1378 err = dev_queue_xmit(skb); 1379 else { 1380 err = -EINVAL; 1381 kfree_skb(skb); 1382 } 1383 return err; 1384 } 1385 EXPORT_SYMBOL(neigh_connected_output); 1386 1387 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1388 { 1389 return dev_queue_xmit(skb); 1390 } 1391 EXPORT_SYMBOL(neigh_direct_output); 1392 1393 static void neigh_proxy_process(unsigned long arg) 1394 { 1395 struct neigh_table *tbl = (struct neigh_table *)arg; 1396 long sched_next = 0; 1397 unsigned long now = jiffies; 1398 struct sk_buff *skb, *n; 1399 1400 spin_lock(&tbl->proxy_queue.lock); 1401 1402 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1403 long tdif = NEIGH_CB(skb)->sched_next - now; 1404 1405 if (tdif <= 0) { 1406 struct net_device *dev = skb->dev; 1407 1408 __skb_unlink(skb, &tbl->proxy_queue); 1409 if (tbl->proxy_redo && netif_running(dev)) { 1410 rcu_read_lock(); 1411 tbl->proxy_redo(skb); 1412 rcu_read_unlock(); 1413 } else { 1414 kfree_skb(skb); 1415 } 1416 1417 dev_put(dev); 1418 } else if (!sched_next || tdif < sched_next) 1419 sched_next = tdif; 1420 } 1421 del_timer(&tbl->proxy_timer); 1422 if (sched_next) 1423 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1424 spin_unlock(&tbl->proxy_queue.lock); 1425 } 1426 1427 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1428 struct sk_buff *skb) 1429 { 1430 unsigned long now = jiffies; 1431 1432 unsigned long sched_next = now + (prandom_u32() % 1433 NEIGH_VAR(p, PROXY_DELAY)); 1434 1435 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1436 kfree_skb(skb); 1437 return; 1438 } 1439 1440 NEIGH_CB(skb)->sched_next = sched_next; 1441 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1442 1443 spin_lock(&tbl->proxy_queue.lock); 1444 if (del_timer(&tbl->proxy_timer)) { 1445 if (time_before(tbl->proxy_timer.expires, sched_next)) 1446 sched_next = tbl->proxy_timer.expires; 1447 } 1448 skb_dst_drop(skb); 1449 dev_hold(skb->dev); 1450 __skb_queue_tail(&tbl->proxy_queue, skb); 1451 mod_timer(&tbl->proxy_timer, sched_next); 1452 spin_unlock(&tbl->proxy_queue.lock); 1453 } 1454 EXPORT_SYMBOL(pneigh_enqueue); 1455 1456 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1457 struct net *net, int ifindex) 1458 { 1459 struct neigh_parms *p; 1460 1461 for (p = &tbl->parms; p; p = p->next) { 1462 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1463 (!p->dev && !ifindex && net_eq(net, &init_net))) 1464 return p; 1465 } 1466 1467 return NULL; 1468 } 1469 1470 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1471 struct neigh_table *tbl) 1472 { 1473 struct neigh_parms *p; 1474 struct net *net = dev_net(dev); 1475 const struct net_device_ops *ops = dev->netdev_ops; 1476 1477 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1478 if (p) { 1479 p->tbl = tbl; 1480 atomic_set(&p->refcnt, 1); 1481 p->reachable_time = 1482 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1483 dev_hold(dev); 1484 p->dev = dev; 1485 write_pnet(&p->net, hold_net(net)); 1486 p->sysctl_table = NULL; 1487 1488 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1489 release_net(net); 1490 dev_put(dev); 1491 kfree(p); 1492 return NULL; 1493 } 1494 1495 write_lock_bh(&tbl->lock); 1496 p->next = tbl->parms.next; 1497 tbl->parms.next = p; 1498 write_unlock_bh(&tbl->lock); 1499 1500 neigh_parms_data_state_cleanall(p); 1501 } 1502 return p; 1503 } 1504 EXPORT_SYMBOL(neigh_parms_alloc); 1505 1506 static void neigh_rcu_free_parms(struct rcu_head *head) 1507 { 1508 struct neigh_parms *parms = 1509 container_of(head, struct neigh_parms, rcu_head); 1510 1511 neigh_parms_put(parms); 1512 } 1513 1514 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1515 { 1516 struct neigh_parms **p; 1517 1518 if (!parms || parms == &tbl->parms) 1519 return; 1520 write_lock_bh(&tbl->lock); 1521 for (p = &tbl->parms.next; *p; p = &(*p)->next) { 1522 if (*p == parms) { 1523 *p = parms->next; 1524 parms->dead = 1; 1525 write_unlock_bh(&tbl->lock); 1526 if (parms->dev) 1527 dev_put(parms->dev); 1528 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1529 return; 1530 } 1531 } 1532 write_unlock_bh(&tbl->lock); 1533 neigh_dbg(1, "%s: not found\n", __func__); 1534 } 1535 EXPORT_SYMBOL(neigh_parms_release); 1536 1537 static void neigh_parms_destroy(struct neigh_parms *parms) 1538 { 1539 release_net(neigh_parms_net(parms)); 1540 kfree(parms); 1541 } 1542 1543 static struct lock_class_key neigh_table_proxy_queue_class; 1544 1545 static void neigh_table_init_no_netlink(struct neigh_table *tbl) 1546 { 1547 unsigned long now = jiffies; 1548 unsigned long phsize; 1549 1550 write_pnet(&tbl->parms.net, &init_net); 1551 atomic_set(&tbl->parms.refcnt, 1); 1552 tbl->parms.reachable_time = 1553 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1554 1555 tbl->stats = alloc_percpu(struct neigh_statistics); 1556 if (!tbl->stats) 1557 panic("cannot create neighbour cache statistics"); 1558 1559 #ifdef CONFIG_PROC_FS 1560 if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat, 1561 &neigh_stat_seq_fops, tbl)) 1562 panic("cannot create neighbour proc dir entry"); 1563 #endif 1564 1565 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1566 1567 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1568 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1569 1570 if (!tbl->nht || !tbl->phash_buckets) 1571 panic("cannot allocate neighbour cache hashes"); 1572 1573 if (!tbl->entry_size) 1574 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1575 tbl->key_len, NEIGH_PRIV_ALIGN); 1576 else 1577 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1578 1579 rwlock_init(&tbl->lock); 1580 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1581 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1582 tbl->parms.reachable_time); 1583 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); 1584 skb_queue_head_init_class(&tbl->proxy_queue, 1585 &neigh_table_proxy_queue_class); 1586 1587 tbl->last_flush = now; 1588 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1589 } 1590 1591 void neigh_table_init(struct neigh_table *tbl) 1592 { 1593 struct neigh_table *tmp; 1594 1595 neigh_table_init_no_netlink(tbl); 1596 write_lock(&neigh_tbl_lock); 1597 for (tmp = neigh_tables; tmp; tmp = tmp->next) { 1598 if (tmp->family == tbl->family) 1599 break; 1600 } 1601 tbl->next = neigh_tables; 1602 neigh_tables = tbl; 1603 write_unlock(&neigh_tbl_lock); 1604 1605 if (unlikely(tmp)) { 1606 pr_err("Registering multiple tables for family %d\n", 1607 tbl->family); 1608 dump_stack(); 1609 } 1610 } 1611 EXPORT_SYMBOL(neigh_table_init); 1612 1613 int neigh_table_clear(struct neigh_table *tbl) 1614 { 1615 struct neigh_table **tp; 1616 1617 /* It is not clean... Fix it to unload IPv6 module safely */ 1618 cancel_delayed_work_sync(&tbl->gc_work); 1619 del_timer_sync(&tbl->proxy_timer); 1620 pneigh_queue_purge(&tbl->proxy_queue); 1621 neigh_ifdown(tbl, NULL); 1622 if (atomic_read(&tbl->entries)) 1623 pr_crit("neighbour leakage\n"); 1624 write_lock(&neigh_tbl_lock); 1625 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) { 1626 if (*tp == tbl) { 1627 *tp = tbl->next; 1628 break; 1629 } 1630 } 1631 write_unlock(&neigh_tbl_lock); 1632 1633 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1634 neigh_hash_free_rcu); 1635 tbl->nht = NULL; 1636 1637 kfree(tbl->phash_buckets); 1638 tbl->phash_buckets = NULL; 1639 1640 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1641 1642 free_percpu(tbl->stats); 1643 tbl->stats = NULL; 1644 1645 return 0; 1646 } 1647 EXPORT_SYMBOL(neigh_table_clear); 1648 1649 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) 1650 { 1651 struct net *net = sock_net(skb->sk); 1652 struct ndmsg *ndm; 1653 struct nlattr *dst_attr; 1654 struct neigh_table *tbl; 1655 struct net_device *dev = NULL; 1656 int err = -EINVAL; 1657 1658 ASSERT_RTNL(); 1659 if (nlmsg_len(nlh) < sizeof(*ndm)) 1660 goto out; 1661 1662 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1663 if (dst_attr == NULL) 1664 goto out; 1665 1666 ndm = nlmsg_data(nlh); 1667 if (ndm->ndm_ifindex) { 1668 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1669 if (dev == NULL) { 1670 err = -ENODEV; 1671 goto out; 1672 } 1673 } 1674 1675 read_lock(&neigh_tbl_lock); 1676 for (tbl = neigh_tables; tbl; tbl = tbl->next) { 1677 struct neighbour *neigh; 1678 1679 if (tbl->family != ndm->ndm_family) 1680 continue; 1681 read_unlock(&neigh_tbl_lock); 1682 1683 if (nla_len(dst_attr) < tbl->key_len) 1684 goto out; 1685 1686 if (ndm->ndm_flags & NTF_PROXY) { 1687 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1688 goto out; 1689 } 1690 1691 if (dev == NULL) 1692 goto out; 1693 1694 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1695 if (neigh == NULL) { 1696 err = -ENOENT; 1697 goto out; 1698 } 1699 1700 err = neigh_update(neigh, NULL, NUD_FAILED, 1701 NEIGH_UPDATE_F_OVERRIDE | 1702 NEIGH_UPDATE_F_ADMIN); 1703 neigh_release(neigh); 1704 goto out; 1705 } 1706 read_unlock(&neigh_tbl_lock); 1707 err = -EAFNOSUPPORT; 1708 1709 out: 1710 return err; 1711 } 1712 1713 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) 1714 { 1715 struct net *net = sock_net(skb->sk); 1716 struct ndmsg *ndm; 1717 struct nlattr *tb[NDA_MAX+1]; 1718 struct neigh_table *tbl; 1719 struct net_device *dev = NULL; 1720 int err; 1721 1722 ASSERT_RTNL(); 1723 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); 1724 if (err < 0) 1725 goto out; 1726 1727 err = -EINVAL; 1728 if (tb[NDA_DST] == NULL) 1729 goto out; 1730 1731 ndm = nlmsg_data(nlh); 1732 if (ndm->ndm_ifindex) { 1733 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1734 if (dev == NULL) { 1735 err = -ENODEV; 1736 goto out; 1737 } 1738 1739 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) 1740 goto out; 1741 } 1742 1743 read_lock(&neigh_tbl_lock); 1744 for (tbl = neigh_tables; tbl; tbl = tbl->next) { 1745 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; 1746 struct neighbour *neigh; 1747 void *dst, *lladdr; 1748 1749 if (tbl->family != ndm->ndm_family) 1750 continue; 1751 read_unlock(&neigh_tbl_lock); 1752 1753 if (nla_len(tb[NDA_DST]) < tbl->key_len) 1754 goto out; 1755 dst = nla_data(tb[NDA_DST]); 1756 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1757 1758 if (ndm->ndm_flags & NTF_PROXY) { 1759 struct pneigh_entry *pn; 1760 1761 err = -ENOBUFS; 1762 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1763 if (pn) { 1764 pn->flags = ndm->ndm_flags; 1765 err = 0; 1766 } 1767 goto out; 1768 } 1769 1770 if (dev == NULL) 1771 goto out; 1772 1773 neigh = neigh_lookup(tbl, dst, dev); 1774 if (neigh == NULL) { 1775 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1776 err = -ENOENT; 1777 goto out; 1778 } 1779 1780 neigh = __neigh_lookup_errno(tbl, dst, dev); 1781 if (IS_ERR(neigh)) { 1782 err = PTR_ERR(neigh); 1783 goto out; 1784 } 1785 } else { 1786 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1787 err = -EEXIST; 1788 neigh_release(neigh); 1789 goto out; 1790 } 1791 1792 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1793 flags &= ~NEIGH_UPDATE_F_OVERRIDE; 1794 } 1795 1796 if (ndm->ndm_flags & NTF_USE) { 1797 neigh_event_send(neigh, NULL); 1798 err = 0; 1799 } else 1800 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); 1801 neigh_release(neigh); 1802 goto out; 1803 } 1804 1805 read_unlock(&neigh_tbl_lock); 1806 err = -EAFNOSUPPORT; 1807 out: 1808 return err; 1809 } 1810 1811 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1812 { 1813 struct nlattr *nest; 1814 1815 nest = nla_nest_start(skb, NDTA_PARMS); 1816 if (nest == NULL) 1817 return -ENOBUFS; 1818 1819 if ((parms->dev && 1820 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1821 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || 1822 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 1823 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 1824 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1825 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1826 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 1827 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 1828 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 1829 nla_put_u32(skb, NDTPA_UCAST_PROBES, 1830 NEIGH_VAR(parms, UCAST_PROBES)) || 1831 nla_put_u32(skb, NDTPA_MCAST_PROBES, 1832 NEIGH_VAR(parms, MCAST_PROBES)) || 1833 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || 1834 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 1835 NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || 1836 nla_put_msecs(skb, NDTPA_GC_STALETIME, 1837 NEIGH_VAR(parms, GC_STALETIME)) || 1838 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 1839 NEIGH_VAR(parms, DELAY_PROBE_TIME)) || 1840 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 1841 NEIGH_VAR(parms, RETRANS_TIME)) || 1842 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 1843 NEIGH_VAR(parms, ANYCAST_DELAY)) || 1844 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 1845 NEIGH_VAR(parms, PROXY_DELAY)) || 1846 nla_put_msecs(skb, NDTPA_LOCKTIME, 1847 NEIGH_VAR(parms, LOCKTIME))) 1848 goto nla_put_failure; 1849 return nla_nest_end(skb, nest); 1850 1851 nla_put_failure: 1852 nla_nest_cancel(skb, nest); 1853 return -EMSGSIZE; 1854 } 1855 1856 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 1857 u32 pid, u32 seq, int type, int flags) 1858 { 1859 struct nlmsghdr *nlh; 1860 struct ndtmsg *ndtmsg; 1861 1862 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 1863 if (nlh == NULL) 1864 return -EMSGSIZE; 1865 1866 ndtmsg = nlmsg_data(nlh); 1867 1868 read_lock_bh(&tbl->lock); 1869 ndtmsg->ndtm_family = tbl->family; 1870 ndtmsg->ndtm_pad1 = 0; 1871 ndtmsg->ndtm_pad2 = 0; 1872 1873 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 1874 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) || 1875 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || 1876 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || 1877 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) 1878 goto nla_put_failure; 1879 { 1880 unsigned long now = jiffies; 1881 unsigned int flush_delta = now - tbl->last_flush; 1882 unsigned int rand_delta = now - tbl->last_rand; 1883 struct neigh_hash_table *nht; 1884 struct ndt_config ndc = { 1885 .ndtc_key_len = tbl->key_len, 1886 .ndtc_entry_size = tbl->entry_size, 1887 .ndtc_entries = atomic_read(&tbl->entries), 1888 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 1889 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 1890 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 1891 }; 1892 1893 rcu_read_lock_bh(); 1894 nht = rcu_dereference_bh(tbl->nht); 1895 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 1896 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 1897 rcu_read_unlock_bh(); 1898 1899 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 1900 goto nla_put_failure; 1901 } 1902 1903 { 1904 int cpu; 1905 struct ndt_stats ndst; 1906 1907 memset(&ndst, 0, sizeof(ndst)); 1908 1909 for_each_possible_cpu(cpu) { 1910 struct neigh_statistics *st; 1911 1912 st = per_cpu_ptr(tbl->stats, cpu); 1913 ndst.ndts_allocs += st->allocs; 1914 ndst.ndts_destroys += st->destroys; 1915 ndst.ndts_hash_grows += st->hash_grows; 1916 ndst.ndts_res_failed += st->res_failed; 1917 ndst.ndts_lookups += st->lookups; 1918 ndst.ndts_hits += st->hits; 1919 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; 1920 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; 1921 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; 1922 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 1923 } 1924 1925 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst)) 1926 goto nla_put_failure; 1927 } 1928 1929 BUG_ON(tbl->parms.dev); 1930 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 1931 goto nla_put_failure; 1932 1933 read_unlock_bh(&tbl->lock); 1934 return nlmsg_end(skb, nlh); 1935 1936 nla_put_failure: 1937 read_unlock_bh(&tbl->lock); 1938 nlmsg_cancel(skb, nlh); 1939 return -EMSGSIZE; 1940 } 1941 1942 static int neightbl_fill_param_info(struct sk_buff *skb, 1943 struct neigh_table *tbl, 1944 struct neigh_parms *parms, 1945 u32 pid, u32 seq, int type, 1946 unsigned int flags) 1947 { 1948 struct ndtmsg *ndtmsg; 1949 struct nlmsghdr *nlh; 1950 1951 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 1952 if (nlh == NULL) 1953 return -EMSGSIZE; 1954 1955 ndtmsg = nlmsg_data(nlh); 1956 1957 read_lock_bh(&tbl->lock); 1958 ndtmsg->ndtm_family = tbl->family; 1959 ndtmsg->ndtm_pad1 = 0; 1960 ndtmsg->ndtm_pad2 = 0; 1961 1962 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 1963 neightbl_fill_parms(skb, parms) < 0) 1964 goto errout; 1965 1966 read_unlock_bh(&tbl->lock); 1967 return nlmsg_end(skb, nlh); 1968 errout: 1969 read_unlock_bh(&tbl->lock); 1970 nlmsg_cancel(skb, nlh); 1971 return -EMSGSIZE; 1972 } 1973 1974 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 1975 [NDTA_NAME] = { .type = NLA_STRING }, 1976 [NDTA_THRESH1] = { .type = NLA_U32 }, 1977 [NDTA_THRESH2] = { .type = NLA_U32 }, 1978 [NDTA_THRESH3] = { .type = NLA_U32 }, 1979 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 1980 [NDTA_PARMS] = { .type = NLA_NESTED }, 1981 }; 1982 1983 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 1984 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 1985 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 1986 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 1987 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 1988 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 1989 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 1990 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 1991 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 1992 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 1993 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 1994 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 1995 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 1996 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 1997 }; 1998 1999 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) 2000 { 2001 struct net *net = sock_net(skb->sk); 2002 struct neigh_table *tbl; 2003 struct ndtmsg *ndtmsg; 2004 struct nlattr *tb[NDTA_MAX+1]; 2005 int err; 2006 2007 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2008 nl_neightbl_policy); 2009 if (err < 0) 2010 goto errout; 2011 2012 if (tb[NDTA_NAME] == NULL) { 2013 err = -EINVAL; 2014 goto errout; 2015 } 2016 2017 ndtmsg = nlmsg_data(nlh); 2018 read_lock(&neigh_tbl_lock); 2019 for (tbl = neigh_tables; tbl; tbl = tbl->next) { 2020 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2021 continue; 2022 2023 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) 2024 break; 2025 } 2026 2027 if (tbl == NULL) { 2028 err = -ENOENT; 2029 goto errout_locked; 2030 } 2031 2032 /* 2033 * We acquire tbl->lock to be nice to the periodic timers and 2034 * make sure they always see a consistent set of values. 2035 */ 2036 write_lock_bh(&tbl->lock); 2037 2038 if (tb[NDTA_PARMS]) { 2039 struct nlattr *tbp[NDTPA_MAX+1]; 2040 struct neigh_parms *p; 2041 int i, ifindex = 0; 2042 2043 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], 2044 nl_ntbl_parm_policy); 2045 if (err < 0) 2046 goto errout_tbl_lock; 2047 2048 if (tbp[NDTPA_IFINDEX]) 2049 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2050 2051 p = lookup_neigh_parms(tbl, net, ifindex); 2052 if (p == NULL) { 2053 err = -ENOENT; 2054 goto errout_tbl_lock; 2055 } 2056 2057 for (i = 1; i <= NDTPA_MAX; i++) { 2058 if (tbp[i] == NULL) 2059 continue; 2060 2061 switch (i) { 2062 case NDTPA_QUEUE_LEN: 2063 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2064 nla_get_u32(tbp[i]) * 2065 SKB_TRUESIZE(ETH_FRAME_LEN)); 2066 break; 2067 case NDTPA_QUEUE_LENBYTES: 2068 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2069 nla_get_u32(tbp[i])); 2070 break; 2071 case NDTPA_PROXY_QLEN: 2072 NEIGH_VAR_SET(p, PROXY_QLEN, 2073 nla_get_u32(tbp[i])); 2074 break; 2075 case NDTPA_APP_PROBES: 2076 NEIGH_VAR_SET(p, APP_PROBES, 2077 nla_get_u32(tbp[i])); 2078 break; 2079 case NDTPA_UCAST_PROBES: 2080 NEIGH_VAR_SET(p, UCAST_PROBES, 2081 nla_get_u32(tbp[i])); 2082 break; 2083 case NDTPA_MCAST_PROBES: 2084 NEIGH_VAR_SET(p, MCAST_PROBES, 2085 nla_get_u32(tbp[i])); 2086 break; 2087 case NDTPA_BASE_REACHABLE_TIME: 2088 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2089 nla_get_msecs(tbp[i])); 2090 break; 2091 case NDTPA_GC_STALETIME: 2092 NEIGH_VAR_SET(p, GC_STALETIME, 2093 nla_get_msecs(tbp[i])); 2094 break; 2095 case NDTPA_DELAY_PROBE_TIME: 2096 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2097 nla_get_msecs(tbp[i])); 2098 break; 2099 case NDTPA_RETRANS_TIME: 2100 NEIGH_VAR_SET(p, RETRANS_TIME, 2101 nla_get_msecs(tbp[i])); 2102 break; 2103 case NDTPA_ANYCAST_DELAY: 2104 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2105 nla_get_msecs(tbp[i])); 2106 break; 2107 case NDTPA_PROXY_DELAY: 2108 NEIGH_VAR_SET(p, PROXY_DELAY, 2109 nla_get_msecs(tbp[i])); 2110 break; 2111 case NDTPA_LOCKTIME: 2112 NEIGH_VAR_SET(p, LOCKTIME, 2113 nla_get_msecs(tbp[i])); 2114 break; 2115 } 2116 } 2117 } 2118 2119 err = -ENOENT; 2120 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2121 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2122 !net_eq(net, &init_net)) 2123 goto errout_tbl_lock; 2124 2125 if (tb[NDTA_THRESH1]) 2126 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2127 2128 if (tb[NDTA_THRESH2]) 2129 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); 2130 2131 if (tb[NDTA_THRESH3]) 2132 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); 2133 2134 if (tb[NDTA_GC_INTERVAL]) 2135 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); 2136 2137 err = 0; 2138 2139 errout_tbl_lock: 2140 write_unlock_bh(&tbl->lock); 2141 errout_locked: 2142 read_unlock(&neigh_tbl_lock); 2143 errout: 2144 return err; 2145 } 2146 2147 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2148 { 2149 struct net *net = sock_net(skb->sk); 2150 int family, tidx, nidx = 0; 2151 int tbl_skip = cb->args[0]; 2152 int neigh_skip = cb->args[1]; 2153 struct neigh_table *tbl; 2154 2155 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; 2156 2157 read_lock(&neigh_tbl_lock); 2158 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) { 2159 struct neigh_parms *p; 2160 2161 if (tidx < tbl_skip || (family && tbl->family != family)) 2162 continue; 2163 2164 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2165 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2166 NLM_F_MULTI) <= 0) 2167 break; 2168 2169 for (nidx = 0, p = tbl->parms.next; p; p = p->next) { 2170 if (!net_eq(neigh_parms_net(p), net)) 2171 continue; 2172 2173 if (nidx < neigh_skip) 2174 goto next; 2175 2176 if (neightbl_fill_param_info(skb, tbl, p, 2177 NETLINK_CB(cb->skb).portid, 2178 cb->nlh->nlmsg_seq, 2179 RTM_NEWNEIGHTBL, 2180 NLM_F_MULTI) <= 0) 2181 goto out; 2182 next: 2183 nidx++; 2184 } 2185 2186 neigh_skip = 0; 2187 } 2188 out: 2189 read_unlock(&neigh_tbl_lock); 2190 cb->args[0] = tidx; 2191 cb->args[1] = nidx; 2192 2193 return skb->len; 2194 } 2195 2196 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2197 u32 pid, u32 seq, int type, unsigned int flags) 2198 { 2199 unsigned long now = jiffies; 2200 struct nda_cacheinfo ci; 2201 struct nlmsghdr *nlh; 2202 struct ndmsg *ndm; 2203 2204 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2205 if (nlh == NULL) 2206 return -EMSGSIZE; 2207 2208 ndm = nlmsg_data(nlh); 2209 ndm->ndm_family = neigh->ops->family; 2210 ndm->ndm_pad1 = 0; 2211 ndm->ndm_pad2 = 0; 2212 ndm->ndm_flags = neigh->flags; 2213 ndm->ndm_type = neigh->type; 2214 ndm->ndm_ifindex = neigh->dev->ifindex; 2215 2216 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2217 goto nla_put_failure; 2218 2219 read_lock_bh(&neigh->lock); 2220 ndm->ndm_state = neigh->nud_state; 2221 if (neigh->nud_state & NUD_VALID) { 2222 char haddr[MAX_ADDR_LEN]; 2223 2224 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2225 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2226 read_unlock_bh(&neigh->lock); 2227 goto nla_put_failure; 2228 } 2229 } 2230 2231 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2232 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2233 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2234 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; 2235 read_unlock_bh(&neigh->lock); 2236 2237 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2238 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2239 goto nla_put_failure; 2240 2241 return nlmsg_end(skb, nlh); 2242 2243 nla_put_failure: 2244 nlmsg_cancel(skb, nlh); 2245 return -EMSGSIZE; 2246 } 2247 2248 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2249 u32 pid, u32 seq, int type, unsigned int flags, 2250 struct neigh_table *tbl) 2251 { 2252 struct nlmsghdr *nlh; 2253 struct ndmsg *ndm; 2254 2255 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2256 if (nlh == NULL) 2257 return -EMSGSIZE; 2258 2259 ndm = nlmsg_data(nlh); 2260 ndm->ndm_family = tbl->family; 2261 ndm->ndm_pad1 = 0; 2262 ndm->ndm_pad2 = 0; 2263 ndm->ndm_flags = pn->flags | NTF_PROXY; 2264 ndm->ndm_type = NDA_DST; 2265 ndm->ndm_ifindex = pn->dev->ifindex; 2266 ndm->ndm_state = NUD_NONE; 2267 2268 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2269 goto nla_put_failure; 2270 2271 return nlmsg_end(skb, nlh); 2272 2273 nla_put_failure: 2274 nlmsg_cancel(skb, nlh); 2275 return -EMSGSIZE; 2276 } 2277 2278 static void neigh_update_notify(struct neighbour *neigh) 2279 { 2280 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2281 __neigh_notify(neigh, RTM_NEWNEIGH, 0); 2282 } 2283 2284 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2285 struct netlink_callback *cb) 2286 { 2287 struct net *net = sock_net(skb->sk); 2288 struct neighbour *n; 2289 int rc, h, s_h = cb->args[1]; 2290 int idx, s_idx = idx = cb->args[2]; 2291 struct neigh_hash_table *nht; 2292 2293 rcu_read_lock_bh(); 2294 nht = rcu_dereference_bh(tbl->nht); 2295 2296 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2297 if (h > s_h) 2298 s_idx = 0; 2299 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2300 n != NULL; 2301 n = rcu_dereference_bh(n->next)) { 2302 if (!net_eq(dev_net(n->dev), net)) 2303 continue; 2304 if (idx < s_idx) 2305 goto next; 2306 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2307 cb->nlh->nlmsg_seq, 2308 RTM_NEWNEIGH, 2309 NLM_F_MULTI) <= 0) { 2310 rc = -1; 2311 goto out; 2312 } 2313 next: 2314 idx++; 2315 } 2316 } 2317 rc = skb->len; 2318 out: 2319 rcu_read_unlock_bh(); 2320 cb->args[1] = h; 2321 cb->args[2] = idx; 2322 return rc; 2323 } 2324 2325 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2326 struct netlink_callback *cb) 2327 { 2328 struct pneigh_entry *n; 2329 struct net *net = sock_net(skb->sk); 2330 int rc, h, s_h = cb->args[3]; 2331 int idx, s_idx = idx = cb->args[4]; 2332 2333 read_lock_bh(&tbl->lock); 2334 2335 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2336 if (h > s_h) 2337 s_idx = 0; 2338 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2339 if (dev_net(n->dev) != net) 2340 continue; 2341 if (idx < s_idx) 2342 goto next; 2343 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2344 cb->nlh->nlmsg_seq, 2345 RTM_NEWNEIGH, 2346 NLM_F_MULTI, tbl) <= 0) { 2347 read_unlock_bh(&tbl->lock); 2348 rc = -1; 2349 goto out; 2350 } 2351 next: 2352 idx++; 2353 } 2354 } 2355 2356 read_unlock_bh(&tbl->lock); 2357 rc = skb->len; 2358 out: 2359 cb->args[3] = h; 2360 cb->args[4] = idx; 2361 return rc; 2362 2363 } 2364 2365 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2366 { 2367 struct neigh_table *tbl; 2368 int t, family, s_t; 2369 int proxy = 0; 2370 int err; 2371 2372 read_lock(&neigh_tbl_lock); 2373 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; 2374 2375 /* check for full ndmsg structure presence, family member is 2376 * the same for both structures 2377 */ 2378 if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) && 2379 ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY) 2380 proxy = 1; 2381 2382 s_t = cb->args[0]; 2383 2384 for (tbl = neigh_tables, t = 0; tbl; 2385 tbl = tbl->next, t++) { 2386 if (t < s_t || (family && tbl->family != family)) 2387 continue; 2388 if (t > s_t) 2389 memset(&cb->args[1], 0, sizeof(cb->args) - 2390 sizeof(cb->args[0])); 2391 if (proxy) 2392 err = pneigh_dump_table(tbl, skb, cb); 2393 else 2394 err = neigh_dump_table(tbl, skb, cb); 2395 if (err < 0) 2396 break; 2397 } 2398 read_unlock(&neigh_tbl_lock); 2399 2400 cb->args[0] = t; 2401 return skb->len; 2402 } 2403 2404 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2405 { 2406 int chain; 2407 struct neigh_hash_table *nht; 2408 2409 rcu_read_lock_bh(); 2410 nht = rcu_dereference_bh(tbl->nht); 2411 2412 read_lock(&tbl->lock); /* avoid resizes */ 2413 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2414 struct neighbour *n; 2415 2416 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2417 n != NULL; 2418 n = rcu_dereference_bh(n->next)) 2419 cb(n, cookie); 2420 } 2421 read_unlock(&tbl->lock); 2422 rcu_read_unlock_bh(); 2423 } 2424 EXPORT_SYMBOL(neigh_for_each); 2425 2426 /* The tbl->lock must be held as a writer and BH disabled. */ 2427 void __neigh_for_each_release(struct neigh_table *tbl, 2428 int (*cb)(struct neighbour *)) 2429 { 2430 int chain; 2431 struct neigh_hash_table *nht; 2432 2433 nht = rcu_dereference_protected(tbl->nht, 2434 lockdep_is_held(&tbl->lock)); 2435 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2436 struct neighbour *n; 2437 struct neighbour __rcu **np; 2438 2439 np = &nht->hash_buckets[chain]; 2440 while ((n = rcu_dereference_protected(*np, 2441 lockdep_is_held(&tbl->lock))) != NULL) { 2442 int release; 2443 2444 write_lock(&n->lock); 2445 release = cb(n); 2446 if (release) { 2447 rcu_assign_pointer(*np, 2448 rcu_dereference_protected(n->next, 2449 lockdep_is_held(&tbl->lock))); 2450 n->dead = 1; 2451 } else 2452 np = &n->next; 2453 write_unlock(&n->lock); 2454 if (release) 2455 neigh_cleanup_and_release(n); 2456 } 2457 } 2458 } 2459 EXPORT_SYMBOL(__neigh_for_each_release); 2460 2461 #ifdef CONFIG_PROC_FS 2462 2463 static struct neighbour *neigh_get_first(struct seq_file *seq) 2464 { 2465 struct neigh_seq_state *state = seq->private; 2466 struct net *net = seq_file_net(seq); 2467 struct neigh_hash_table *nht = state->nht; 2468 struct neighbour *n = NULL; 2469 int bucket = state->bucket; 2470 2471 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 2472 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 2473 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 2474 2475 while (n) { 2476 if (!net_eq(dev_net(n->dev), net)) 2477 goto next; 2478 if (state->neigh_sub_iter) { 2479 loff_t fakep = 0; 2480 void *v; 2481 2482 v = state->neigh_sub_iter(state, n, &fakep); 2483 if (!v) 2484 goto next; 2485 } 2486 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 2487 break; 2488 if (n->nud_state & ~NUD_NOARP) 2489 break; 2490 next: 2491 n = rcu_dereference_bh(n->next); 2492 } 2493 2494 if (n) 2495 break; 2496 } 2497 state->bucket = bucket; 2498 2499 return n; 2500 } 2501 2502 static struct neighbour *neigh_get_next(struct seq_file *seq, 2503 struct neighbour *n, 2504 loff_t *pos) 2505 { 2506 struct neigh_seq_state *state = seq->private; 2507 struct net *net = seq_file_net(seq); 2508 struct neigh_hash_table *nht = state->nht; 2509 2510 if (state->neigh_sub_iter) { 2511 void *v = state->neigh_sub_iter(state, n, pos); 2512 if (v) 2513 return n; 2514 } 2515 n = rcu_dereference_bh(n->next); 2516 2517 while (1) { 2518 while (n) { 2519 if (!net_eq(dev_net(n->dev), net)) 2520 goto next; 2521 if (state->neigh_sub_iter) { 2522 void *v = state->neigh_sub_iter(state, n, pos); 2523 if (v) 2524 return n; 2525 goto next; 2526 } 2527 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 2528 break; 2529 2530 if (n->nud_state & ~NUD_NOARP) 2531 break; 2532 next: 2533 n = rcu_dereference_bh(n->next); 2534 } 2535 2536 if (n) 2537 break; 2538 2539 if (++state->bucket >= (1 << nht->hash_shift)) 2540 break; 2541 2542 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 2543 } 2544 2545 if (n && pos) 2546 --(*pos); 2547 return n; 2548 } 2549 2550 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 2551 { 2552 struct neighbour *n = neigh_get_first(seq); 2553 2554 if (n) { 2555 --(*pos); 2556 while (*pos) { 2557 n = neigh_get_next(seq, n, pos); 2558 if (!n) 2559 break; 2560 } 2561 } 2562 return *pos ? NULL : n; 2563 } 2564 2565 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 2566 { 2567 struct neigh_seq_state *state = seq->private; 2568 struct net *net = seq_file_net(seq); 2569 struct neigh_table *tbl = state->tbl; 2570 struct pneigh_entry *pn = NULL; 2571 int bucket = state->bucket; 2572 2573 state->flags |= NEIGH_SEQ_IS_PNEIGH; 2574 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 2575 pn = tbl->phash_buckets[bucket]; 2576 while (pn && !net_eq(pneigh_net(pn), net)) 2577 pn = pn->next; 2578 if (pn) 2579 break; 2580 } 2581 state->bucket = bucket; 2582 2583 return pn; 2584 } 2585 2586 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 2587 struct pneigh_entry *pn, 2588 loff_t *pos) 2589 { 2590 struct neigh_seq_state *state = seq->private; 2591 struct net *net = seq_file_net(seq); 2592 struct neigh_table *tbl = state->tbl; 2593 2594 do { 2595 pn = pn->next; 2596 } while (pn && !net_eq(pneigh_net(pn), net)); 2597 2598 while (!pn) { 2599 if (++state->bucket > PNEIGH_HASHMASK) 2600 break; 2601 pn = tbl->phash_buckets[state->bucket]; 2602 while (pn && !net_eq(pneigh_net(pn), net)) 2603 pn = pn->next; 2604 if (pn) 2605 break; 2606 } 2607 2608 if (pn && pos) 2609 --(*pos); 2610 2611 return pn; 2612 } 2613 2614 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 2615 { 2616 struct pneigh_entry *pn = pneigh_get_first(seq); 2617 2618 if (pn) { 2619 --(*pos); 2620 while (*pos) { 2621 pn = pneigh_get_next(seq, pn, pos); 2622 if (!pn) 2623 break; 2624 } 2625 } 2626 return *pos ? NULL : pn; 2627 } 2628 2629 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 2630 { 2631 struct neigh_seq_state *state = seq->private; 2632 void *rc; 2633 loff_t idxpos = *pos; 2634 2635 rc = neigh_get_idx(seq, &idxpos); 2636 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 2637 rc = pneigh_get_idx(seq, &idxpos); 2638 2639 return rc; 2640 } 2641 2642 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 2643 __acquires(rcu_bh) 2644 { 2645 struct neigh_seq_state *state = seq->private; 2646 2647 state->tbl = tbl; 2648 state->bucket = 0; 2649 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 2650 2651 rcu_read_lock_bh(); 2652 state->nht = rcu_dereference_bh(tbl->nht); 2653 2654 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 2655 } 2656 EXPORT_SYMBOL(neigh_seq_start); 2657 2658 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2659 { 2660 struct neigh_seq_state *state; 2661 void *rc; 2662 2663 if (v == SEQ_START_TOKEN) { 2664 rc = neigh_get_first(seq); 2665 goto out; 2666 } 2667 2668 state = seq->private; 2669 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 2670 rc = neigh_get_next(seq, v, NULL); 2671 if (rc) 2672 goto out; 2673 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 2674 rc = pneigh_get_first(seq); 2675 } else { 2676 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 2677 rc = pneigh_get_next(seq, v, NULL); 2678 } 2679 out: 2680 ++(*pos); 2681 return rc; 2682 } 2683 EXPORT_SYMBOL(neigh_seq_next); 2684 2685 void neigh_seq_stop(struct seq_file *seq, void *v) 2686 __releases(rcu_bh) 2687 { 2688 rcu_read_unlock_bh(); 2689 } 2690 EXPORT_SYMBOL(neigh_seq_stop); 2691 2692 /* statistics via seq_file */ 2693 2694 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 2695 { 2696 struct neigh_table *tbl = seq->private; 2697 int cpu; 2698 2699 if (*pos == 0) 2700 return SEQ_START_TOKEN; 2701 2702 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 2703 if (!cpu_possible(cpu)) 2704 continue; 2705 *pos = cpu+1; 2706 return per_cpu_ptr(tbl->stats, cpu); 2707 } 2708 return NULL; 2709 } 2710 2711 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2712 { 2713 struct neigh_table *tbl = seq->private; 2714 int cpu; 2715 2716 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 2717 if (!cpu_possible(cpu)) 2718 continue; 2719 *pos = cpu+1; 2720 return per_cpu_ptr(tbl->stats, cpu); 2721 } 2722 return NULL; 2723 } 2724 2725 static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 2726 { 2727 2728 } 2729 2730 static int neigh_stat_seq_show(struct seq_file *seq, void *v) 2731 { 2732 struct neigh_table *tbl = seq->private; 2733 struct neigh_statistics *st = v; 2734 2735 if (v == SEQ_START_TOKEN) { 2736 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n"); 2737 return 0; 2738 } 2739 2740 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 2741 "%08lx %08lx %08lx %08lx %08lx\n", 2742 atomic_read(&tbl->entries), 2743 2744 st->allocs, 2745 st->destroys, 2746 st->hash_grows, 2747 2748 st->lookups, 2749 st->hits, 2750 2751 st->res_failed, 2752 2753 st->rcv_probes_mcast, 2754 st->rcv_probes_ucast, 2755 2756 st->periodic_gc_runs, 2757 st->forced_gc_runs, 2758 st->unres_discards 2759 ); 2760 2761 return 0; 2762 } 2763 2764 static const struct seq_operations neigh_stat_seq_ops = { 2765 .start = neigh_stat_seq_start, 2766 .next = neigh_stat_seq_next, 2767 .stop = neigh_stat_seq_stop, 2768 .show = neigh_stat_seq_show, 2769 }; 2770 2771 static int neigh_stat_seq_open(struct inode *inode, struct file *file) 2772 { 2773 int ret = seq_open(file, &neigh_stat_seq_ops); 2774 2775 if (!ret) { 2776 struct seq_file *sf = file->private_data; 2777 sf->private = PDE_DATA(inode); 2778 } 2779 return ret; 2780 }; 2781 2782 static const struct file_operations neigh_stat_seq_fops = { 2783 .owner = THIS_MODULE, 2784 .open = neigh_stat_seq_open, 2785 .read = seq_read, 2786 .llseek = seq_lseek, 2787 .release = seq_release, 2788 }; 2789 2790 #endif /* CONFIG_PROC_FS */ 2791 2792 static inline size_t neigh_nlmsg_size(void) 2793 { 2794 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2795 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2796 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 2797 + nla_total_size(sizeof(struct nda_cacheinfo)) 2798 + nla_total_size(4); /* NDA_PROBES */ 2799 } 2800 2801 static void __neigh_notify(struct neighbour *n, int type, int flags) 2802 { 2803 struct net *net = dev_net(n->dev); 2804 struct sk_buff *skb; 2805 int err = -ENOBUFS; 2806 2807 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 2808 if (skb == NULL) 2809 goto errout; 2810 2811 err = neigh_fill_info(skb, n, 0, 0, type, flags); 2812 if (err < 0) { 2813 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 2814 WARN_ON(err == -EMSGSIZE); 2815 kfree_skb(skb); 2816 goto errout; 2817 } 2818 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 2819 return; 2820 errout: 2821 if (err < 0) 2822 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 2823 } 2824 2825 void neigh_app_ns(struct neighbour *n) 2826 { 2827 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); 2828 } 2829 EXPORT_SYMBOL(neigh_app_ns); 2830 2831 #ifdef CONFIG_SYSCTL 2832 static int zero; 2833 static int int_max = INT_MAX; 2834 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 2835 2836 static int proc_unres_qlen(struct ctl_table *ctl, int write, 2837 void __user *buffer, size_t *lenp, loff_t *ppos) 2838 { 2839 int size, ret; 2840 struct ctl_table tmp = *ctl; 2841 2842 tmp.extra1 = &zero; 2843 tmp.extra2 = &unres_qlen_max; 2844 tmp.data = &size; 2845 2846 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 2847 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 2848 2849 if (write && !ret) 2850 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 2851 return ret; 2852 } 2853 2854 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 2855 int family) 2856 { 2857 switch (family) { 2858 case AF_INET: 2859 return __in_dev_arp_parms_get_rcu(dev); 2860 case AF_INET6: 2861 return __in6_dev_nd_parms_get_rcu(dev); 2862 } 2863 return NULL; 2864 } 2865 2866 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 2867 int index) 2868 { 2869 struct net_device *dev; 2870 int family = neigh_parms_family(p); 2871 2872 rcu_read_lock(); 2873 for_each_netdev_rcu(net, dev) { 2874 struct neigh_parms *dst_p = 2875 neigh_get_dev_parms_rcu(dev, family); 2876 2877 if (dst_p && !test_bit(index, dst_p->data_state)) 2878 dst_p->data[index] = p->data[index]; 2879 } 2880 rcu_read_unlock(); 2881 } 2882 2883 static void neigh_proc_update(struct ctl_table *ctl, int write) 2884 { 2885 struct net_device *dev = ctl->extra1; 2886 struct neigh_parms *p = ctl->extra2; 2887 struct net *net = neigh_parms_net(p); 2888 int index = (int *) ctl->data - p->data; 2889 2890 if (!write) 2891 return; 2892 2893 set_bit(index, p->data_state); 2894 if (!dev) /* NULL dev means this is default value */ 2895 neigh_copy_dflt_parms(net, p, index); 2896 } 2897 2898 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 2899 void __user *buffer, 2900 size_t *lenp, loff_t *ppos) 2901 { 2902 struct ctl_table tmp = *ctl; 2903 int ret; 2904 2905 tmp.extra1 = &zero; 2906 tmp.extra2 = &int_max; 2907 2908 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 2909 neigh_proc_update(ctl, write); 2910 return ret; 2911 } 2912 2913 int neigh_proc_dointvec(struct ctl_table *ctl, int write, 2914 void __user *buffer, size_t *lenp, loff_t *ppos) 2915 { 2916 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2917 2918 neigh_proc_update(ctl, write); 2919 return ret; 2920 } 2921 EXPORT_SYMBOL(neigh_proc_dointvec); 2922 2923 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, 2924 void __user *buffer, 2925 size_t *lenp, loff_t *ppos) 2926 { 2927 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 2928 2929 neigh_proc_update(ctl, write); 2930 return ret; 2931 } 2932 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 2933 2934 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 2935 void __user *buffer, 2936 size_t *lenp, loff_t *ppos) 2937 { 2938 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 2939 2940 neigh_proc_update(ctl, write); 2941 return ret; 2942 } 2943 2944 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 2945 void __user *buffer, 2946 size_t *lenp, loff_t *ppos) 2947 { 2948 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 2949 2950 neigh_proc_update(ctl, write); 2951 return ret; 2952 } 2953 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 2954 2955 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 2956 void __user *buffer, 2957 size_t *lenp, loff_t *ppos) 2958 { 2959 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 2960 2961 neigh_proc_update(ctl, write); 2962 return ret; 2963 } 2964 2965 #define NEIGH_PARMS_DATA_OFFSET(index) \ 2966 (&((struct neigh_parms *) 0)->data[index]) 2967 2968 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 2969 [NEIGH_VAR_ ## attr] = { \ 2970 .procname = name, \ 2971 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 2972 .maxlen = sizeof(int), \ 2973 .mode = mval, \ 2974 .proc_handler = proc, \ 2975 } 2976 2977 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 2978 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 2979 2980 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 2981 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 2982 2983 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 2984 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 2985 2986 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ 2987 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 2988 2989 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 2990 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 2991 2992 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 2993 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 2994 2995 static struct neigh_sysctl_table { 2996 struct ctl_table_header *sysctl_header; 2997 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 2998 } neigh_sysctl_template __read_mostly = { 2999 .neigh_vars = { 3000 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3001 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3002 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3003 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3004 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3005 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3006 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3007 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3008 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3009 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3010 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3011 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3012 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3013 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3014 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3015 [NEIGH_VAR_GC_INTERVAL] = { 3016 .procname = "gc_interval", 3017 .maxlen = sizeof(int), 3018 .mode = 0644, 3019 .proc_handler = proc_dointvec_jiffies, 3020 }, 3021 [NEIGH_VAR_GC_THRESH1] = { 3022 .procname = "gc_thresh1", 3023 .maxlen = sizeof(int), 3024 .mode = 0644, 3025 .extra1 = &zero, 3026 .extra2 = &int_max, 3027 .proc_handler = proc_dointvec_minmax, 3028 }, 3029 [NEIGH_VAR_GC_THRESH2] = { 3030 .procname = "gc_thresh2", 3031 .maxlen = sizeof(int), 3032 .mode = 0644, 3033 .extra1 = &zero, 3034 .extra2 = &int_max, 3035 .proc_handler = proc_dointvec_minmax, 3036 }, 3037 [NEIGH_VAR_GC_THRESH3] = { 3038 .procname = "gc_thresh3", 3039 .maxlen = sizeof(int), 3040 .mode = 0644, 3041 .extra1 = &zero, 3042 .extra2 = &int_max, 3043 .proc_handler = proc_dointvec_minmax, 3044 }, 3045 {}, 3046 }, 3047 }; 3048 3049 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3050 proc_handler *handler) 3051 { 3052 int i; 3053 struct neigh_sysctl_table *t; 3054 const char *dev_name_source; 3055 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3056 char *p_name; 3057 3058 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3059 if (!t) 3060 goto err; 3061 3062 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3063 t->neigh_vars[i].data += (long) p; 3064 t->neigh_vars[i].extra1 = dev; 3065 t->neigh_vars[i].extra2 = p; 3066 } 3067 3068 if (dev) { 3069 dev_name_source = dev->name; 3070 /* Terminate the table early */ 3071 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3072 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3073 } else { 3074 dev_name_source = "default"; 3075 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); 3076 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; 3077 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; 3078 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; 3079 } 3080 3081 if (handler) { 3082 /* RetransTime */ 3083 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3084 /* ReachableTime */ 3085 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3086 /* RetransTime (in milliseconds)*/ 3087 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3088 /* ReachableTime (in milliseconds) */ 3089 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3090 } 3091 3092 /* Don't export sysctls to unprivileged users */ 3093 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3094 t->neigh_vars[0].procname = NULL; 3095 3096 switch (neigh_parms_family(p)) { 3097 case AF_INET: 3098 p_name = "ipv4"; 3099 break; 3100 case AF_INET6: 3101 p_name = "ipv6"; 3102 break; 3103 default: 3104 BUG(); 3105 } 3106 3107 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3108 p_name, dev_name_source); 3109 t->sysctl_header = 3110 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3111 if (!t->sysctl_header) 3112 goto free; 3113 3114 p->sysctl_table = t; 3115 return 0; 3116 3117 free: 3118 kfree(t); 3119 err: 3120 return -ENOBUFS; 3121 } 3122 EXPORT_SYMBOL(neigh_sysctl_register); 3123 3124 void neigh_sysctl_unregister(struct neigh_parms *p) 3125 { 3126 if (p->sysctl_table) { 3127 struct neigh_sysctl_table *t = p->sysctl_table; 3128 p->sysctl_table = NULL; 3129 unregister_net_sysctl_table(t->sysctl_header); 3130 kfree(t); 3131 } 3132 } 3133 EXPORT_SYMBOL(neigh_sysctl_unregister); 3134 3135 #endif /* CONFIG_SYSCTL */ 3136 3137 static int __init neigh_init(void) 3138 { 3139 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL); 3140 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL); 3141 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL); 3142 3143 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3144 NULL); 3145 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL); 3146 3147 return 0; 3148 } 3149 3150 subsys_initcall(neigh_init); 3151 3152
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.