~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/core/dev.c

Version: ~ [ linux-5.1-rc1 ] ~ [ linux-5.0.3 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.30 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.107 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.164 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.176 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.136 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.63 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.39.4 ] ~ [ linux-2.6.38.8 ] ~ [ linux-2.6.37.6 ] ~ [ linux-2.6.36.4 ] ~ [ linux-2.6.35.14 ] ~ [ linux-2.6.34.15 ] ~ [ linux-2.6.33.20 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      NET3    Protocol independent device support routines.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  *      Derived from the non IP parts of dev.c 1.0.19
 10  *              Authors:        Ross Biro, <bir7@leland.Stanford.Edu>
 11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
 13  *
 14  *      Additional Authors:
 15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
 16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
 17  *              David Hinds <dahinds@users.sourceforge.net>
 18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 19  *              Adam Sulmicki <adam@cfar.umd.edu>
 20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
 21  *
 22  *      Changes:
 23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
 24  *                                      to 2 if register_netdev gets called
 25  *                                      before net_dev_init & also removed a
 26  *                                      few lines of code in the process.
 27  *              Alan Cox        :       device private ioctl copies fields back.
 28  *              Alan Cox        :       Transmit queue code does relevant
 29  *                                      stunts to keep the queue safe.
 30  *              Alan Cox        :       Fixed double lock.
 31  *              Alan Cox        :       Fixed promisc NULL pointer trap
 32  *              ????????        :       Support the full private ioctl range
 33  *              Alan Cox        :       Moved ioctl permission check into
 34  *                                      drivers
 35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
 36  *              Alan Cox        :       100 backlog just doesn't cut it when
 37  *                                      you start doing multicast video 8)
 38  *              Alan Cox        :       Rewrote net_bh and list manager.
 39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
 40  *              Alan Cox        :       Took out transmit every packet pass
 41  *                                      Saved a few bytes in the ioctl handler
 42  *              Alan Cox        :       Network driver sets packet type before
 43  *                                      calling netif_rx. Saves a function
 44  *                                      call a packet.
 45  *              Alan Cox        :       Hashed net_bh()
 46  *              Richard Kooijman:       Timestamp fixes.
 47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
 48  *              Alan Cox        :       Device lock protection.
 49  *              Alan Cox        :       Fixed nasty side effect of device close
 50  *                                      changes.
 51  *              Rudi Cilibrasi  :       Pass the right thing to
 52  *                                      set_mac_address()
 53  *              Dave Miller     :       32bit quantity for the device lock to
 54  *                                      make it work out on a Sparc.
 55  *              Bjorn Ekwall    :       Added KERNELD hack.
 56  *              Alan Cox        :       Cleaned up the backlog initialise.
 57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
 58  *                                      1 device.
 59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
 60  *                                      is no device open function.
 61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
 62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
 63  *              Cyrus Durgin    :       Cleaned for KMOD
 64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
 65  *                                      A network device unload needs to purge
 66  *                                      the backlog queue.
 67  *      Paul Rusty Russell      :       SIOCSIFNAME
 68  *              Pekka Riikonen  :       Netdev boot-time settings code
 69  *              Andrew Morton   :       Make unregister_netdevice wait
 70  *                                      indefinitely on dev->refcnt
 71  *              J Hadi Salim    :       - Backlog queue sampling
 72  *                                      - netif_rx() feedback
 73  */
 74 
 75 #include <asm/uaccess.h>
 76 #include <asm/system.h>
 77 #include <asm/bitops.h>
 78 #include <linux/config.h>
 79 #include <linux/types.h>
 80 #include <linux/kernel.h>
 81 #include <linux/sched.h>
 82 #include <linux/string.h>
 83 #include <linux/mm.h>
 84 #include <linux/socket.h>
 85 #include <linux/sockios.h>
 86 #include <linux/errno.h>
 87 #include <linux/interrupt.h>
 88 #include <linux/if_ether.h>
 89 #include <linux/netdevice.h>
 90 #include <linux/etherdevice.h>
 91 #include <linux/notifier.h>
 92 #include <linux/skbuff.h>
 93 #include <net/sock.h>
 94 #include <linux/rtnetlink.h>
 95 #include <linux/proc_fs.h>
 96 #include <linux/seq_file.h>
 97 #include <linux/stat.h>
 98 #include <linux/if_bridge.h>
 99 #include <linux/divert.h>
100 #include <net/dst.h>
101 #include <net/pkt_sched.h>
102 #include <net/checksum.h>
103 #include <linux/highmem.h>
104 #include <linux/init.h>
105 #include <linux/kmod.h>
106 #include <linux/module.h>
107 #include <linux/kallsyms.h>
108 #ifdef CONFIG_NET_RADIO
109 #include <linux/wireless.h>             /* Note : will define WIRELESS_EXT */
110 #include <net/iw_handler.h>
111 #endif  /* CONFIG_NET_RADIO */
112 #include <asm/current.h>
113 
114 /* This define, if set, will randomly drop a packet when congestion
115  * is more than moderate.  It helps fairness in the multi-interface
116  * case when one of them is a hog, but it kills performance for the
117  * single interface case so it is off now by default.
118  */
119 #undef RAND_LIE
120 
121 /* Setting this will sample the queue lengths and thus congestion
122  * via a timer instead of as each packet is received.
123  */
124 #undef OFFLINE_SAMPLE
125 
126 /*
127  *      The list of packet types we will receive (as opposed to discard)
128  *      and the routines to invoke.
129  *
130  *      Why 16. Because with 16 the only overlap we get on a hash of the
131  *      low nibble of the protocol value is RARP/SNAP/X.25.
132  *
133  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
134  *             sure which should go first, but I bet it won't make much
135  *             difference if we are running VLANs.  The good news is that
136  *             this protocol won't be in the list unless compiled in, so
137  *             the average user (w/out VLANs) will not be adversly affected.
138  *             --BLG
139  *
140  *              0800    IP
141  *              8100    802.1Q VLAN
142  *              0001    802.3
143  *              0002    AX.25
144  *              0004    802.2
145  *              8035    RARP
146  *              0005    SNAP
147  *              0805    X.25
148  *              0806    ARP
149  *              8137    IPX
150  *              0009    Localtalk
151  *              86DD    IPv6
152  */
153 
154 static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
155 static struct list_head ptype_base[16]; /* 16 way hashed list */
156 static struct list_head ptype_all;              /* Taps */
157 
158 #ifdef OFFLINE_SAMPLE
159 static void sample_queue(unsigned long dummy);
160 static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
161 #endif
162 
163 /*
164  *      Our notifier list
165  */
166 
167 static struct notifier_block *netdev_chain;
168 
169 /*
170  *      Device drivers call our routines to queue packets here. We empty the
171  *      queue in the local softnet handler.
172  */
173 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
174 
175 #ifdef CONFIG_NET_FASTROUTE
176 int netdev_fastroute;
177 int netdev_fastroute_obstacles;
178 #endif
179 
180 extern int netdev_sysfs_init(void);
181 extern int netdev_register_sysfs(struct net_device *);
182 extern int netdev_unregister_sysfs(struct net_device *);
183 
184 
185 /*******************************************************************************
186 
187                 Protocol management and registration routines
188 
189 *******************************************************************************/
190 
191 /*
192  *      For efficiency
193  */
194 
195 int netdev_nit;
196 
197 /*
198  *      Add a protocol ID to the list. Now that the input handler is
199  *      smarter we can dispense with all the messy stuff that used to be
200  *      here.
201  *
202  *      BEWARE!!! Protocol handlers, mangling input packets,
203  *      MUST BE last in hash buckets and checking protocol handlers
204  *      MUST start from promiscuous ptype_all chain in net_bh.
205  *      It is true now, do not change it.
206  *      Explanation follows: if protocol handler, mangling packet, will
207  *      be the first on list, it is not able to sense, that packet
208  *      is cloned and should be copied-on-write, so that it will
209  *      change it and subsequent readers will get broken packet.
210  *                                                      --ANK (980803)
211  */
212 
213 /**
214  *      dev_add_pack - add packet handler
215  *      @pt: packet type declaration
216  *
217  *      Add a protocol handler to the networking stack. The passed &packet_type
218  *      is linked into kernel lists and may not be freed until it has been
219  *      removed from the kernel lists.
220  *
221  *      This call does not sleep therefore it can not 
222  *      guarantee all CPU's that are in middle of receiving packets
223  *      will see the new packet type (until the next received packet).
224  */
225 
226 void dev_add_pack(struct packet_type *pt)
227 {
228         int hash;
229 
230         spin_lock_bh(&ptype_lock);
231 #ifdef CONFIG_NET_FASTROUTE
232         if (pt->af_packet_priv) {
233                 netdev_fastroute_obstacles++;
234                 dev_clear_fastroute(pt->dev);
235         }
236 #endif
237         if (pt->type == htons(ETH_P_ALL)) {
238                 netdev_nit++;
239                 list_add_rcu(&pt->list, &ptype_all);
240         } else {
241                 hash = ntohs(pt->type) & 15;
242                 list_add_rcu(&pt->list, &ptype_base[hash]);
243         }
244         spin_unlock_bh(&ptype_lock);
245 }
246 
247 extern void linkwatch_run_queue(void);
248 
249 
250 
251 /**
252  *      __dev_remove_pack        - remove packet handler
253  *      @pt: packet type declaration
254  *
255  *      Remove a protocol handler that was previously added to the kernel
256  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
257  *      from the kernel lists and can be freed or reused once this function
258  *      returns. 
259  *
260  *      The packet type might still be in use by receivers
261  *      and must not be freed until after all the CPU's have gone
262  *      through a quiescent state.
263  */
264 void __dev_remove_pack(struct packet_type *pt)
265 {
266         struct list_head *head;
267         struct packet_type *pt1;
268 
269         spin_lock_bh(&ptype_lock);
270 
271         if (pt->type == htons(ETH_P_ALL)) {
272                 netdev_nit--;
273                 head = &ptype_all;
274         } else
275                 head = &ptype_base[ntohs(pt->type) & 15];
276 
277         list_for_each_entry(pt1, head, list) {
278                 if (pt == pt1) {
279 #ifdef CONFIG_NET_FASTROUTE
280                         if (pt->af_packet_priv)
281                                 netdev_fastroute_obstacles--;
282 #endif
283                         list_del_rcu(&pt->list);
284                         goto out;
285                 }
286         }
287 
288         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
289 out:
290         spin_unlock_bh(&ptype_lock);
291 }
292 /**
293  *      dev_remove_pack  - remove packet handler
294  *      @pt: packet type declaration
295  *
296  *      Remove a protocol handler that was previously added to the kernel
297  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
298  *      from the kernel lists and can be freed or reused once this function
299  *      returns.
300  *
301  *      This call sleeps to guarantee that no CPU is looking at the packet
302  *      type after return.
303  */
304 void dev_remove_pack(struct packet_type *pt)
305 {
306         __dev_remove_pack(pt);
307         
308         synchronize_net();
309 }
310 
311 /******************************************************************************
312 
313                       Device Boot-time Settings Routines
314 
315 *******************************************************************************/
316 
317 /* Boot time configuration table */
318 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
319 
320 /**
321  *      netdev_boot_setup_add   - add new setup entry
322  *      @name: name of the device
323  *      @map: configured settings for the device
324  *
325  *      Adds new setup entry to the dev_boot_setup list.  The function
326  *      returns 0 on error and 1 on success.  This is a generic routine to
327  *      all netdevices.
328  */
329 int netdev_boot_setup_add(char *name, struct ifmap *map)
330 {
331         struct netdev_boot_setup *s;
332         int i;
333 
334         s = dev_boot_setup;
335         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
336                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
337                         memset(s[i].name, 0, sizeof(s[i].name));
338                         strcpy(s[i].name, name);
339                         memcpy(&s[i].map, map, sizeof(s[i].map));
340                         break;
341                 }
342         }
343 
344         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
345 }
346 
347 /**
348  *      netdev_boot_setup_check - check boot time settings
349  *      @dev: the netdevice
350  *
351  *      Check boot time settings for the device.
352  *      The found settings are set for the device to be used
353  *      later in the device probing.
354  *      Returns 0 if no settings found, 1 if they are.
355  */
356 int netdev_boot_setup_check(struct net_device *dev)
357 {
358         struct netdev_boot_setup *s = dev_boot_setup;
359         int i;
360 
361         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
362                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
363                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
364                         dev->irq        = s[i].map.irq;
365                         dev->base_addr  = s[i].map.base_addr;
366                         dev->mem_start  = s[i].map.mem_start;
367                         dev->mem_end    = s[i].map.mem_end;
368                         return 1;
369                 }
370         }
371         return 0;
372 }
373 
374 /*
375  * Saves at boot time configured settings for any netdevice.
376  */
377 int __init netdev_boot_setup(char *str)
378 {
379         int ints[5];
380         struct ifmap map;
381 
382         str = get_options(str, ARRAY_SIZE(ints), ints);
383         if (!str || !*str)
384                 return 0;
385 
386         /* Save settings */
387         memset(&map, 0, sizeof(map));
388         if (ints[0] > 0)
389                 map.irq = ints[1];
390         if (ints[0] > 1)
391                 map.base_addr = ints[2];
392         if (ints[0] > 2)
393                 map.mem_start = ints[3];
394         if (ints[0] > 3)
395                 map.mem_end = ints[4];
396 
397         /* Add new entry to the list */
398         return netdev_boot_setup_add(str, &map);
399 }
400 
401 __setup("netdev=", netdev_boot_setup);
402 
403 /*******************************************************************************
404 
405                             Device Interface Subroutines
406 
407 *******************************************************************************/
408 
409 /**
410  *      __dev_get_by_name       - find a device by its name
411  *      @name: name to find
412  *
413  *      Find an interface by name. Must be called under RTNL semaphore
414  *      or @dev_base_lock. If the name is found a pointer to the device
415  *      is returned. If the name is not found then %NULL is returned. The
416  *      reference counters are not incremented so the caller must be
417  *      careful with locks.
418  */
419 
420 struct net_device *__dev_get_by_name(const char *name)
421 {
422         struct net_device *dev;
423 
424         for (dev = dev_base; dev; dev = dev->next)
425                 if (!strncmp(dev->name, name, IFNAMSIZ))
426                         break;
427         return dev;
428 }
429 
430 /**
431  *      dev_get_by_name         - find a device by its name
432  *      @name: name to find
433  *
434  *      Find an interface by name. This can be called from any
435  *      context and does its own locking. The returned handle has
436  *      the usage count incremented and the caller must use dev_put() to
437  *      release it when it is no longer needed. %NULL is returned if no
438  *      matching device is found.
439  */
440 
441 struct net_device *dev_get_by_name(const char *name)
442 {
443         struct net_device *dev;
444 
445         read_lock(&dev_base_lock);
446         dev = __dev_get_by_name(name);
447         if (dev)
448                 dev_hold(dev);
449         read_unlock(&dev_base_lock);
450         return dev;
451 }
452 
453 /*
454    Return value is changed to int to prevent illegal usage in future.
455    It is still legal to use to check for device existence.
456 
457    User should understand, that the result returned by this function
458    is meaningless, if it was not issued under rtnl semaphore.
459  */
460 
461 /**
462  *      dev_get -       test if a device exists
463  *      @name:  name to test for
464  *
465  *      Test if a name exists. Returns true if the name is found. In order
466  *      to be sure the name is not allocated or removed during the test the
467  *      caller must hold the rtnl semaphore.
468  *
469  *      This function exists only for back compatibility with older
470  *      drivers.
471  */
472 int __dev_get(const char *name)
473 {
474         struct net_device *dev;
475 
476         read_lock(&dev_base_lock);
477         dev = __dev_get_by_name(name);
478         read_unlock(&dev_base_lock);
479         return dev != NULL;
480 }
481 
482 /**
483  *      __dev_get_by_index - find a device by its ifindex
484  *      @ifindex: index of device
485  *
486  *      Search for an interface by index. Returns %NULL if the device
487  *      is not found or a pointer to the device. The device has not
488  *      had its reference counter increased so the caller must be careful
489  *      about locking. The caller must hold either the RTNL semaphore
490  *      or @dev_base_lock.
491  */
492 
493 struct net_device *__dev_get_by_index(int ifindex)
494 {
495         struct net_device *dev;
496 
497         for (dev = dev_base; dev; dev = dev->next)
498                 if (dev->ifindex == ifindex)
499                         break;
500         return dev;
501 }
502 
503 
504 /**
505  *      dev_get_by_index - find a device by its ifindex
506  *      @ifindex: index of device
507  *
508  *      Search for an interface by index. Returns NULL if the device
509  *      is not found or a pointer to the device. The device returned has
510  *      had a reference added and the pointer is safe until the user calls
511  *      dev_put to indicate they have finished with it.
512  */
513 
514 struct net_device *dev_get_by_index(int ifindex)
515 {
516         struct net_device *dev;
517 
518         read_lock(&dev_base_lock);
519         dev = __dev_get_by_index(ifindex);
520         if (dev)
521                 dev_hold(dev);
522         read_unlock(&dev_base_lock);
523         return dev;
524 }
525 
526 /**
527  *      dev_getbyhwaddr - find a device by its hardware address
528  *      @type: media type of device
529  *      @ha: hardware address
530  *
531  *      Search for an interface by MAC address. Returns NULL if the device
532  *      is not found or a pointer to the device. The caller must hold the
533  *      rtnl semaphore. The returned device has not had its ref count increased
534  *      and the caller must therefore be careful about locking
535  *
536  *      BUGS:
537  *      If the API was consistent this would be __dev_get_by_hwaddr
538  */
539 
540 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
541 {
542         struct net_device *dev;
543 
544         ASSERT_RTNL();
545 
546         for (dev = dev_base; dev; dev = dev->next)
547                 if (dev->type == type &&
548                     !memcmp(dev->dev_addr, ha, dev->addr_len))
549                         break;
550         return dev;
551 }
552 
553 struct net_device *__dev_getfirstbyhwtype(unsigned short type)
554 {
555         struct net_device *dev;
556 
557         for (dev = dev_base; dev; dev = dev->next)
558                 if (dev->type == type)
559                         break;
560         return dev;
561 }
562 
563 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
564 
565 struct net_device *dev_getfirstbyhwtype(unsigned short type)
566 {
567         struct net_device *dev;
568 
569         rtnl_lock();
570         dev = __dev_getfirstbyhwtype(type);
571         if (dev)
572                 dev_hold(dev);
573         rtnl_unlock();
574         return dev;
575 }
576 
577 EXPORT_SYMBOL(dev_getfirstbyhwtype);
578 
579 /**
580  *      dev_get_by_flags - find any device with given flags
581  *      @if_flags: IFF_* values
582  *      @mask: bitmask of bits in if_flags to check
583  *
584  *      Search for any interface with the given flags. Returns NULL if a device
585  *      is not found or a pointer to the device. The device returned has 
586  *      had a reference added and the pointer is safe until the user calls
587  *      dev_put to indicate they have finished with it.
588  */
589 
590 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
591 {
592         struct net_device *dev;
593 
594         read_lock(&dev_base_lock);
595         dev = __dev_get_by_flags(if_flags, mask);
596         if (dev)
597                 dev_hold(dev);
598         read_unlock(&dev_base_lock);
599         return dev;
600 }
601 
602 /**
603  *      __dev_get_by_flags - find any device with given flags
604  *      @if_flags: IFF_* values
605  *      @mask: bitmask of bits in if_flags to check
606  *
607  *      Search for any interface with the given flags. Returns NULL if a device
608  *      is not found or a pointer to the device. The caller must hold either
609  *      the RTNL semaphore or @dev_base_lock.
610  */
611 
612 struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask)
613 {
614         struct net_device *dev;
615 
616         for (dev = dev_base; dev != NULL; dev = dev->next) {
617                 if (((dev->flags ^ if_flags) & mask) == 0)
618                         return dev;
619         }
620         return NULL;
621 }
622 
623 /**
624  *      dev_alloc_name - allocate a name for a device
625  *      @dev: device
626  *      @name: name format string
627  *
628  *      Passed a format string - eg "lt%d" it will try and find a suitable
629  *      id. Not efficient for many devices, not called a lot. The caller
630  *      must hold the dev_base or rtnl lock while allocating the name and
631  *      adding the device in order to avoid duplicates. Returns the number
632  *      of the unit assigned or a negative errno code.
633  */
634 
635 int dev_alloc_name(struct net_device *dev, const char *name)
636 {
637         int i;
638         char buf[32];
639         char *p;
640 
641         /*
642          * Verify the string as this thing may have come from
643          * the user.  There must be either one "%d" and no other "%"
644          * characters, or no "%" characters at all.
645          */
646         p = strchr(name, '%');
647         if (p && (p[1] != 'd' || strchr(p + 2, '%')))
648                 return -EINVAL;
649 
650         /*
651          * If you need over 100 please also fix the algorithm...
652          */
653         for (i = 0; i < 100; i++) {
654                 snprintf(buf, sizeof(buf), name, i);
655                 if (!__dev_get_by_name(buf)) {
656                         strcpy(dev->name, buf);
657                         return i;
658                 }
659         }
660         return -ENFILE; /* Over 100 of the things .. bail out! */
661 }
662 
663 /**
664  *      dev_alloc - allocate a network device and name
665  *      @name: name format string
666  *      @err: error return pointer
667  *
668  *      Passed a format string, eg. "lt%d", it will allocate a network device
669  *      and space for the name. %NULL is returned if no memory is available.
670  *      If the allocation succeeds then the name is assigned and the
671  *      device pointer returned. %NULL is returned if the name allocation
672  *      failed. The cause of an error is returned as a negative errno code
673  *      in the variable @err points to.
674  *
675  *      This call is deprecated in favor of alloc_netdev because
676  *      the caller must hold the @dev_base or RTNL locks when doing this in
677  *      order to avoid duplicate name allocations.
678  */
679 
680 struct net_device *__dev_alloc(const char *name, int *err)
681 {
682         struct net_device *dev = kmalloc(sizeof(*dev), GFP_KERNEL);
683 
684         if (!dev)
685                 *err = -ENOBUFS;
686         else {
687                 memset(dev, 0, sizeof(*dev));
688                 *err = dev_alloc_name(dev, name);
689                 if (*err < 0) {
690                         kfree(dev);
691                         dev = NULL;
692                 }
693         }
694         return dev;
695 }
696 
697 /**
698  *      netdev_state_change - device changes state
699  *      @dev: device to cause notification
700  *
701  *      Called to indicate a device has changed state. This function calls
702  *      the notifier chains for netdev_chain and sends a NEWLINK message
703  *      to the routing socket.
704  */
705 void netdev_state_change(struct net_device *dev)
706 {
707         if (dev->flags & IFF_UP) {
708                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
709                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
710         }
711 }
712 
713 /**
714  *      dev_load        - load a network module
715  *      @name: name of interface
716  *
717  *      If a network interface is not present and the process has suitable
718  *      privileges this function loads the module. If module loading is not
719  *      available in this kernel then it becomes a nop.
720  */
721 
722 void dev_load(const char *name)
723 {
724         struct net_device *dev;  
725 
726         read_lock(&dev_base_lock);
727         dev = __dev_get_by_name(name);
728         read_unlock(&dev_base_lock);
729 
730         if (!dev && capable(CAP_SYS_MODULE))
731                 request_module("%s", name);
732 }
733 
734 static int default_rebuild_header(struct sk_buff *skb)
735 {
736         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
737                skb->dev ? skb->dev->name : "NULL!!!");
738         kfree_skb(skb);
739         return 1;
740 }
741 
742 
743 /*
744  * Some old buggy device drivers change get_stats after registering
745  * the device.  Try and trap them here.
746  * This can be elimnated when all devices are known fixed.
747  */
748 static inline int get_stats_changed(struct net_device *dev)
749 {
750         int changed = dev->last_stats != dev->get_stats;
751         dev->last_stats = dev->get_stats;
752         return changed;
753 }
754 
755 /**
756  *      dev_open        - prepare an interface for use.
757  *      @dev:   device to open
758  *
759  *      Takes a device from down to up state. The device's private open
760  *      function is invoked and then the multicast lists are loaded. Finally
761  *      the device is moved into the up state and a %NETDEV_UP message is
762  *      sent to the netdev notifier chain.
763  *
764  *      Calling this function on an active interface is a nop. On a failure
765  *      a negative errno code is returned.
766  */
767 int dev_open(struct net_device *dev)
768 {
769         int ret = 0;
770 
771         /*
772          *      Is it already up?
773          */
774 
775         if (dev->flags & IFF_UP)
776                 return 0;
777 
778         /*
779          *       Check for broken device drivers.
780          */
781         if (get_stats_changed(dev) && net_ratelimit()) {
782                 printk(KERN_ERR "%s: driver changed get_stats after register\n",
783                        dev->name);
784         }
785 
786         /*
787          *      Is it even present?
788          */
789         if (!netif_device_present(dev))
790                 return -ENODEV;
791 
792         /*
793          *      Call device private open method
794          */
795         set_bit(__LINK_STATE_START, &dev->state);
796         if (dev->open) {
797                 ret = dev->open(dev);
798                 if (ret)
799                         clear_bit(__LINK_STATE_START, &dev->state);
800         }
801 
802         /*
803          *      Check for more broken device drivers.
804          */
805         if (get_stats_changed(dev) && net_ratelimit()) {
806                 printk(KERN_ERR "%s: driver changed get_stats in open\n",
807                        dev->name);
808         }
809 
810         /*
811          *      If it went open OK then:
812          */
813 
814         if (!ret) {
815                 /*
816                  *      Set the flags.
817                  */
818                 dev->flags |= IFF_UP;
819 
820                 /*
821                  *      Initialize multicasting status
822                  */
823                 dev_mc_upload(dev);
824 
825                 /*
826                  *      Wakeup transmit queue engine
827                  */
828                 dev_activate(dev);
829 
830                 /*
831                  *      ... and announce new interface.
832                  */
833                 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
834         }
835         return ret;
836 }
837 
838 #ifdef CONFIG_NET_FASTROUTE
839 
840 static void dev_do_clear_fastroute(struct net_device *dev)
841 {
842         if (dev->accept_fastpath) {
843                 int i;
844 
845                 for (i = 0; i <= NETDEV_FASTROUTE_HMASK; i++) {
846                         struct dst_entry *dst;
847 
848                         write_lock_irq(&dev->fastpath_lock);
849                         dst = dev->fastpath[i];
850                         dev->fastpath[i] = NULL;
851                         write_unlock_irq(&dev->fastpath_lock);
852 
853                         dst_release(dst);
854                 }
855         }
856 }
857 
858 void dev_clear_fastroute(struct net_device *dev)
859 {
860         if (dev) {
861                 dev_do_clear_fastroute(dev);
862         } else {
863                 read_lock(&dev_base_lock);
864                 for (dev = dev_base; dev; dev = dev->next)
865                         dev_do_clear_fastroute(dev);
866                 read_unlock(&dev_base_lock);
867         }
868 }
869 #endif
870 
871 /**
872  *      dev_close - shutdown an interface.
873  *      @dev: device to shutdown
874  *
875  *      This function moves an active device into down state. A
876  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
877  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
878  *      chain.
879  */
880 int dev_close(struct net_device *dev)
881 {
882         if (!(dev->flags & IFF_UP))
883                 return 0;
884 
885         /*
886          *      Tell people we are going down, so that they can
887          *      prepare to death, when device is still operating.
888          */
889         notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
890 
891         dev_deactivate(dev);
892 
893         clear_bit(__LINK_STATE_START, &dev->state);
894 
895         /* Synchronize to scheduled poll. We cannot touch poll list,
896          * it can be even on different cpu. So just clear netif_running(),
897          * and wait when poll really will happen. Actually, the best place
898          * for this is inside dev->stop() after device stopped its irq
899          * engine, but this requires more changes in devices. */
900 
901         smp_mb__after_clear_bit(); /* Commit netif_running(). */
902         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
903                 /* No hurry. */
904                 current->state = TASK_INTERRUPTIBLE;
905                 schedule_timeout(1);
906         }
907 
908         /*
909          *      Call the device specific close. This cannot fail.
910          *      Only if device is UP
911          *
912          *      We allow it to be called even after a DETACH hot-plug
913          *      event.
914          */
915         if (dev->stop)
916                 dev->stop(dev);
917 
918         /*
919          *      Device is now down.
920          */
921 
922         dev->flags &= ~IFF_UP;
923 #ifdef CONFIG_NET_FASTROUTE
924         dev_clear_fastroute(dev);
925 #endif
926 
927         /*
928          * Tell people we are down
929          */
930         notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
931 
932         return 0;
933 }
934 
935 
936 /*
937  *      Device change register/unregister. These are not inline or static
938  *      as we export them to the world.
939  */
940 
941 /**
942  *      register_netdevice_notifier - register a network notifier block
943  *      @nb: notifier
944  *
945  *      Register a notifier to be called when network device events occur.
946  *      The notifier passed is linked into the kernel structures and must
947  *      not be reused until it has been unregistered. A negative errno code
948  *      is returned on a failure.
949  */
950 
951 int register_netdevice_notifier(struct notifier_block *nb)
952 {
953         return notifier_chain_register(&netdev_chain, nb);
954 }
955 
956 /**
957  *      unregister_netdevice_notifier - unregister a network notifier block
958  *      @nb: notifier
959  *
960  *      Unregister a notifier previously registered by
961  *      register_netdevice_notifier(). The notifier is unlinked into the
962  *      kernel structures and may then be reused. A negative errno code
963  *      is returned on a failure.
964  */
965 
966 int unregister_netdevice_notifier(struct notifier_block *nb)
967 {
968         return notifier_chain_unregister(&netdev_chain, nb);
969 }
970 
971 /**
972  *      call_netdevice_notifiers - call all network notifier blocks
973  *      @val: value passed unmodified to notifier function
974  *      @v:   pointer passed unmodified to notifier function
975  *
976  *      Call all network notifier blocks.  Parameters and return value
977  *      are as for notifier_call_chain().
978  */
979 
980 int call_netdevice_notifiers(unsigned long val, void *v)
981 {
982         return notifier_call_chain(&netdev_chain, val, v);
983 }
984 
985 /*
986  *      Support routine. Sends outgoing frames to any network
987  *      taps currently in use.
988  */
989 
990 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
991 {
992         struct packet_type *ptype;
993         do_gettimeofday(&skb->stamp);
994 
995         rcu_read_lock();
996         list_for_each_entry_rcu(ptype, &ptype_all, list) {
997                 /* Never send packets back to the socket
998                  * they originated from - MvS (miquels@drinkel.ow.org)
999                  */
1000                 if ((ptype->dev == dev || !ptype->dev) &&
1001                     (ptype->af_packet_priv == NULL ||
1002                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1003                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1004                         if (!skb2)
1005                                 break;
1006 
1007                         /* skb->nh should be correctly
1008                            set by sender, so that the second statement is
1009                            just protection against buggy protocols.
1010                          */
1011                         skb2->mac.raw = skb2->data;
1012 
1013                         if (skb2->nh.raw < skb2->data ||
1014                             skb2->nh.raw > skb2->tail) {
1015                                 if (net_ratelimit())
1016                                         printk(KERN_CRIT "protocol %04x is "
1017                                                "buggy, dev %s\n",
1018                                                skb2->protocol, dev->name);
1019                                 skb2->nh.raw = skb2->data;
1020                         }
1021 
1022                         skb2->h.raw = skb2->nh.raw;
1023                         skb2->pkt_type = PACKET_OUTGOING;
1024                         ptype->func(skb2, skb->dev, ptype);
1025                 }
1026         }
1027         rcu_read_unlock();
1028 }
1029 
1030 /* Calculate csum in the case, when packet is misrouted.
1031  * If it failed by some reason, ignore and send skb with wrong
1032  * checksum.
1033  */
1034 struct sk_buff *skb_checksum_help(struct sk_buff *skb)
1035 {
1036         unsigned int csum;
1037         int offset = skb->h.raw - skb->data;
1038 
1039         if (offset > (int)skb->len)
1040                 BUG();
1041         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1042 
1043         offset = skb->tail - skb->h.raw;
1044         if (offset <= 0)
1045                 BUG();
1046         if (skb->csum + 2 > offset)
1047                 BUG();
1048 
1049         *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1050         skb->ip_summed = CHECKSUM_NONE;
1051         return skb;
1052 }
1053 
1054 #ifdef CONFIG_HIGHMEM
1055 /* Actually, we should eliminate this check as soon as we know, that:
1056  * 1. IOMMU is present and allows to map all the memory.
1057  * 2. No high memory really exists on this machine.
1058  */
1059 
1060 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1061 {
1062         int i;
1063 
1064         if (dev->features & NETIF_F_HIGHDMA)
1065                 return 0;
1066 
1067         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1068                 if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
1069                         return 1;
1070 
1071         return 0;
1072 }
1073 #else
1074 #define illegal_highdma(dev, skb)       (0)
1075 #endif
1076 
1077 extern void skb_release_data(struct sk_buff *);
1078 
1079 /* Keep head the same: replace data */
1080 int __skb_linearize(struct sk_buff *skb, int gfp_mask)
1081 {
1082         unsigned int size;
1083         u8 *data;
1084         long offset;
1085         struct skb_shared_info *ninfo;
1086         int headerlen = skb->data - skb->head;
1087         int expand = (skb->tail + skb->data_len) - skb->end;
1088 
1089         if (skb_shared(skb))
1090                 BUG();
1091 
1092         if (expand <= 0)
1093                 expand = 0;
1094 
1095         size = skb->end - skb->head + expand;
1096         size = SKB_DATA_ALIGN(size);
1097         data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1098         if (!data)
1099                 return -ENOMEM;
1100 
1101         /* Copy entire thing */
1102         if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1103                 BUG();
1104 
1105         /* Set up shinfo */
1106         ninfo = (struct skb_shared_info*)(data + size);
1107         atomic_set(&ninfo->dataref, 1);
1108         ninfo->tso_size = skb_shinfo(skb)->tso_size;
1109         ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1110         ninfo->nr_frags = 0;
1111         ninfo->frag_list = NULL;
1112 
1113         /* Offset between the two in bytes */
1114         offset = data - skb->head;
1115 
1116         /* Free old data. */
1117         skb_release_data(skb);
1118 
1119         skb->head = data;
1120         skb->end  = data + size;
1121 
1122         /* Set up new pointers */
1123         skb->h.raw   += offset;
1124         skb->nh.raw  += offset;
1125         skb->mac.raw += offset;
1126         skb->tail    += offset;
1127         skb->data    += offset;
1128 
1129         /* We are no longer a clone, even if we were. */
1130         skb->cloned    = 0;
1131 
1132         skb->tail     += skb->data_len;
1133         skb->data_len  = 0;
1134         return 0;
1135 }
1136 
1137 /**
1138  *      dev_queue_xmit - transmit a buffer
1139  *      @skb: buffer to transmit
1140  *
1141  *      Queue a buffer for transmission to a network device. The caller must
1142  *      have set the device and priority and built the buffer before calling
1143  *      this function. The function can be called from an interrupt.
1144  *
1145  *      A negative errno code is returned on a failure. A success does not
1146  *      guarantee the frame will be transmitted as it may be dropped due
1147  *      to congestion or traffic shaping.
1148  */
1149 
1150 int dev_queue_xmit(struct sk_buff *skb)
1151 {
1152         struct net_device *dev = skb->dev;
1153         struct Qdisc *q;
1154         int rc = -ENOMEM;
1155 
1156         if (skb_shinfo(skb)->frag_list &&
1157             !(dev->features & NETIF_F_FRAGLIST) &&
1158             __skb_linearize(skb, GFP_ATOMIC))
1159                 goto out_kfree_skb;
1160 
1161         /* Fragmented skb is linearized if device does not support SG,
1162          * or if at least one of fragments is in highmem and device
1163          * does not support DMA from it.
1164          */
1165         if (skb_shinfo(skb)->nr_frags &&
1166             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1167             __skb_linearize(skb, GFP_ATOMIC))
1168                 goto out_kfree_skb;
1169 
1170         /* If packet is not checksummed and device does not support
1171          * checksumming for this protocol, complete checksumming here.
1172          */
1173         if (skb->ip_summed == CHECKSUM_HW &&
1174             (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1175              (!(dev->features & NETIF_F_IP_CSUM) ||
1176               skb->protocol != htons(ETH_P_IP)))) {
1177                 if ((skb = skb_checksum_help(skb)) == NULL)
1178                         goto out;
1179         }
1180 
1181         /* Grab device queue */
1182         spin_lock_bh(&dev->queue_lock);
1183         q = dev->qdisc;
1184         if (q->enqueue) {
1185                 rc = q->enqueue(skb, q);
1186 
1187                 qdisc_run(dev);
1188 
1189                 spin_unlock_bh(&dev->queue_lock);
1190                 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1191                 goto out;
1192         }
1193 
1194         /* The device has no queue. Common case for software devices:
1195            loopback, all the sorts of tunnels...
1196 
1197            Really, it is unlikely that xmit_lock protection is necessary here.
1198            (f.e. loopback and IP tunnels are clean ignoring statistics
1199            counters.)
1200            However, it is possible, that they rely on protection
1201            made by us here.
1202 
1203            Check this and shot the lock. It is not prone from deadlocks.
1204            Either shot noqueue qdisc, it is even simpler 8)
1205          */
1206         if (dev->flags & IFF_UP) {
1207                 int cpu = smp_processor_id();
1208 
1209                 if (dev->xmit_lock_owner != cpu) {
1210                         /*
1211                          * The spin_lock effectivly does a preempt lock, but 
1212                          * we are about to drop that...
1213                          */
1214                         preempt_disable();
1215                         spin_unlock(&dev->queue_lock);
1216                         spin_lock(&dev->xmit_lock);
1217                         dev->xmit_lock_owner = cpu;
1218                         preempt_enable();
1219 
1220                         if (!netif_queue_stopped(dev)) {
1221                                 if (netdev_nit)
1222                                         dev_queue_xmit_nit(skb, dev);
1223 
1224                                 rc = 0;
1225                                 if (!dev->hard_start_xmit(skb, dev)) {
1226                                         dev->xmit_lock_owner = -1;
1227                                         spin_unlock_bh(&dev->xmit_lock);
1228                                         goto out;
1229                                 }
1230                         }
1231                         dev->xmit_lock_owner = -1;
1232                         spin_unlock_bh(&dev->xmit_lock);
1233                         if (net_ratelimit())
1234                                 printk(KERN_CRIT "Virtual device %s asks to "
1235                                        "queue packet!\n", dev->name);
1236                         goto out_enetdown;
1237                 } else {
1238                         /* Recursion is detected! It is possible,
1239                          * unfortunately */
1240                         if (net_ratelimit())
1241                                 printk(KERN_CRIT "Dead loop on virtual device "
1242                                        "%s, fix it urgently!\n", dev->name);
1243                 }
1244         }
1245         spin_unlock_bh(&dev->queue_lock);
1246 out_enetdown:
1247         rc = -ENETDOWN;
1248 out_kfree_skb:
1249         kfree_skb(skb);
1250 out:
1251         return rc;
1252 }
1253 
1254 
1255 /*=======================================================================
1256                         Receiver routines
1257   =======================================================================*/
1258 
1259 int netdev_max_backlog = 300;
1260 int weight_p = 64;            /* old backlog weight */
1261 /* These numbers are selected based on intuition and some
1262  * experimentatiom, if you have more scientific way of doing this
1263  * please go ahead and fix things.
1264  */
1265 int no_cong_thresh = 10;
1266 int no_cong = 20;
1267 int lo_cong = 100;
1268 int mod_cong = 290;
1269 
1270 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1271 
1272 
1273 #ifdef CONFIG_NET_HW_FLOWCONTROL
1274 atomic_t netdev_dropping = ATOMIC_INIT(0);
1275 static unsigned long netdev_fc_mask = 1;
1276 unsigned long netdev_fc_xoff;
1277 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
1278 
1279 static struct
1280 {
1281         void (*stimul)(struct net_device *);
1282         struct net_device *dev;
1283 } netdev_fc_slots[BITS_PER_LONG];
1284 
1285 int netdev_register_fc(struct net_device *dev,
1286                        void (*stimul)(struct net_device *dev))
1287 {
1288         int bit = 0;
1289         unsigned long flags;
1290 
1291         spin_lock_irqsave(&netdev_fc_lock, flags);
1292         if (netdev_fc_mask != ~0UL) {
1293                 bit = ffz(netdev_fc_mask);
1294                 netdev_fc_slots[bit].stimul = stimul;
1295                 netdev_fc_slots[bit].dev = dev;
1296                 set_bit(bit, &netdev_fc_mask);
1297                 clear_bit(bit, &netdev_fc_xoff);
1298         }
1299         spin_unlock_irqrestore(&netdev_fc_lock, flags);
1300         return bit;
1301 }
1302 
1303 void netdev_unregister_fc(int bit)
1304 {
1305         unsigned long flags;
1306 
1307         spin_lock_irqsave(&netdev_fc_lock, flags);
1308         if (bit > 0) {
1309                 netdev_fc_slots[bit].stimul = NULL;
1310                 netdev_fc_slots[bit].dev = NULL;
1311                 clear_bit(bit, &netdev_fc_mask);
1312                 clear_bit(bit, &netdev_fc_xoff);
1313         }
1314         spin_unlock_irqrestore(&netdev_fc_lock, flags);
1315 }
1316 
1317 static void netdev_wakeup(void)
1318 {
1319         unsigned long xoff;
1320 
1321         spin_lock(&netdev_fc_lock);
1322         xoff = netdev_fc_xoff;
1323         netdev_fc_xoff = 0;
1324         while (xoff) {
1325                 int i = ffz(~xoff);
1326                 xoff &= ~(1 << i);
1327                 netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
1328         }
1329         spin_unlock(&netdev_fc_lock);
1330 }
1331 #endif
1332 
1333 static void get_sample_stats(int cpu)
1334 {
1335 #ifdef RAND_LIE
1336         unsigned long rd;
1337         int rq;
1338 #endif
1339         struct softnet_data *sd = &per_cpu(softnet_data, cpu);
1340         int blog = sd->input_pkt_queue.qlen;
1341         int avg_blog = sd->avg_blog;
1342 
1343         avg_blog = (avg_blog >> 1) + (blog >> 1);
1344 
1345         if (avg_blog > mod_cong) {
1346                 /* Above moderate congestion levels. */
1347                 sd->cng_level = NET_RX_CN_HIGH;
1348 #ifdef RAND_LIE
1349                 rd = net_random();
1350                 rq = rd % netdev_max_backlog;
1351                 if (rq < avg_blog) /* unlucky bastard */
1352                         sd->cng_level = NET_RX_DROP;
1353 #endif
1354         } else if (avg_blog > lo_cong) {
1355                 sd->cng_level = NET_RX_CN_MOD;
1356 #ifdef RAND_LIE
1357                 rd = net_random();
1358                 rq = rd % netdev_max_backlog;
1359                         if (rq < avg_blog) /* unlucky bastard */
1360                                 sd->cng_level = NET_RX_CN_HIGH;
1361 #endif
1362         } else if (avg_blog > no_cong)
1363                 sd->cng_level = NET_RX_CN_LOW;
1364         else  /* no congestion */
1365                 sd->cng_level = NET_RX_SUCCESS;
1366 
1367         sd->avg_blog = avg_blog;
1368 }
1369 
1370 #ifdef OFFLINE_SAMPLE
1371 static void sample_queue(unsigned long dummy)
1372 {
1373 /* 10 ms 0r 1ms -- i don't care -- JHS */
1374         int next_tick = 1;
1375         int cpu = smp_processor_id();
1376 
1377         get_sample_stats(cpu);
1378         next_tick += jiffies;
1379         mod_timer(&samp_timer, next_tick);
1380 }
1381 #endif
1382 
1383 
1384 /**
1385  *      netif_rx        -       post buffer to the network code
1386  *      @skb: buffer to post
1387  *
1388  *      This function receives a packet from a device driver and queues it for
1389  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1390  *      may be dropped during processing for congestion control or by the
1391  *      protocol layers.
1392  *
1393  *      return values:
1394  *      NET_RX_SUCCESS  (no congestion)
1395  *      NET_RX_CN_LOW   (low congestion)
1396  *      NET_RX_CN_MOD   (moderate congestion)
1397  *      NET_RX_CN_HIGH  (high congestion)
1398  *      NET_RX_DROP     (packet was dropped)
1399  *
1400  */
1401 
1402 int netif_rx(struct sk_buff *skb)
1403 {
1404         int this_cpu;
1405         struct softnet_data *queue;
1406         unsigned long flags;
1407 
1408         if (!skb->stamp.tv_sec)
1409                 do_gettimeofday(&skb->stamp);
1410 
1411         /*
1412          * The code is rearranged so that the path is the most
1413          * short when CPU is congested, but is still operating.
1414          */
1415         local_irq_save(flags);
1416         this_cpu = smp_processor_id();
1417         queue = &__get_cpu_var(softnet_data);
1418 
1419         __get_cpu_var(netdev_rx_stat).total++;
1420         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1421                 if (queue->input_pkt_queue.qlen) {
1422                         if (queue->throttle)
1423                                 goto drop;
1424 
1425 enqueue:
1426                         dev_hold(skb->dev);
1427                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1428 #ifndef OFFLINE_SAMPLE
1429                         get_sample_stats(this_cpu);
1430 #endif
1431                         local_irq_restore(flags);
1432                         return queue->cng_level;
1433                 }
1434 
1435                 if (queue->throttle) {
1436                         queue->throttle = 0;
1437 #ifdef CONFIG_NET_HW_FLOWCONTROL
1438                         if (atomic_dec_and_test(&netdev_dropping))
1439                                 netdev_wakeup();
1440 #endif
1441                 }
1442 
1443                 netif_rx_schedule(&queue->backlog_dev);
1444                 goto enqueue;
1445         }
1446 
1447         if (!queue->throttle) {
1448                 queue->throttle = 1;
1449                 __get_cpu_var(netdev_rx_stat).throttled++;
1450 #ifdef CONFIG_NET_HW_FLOWCONTROL
1451                 atomic_inc(&netdev_dropping);
1452 #endif
1453         }
1454 
1455 drop:
1456         __get_cpu_var(netdev_rx_stat).dropped++;
1457         local_irq_restore(flags);
1458 
1459         kfree_skb(skb);
1460         return NET_RX_DROP;
1461 }
1462 
1463 static __inline__ void skb_bond(struct sk_buff *skb)
1464 {
1465         struct net_device *dev = skb->dev;
1466 
1467         if (dev->master) {
1468                 skb->real_dev = skb->dev;
1469                 skb->dev = dev->master;
1470         }
1471 }
1472 
1473 static void net_tx_action(struct softirq_action *h)
1474 {
1475         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1476 
1477         if (sd->completion_queue) {
1478                 struct sk_buff *clist;
1479 
1480                 local_irq_disable();
1481                 clist = sd->completion_queue;
1482                 sd->completion_queue = NULL;
1483                 local_irq_enable();
1484 
1485                 while (clist) {
1486                         struct sk_buff *skb = clist;
1487                         clist = clist->next;
1488 
1489                         BUG_TRAP(!atomic_read(&skb->users));
1490                         __kfree_skb(skb);
1491                 }
1492         }
1493 
1494         if (sd->output_queue) {
1495                 struct net_device *head;
1496 
1497                 local_irq_disable();
1498                 head = sd->output_queue;
1499                 sd->output_queue = NULL;
1500                 local_irq_enable();
1501 
1502                 while (head) {
1503                         struct net_device *dev = head;
1504                         head = head->next_sched;
1505 
1506                         smp_mb__before_clear_bit();
1507                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1508 
1509                         if (spin_trylock(&dev->queue_lock)) {
1510                                 qdisc_run(dev);
1511                                 spin_unlock(&dev->queue_lock);
1512                         } else {
1513                                 netif_schedule(dev);
1514                         }
1515                 }
1516         }
1517 }
1518 
1519 static __inline__ int deliver_skb(struct sk_buff *skb,
1520                                   struct packet_type *pt_prev, int last)
1521 {
1522         atomic_inc(&skb->users);
1523         return pt_prev->func(skb, skb->dev, pt_prev);
1524 }
1525 
1526 
1527 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1528 int (*br_handle_frame_hook)(struct sk_buff *skb);
1529 
1530 static __inline__ int handle_bridge(struct sk_buff *skb,
1531                                      struct packet_type *pt_prev)
1532 {
1533         int ret = NET_RX_DROP;
1534         if (pt_prev)
1535                 ret = deliver_skb(skb, pt_prev, 0);
1536 
1537         return ret;
1538 }
1539 
1540 #endif
1541 
1542 static inline int __handle_bridge(struct sk_buff *skb,
1543                         struct packet_type **pt_prev, int *ret)
1544 {
1545 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1546         if (skb->dev->br_port) {
1547                 *ret = handle_bridge(skb, *pt_prev);
1548                 if (br_handle_frame_hook(skb) == 0)
1549                         return 1;
1550 
1551                 *pt_prev = NULL;
1552         }
1553 #endif
1554         return 0;
1555 }
1556 
1557 int netif_receive_skb(struct sk_buff *skb)
1558 {
1559         struct packet_type *ptype, *pt_prev;
1560         int ret = NET_RX_DROP;
1561         unsigned short type = skb->protocol;
1562 
1563         if (!skb->stamp.tv_sec)
1564                 do_gettimeofday(&skb->stamp);
1565 
1566         skb_bond(skb);
1567 
1568         __get_cpu_var(netdev_rx_stat).total++;
1569 
1570 #ifdef CONFIG_NET_FASTROUTE
1571         if (skb->pkt_type == PACKET_FASTROUTE) {
1572                 __get_cpu_var(netdev_rx_stat).fastroute_deferred_out++;
1573                 return dev_queue_xmit(skb);
1574         }
1575 #endif
1576 
1577         skb->h.raw = skb->nh.raw = skb->data;
1578 
1579         pt_prev = NULL;
1580         rcu_read_lock();
1581         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1582                 if (!ptype->dev || ptype->dev == skb->dev) {
1583                         if (pt_prev) 
1584                                 ret = deliver_skb(skb, pt_prev, 0);
1585                         pt_prev = ptype;
1586                 }
1587         }
1588 
1589         handle_diverter(skb);
1590 
1591         if (__handle_bridge(skb, &pt_prev, &ret))
1592                 goto out;
1593 
1594         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1595                 if (ptype->type == type &&
1596                     (!ptype->dev || ptype->dev == skb->dev)) {
1597                         if (pt_prev) 
1598                                 ret = deliver_skb(skb, pt_prev, 0);
1599                         pt_prev = ptype;
1600                 }
1601         }
1602 
1603         if (pt_prev) {
1604                 ret = pt_prev->func(skb, skb->dev, pt_prev);
1605         } else {
1606                 kfree_skb(skb);
1607                 /* Jamal, now you will not able to escape explaining
1608                  * me how you were going to use this. :-)
1609                  */
1610                 ret = NET_RX_DROP;
1611         }
1612 
1613 out:
1614         rcu_read_unlock();
1615         return ret;
1616 }
1617 
1618 static int process_backlog(struct net_device *backlog_dev, int *budget)
1619 {
1620         int work = 0;
1621         int quota = min(backlog_dev->quota, *budget);
1622         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1623         unsigned long start_time = jiffies;
1624 
1625         for (;;) {
1626                 struct sk_buff *skb;
1627                 struct net_device *dev;
1628 
1629                 local_irq_disable();
1630                 skb = __skb_dequeue(&queue->input_pkt_queue);
1631                 if (!skb)
1632                         goto job_done;
1633                 local_irq_enable();
1634 
1635                 dev = skb->dev;
1636 
1637                 netif_receive_skb(skb);
1638 
1639                 dev_put(dev);
1640 
1641                 work++;
1642 
1643                 if (work >= quota || jiffies - start_time > 1)
1644                         break;
1645 
1646 #ifdef CONFIG_NET_HW_FLOWCONTROL
1647                 if (queue->throttle &&
1648                     queue->input_pkt_queue.qlen < no_cong_thresh ) {
1649                         queue->throttle = 0;
1650                         if (atomic_dec_and_test(&netdev_dropping)) {
1651                                 netdev_wakeup();
1652                                 break;
1653                         }
1654                 }
1655 #endif
1656         }
1657 
1658         backlog_dev->quota -= work;
1659         *budget -= work;
1660         return -1;
1661 
1662 job_done:
1663         backlog_dev->quota -= work;
1664         *budget -= work;
1665 
1666         list_del(&backlog_dev->poll_list);
1667         smp_mb__before_clear_bit();
1668         netif_poll_enable(backlog_dev);
1669 
1670         if (queue->throttle) {
1671                 queue->throttle = 0;
1672 #ifdef CONFIG_NET_HW_FLOWCONTROL
1673                 if (atomic_dec_and_test(&netdev_dropping))
1674                         netdev_wakeup();
1675 #endif
1676         }
1677         local_irq_enable();
1678         return 0;
1679 }
1680 
1681 static void net_rx_action(struct softirq_action *h)
1682 {
1683         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1684         unsigned long start_time = jiffies;
1685         int budget = netdev_max_backlog;
1686 
1687         
1688         preempt_disable();
1689         local_irq_disable();
1690 
1691         while (!list_empty(&queue->poll_list)) {
1692                 struct net_device *dev;
1693 
1694                 if (budget <= 0 || jiffies - start_time > 1)
1695                         goto softnet_break;
1696 
1697                 local_irq_enable();
1698 
1699                 dev = list_entry(queue->poll_list.next,
1700                                  struct net_device, poll_list);
1701 
1702                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1703                         local_irq_disable();
1704                         list_del(&dev->poll_list);
1705                         list_add_tail(&dev->poll_list, &queue->poll_list);
1706                         if (dev->quota < 0)
1707                                 dev->quota += dev->weight;
1708                         else
1709                                 dev->quota = dev->weight;
1710                 } else {
1711                         dev_put(dev);
1712                         local_irq_disable();
1713                 }
1714         }
1715 out:
1716         local_irq_enable();
1717         preempt_enable();
1718         return;
1719 
1720 softnet_break:
1721         __get_cpu_var(netdev_rx_stat).time_squeeze++;
1722         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1723         goto out;
1724 }
1725 
1726 static gifconf_func_t * gifconf_list [NPROTO];
1727 
1728 /**
1729  *      register_gifconf        -       register a SIOCGIF handler
1730  *      @family: Address family
1731  *      @gifconf: Function handler
1732  *
1733  *      Register protocol dependent address dumping routines. The handler
1734  *      that is passed must not be freed or reused until it has been replaced
1735  *      by another handler.
1736  */
1737 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1738 {
1739         if (family >= NPROTO)
1740                 return -EINVAL;
1741         gifconf_list[family] = gifconf;
1742         return 0;
1743 }
1744 
1745 
1746 /*
1747  *      Map an interface index to its name (SIOCGIFNAME)
1748  */
1749 
1750 /*
1751  *      We need this ioctl for efficient implementation of the
1752  *      if_indextoname() function required by the IPv6 API.  Without
1753  *      it, we would have to search all the interfaces to find a
1754  *      match.  --pb
1755  */
1756 
1757 static int dev_ifname(struct ifreq *arg)
1758 {
1759         struct net_device *dev;
1760         struct ifreq ifr;
1761 
1762         /*
1763          *      Fetch the caller's info block.
1764          */
1765 
1766         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1767                 return -EFAULT;
1768 
1769         read_lock(&dev_base_lock);
1770         dev = __dev_get_by_index(ifr.ifr_ifindex);
1771         if (!dev) {
1772                 read_unlock(&dev_base_lock);
1773                 return -ENODEV;
1774         }
1775 
1776         strcpy(ifr.ifr_name, dev->name);
1777         read_unlock(&dev_base_lock);
1778 
1779         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1780                 return -EFAULT;
1781         return 0;
1782 }
1783 
1784 /*
1785  *      Perform a SIOCGIFCONF call. This structure will change
1786  *      size eventually, and there is nothing I can do about it.
1787  *      Thus we will need a 'compatibility mode'.
1788  */
1789 
1790 static int dev_ifconf(char *arg)
1791 {
1792         struct ifconf ifc;
1793         struct net_device *dev;
1794         char *pos;
1795         int len;
1796         int total;
1797         int i;
1798 
1799         /*
1800          *      Fetch the caller's info block.
1801          */
1802 
1803         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1804                 return -EFAULT;
1805 
1806         pos = ifc.ifc_buf;
1807         len = ifc.ifc_len;
1808 
1809         /*
1810          *      Loop over the interfaces, and write an info block for each.
1811          */
1812 
1813         total = 0;
1814         for (dev = dev_base; dev; dev = dev->next) {
1815                 for (i = 0; i < NPROTO; i++) {
1816                         if (gifconf_list[i]) {
1817                                 int done;
1818                                 if (!pos)
1819                                         done = gifconf_list[i](dev, NULL, 0);
1820                                 else
1821                                         done = gifconf_list[i](dev, pos + total,
1822                                                                len - total);
1823                                 if (done < 0)
1824                                         return -EFAULT;
1825                                 total += done;
1826                         }
1827                 }
1828         }
1829 
1830         /*
1831          *      All done.  Write the updated control block back to the caller.
1832          */
1833         ifc.ifc_len = total;
1834 
1835         /*
1836          *      Both BSD and Solaris return 0 here, so we do too.
1837          */
1838         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1839 }
1840 
1841 #ifdef CONFIG_PROC_FS
1842 /*
1843  *      This is invoked by the /proc filesystem handler to display a device
1844  *      in detail.
1845  */
1846 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1847 {
1848         struct net_device *dev;
1849         loff_t i;
1850 
1851         for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1852 
1853         return i == pos ? dev : NULL;
1854 }
1855 
1856 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1857 {
1858         read_lock(&dev_base_lock);
1859         return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1860 }
1861 
1862 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1863 {
1864         ++*pos;
1865         return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1866 }
1867 
1868 void dev_seq_stop(struct seq_file *seq, void *v)
1869 {
1870         read_unlock(&dev_base_lock);
1871 }
1872 
1873 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1874 {
1875         struct net_device_stats *stats = dev->get_stats ? dev->get_stats(dev) :
1876                                                           NULL;
1877         if (stats)
1878                 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1879                                 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1880                            dev->name, stats->rx_bytes, stats->rx_packets,
1881                            stats->rx_errors,
1882                            stats->rx_dropped + stats->rx_missed_errors,
1883                            stats->rx_fifo_errors,
1884                            stats->rx_length_errors + stats->rx_over_errors +
1885                              stats->rx_crc_errors + stats->rx_frame_errors,
1886                            stats->rx_compressed, stats->multicast,
1887                            stats->tx_bytes, stats->tx_packets,
1888                            stats->tx_errors, stats->tx_dropped,
1889                            stats->tx_fifo_errors, stats->collisions,
1890                            stats->tx_carrier_errors +
1891                              stats->tx_aborted_errors +
1892                              stats->tx_window_errors +
1893                              stats->tx_heartbeat_errors,
1894                            stats->tx_compressed);
1895         else
1896                 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1897 }
1898 
1899 /*
1900  *      Called from the PROCfs module. This now uses the new arbitrary sized
1901  *      /proc/net interface to create /proc/net/dev
1902  */
1903 static int dev_seq_show(struct seq_file *seq, void *v)
1904 {
1905         if (v == SEQ_START_TOKEN)
1906                 seq_puts(seq, "Inter-|   Receive                            "
1907                               "                    |  Transmit\n"
1908                               " face |bytes    packets errs drop fifo frame "
1909                               "compressed multicast|bytes    packets errs "
1910                               "drop fifo colls carrier compressed\n");
1911         else
1912                 dev_seq_printf_stats(seq, v);
1913         return 0;
1914 }
1915 
1916 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
1917 {
1918         struct netif_rx_stats *rc = NULL;
1919 
1920         while (*pos < NR_CPUS)
1921                 if (cpu_online(*pos)) {
1922                         rc = &per_cpu(netdev_rx_stat, *pos);
1923                         break;
1924                 } else
1925                         ++*pos;
1926         return rc;
1927 }
1928 
1929 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
1930 {
1931         return softnet_get_online(pos);
1932 }
1933 
1934 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1935 {
1936         ++*pos;
1937         return softnet_get_online(pos);
1938 }
1939 
1940 static void softnet_seq_stop(struct seq_file *seq, void *v)
1941 {
1942 }
1943 
1944 static int softnet_seq_show(struct seq_file *seq, void *v)
1945 {
1946         struct netif_rx_stats *s = v;
1947 
1948         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1949                    s->total, s->dropped, s->time_squeeze, s->throttled,
1950                    s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
1951                    s->fastroute_deferred_out,
1952 #if 0
1953                    s->fastroute_latency_reduction
1954 #else
1955                    s->cpu_collision
1956 #endif
1957                   );
1958         return 0;
1959 }
1960 
1961 static struct seq_operations dev_seq_ops = {
1962         .start = dev_seq_start,
1963         .next  = dev_seq_next,
1964         .stop  = dev_seq_stop,
1965         .show  = dev_seq_show,
1966 };
1967 
1968 static int dev_seq_open(struct inode *inode, struct file *file)
1969 {
1970         return seq_open(file, &dev_seq_ops);
1971 }
1972 
1973 static struct file_operations dev_seq_fops = {
1974         .owner   = THIS_MODULE,
1975         .open    = dev_seq_open,
1976         .read    = seq_read,
1977         .llseek  = seq_lseek,
1978         .release = seq_release,
1979 };
1980 
1981 static struct seq_operations softnet_seq_ops = {
1982         .start = softnet_seq_start,
1983         .next  = softnet_seq_next,
1984         .stop  = softnet_seq_stop,
1985         .show  = softnet_seq_show,
1986 };
1987 
1988 static int softnet_seq_open(struct inode *inode, struct file *file)
1989 {
1990         return seq_open(file, &softnet_seq_ops);
1991 }
1992 
1993 static struct file_operations softnet_seq_fops = {
1994         .owner   = THIS_MODULE,
1995         .open    = softnet_seq_open,
1996         .read    = seq_read,
1997         .llseek  = seq_lseek,
1998         .release = seq_release,
1999 };
2000 
2001 #ifdef WIRELESS_EXT
2002 extern int wireless_proc_init(void);
2003 #else
2004 #define wireless_proc_init() 0
2005 #endif
2006 
2007 static int __init dev_proc_init(void)
2008 {
2009         int rc = -ENOMEM;
2010 
2011         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2012                 goto out;
2013         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2014                 goto out_dev;
2015         if (wireless_proc_init())
2016                 goto out_softnet;
2017         rc = 0;
2018 out:
2019         return rc;
2020 out_softnet:
2021         proc_net_remove("softnet_stat");
2022 out_dev:
2023         proc_net_remove("dev");
2024         goto out;
2025 }
2026 #else
2027 #define dev_proc_init() 0
2028 #endif  /* CONFIG_PROC_FS */
2029 
2030 
2031 /**
2032  *      netdev_set_master       -       set up master/slave pair
2033  *      @slave: slave device
2034  *      @master: new master device
2035  *
2036  *      Changes the master device of the slave. Pass %NULL to break the
2037  *      bonding. The caller must hold the RTNL semaphore. On a failure
2038  *      a negative errno code is returned. On success the reference counts
2039  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2040  *      function returns zero.
2041  */
2042 int netdev_set_master(struct net_device *slave, struct net_device *master)
2043 {
2044         struct net_device *old = slave->master;
2045 
2046         ASSERT_RTNL();
2047 
2048         if (master) {
2049                 if (old)
2050                         return -EBUSY;
2051                 dev_hold(master);
2052         }
2053 
2054         slave->master = master;
2055         
2056         synchronize_net();
2057 
2058         if (old)
2059                 dev_put(old);
2060 
2061         if (master)
2062                 slave->flags |= IFF_SLAVE;
2063         else
2064                 slave->flags &= ~IFF_SLAVE;
2065 
2066         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2067         return 0;
2068 }
2069 
2070 /**
2071  *      dev_set_promiscuity     - update promiscuity count on a device
2072  *      @dev: device
2073  *      @inc: modifier
2074  *
2075  *      Add or remove promsicuity from a device. While the count in the device
2076  *      remains above zero the interface remains promiscuous. Once it hits zero
2077  *      the device reverts back to normal filtering operation. A negative inc
2078  *      value is used to drop promiscuity on the device.
2079  */
2080 void dev_set_promiscuity(struct net_device *dev, int inc)
2081 {
2082         unsigned short old_flags = dev->flags;
2083 
2084         dev->flags |= IFF_PROMISC;
2085         if ((dev->promiscuity += inc) == 0)
2086                 dev->flags &= ~IFF_PROMISC;
2087         if (dev->flags ^ old_flags) {
2088 #ifdef CONFIG_NET_FASTROUTE
2089                 if (dev->flags & IFF_PROMISC) {
2090                         netdev_fastroute_obstacles++;
2091                         dev_clear_fastroute(dev);
2092                 } else
2093                         netdev_fastroute_obstacles--;
2094 #endif
2095                 dev_mc_upload(dev);
2096                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2097                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2098                                                                "left");
2099         }
2100 }
2101 
2102 /**
2103  *      dev_set_allmulti        - update allmulti count on a device
2104  *      @dev: device
2105  *      @inc: modifier
2106  *
2107  *      Add or remove reception of all multicast frames to a device. While the
2108  *      count in the device remains above zero the interface remains listening
2109  *      to all interfaces. Once it hits zero the device reverts back to normal
2110  *      filtering operation. A negative @inc value is used to drop the counter
2111  *      when releasing a resource needing all multicasts.
2112  */
2113 
2114 void dev_set_allmulti(struct net_device *dev, int inc)
2115 {
2116         unsigned short old_flags = dev->flags;
2117 
2118         dev->flags |= IFF_ALLMULTI;
2119         if ((dev->allmulti += inc) == 0)
2120                 dev->flags &= ~IFF_ALLMULTI;
2121         if (dev->flags ^ old_flags)
2122                 dev_mc_upload(dev);
2123 }
2124 
2125 unsigned dev_get_flags(const struct net_device *dev)
2126 {
2127         unsigned flags;
2128 
2129         flags = (dev->flags & ~(IFF_PROMISC |
2130                                 IFF_ALLMULTI |
2131                                 IFF_RUNNING)) | 
2132                 (dev->gflags & (IFF_PROMISC |
2133                                 IFF_ALLMULTI));
2134 
2135         if (netif_running(dev) && netif_carrier_ok(dev))
2136                 flags |= IFF_RUNNING;
2137 
2138         return flags;
2139 }
2140 
2141 int dev_change_flags(struct net_device *dev, unsigned flags)
2142 {
2143         int ret;
2144         int old_flags = dev->flags;
2145 
2146         /*
2147          *      Set the flags on our device.
2148          */
2149 
2150         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2151                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2152                                IFF_AUTOMEDIA)) |
2153                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2154                                     IFF_ALLMULTI));
2155 
2156         /*
2157          *      Load in the correct multicast list now the flags have changed.
2158          */
2159 
2160         dev_mc_upload(dev);
2161 
2162         /*
2163          *      Have we downed the interface. We handle IFF_UP ourselves
2164          *      according to user attempts to set it, rather than blindly
2165          *      setting it.
2166          */
2167 
2168         ret = 0;
2169         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2170                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2171 
2172                 if (!ret)
2173                         dev_mc_upload(dev);
2174         }
2175 
2176         if (dev->flags & IFF_UP &&
2177             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2178                                           IFF_VOLATILE)))
2179                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2180 
2181         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2182                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2183                 dev->gflags ^= IFF_PROMISC;
2184                 dev_set_promiscuity(dev, inc);
2185         }
2186 
2187         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2188            is important. Some (broken) drivers set IFF_PROMISC, when
2189            IFF_ALLMULTI is requested not asking us and not reporting.
2190          */
2191         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2192                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2193                 dev->gflags ^= IFF_ALLMULTI;
2194                 dev_set_allmulti(dev, inc);
2195         }
2196 
2197         if (old_flags ^ dev->flags)
2198                 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2199 
2200         return ret;
2201 }
2202 
2203 int dev_set_mtu(struct net_device *dev, int new_mtu)
2204 {
2205         int err;
2206 
2207         if (new_mtu == dev->mtu)
2208                 return 0;
2209 
2210         /*      MTU must be positive.    */
2211         if (new_mtu < 0)
2212                 return -EINVAL;
2213 
2214         if (!netif_device_present(dev))
2215                 return -ENODEV;
2216 
2217         err = 0;
2218         if (dev->change_mtu)
2219                 err = dev->change_mtu(dev, new_mtu);
2220         else
2221                 dev->mtu = new_mtu;
2222         if (!err && dev->flags & IFF_UP)
2223                 notifier_call_chain(&netdev_chain,
2224                                     NETDEV_CHANGEMTU, dev);
2225         return err;
2226 }
2227 
2228 
2229 /*
2230  *      Perform the SIOCxIFxxx calls.
2231  */
2232 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2233 {
2234         int err;
2235         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2236 
2237         if (!dev)
2238                 return -ENODEV;
2239 
2240         switch (cmd) {
2241                 case SIOCGIFFLAGS:      /* Get interface flags */
2242                         ifr->ifr_flags = dev_get_flags(dev);
2243                         return 0;
2244 
2245                 case SIOCSIFFLAGS:      /* Set interface flags */
2246                         return dev_change_flags(dev, ifr->ifr_flags);
2247 
2248                 case SIOCGIFMETRIC:     /* Get the metric on the interface
2249                                            (currently unused) */
2250                         ifr->ifr_metric = 0;
2251                         return 0;
2252 
2253                 case SIOCSIFMETRIC:     /* Set the metric on the interface
2254                                            (currently unused) */
2255                         return -EOPNOTSUPP;
2256 
2257                 case SIOCGIFMTU:        /* Get the MTU of a device */
2258                         ifr->ifr_mtu = dev->mtu;
2259                         return 0;
2260 
2261                 case SIOCSIFMTU:        /* Set the MTU of a device */
2262                         return dev_set_mtu(dev, ifr->ifr_mtu);
2263 
2264                 case SIOCGIFHWADDR:
2265                         memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2266                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2267                         ifr->ifr_hwaddr.sa_family = dev->type;
2268                         return 0;
2269 
2270                 case SIOCSIFHWADDR:
2271                         if (!dev->set_mac_address)
2272                                 return -EOPNOTSUPP;
2273                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2274                                 return -EINVAL;
2275                         if (!netif_device_present(dev))
2276                                 return -ENODEV;
2277                         err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
2278                         if (!err)
2279                                 notifier_call_chain(&netdev_chain,
2280                                                     NETDEV_CHANGEADDR, dev);
2281                         return err;
2282 
2283                 case SIOCSIFHWBROADCAST:
2284                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2285                                 return -EINVAL;
2286                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2287                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2288                         notifier_call_chain(&netdev_chain,
2289                                             NETDEV_CHANGEADDR, dev);
2290                         return 0;
2291 
2292                 case SIOCGIFMAP:
2293                         ifr->ifr_map.mem_start = dev->mem_start;
2294                         ifr->ifr_map.mem_end   = dev->mem_end;
2295                         ifr->ifr_map.base_addr = dev->base_addr;
2296                         ifr->ifr_map.irq       = dev->irq;
2297                         ifr->ifr_map.dma       = dev->dma;
2298                         ifr->ifr_map.port      = dev->if_port;
2299                         return 0;
2300 
2301                 case SIOCSIFMAP:
2302                         if (dev->set_config) {
2303                                 if (!netif_device_present(dev))
2304                                         return -ENODEV;
2305                                 return dev->set_config(dev, &ifr->ifr_map);
2306                         }
2307                         return -EOPNOTSUPP;
2308 
2309                 case SIOCADDMULTI:
2310                         if (!dev->set_multicast_list ||
2311                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2312                                 return -EINVAL;
2313                         if (!netif_device_present(dev))
2314                                 return -ENODEV;
2315                         dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2316                                    dev->addr_len, 1);
2317                         return 0;
2318 
2319                 case SIOCDELMULTI:
2320                         if (!dev->set_multicast_list ||
2321                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2322                                 return -EINVAL;
2323                         if (!netif_device_present(dev))
2324                                 return -ENODEV;
2325                         dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2326                                       dev->addr_len, 1);
2327                         return 0;
2328 
2329                 case SIOCGIFINDEX:
2330                         ifr->ifr_ifindex = dev->ifindex;
2331                         return 0;
2332 
2333                 case SIOCGIFTXQLEN:
2334                         ifr->ifr_qlen = dev->tx_queue_len;
2335                         return 0;
2336 
2337                 case SIOCSIFTXQLEN:
2338                         if (ifr->ifr_qlen < 0)
2339                                 return -EINVAL;
2340                         dev->tx_queue_len = ifr->ifr_qlen;
2341                         return 0;
2342 
2343                 case SIOCSIFNAME:
2344                         if (dev->flags & IFF_UP)
2345                                 return -EBUSY;
2346                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2347                         if (__dev_get_by_name(ifr->ifr_newname))
2348                                 return -EEXIST;
2349                         err = class_device_rename(&dev->class_dev, 
2350                                                   ifr->ifr_newname);
2351                         if (!err) {
2352                                 strlcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
2353 
2354                                 notifier_call_chain(&netdev_chain,
2355                                                     NETDEV_CHANGENAME, dev);
2356                         }
2357                         return err;
2358 
2359                 /*
2360                  *      Unknown or private ioctl
2361                  */
2362 
2363                 default:
2364                         if ((cmd >= SIOCDEVPRIVATE &&
2365                             cmd <= SIOCDEVPRIVATE + 15) ||
2366                             cmd == SIOCBONDENSLAVE ||
2367                             cmd == SIOCBONDRELEASE ||
2368                             cmd == SIOCBONDSETHWADDR ||
2369                             cmd == SIOCBONDSLAVEINFOQUERY ||
2370                             cmd == SIOCBONDINFOQUERY ||
2371                             cmd == SIOCBONDCHANGEACTIVE ||
2372                             cmd == SIOCGMIIPHY ||
2373                             cmd == SIOCGMIIREG ||
2374                             cmd == SIOCSMIIREG ||
2375                             cmd == SIOCWANDEV) {
2376                                 err = -EOPNOTSUPP;
2377                                 if (dev->do_ioctl) {
2378                                         if (netif_device_present(dev))
2379                                                 err = dev->do_ioctl(dev, ifr,
2380                                                                     cmd);
2381                                         else
2382                                                 err = -ENODEV;
2383                                 }
2384                         } else
2385                                 err = -EINVAL;
2386 
2387         }
2388         return err;
2389 }
2390 
2391 /*
2392  *      This function handles all "interface"-type I/O control requests. The actual
2393  *      'doing' part of this is dev_ifsioc above.
2394  */
2395 
2396 /**
2397  *      dev_ioctl       -       network device ioctl
2398  *      @cmd: command to issue
2399  *      @arg: pointer to a struct ifreq in user space
2400  *
2401  *      Issue ioctl functions to devices. This is normally called by the
2402  *      user space syscall interfaces but can sometimes be useful for
2403  *      other purposes. The return value is the return from the syscall if
2404  *      positive or a negative errno code on error.
2405  */
2406 
2407 int dev_ioctl(unsigned int cmd, void *arg)
2408 {
2409         struct ifreq ifr;
2410         int ret;
2411         char *colon;
2412 
2413         /* One special case: SIOCGIFCONF takes ifconf argument
2414            and requires shared lock, because it sleeps writing
2415            to user space.
2416          */
2417 
2418         if (cmd == SIOCGIFCONF) {
2419                 rtnl_shlock();
2420                 ret = dev_ifconf((char *) arg);
2421                 rtnl_shunlock();
2422                 return ret;
2423         }
2424         if (cmd == SIOCGIFNAME)
2425                 return dev_ifname((struct ifreq *)arg);
2426 
2427         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2428                 return -EFAULT;
2429 
2430         ifr.ifr_name[IFNAMSIZ-1] = 0;
2431 
2432         colon = strchr(ifr.ifr_name, ':');
2433         if (colon)
2434                 *colon = 0;
2435 
2436         /*
2437          *      See which interface the caller is talking about.
2438          */
2439 
2440         switch (cmd) {
2441                 /*
2442                  *      These ioctl calls:
2443                  *      - can be done by all.
2444                  *      - atomic and do not require locking.
2445                  *      - return a value
2446                  */
2447                 case SIOCGIFFLAGS:
2448                 case SIOCGIFMETRIC:
2449                 case SIOCGIFMTU:
2450                 case SIOCGIFHWADDR:
2451                 case SIOCGIFSLAVE:
2452                 case SIOCGIFMAP:
2453                 case SIOCGIFINDEX:
2454                 case SIOCGIFTXQLEN:
2455                         dev_load(ifr.ifr_name);
2456                         read_lock(&dev_base_lock);
2457                         ret = dev_ifsioc(&ifr, cmd);
2458                         read_unlock(&dev_base_lock);
2459                         if (!ret) {
2460                                 if (colon)
2461                                         *colon = ':';
2462                                 if (copy_to_user(arg, &ifr,
2463                                                  sizeof(struct ifreq)))
2464                                         ret = -EFAULT;
2465                         }
2466                         return ret;
2467 
2468                 case SIOCETHTOOL:
2469                         dev_load(ifr.ifr_name);
2470                         rtnl_lock();
2471                         ret = dev_ethtool(&ifr);
2472                         rtnl_unlock();
2473                         if (!ret) {
2474                                 if (colon)
2475                                         *colon = ':';
2476                                 if (copy_to_user(arg, &ifr,
2477                                                  sizeof(struct ifreq)))
2478                                         ret = -EFAULT;
2479                         }
2480                         return ret;
2481 
2482                 /*
2483                  *      These ioctl calls:
2484                  *      - require superuser power.
2485                  *      - require strict serialization.
2486                  *      - return a value
2487                  */
2488                 case SIOCGMIIPHY:
2489                 case SIOCGMIIREG:
2490                         if (!capable(CAP_NET_ADMIN))
2491                                 return -EPERM;
2492                         dev_load(ifr.ifr_name);
2493                         rtnl_lock();
2494                         ret = dev_ifsioc(&ifr, cmd);
2495                         rtnl_unlock();
2496                         if (!ret) {
2497                                 if (colon)
2498                                         *colon = ':';
2499                                 if (copy_to_user(arg, &ifr,
2500                                                  sizeof(struct ifreq)))
2501                                         ret = -EFAULT;
2502                         }
2503                         return ret;
2504 
2505                 /*
2506                  *      These ioctl calls:
2507                  *      - require superuser power.
2508                  *      - require strict serialization.
2509                  *      - do not return a value
2510                  */
2511                 case SIOCSIFFLAGS:
2512                 case SIOCSIFMETRIC:
2513                 case SIOCSIFMTU:
2514                 case SIOCSIFMAP:
2515                 case SIOCSIFHWADDR:
2516                 case SIOCSIFSLAVE:
2517                 case SIOCADDMULTI:
2518                 case SIOCDELMULTI:
2519                 case SIOCSIFHWBROADCAST:
2520                 case SIOCSIFTXQLEN:
2521                 case SIOCSIFNAME:
2522                 case SIOCSMIIREG:
2523                 case SIOCBONDENSLAVE:
2524                 case SIOCBONDRELEASE:
2525                 case SIOCBONDSETHWADDR:
2526                 case SIOCBONDSLAVEINFOQUERY:
2527                 case SIOCBONDINFOQUERY:
2528                 case SIOCBONDCHANGEACTIVE:
2529                         if (!capable(CAP_NET_ADMIN))
2530                                 return -EPERM;
2531                         dev_load(ifr.ifr_name);
2532                         rtnl_lock();
2533                         ret = dev_ifsioc(&ifr, cmd);
2534                         rtnl_unlock();
2535                         return ret;
2536 
2537                 case SIOCGIFMEM:
2538                         /* Get the per device memory space. We can add this but
2539                          * currently do not support it */
2540                 case SIOCSIFMEM:
2541                         /* Set the per device memory buffer space.
2542                          * Not applicable in our case */
2543                 case SIOCSIFLINK:
2544                         return -EINVAL;
2545 
2546                 /*
2547                  *      Unknown or private ioctl.
2548                  */
2549                 default:
2550                         if (cmd == SIOCWANDEV ||
2551                             (cmd >= SIOCDEVPRIVATE &&
2552                              cmd <= SIOCDEVPRIVATE + 15)) {
2553                                 dev_load(ifr.ifr_name);
2554                                 rtnl_lock();
2555                                 ret = dev_ifsioc(&ifr, cmd);
2556                                 rtnl_unlock();
2557                                 if (!ret && copy_to_user(arg, &ifr,
2558                                                          sizeof(struct ifreq)))
2559                                         ret = -EFAULT;
2560                                 return ret;
2561                         }
2562 #ifdef WIRELESS_EXT
2563                         /* Take care of Wireless Extensions */
2564                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2565                                 /* If command is `set a parameter', or
2566                                  * `get the encoding parameters', check if
2567                                  * the user has the right to do it */
2568                                 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2569                                         if (!capable(CAP_NET_ADMIN))
2570                                                 return -EPERM;
2571                                 }
2572                                 dev_load(ifr.ifr_name);
2573                                 rtnl_lock();
2574                                 /* Follow me in net/core/wireless.c */
2575                                 ret = wireless_process_ioctl(&ifr, cmd);
2576                                 rtnl_unlock();
2577                                 if (!ret && IW_IS_GET(cmd) &&
2578                                     copy_to_user(arg, &ifr,
2579                                                  sizeof(struct ifreq)))
2580                                         ret = -EFAULT;
2581                                 return ret;
2582                         }
2583 #endif  /* WIRELESS_EXT */
2584                         return -EINVAL;
2585         }
2586 }
2587 
2588 
2589 /**
2590  *      dev_new_index   -       allocate an ifindex
2591  *
2592  *      Returns a suitable unique value for a new device interface
2593  *      number.  The caller must hold the rtnl semaphore or the
2594  *      dev_base_lock to be sure it remains unique.
2595  */
2596 int dev_new_index(void)
2597 {
2598         static int ifindex;
2599         for (;;) {
2600                 if (++ifindex <= 0)
2601                         ifindex = 1;
2602                 if (!__dev_get_by_index(ifindex))
2603                         return ifindex;
2604         }
2605 }
2606 
2607 static int dev_boot_phase = 1;
2608 
2609 /* Delayed registration/unregisteration */
2610 static spinlock_t net_todo_list_lock = SPIN_LOCK_UNLOCKED;
2611 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2612 
2613 static inline void net_set_todo(struct net_device *dev)
2614 {
2615         spin_lock(&net_todo_list_lock);
2616         list_add_tail(&dev->todo_list, &net_todo_list);
2617         spin_unlock(&net_todo_list_lock);
2618 }
2619 
2620 /**
2621  *      register_netdevice      - register a network device
2622  *      @dev: device to register
2623  *
2624  *      Take a completed network device structure and add it to the kernel
2625  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2626  *      chain. 0 is returned on success. A negative errno code is returned
2627  *      on a failure to set up the device, or if the name is a duplicate.
2628  *
2629  *      Callers must hold the rtnl semaphore.  See the comment at the
2630  *      end of Space.c for details about the locking.  You may want
2631  *      register_netdev() instead of this.
2632  *
2633  *      BUGS:
2634  *      The locking appears insufficient to guarantee two parallel registers
2635  *      will not get the same name.
2636  */
2637 
2638 int register_netdevice(struct net_device *dev)
2639 {
2640         struct net_device *d, **dp;
2641         int ret;
2642 
2643         BUG_ON(dev_boot_phase);
2644         ASSERT_RTNL();
2645 
2646         /* When net_device's are persistent, this will be fatal. */
2647         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2648 
2649         spin_lock_init(&dev->queue_lock);
2650         spin_lock_init(&dev->xmit_lock);
2651         dev->xmit_lock_owner = -1;
2652 #ifdef CONFIG_NET_FASTROUTE
2653         dev->fastpath_lock = RW_LOCK_UNLOCKED;
2654 #endif
2655 
2656         ret = alloc_divert_blk(dev);
2657         if (ret)
2658                 goto out;
2659 
2660         dev->iflink = -1;
2661 
2662         /* Init, if this function is available */
2663         if (dev->init) {
2664                 ret = dev->init(dev);
2665                 if (ret) {
2666                         if (ret > 0)
2667                                 ret = -EIO;
2668                         goto out_err;
2669                 }
2670         }
2671 
2672         dev->ifindex = dev_new_index();
2673         if (dev->iflink == -1)
2674                 dev->iflink = dev->ifindex;
2675 
2676         /* Check for existence, and append to tail of chain */
2677         ret = -EEXIST;
2678         for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
2679                 if (d == dev || !strcmp(d->name, dev->name))
2680                         goto out_err;
2681         }
2682         
2683         /* Fix illegal SG+CSUM combinations. */
2684         if ((dev->features & NETIF_F_SG) &&
2685             !(dev->features & (NETIF_F_IP_CSUM |
2686                                NETIF_F_NO_CSUM |
2687                                NETIF_F_HW_CSUM))) {
2688                 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2689                        dev->name);
2690                 dev->features &= ~NETIF_F_SG;
2691         }
2692 
2693         /*
2694          *      nil rebuild_header routine,
2695          *      that should be never called and used as just bug trap.
2696          */
2697 
2698         if (!dev->rebuild_header)
2699                 dev->rebuild_header = default_rebuild_header;
2700 
2701         /*
2702          *      Default initial state at registry is that the
2703          *      device is present.
2704          */
2705 
2706         set_bit(__LINK_STATE_PRESENT, &dev->state);
2707 
2708         dev->next = NULL;
2709         dev_init_scheduler(dev);
2710         write_lock_bh(&dev_base_lock);
2711         *dp = dev;
2712         dev_hold(dev);
2713         dev->reg_state = NETREG_REGISTERING;
2714         write_unlock_bh(&dev_base_lock);
2715 
2716         /* Notify protocols, that a new device appeared. */
2717         notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2718 
2719         /* Finish registration after unlock */
2720         net_set_todo(dev);
2721         ret = 0;
2722 
2723 out:
2724         return ret;
2725 out_err:
2726         free_divert_blk(dev);
2727         goto out;
2728 }
2729 
2730 /*
2731  * netdev_wait_allrefs - wait until all references are gone.
2732  *
2733  * This is called when unregistering network devices.
2734  *
2735  * Any protocol or device that holds a reference should register
2736  * for netdevice notification, and cleanup and put back the
2737  * reference if they receive an UNREGISTER event.
2738  * We can get stuck here if buggy protocols don't correctly
2739  * call dev_put. 
2740  */
2741 static void netdev_wait_allrefs(struct net_device *dev)
2742 {
2743         unsigned long rebroadcast_time, warning_time;
2744 
2745         rebroadcast_time = warning_time = jiffies;
2746         while (atomic_read(&dev->refcnt) != 0) {
2747                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2748                         rtnl_shlock();
2749                         rtnl_exlock();
2750 
2751                         /* Rebroadcast unregister notification */
2752                         notifier_call_chain(&netdev_chain,
2753                                             NETDEV_UNREGISTER, dev);
2754 
2755                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2756                                      &dev->state)) {
2757                                 /* We must not have linkwatch events
2758                                  * pending on unregister. If this
2759                                  * happens, we simply run the queue
2760                                  * unscheduled, resulting in a noop
2761                                  * for this device.
2762                                  */
2763                                 linkwatch_run_queue();
2764                         }
2765 
2766                         rtnl_exunlock();
2767                         rtnl_shunlock();
2768 
2769                         rebroadcast_time = jiffies;
2770                 }
2771 
2772                 current->state = TASK_INTERRUPTIBLE;
2773                 schedule_timeout(HZ / 4);
2774 
2775                 if (time_after(jiffies, warning_time + 10 * HZ)) {
2776                         printk(KERN_EMERG "unregister_netdevice: "
2777                                "waiting for %s to become free. Usage "
2778                                "count = %d\n",
2779                                dev->name, atomic_read(&dev->refcnt));
2780                         warning_time = jiffies;
2781                 }
2782         }
2783 }
2784 
2785 /* The sequence is:
2786  *
2787  *      rtnl_lock();
2788  *      ...
2789  *      register_netdevice(x1);
2790  *      register_netdevice(x2);
2791  *      ...
2792  *      unregister_netdevice(y1);
2793  *      unregister_netdevice(y2);
2794  *      ...
2795  *      rtnl_unlock();
2796  *      free_netdev(y1);
2797  *      free_netdev(y2);
2798  *
2799  * We are invoked by rtnl_unlock() after it drops the semaphore.
2800  * This allows us to deal with problems:
2801  * 1) We can create/delete sysfs objects which invoke hotplug
2802  *    without deadlocking with linkwatch via keventd.
2803  * 2) Since we run with the RTNL semaphore not held, we can sleep
2804  *    safely in order to wait for the netdev refcnt to drop to zero.
2805  */
2806 static DECLARE_MUTEX(net_todo_run_mutex);
2807 void netdev_run_todo(void)
2808 {
2809         struct list_head list = LIST_HEAD_INIT(list);
2810 
2811         /* Safe outside mutex since we only care about entries that
2812          * this cpu put into queue while under RTNL.
2813          */
2814         if (list_empty(&net_todo_list))
2815                 return;
2816 
2817         /* Need to guard against multiple cpu's getting out of order. */
2818         down(&net_todo_run_mutex);
2819 
2820         /* Snapshot list, allow later requests */
2821         spin_lock(&net_todo_list_lock);
2822         list_splice_init(&net_todo_list, &list);
2823         spin_unlock(&net_todo_list_lock);
2824                 
2825         while (!list_empty(&list)) {
2826                 struct net_device *dev
2827                         = list_entry(list.next, struct net_device, todo_list);
2828                 list_del(&dev->todo_list);
2829 
2830                 switch(dev->reg_state) {
2831                 case NETREG_REGISTERING:
2832                         netdev_register_sysfs(dev);
2833                         dev->reg_state = NETREG_REGISTERED;
2834                         break;
2835 
2836                 case NETREG_UNREGISTERING:
2837                         netdev_unregister_sysfs(dev);
2838                         dev->reg_state = NETREG_UNREGISTERED;
2839 
2840                         netdev_wait_allrefs(dev);
2841 
2842                         /* paranoia */
2843                         BUG_ON(atomic_read(&dev->refcnt));
2844                         BUG_TRAP(!dev->ip_ptr);
2845                         BUG_TRAP(!dev->ip6_ptr);
2846                         BUG_TRAP(!dev->dn_ptr);
2847 
2848 
2849                         /* It must be the very last action, 
2850                          * after this 'dev' may point to freed up memory.
2851                          */
2852                         if (dev->destructor)
2853                                 dev->destructor(dev);
2854                         break;
2855 
2856                 default:
2857                         printk(KERN_ERR "network todo '%s' but state %d\n",
2858                                dev->name, dev->reg_state);
2859                         break;
2860                 }
2861         }
2862 
2863         up(&net_todo_run_mutex);
2864 }
2865 
2866 /**
2867  *      free_netdev - free network device
2868  *      @dev: device
2869  *
2870  *      This function does the last stage of destroying an allocated device 
2871  *      interface. The reference to the device object is released.  
2872  *      If this is the last reference then it will be freed.
2873  */
2874 void free_netdev(struct net_device *dev)
2875 {
2876         /*  Compatiablity with error handling in drivers */
2877         if (dev->reg_state == NETREG_UNINITIALIZED) {
2878                 kfree(dev);
2879                 return;
2880         }
2881 
2882         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
2883         dev->reg_state = NETREG_RELEASED;
2884 
2885         /* will free via class release */
2886         class_device_put(&dev->class_dev);
2887 }
2888  
2889 /* Synchronize with packet receive processing. */
2890 void synchronize_net(void) 
2891 {
2892         might_sleep();
2893         synchronize_kernel();
2894 }
2895 
2896 /**
2897  *      unregister_netdevice - remove device from the kernel
2898  *      @dev: device
2899  *
2900  *      This function shuts down a device interface and removes it
2901  *      from the kernel tables. On success 0 is returned, on a failure
2902  *      a negative errno code is returned.
2903  *
2904  *      Callers must hold the rtnl semaphore.  See the comment at the
2905  *      end of Space.c for details about the locking.  You may want
2906  *      unregister_netdev() instead of this.
2907  */
2908 
2909 int unregister_netdevice(struct net_device *dev)
2910 {
2911         struct net_device *d, **dp;
2912 
2913         BUG_ON(dev_boot_phase);
2914         ASSERT_RTNL();
2915 
2916         /* Some devices call without registering for initialization unwind. */
2917         if (dev->reg_state == NETREG_UNINITIALIZED) {
2918                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
2919                                   "was registered\n", dev->name, dev);
2920                 return -ENODEV;
2921         }
2922 
2923         BUG_ON(dev->reg_state != NETREG_REGISTERED);
2924 
2925         /* If device is running, close it first. */
2926         if (dev->flags & IFF_UP)
2927                 dev_close(dev);
2928 
2929         /* And unlink it from device chain. */
2930         for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
2931                 if (d == dev) {
2932                         write_lock_bh(&dev_base_lock);
2933                         *dp = d->next;
2934                         write_unlock_bh(&dev_base_lock);
2935                         break;
2936                 }
2937         }
2938         if (!d) {
2939                 printk(KERN_ERR "unregister net_device: '%s' not found\n",
2940                        dev->name);
2941                 return -ENODEV;
2942         }
2943 
2944         dev->reg_state = NETREG_UNREGISTERING;
2945 
2946         synchronize_net();
2947 
2948 #ifdef CONFIG_NET_FASTROUTE
2949         dev_clear_fastroute(dev);
2950 #endif
2951 
2952         /* Shutdown queueing discipline. */
2953         dev_shutdown(dev);
2954 
2955         
2956         /* Notify protocols, that we are about to destroy
2957            this device. They should clean all the things.
2958         */
2959         notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2960         
2961         /*
2962          *      Flush the multicast chain
2963          */
2964         dev_mc_discard(dev);
2965 
2966         if (dev->uninit)
2967                 dev->uninit(dev);
2968 
2969         /* Notifier chain MUST detach us from master device. */
2970         BUG_TRAP(!dev->master);
2971 
2972         free_divert_blk(dev);
2973 
2974         /* Finish processing unregister after unlock */
2975         net_set_todo(dev);
2976 
2977         dev_put(dev);
2978         return 0;
2979 }
2980 
2981 
2982 /*
2983  *      Initialize the DEV module. At boot time this walks the device list and
2984  *      unhooks any devices that fail to initialise (normally hardware not
2985  *      present) and leaves us with a valid list of present and active devices.
2986  *
2987  */
2988 
2989 /*
2990  *       This is called single threaded during boot, so no need
2991  *       to take the rtnl semaphore.
2992  */
2993 static int __init net_dev_init(void)
2994 {
2995         int i, rc = -ENOMEM;
2996 
2997         BUG_ON(!dev_boot_phase);
2998 
2999         if (dev_proc_init())
3000                 goto out;
3001 
3002         if (netdev_sysfs_init())
3003                 goto out;
3004 
3005         INIT_LIST_HEAD(&ptype_all);
3006         for (i = 0; i < 16; i++) 
3007                 INIT_LIST_HEAD(&ptype_base[i]);
3008 
3009         /*
3010          *      Initialise the packet receive queues.
3011          */
3012 
3013         for (i = 0; i < NR_CPUS; i++) {
3014                 struct softnet_data *queue;
3015 
3016                 queue = &per_cpu(softnet_data, i);
3017                 skb_queue_head_init(&queue->input_pkt_queue);
3018                 queue->throttle = 0;
3019                 queue->cng_level = 0;
3020                 queue->avg_blog = 10; /* arbitrary non-zero */
3021                 queue->completion_queue = NULL;
3022                 INIT_LIST_HEAD(&queue->poll_list);
3023                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3024                 queue->backlog_dev.weight = weight_p;
3025                 queue->backlog_dev.poll = process_backlog;
3026                 atomic_set(&queue->backlog_dev.refcnt, 1);
3027         }
3028 
3029 #ifdef OFFLINE_SAMPLE
3030         samp_timer.expires = jiffies + (10 * HZ);
3031         add_timer(&samp_timer);
3032 #endif
3033 
3034         dev_boot_phase = 0;
3035 
3036         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3037         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3038 
3039         dst_init();
3040         dev_mcast_init();
3041 
3042 #ifdef CONFIG_NET_SCHED
3043         pktsched_init();
3044 #endif
3045         rc = 0;
3046 out:
3047         return rc;
3048 }
3049 
3050 subsys_initcall(net_dev_init);
3051 
3052 EXPORT_SYMBOL(__dev_get);
3053 EXPORT_SYMBOL(__dev_get_by_flags);
3054 EXPORT_SYMBOL(__dev_get_by_index);
3055 EXPORT_SYMBOL(__dev_get_by_name);
3056 EXPORT_SYMBOL(__dev_remove_pack);
3057 EXPORT_SYMBOL(__skb_linearize);
3058 EXPORT_SYMBOL(call_netdevice_notifiers);
3059 EXPORT_SYMBOL(dev_add_pack);
3060 EXPORT_SYMBOL(__dev_alloc);
3061 EXPORT_SYMBOL(dev_alloc_name);
3062 EXPORT_SYMBOL(dev_close);
3063 EXPORT_SYMBOL(dev_get_by_flags);
3064 EXPORT_SYMBOL(dev_get_by_index);
3065 EXPORT_SYMBOL(dev_get_by_name);
3066 EXPORT_SYMBOL(dev_getbyhwaddr);
3067 EXPORT_SYMBOL(dev_ioctl);
3068 EXPORT_SYMBOL(dev_new_index);
3069 EXPORT_SYMBOL(dev_open);
3070 EXPORT_SYMBOL(dev_queue_xmit);
3071 EXPORT_SYMBOL(dev_queue_xmit_nit);
3072 EXPORT_SYMBOL(dev_remove_pack);
3073 EXPORT_SYMBOL(dev_set_allmulti);
3074 EXPORT_SYMBOL(dev_set_promiscuity);
3075 EXPORT_SYMBOL(free_netdev);
3076 EXPORT_SYMBOL(netdev_boot_setup_check);
3077 EXPORT_SYMBOL(netdev_set_master);
3078 EXPORT_SYMBOL(netdev_state_change);
3079 EXPORT_SYMBOL(netif_receive_skb);
3080 EXPORT_SYMBOL(netif_rx);
3081 EXPORT_SYMBOL(register_gifconf);
3082 EXPORT_SYMBOL(register_netdevice);
3083 EXPORT_SYMBOL(register_netdevice_notifier);
3084 EXPORT_SYMBOL(skb_checksum_help);
3085 EXPORT_SYMBOL(synchronize_net);
3086 EXPORT_SYMBOL(unregister_netdevice);
3087 EXPORT_SYMBOL(unregister_netdevice_notifier);
3088 
3089 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3090 EXPORT_SYMBOL(br_handle_frame_hook);
3091 #endif
3092 /* for 801q VLAN support */
3093 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
3094 EXPORT_SYMBOL(dev_change_flags);
3095 #endif
3096 #ifdef CONFIG_KMOD
3097 EXPORT_SYMBOL(dev_load);
3098 #endif
3099 #ifdef CONFIG_NET_HW_FLOWCONTROL
3100 EXPORT_SYMBOL(netdev_dropping);
3101 EXPORT_SYMBOL(netdev_fc_xoff);
3102 EXPORT_SYMBOL(netdev_register_fc);
3103 EXPORT_SYMBOL(netdev_unregister_fc);
3104 #endif
3105 #ifdef CONFIG_NET_FASTROUTE
3106 EXPORT_SYMBOL(netdev_fastroute);
3107 EXPORT_SYMBOL(netdev_fastroute_obstacles);
3108 #endif
3109 
3110 EXPORT_PER_CPU_SYMBOL(softnet_data);
3111 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp