~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/packet/af_packet.c

Version: ~ [ linux-5.10-rc5 ] ~ [ linux-5.9.10 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.79 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.159 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.208 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.245 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.245 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              PACKET - implements raw packet sockets.
  7  *
  8  * Authors:     Ross Biro
  9  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 10  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
 11  *
 12  * Fixes:
 13  *              Alan Cox        :       verify_area() now used correctly
 14  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
 15  *              Alan Cox        :       tidied skbuff lists.
 16  *              Alan Cox        :       Now uses generic datagram routines I
 17  *                                      added. Also fixed the peek/read crash
 18  *                                      from all old Linux datagram code.
 19  *              Alan Cox        :       Uses the improved datagram code.
 20  *              Alan Cox        :       Added NULL's for socket options.
 21  *              Alan Cox        :       Re-commented the code.
 22  *              Alan Cox        :       Use new kernel side addressing
 23  *              Rob Janssen     :       Correct MTU usage.
 24  *              Dave Platt      :       Counter leaks caused by incorrect
 25  *                                      interrupt locking and some slightly
 26  *                                      dubious gcc output. Can you read
 27  *                                      compiler: it said _VOLATILE_
 28  *      Richard Kooijman        :       Timestamp fixes.
 29  *              Alan Cox        :       New buffers. Use sk->mac.raw.
 30  *              Alan Cox        :       sendmsg/recvmsg support.
 31  *              Alan Cox        :       Protocol setting support
 32  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
 33  *      Cyrus Durgin            :       Fixed kerneld for kmod.
 34  *      Michal Ostrowski        :       Module initialization cleanup.
 35  *         Ulises Alonso        :       Frame number limit removal and
 36  *                                      packet_set_ring memory leak.
 37  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
 38  *                                      The convention is that longer addresses
 39  *                                      will simply extend the hardware address
 40  *                                      byte arrays at the end of sockaddr_ll
 41  *                                      and packet_mreq.
 42  *              Johann Baudy    :       Added TX RING.
 43  *
 44  *              This program is free software; you can redistribute it and/or
 45  *              modify it under the terms of the GNU General Public License
 46  *              as published by the Free Software Foundation; either version
 47  *              2 of the License, or (at your option) any later version.
 48  *
 49  */
 50 
 51 #include <linux/types.h>
 52 #include <linux/mm.h>
 53 #include <linux/capability.h>
 54 #include <linux/fcntl.h>
 55 #include <linux/socket.h>
 56 #include <linux/in.h>
 57 #include <linux/inet.h>
 58 #include <linux/netdevice.h>
 59 #include <linux/if_packet.h>
 60 #include <linux/wireless.h>
 61 #include <linux/kernel.h>
 62 #include <linux/kmod.h>
 63 #include <net/net_namespace.h>
 64 #include <net/ip.h>
 65 #include <net/protocol.h>
 66 #include <linux/skbuff.h>
 67 #include <net/sock.h>
 68 #include <linux/errno.h>
 69 #include <linux/timer.h>
 70 #include <asm/system.h>
 71 #include <asm/uaccess.h>
 72 #include <asm/ioctls.h>
 73 #include <asm/page.h>
 74 #include <asm/cacheflush.h>
 75 #include <asm/io.h>
 76 #include <linux/proc_fs.h>
 77 #include <linux/seq_file.h>
 78 #include <linux/poll.h>
 79 #include <linux/module.h>
 80 #include <linux/init.h>
 81 #include <linux/mutex.h>
 82 
 83 #ifdef CONFIG_INET
 84 #include <net/inet_common.h>
 85 #endif
 86 
 87 /*
 88    Assumptions:
 89    - if device has no dev->hard_header routine, it adds and removes ll header
 90      inside itself. In this case ll header is invisible outside of device,
 91      but higher levels still should reserve dev->hard_header_len.
 92      Some devices are enough clever to reallocate skb, when header
 93      will not fit to reserved space (tunnel), another ones are silly
 94      (PPP).
 95    - packet socket receives packets with pulled ll header,
 96      so that SOCK_RAW should push it back.
 97 
 98 On receive:
 99 -----------
100 
101 Incoming, dev->hard_header!=NULL
102    mac_header -> ll header
103    data       -> data
104 
105 Outgoing, dev->hard_header!=NULL
106    mac_header -> ll header
107    data       -> ll header
108 
109 Incoming, dev->hard_header==NULL
110    mac_header -> UNKNOWN position. It is very likely, that it points to ll
111                  header.  PPP makes it, that is wrong, because introduce
112                  assymetry between rx and tx paths.
113    data       -> data
114 
115 Outgoing, dev->hard_header==NULL
116    mac_header -> data. ll header is still not built!
117    data       -> data
118 
119 Resume
120   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
121 
122 
123 On transmit:
124 ------------
125 
126 dev->hard_header != NULL
127    mac_header -> ll header
128    data       -> ll header
129 
130 dev->hard_header == NULL (ll header is added by device, we cannot control it)
131    mac_header -> data
132    data       -> data
133 
134    We should set nh.raw on output to correct posistion,
135    packet classifier depends on it.
136  */
137 
138 /* Private packet socket structures. */
139 
140 struct packet_mclist {
141         struct packet_mclist    *next;
142         int                     ifindex;
143         int                     count;
144         unsigned short          type;
145         unsigned short          alen;
146         unsigned char           addr[MAX_ADDR_LEN];
147 };
148 /* identical to struct packet_mreq except it has
149  * a longer address field.
150  */
151 struct packet_mreq_max {
152         int             mr_ifindex;
153         unsigned short  mr_type;
154         unsigned short  mr_alen;
155         unsigned char   mr_address[MAX_ADDR_LEN];
156 };
157 
158 #ifdef CONFIG_PACKET_MMAP
159 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
160                 int closing, int tx_ring);
161 
162 struct packet_ring_buffer {
163         char                    **pg_vec;
164         unsigned int            head;
165         unsigned int            frames_per_block;
166         unsigned int            frame_size;
167         unsigned int            frame_max;
168 
169         unsigned int            pg_vec_order;
170         unsigned int            pg_vec_pages;
171         unsigned int            pg_vec_len;
172 
173         atomic_t                pending;
174 };
175 
176 struct packet_sock;
177 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
178 #endif
179 
180 static void packet_flush_mclist(struct sock *sk);
181 
182 struct packet_sock {
183         /* struct sock has to be the first member of packet_sock */
184         struct sock             sk;
185         struct tpacket_stats    stats;
186 #ifdef CONFIG_PACKET_MMAP
187         struct packet_ring_buffer       rx_ring;
188         struct packet_ring_buffer       tx_ring;
189         int                     copy_thresh;
190 #endif
191         struct packet_type      prot_hook;
192         spinlock_t              bind_lock;
193         struct mutex            pg_vec_lock;
194         unsigned int            running:1,      /* prot_hook is attached*/
195                                 auxdata:1,
196                                 origdev:1;
197         int                     ifindex;        /* bound device         */
198         __be16                  num;
199         struct packet_mclist    *mclist;
200 #ifdef CONFIG_PACKET_MMAP
201         atomic_t                mapped;
202         enum tpacket_versions   tp_version;
203         unsigned int            tp_hdrlen;
204         unsigned int            tp_reserve;
205         unsigned int            tp_loss:1;
206 #endif
207 };
208 
209 struct packet_skb_cb {
210         unsigned int origlen;
211         union {
212                 struct sockaddr_pkt pkt;
213                 struct sockaddr_ll ll;
214         } sa;
215 };
216 
217 #define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
218 
219 #ifdef CONFIG_PACKET_MMAP
220 
221 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
222 {
223         union {
224                 struct tpacket_hdr *h1;
225                 struct tpacket2_hdr *h2;
226                 void *raw;
227         } h;
228 
229         h.raw = frame;
230         switch (po->tp_version) {
231         case TPACKET_V1:
232                 h.h1->tp_status = status;
233                 flush_dcache_page(virt_to_page(&h.h1->tp_status));
234                 break;
235         case TPACKET_V2:
236                 h.h2->tp_status = status;
237                 flush_dcache_page(virt_to_page(&h.h2->tp_status));
238                 break;
239         default:
240                 pr_err("TPACKET version not supported\n");
241                 BUG();
242         }
243 
244         smp_wmb();
245 }
246 
247 static int __packet_get_status(struct packet_sock *po, void *frame)
248 {
249         union {
250                 struct tpacket_hdr *h1;
251                 struct tpacket2_hdr *h2;
252                 void *raw;
253         } h;
254 
255         smp_rmb();
256 
257         h.raw = frame;
258         switch (po->tp_version) {
259         case TPACKET_V1:
260                 flush_dcache_page(virt_to_page(&h.h1->tp_status));
261                 return h.h1->tp_status;
262         case TPACKET_V2:
263                 flush_dcache_page(virt_to_page(&h.h2->tp_status));
264                 return h.h2->tp_status;
265         default:
266                 pr_err("TPACKET version not supported\n");
267                 BUG();
268                 return 0;
269         }
270 }
271 
272 static void *packet_lookup_frame(struct packet_sock *po,
273                 struct packet_ring_buffer *rb,
274                 unsigned int position,
275                 int status)
276 {
277         unsigned int pg_vec_pos, frame_offset;
278         union {
279                 struct tpacket_hdr *h1;
280                 struct tpacket2_hdr *h2;
281                 void *raw;
282         } h;
283 
284         pg_vec_pos = position / rb->frames_per_block;
285         frame_offset = position % rb->frames_per_block;
286 
287         h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
288 
289         if (status != __packet_get_status(po, h.raw))
290                 return NULL;
291 
292         return h.raw;
293 }
294 
295 static inline void *packet_current_frame(struct packet_sock *po,
296                 struct packet_ring_buffer *rb,
297                 int status)
298 {
299         return packet_lookup_frame(po, rb, rb->head, status);
300 }
301 
302 static inline void *packet_previous_frame(struct packet_sock *po,
303                 struct packet_ring_buffer *rb,
304                 int status)
305 {
306         unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
307         return packet_lookup_frame(po, rb, previous, status);
308 }
309 
310 static inline void packet_increment_head(struct packet_ring_buffer *buff)
311 {
312         buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
313 }
314 
315 #endif
316 
317 static inline struct packet_sock *pkt_sk(struct sock *sk)
318 {
319         return (struct packet_sock *)sk;
320 }
321 
322 static void packet_sock_destruct(struct sock *sk)
323 {
324         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
325         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
326 
327         if (!sock_flag(sk, SOCK_DEAD)) {
328                 pr_err("Attempt to release alive packet socket: %p\n", sk);
329                 return;
330         }
331 
332         sk_refcnt_debug_dec(sk);
333 }
334 
335 
336 static const struct proto_ops packet_ops;
337 
338 static const struct proto_ops packet_ops_spkt;
339 
340 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
341                            struct packet_type *pt, struct net_device *orig_dev)
342 {
343         struct sock *sk;
344         struct sockaddr_pkt *spkt;
345 
346         /*
347          *      When we registered the protocol we saved the socket in the data
348          *      field for just this event.
349          */
350 
351         sk = pt->af_packet_priv;
352 
353         /*
354          *      Yank back the headers [hope the device set this
355          *      right or kerboom...]
356          *
357          *      Incoming packets have ll header pulled,
358          *      push it back.
359          *
360          *      For outgoing ones skb->data == skb_mac_header(skb)
361          *      so that this procedure is noop.
362          */
363 
364         if (skb->pkt_type == PACKET_LOOPBACK)
365                 goto out;
366 
367         if (dev_net(dev) != sock_net(sk))
368                 goto out;
369 
370         skb = skb_share_check(skb, GFP_ATOMIC);
371         if (skb == NULL)
372                 goto oom;
373 
374         /* drop any routing info */
375         skb_dst_drop(skb);
376 
377         /* drop conntrack reference */
378         nf_reset(skb);
379 
380         spkt = &PACKET_SKB_CB(skb)->sa.pkt;
381 
382         skb_push(skb, skb->data - skb_mac_header(skb));
383 
384         /*
385          *      The SOCK_PACKET socket receives _all_ frames.
386          */
387 
388         spkt->spkt_family = dev->type;
389         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
390         spkt->spkt_protocol = skb->protocol;
391 
392         /*
393          *      Charge the memory to the socket. This is done specifically
394          *      to prevent sockets using all the memory up.
395          */
396 
397         if (sock_queue_rcv_skb(sk, skb) == 0)
398                 return 0;
399 
400 out:
401         kfree_skb(skb);
402 oom:
403         return 0;
404 }
405 
406 
407 /*
408  *      Output a raw packet to a device layer. This bypasses all the other
409  *      protocol layers and you must therefore supply it with a complete frame
410  */
411 
412 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
413                                struct msghdr *msg, size_t len)
414 {
415         struct sock *sk = sock->sk;
416         struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
417         struct sk_buff *skb;
418         struct net_device *dev;
419         __be16 proto = 0;
420         int err;
421 
422         /*
423          *      Get and verify the address.
424          */
425 
426         if (saddr) {
427                 if (msg->msg_namelen < sizeof(struct sockaddr))
428                         return -EINVAL;
429                 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
430                         proto = saddr->spkt_protocol;
431         } else
432                 return -ENOTCONN;       /* SOCK_PACKET must be sent giving an address */
433 
434         /*
435          *      Find the device first to size check it
436          */
437 
438         saddr->spkt_device[13] = 0;
439         dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
440         err = -ENODEV;
441         if (dev == NULL)
442                 goto out_unlock;
443 
444         err = -ENETDOWN;
445         if (!(dev->flags & IFF_UP))
446                 goto out_unlock;
447 
448         /*
449          * You may not queue a frame bigger than the mtu. This is the lowest level
450          * raw protocol and you must do your own fragmentation at this level.
451          */
452 
453         err = -EMSGSIZE;
454         if (len > dev->mtu + dev->hard_header_len)
455                 goto out_unlock;
456 
457         err = -ENOBUFS;
458         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
459 
460         /*
461          * If the write buffer is full, then tough. At this level the user
462          * gets to deal with the problem - do your own algorithmic backoffs.
463          * That's far more flexible.
464          */
465 
466         if (skb == NULL)
467                 goto out_unlock;
468 
469         /*
470          *      Fill it in
471          */
472 
473         /* FIXME: Save some space for broken drivers that write a
474          * hard header at transmission time by themselves. PPP is the
475          * notable one here. This should really be fixed at the driver level.
476          */
477         skb_reserve(skb, LL_RESERVED_SPACE(dev));
478         skb_reset_network_header(skb);
479 
480         /* Try to align data part correctly */
481         if (dev->header_ops) {
482                 skb->data -= dev->hard_header_len;
483                 skb->tail -= dev->hard_header_len;
484                 if (len < dev->hard_header_len)
485                         skb_reset_network_header(skb);
486         }
487 
488         /* Returns -EFAULT on error */
489         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
490         skb->protocol = proto;
491         skb->dev = dev;
492         skb->priority = sk->sk_priority;
493         if (err)
494                 goto out_free;
495 
496         /*
497          *      Now send it
498          */
499 
500         dev_queue_xmit(skb);
501         dev_put(dev);
502         return len;
503 
504 out_free:
505         kfree_skb(skb);
506 out_unlock:
507         if (dev)
508                 dev_put(dev);
509         return err;
510 }
511 
512 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
513                                       unsigned int res)
514 {
515         struct sk_filter *filter;
516 
517         rcu_read_lock_bh();
518         filter = rcu_dereference(sk->sk_filter);
519         if (filter != NULL)
520                 res = sk_run_filter(skb, filter->insns, filter->len);
521         rcu_read_unlock_bh();
522 
523         return res;
524 }
525 
526 /*
527    This function makes lazy skb cloning in hope that most of packets
528    are discarded by BPF.
529 
530    Note tricky part: we DO mangle shared skb! skb->data, skb->len
531    and skb->cb are mangled. It works because (and until) packets
532    falling here are owned by current CPU. Output packets are cloned
533    by dev_queue_xmit_nit(), input packets are processed by net_bh
534    sequencially, so that if we return skb to original state on exit,
535    we will not harm anyone.
536  */
537 
538 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
539                       struct packet_type *pt, struct net_device *orig_dev)
540 {
541         struct sock *sk;
542         struct sockaddr_ll *sll;
543         struct packet_sock *po;
544         u8 *skb_head = skb->data;
545         int skb_len = skb->len;
546         unsigned int snaplen, res;
547 
548         if (skb->pkt_type == PACKET_LOOPBACK)
549                 goto drop;
550 
551         sk = pt->af_packet_priv;
552         po = pkt_sk(sk);
553 
554         if (dev_net(dev) != sock_net(sk))
555                 goto drop;
556 
557         skb->dev = dev;
558 
559         if (dev->header_ops) {
560                 /* The device has an explicit notion of ll header,
561                    exported to higher levels.
562 
563                    Otherwise, the device hides datails of it frame
564                    structure, so that corresponding packet head
565                    never delivered to user.
566                  */
567                 if (sk->sk_type != SOCK_DGRAM)
568                         skb_push(skb, skb->data - skb_mac_header(skb));
569                 else if (skb->pkt_type == PACKET_OUTGOING) {
570                         /* Special case: outgoing packets have ll header at head */
571                         skb_pull(skb, skb_network_offset(skb));
572                 }
573         }
574 
575         snaplen = skb->len;
576 
577         res = run_filter(skb, sk, snaplen);
578         if (!res)
579                 goto drop_n_restore;
580         if (snaplen > res)
581                 snaplen = res;
582 
583         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
584             (unsigned)sk->sk_rcvbuf)
585                 goto drop_n_acct;
586 
587         if (skb_shared(skb)) {
588                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
589                 if (nskb == NULL)
590                         goto drop_n_acct;
591 
592                 if (skb_head != skb->data) {
593                         skb->data = skb_head;
594                         skb->len = skb_len;
595                 }
596                 kfree_skb(skb);
597                 skb = nskb;
598         }
599 
600         BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
601                      sizeof(skb->cb));
602 
603         sll = &PACKET_SKB_CB(skb)->sa.ll;
604         sll->sll_family = AF_PACKET;
605         sll->sll_hatype = dev->type;
606         sll->sll_protocol = skb->protocol;
607         sll->sll_pkttype = skb->pkt_type;
608         if (unlikely(po->origdev))
609                 sll->sll_ifindex = orig_dev->ifindex;
610         else
611                 sll->sll_ifindex = dev->ifindex;
612 
613         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
614 
615         PACKET_SKB_CB(skb)->origlen = skb->len;
616 
617         if (pskb_trim(skb, snaplen))
618                 goto drop_n_acct;
619 
620         skb_set_owner_r(skb, sk);
621         skb->dev = NULL;
622         skb_dst_drop(skb);
623 
624         /* drop conntrack reference */
625         nf_reset(skb);
626 
627         spin_lock(&sk->sk_receive_queue.lock);
628         po->stats.tp_packets++;
629         __skb_queue_tail(&sk->sk_receive_queue, skb);
630         spin_unlock(&sk->sk_receive_queue.lock);
631         sk->sk_data_ready(sk, skb->len);
632         return 0;
633 
634 drop_n_acct:
635         spin_lock(&sk->sk_receive_queue.lock);
636         po->stats.tp_drops++;
637         spin_unlock(&sk->sk_receive_queue.lock);
638 
639 drop_n_restore:
640         if (skb_head != skb->data && skb_shared(skb)) {
641                 skb->data = skb_head;
642                 skb->len = skb_len;
643         }
644 drop:
645         consume_skb(skb);
646         return 0;
647 }
648 
649 #ifdef CONFIG_PACKET_MMAP
650 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
651                        struct packet_type *pt, struct net_device *orig_dev)
652 {
653         struct sock *sk;
654         struct packet_sock *po;
655         struct sockaddr_ll *sll;
656         union {
657                 struct tpacket_hdr *h1;
658                 struct tpacket2_hdr *h2;
659                 void *raw;
660         } h;
661         u8 *skb_head = skb->data;
662         int skb_len = skb->len;
663         unsigned int snaplen, res;
664         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
665         unsigned short macoff, netoff, hdrlen;
666         struct sk_buff *copy_skb = NULL;
667         struct timeval tv;
668         struct timespec ts;
669 
670         if (skb->pkt_type == PACKET_LOOPBACK)
671                 goto drop;
672 
673         sk = pt->af_packet_priv;
674         po = pkt_sk(sk);
675 
676         if (dev_net(dev) != sock_net(sk))
677                 goto drop;
678 
679         if (dev->header_ops) {
680                 if (sk->sk_type != SOCK_DGRAM)
681                         skb_push(skb, skb->data - skb_mac_header(skb));
682                 else if (skb->pkt_type == PACKET_OUTGOING) {
683                         /* Special case: outgoing packets have ll header at head */
684                         skb_pull(skb, skb_network_offset(skb));
685                 }
686         }
687 
688         if (skb->ip_summed == CHECKSUM_PARTIAL)
689                 status |= TP_STATUS_CSUMNOTREADY;
690 
691         snaplen = skb->len;
692 
693         res = run_filter(skb, sk, snaplen);
694         if (!res)
695                 goto drop_n_restore;
696         if (snaplen > res)
697                 snaplen = res;
698 
699         if (sk->sk_type == SOCK_DGRAM) {
700                 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
701                                   po->tp_reserve;
702         } else {
703                 unsigned maclen = skb_network_offset(skb);
704                 netoff = TPACKET_ALIGN(po->tp_hdrlen +
705                                        (maclen < 16 ? 16 : maclen)) +
706                         po->tp_reserve;
707                 macoff = netoff - maclen;
708         }
709 
710         if (macoff + snaplen > po->rx_ring.frame_size) {
711                 if (po->copy_thresh &&
712                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
713                     (unsigned)sk->sk_rcvbuf) {
714                         if (skb_shared(skb)) {
715                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
716                         } else {
717                                 copy_skb = skb_get(skb);
718                                 skb_head = skb->data;
719                         }
720                         if (copy_skb)
721                                 skb_set_owner_r(copy_skb, sk);
722                 }
723                 snaplen = po->rx_ring.frame_size - macoff;
724                 if ((int)snaplen < 0)
725                         snaplen = 0;
726         }
727 
728         spin_lock(&sk->sk_receive_queue.lock);
729         h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
730         if (!h.raw)
731                 goto ring_is_full;
732         packet_increment_head(&po->rx_ring);
733         po->stats.tp_packets++;
734         if (copy_skb) {
735                 status |= TP_STATUS_COPY;
736                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
737         }
738         if (!po->stats.tp_drops)
739                 status &= ~TP_STATUS_LOSING;
740         spin_unlock(&sk->sk_receive_queue.lock);
741 
742         skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
743 
744         switch (po->tp_version) {
745         case TPACKET_V1:
746                 h.h1->tp_len = skb->len;
747                 h.h1->tp_snaplen = snaplen;
748                 h.h1->tp_mac = macoff;
749                 h.h1->tp_net = netoff;
750                 if (skb->tstamp.tv64)
751                         tv = ktime_to_timeval(skb->tstamp);
752                 else
753                         do_gettimeofday(&tv);
754                 h.h1->tp_sec = tv.tv_sec;
755                 h.h1->tp_usec = tv.tv_usec;
756                 hdrlen = sizeof(*h.h1);
757                 break;
758         case TPACKET_V2:
759                 h.h2->tp_len = skb->len;
760                 h.h2->tp_snaplen = snaplen;
761                 h.h2->tp_mac = macoff;
762                 h.h2->tp_net = netoff;
763                 if (skb->tstamp.tv64)
764                         ts = ktime_to_timespec(skb->tstamp);
765                 else
766                         getnstimeofday(&ts);
767                 h.h2->tp_sec = ts.tv_sec;
768                 h.h2->tp_nsec = ts.tv_nsec;
769                 h.h2->tp_vlan_tci = skb->vlan_tci;
770                 h.h2->tp_padding = 0;
771                 hdrlen = sizeof(*h.h2);
772                 break;
773         default:
774                 BUG();
775         }
776 
777         sll = h.raw + TPACKET_ALIGN(hdrlen);
778         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
779         sll->sll_family = AF_PACKET;
780         sll->sll_hatype = dev->type;
781         sll->sll_protocol = skb->protocol;
782         sll->sll_pkttype = skb->pkt_type;
783         if (unlikely(po->origdev))
784                 sll->sll_ifindex = orig_dev->ifindex;
785         else
786                 sll->sll_ifindex = dev->ifindex;
787 
788         __packet_set_status(po, h.raw, status);
789         smp_mb();
790         {
791                 struct page *p_start, *p_end;
792                 u8 *h_end = h.raw + macoff + snaplen - 1;
793 
794                 p_start = virt_to_page(h.raw);
795                 p_end = virt_to_page(h_end);
796                 while (p_start <= p_end) {
797                         flush_dcache_page(p_start);
798                         p_start++;
799                 }
800         }
801 
802         sk->sk_data_ready(sk, 0);
803 
804 drop_n_restore:
805         if (skb_head != skb->data && skb_shared(skb)) {
806                 skb->data = skb_head;
807                 skb->len = skb_len;
808         }
809 drop:
810         kfree_skb(skb);
811         return 0;
812 
813 ring_is_full:
814         po->stats.tp_drops++;
815         spin_unlock(&sk->sk_receive_queue.lock);
816 
817         sk->sk_data_ready(sk, 0);
818         kfree_skb(copy_skb);
819         goto drop_n_restore;
820 }
821 
822 static void tpacket_destruct_skb(struct sk_buff *skb)
823 {
824         struct packet_sock *po = pkt_sk(skb->sk);
825         void *ph;
826 
827         BUG_ON(skb == NULL);
828 
829         if (likely(po->tx_ring.pg_vec)) {
830                 ph = skb_shinfo(skb)->destructor_arg;
831                 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
832                 atomic_dec(&po->tx_ring.pending);
833                 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
834         }
835 
836         sock_wfree(skb);
837 }
838 
839 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
840                 void *frame, struct net_device *dev, int size_max,
841                 __be16 proto, unsigned char *addr)
842 {
843         union {
844                 struct tpacket_hdr *h1;
845                 struct tpacket2_hdr *h2;
846                 void *raw;
847         } ph;
848         int to_write, offset, len, tp_len, nr_frags, len_max;
849         struct socket *sock = po->sk.sk_socket;
850         struct page *page;
851         void *data;
852         int err;
853 
854         ph.raw = frame;
855 
856         skb->protocol = proto;
857         skb->dev = dev;
858         skb->priority = po->sk.sk_priority;
859         skb_shinfo(skb)->destructor_arg = ph.raw;
860 
861         switch (po->tp_version) {
862         case TPACKET_V2:
863                 tp_len = ph.h2->tp_len;
864                 break;
865         default:
866                 tp_len = ph.h1->tp_len;
867                 break;
868         }
869         if (unlikely(tp_len > size_max)) {
870                 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
871                 return -EMSGSIZE;
872         }
873 
874         skb_reserve(skb, LL_RESERVED_SPACE(dev));
875         skb_reset_network_header(skb);
876 
877         data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
878         to_write = tp_len;
879 
880         if (sock->type == SOCK_DGRAM) {
881                 err = dev_hard_header(skb, dev, ntohs(proto), addr,
882                                 NULL, tp_len);
883                 if (unlikely(err < 0))
884                         return -EINVAL;
885         } else if (dev->hard_header_len) {
886                 /* net device doesn't like empty head */
887                 if (unlikely(tp_len <= dev->hard_header_len)) {
888                         pr_err("packet size is too short (%d < %d)\n",
889                                tp_len, dev->hard_header_len);
890                         return -EINVAL;
891                 }
892 
893                 skb_push(skb, dev->hard_header_len);
894                 err = skb_store_bits(skb, 0, data,
895                                 dev->hard_header_len);
896                 if (unlikely(err))
897                         return err;
898 
899                 data += dev->hard_header_len;
900                 to_write -= dev->hard_header_len;
901         }
902 
903         err = -EFAULT;
904         page = virt_to_page(data);
905         offset = offset_in_page(data);
906         len_max = PAGE_SIZE - offset;
907         len = ((to_write > len_max) ? len_max : to_write);
908 
909         skb->data_len = to_write;
910         skb->len += to_write;
911         skb->truesize += to_write;
912         atomic_add(to_write, &po->sk.sk_wmem_alloc);
913 
914         while (likely(to_write)) {
915                 nr_frags = skb_shinfo(skb)->nr_frags;
916 
917                 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
918                         pr_err("Packet exceed the number of skb frags(%lu)\n",
919                                MAX_SKB_FRAGS);
920                         return -EFAULT;
921                 }
922 
923                 flush_dcache_page(page);
924                 get_page(page);
925                 skb_fill_page_desc(skb,
926                                 nr_frags,
927                                 page++, offset, len);
928                 to_write -= len;
929                 offset = 0;
930                 len_max = PAGE_SIZE;
931                 len = ((to_write > len_max) ? len_max : to_write);
932         }
933 
934         return tp_len;
935 }
936 
937 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
938 {
939         struct socket *sock;
940         struct sk_buff *skb;
941         struct net_device *dev;
942         __be16 proto;
943         int ifindex, err, reserve = 0;
944         void *ph;
945         struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
946         int tp_len, size_max;
947         unsigned char *addr;
948         int len_sum = 0;
949         int status = 0;
950 
951         sock = po->sk.sk_socket;
952 
953         mutex_lock(&po->pg_vec_lock);
954 
955         err = -EBUSY;
956         if (saddr == NULL) {
957                 ifindex = po->ifindex;
958                 proto   = po->num;
959                 addr    = NULL;
960         } else {
961                 err = -EINVAL;
962                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
963                         goto out;
964                 if (msg->msg_namelen < (saddr->sll_halen
965                                         + offsetof(struct sockaddr_ll,
966                                                 sll_addr)))
967                         goto out;
968                 ifindex = saddr->sll_ifindex;
969                 proto   = saddr->sll_protocol;
970                 addr    = saddr->sll_addr;
971         }
972 
973         dev = dev_get_by_index(sock_net(&po->sk), ifindex);
974         err = -ENXIO;
975         if (unlikely(dev == NULL))
976                 goto out;
977 
978         reserve = dev->hard_header_len;
979 
980         err = -ENETDOWN;
981         if (unlikely(!(dev->flags & IFF_UP)))
982                 goto out_put;
983 
984         size_max = po->tx_ring.frame_size
985                 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
986 
987         if (size_max > dev->mtu + reserve)
988                 size_max = dev->mtu + reserve;
989 
990         do {
991                 ph = packet_current_frame(po, &po->tx_ring,
992                                 TP_STATUS_SEND_REQUEST);
993 
994                 if (unlikely(ph == NULL)) {
995                         schedule();
996                         continue;
997                 }
998 
999                 status = TP_STATUS_SEND_REQUEST;
1000                 skb = sock_alloc_send_skb(&po->sk,
1001                                 LL_ALLOCATED_SPACE(dev)
1002                                 + sizeof(struct sockaddr_ll),
1003                                 0, &err);
1004 
1005                 if (unlikely(skb == NULL))
1006                         goto out_status;
1007 
1008                 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1009                                 addr);
1010 
1011                 if (unlikely(tp_len < 0)) {
1012                         if (po->tp_loss) {
1013                                 __packet_set_status(po, ph,
1014                                                 TP_STATUS_AVAILABLE);
1015                                 packet_increment_head(&po->tx_ring);
1016                                 kfree_skb(skb);
1017                                 continue;
1018                         } else {
1019                                 status = TP_STATUS_WRONG_FORMAT;
1020                                 err = tp_len;
1021                                 goto out_status;
1022                         }
1023                 }
1024 
1025                 skb->destructor = tpacket_destruct_skb;
1026                 __packet_set_status(po, ph, TP_STATUS_SENDING);
1027                 atomic_inc(&po->tx_ring.pending);
1028 
1029                 status = TP_STATUS_SEND_REQUEST;
1030                 err = dev_queue_xmit(skb);
1031                 if (unlikely(err > 0)) {
1032                         err = net_xmit_errno(err);
1033                         if (err && __packet_get_status(po, ph) ==
1034                                    TP_STATUS_AVAILABLE) {
1035                                 /* skb was destructed already */
1036                                 skb = NULL;
1037                                 goto out_status;
1038                         }
1039                         /*
1040                          * skb was dropped but not destructed yet;
1041                          * let's treat it like congestion or err < 0
1042                          */
1043                         err = 0;
1044                 }
1045                 packet_increment_head(&po->tx_ring);
1046                 len_sum += tp_len;
1047         } while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT))
1048                                         && (atomic_read(&po->tx_ring.pending))))
1049               );
1050 
1051         err = len_sum;
1052         goto out_put;
1053 
1054 out_status:
1055         __packet_set_status(po, ph, status);
1056         kfree_skb(skb);
1057 out_put:
1058         dev_put(dev);
1059 out:
1060         mutex_unlock(&po->pg_vec_lock);
1061         return err;
1062 }
1063 #endif
1064 
1065 static int packet_snd(struct socket *sock,
1066                           struct msghdr *msg, size_t len)
1067 {
1068         struct sock *sk = sock->sk;
1069         struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1070         struct sk_buff *skb;
1071         struct net_device *dev;
1072         __be16 proto;
1073         unsigned char *addr;
1074         int ifindex, err, reserve = 0;
1075 
1076         /*
1077          *      Get and verify the address.
1078          */
1079 
1080         if (saddr == NULL) {
1081                 struct packet_sock *po = pkt_sk(sk);
1082 
1083                 ifindex = po->ifindex;
1084                 proto   = po->num;
1085                 addr    = NULL;
1086         } else {
1087                 err = -EINVAL;
1088                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1089                         goto out;
1090                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1091                         goto out;
1092                 ifindex = saddr->sll_ifindex;
1093                 proto   = saddr->sll_protocol;
1094                 addr    = saddr->sll_addr;
1095         }
1096 
1097 
1098         dev = dev_get_by_index(sock_net(sk), ifindex);
1099         err = -ENXIO;
1100         if (dev == NULL)
1101                 goto out_unlock;
1102         if (sock->type == SOCK_RAW)
1103                 reserve = dev->hard_header_len;
1104 
1105         err = -ENETDOWN;
1106         if (!(dev->flags & IFF_UP))
1107                 goto out_unlock;
1108 
1109         err = -EMSGSIZE;
1110         if (len > dev->mtu+reserve)
1111                 goto out_unlock;
1112 
1113         skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
1114                                 msg->msg_flags & MSG_DONTWAIT, &err);
1115         if (skb == NULL)
1116                 goto out_unlock;
1117 
1118         skb_reserve(skb, LL_RESERVED_SPACE(dev));
1119         skb_reset_network_header(skb);
1120 
1121         err = -EINVAL;
1122         if (sock->type == SOCK_DGRAM &&
1123             dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
1124                 goto out_free;
1125 
1126         /* Returns -EFAULT on error */
1127         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1128         if (err)
1129                 goto out_free;
1130 
1131         skb->protocol = proto;
1132         skb->dev = dev;
1133         skb->priority = sk->sk_priority;
1134 
1135         /*
1136          *      Now send it
1137          */
1138 
1139         err = dev_queue_xmit(skb);
1140         if (err > 0 && (err = net_xmit_errno(err)) != 0)
1141                 goto out_unlock;
1142 
1143         dev_put(dev);
1144 
1145         return len;
1146 
1147 out_free:
1148         kfree_skb(skb);
1149 out_unlock:
1150         if (dev)
1151                 dev_put(dev);
1152 out:
1153         return err;
1154 }
1155 
1156 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1157                 struct msghdr *msg, size_t len)
1158 {
1159 #ifdef CONFIG_PACKET_MMAP
1160         struct sock *sk = sock->sk;
1161         struct packet_sock *po = pkt_sk(sk);
1162         if (po->tx_ring.pg_vec)
1163                 return tpacket_snd(po, msg);
1164         else
1165 #endif
1166                 return packet_snd(sock, msg, len);
1167 }
1168 
1169 /*
1170  *      Close a PACKET socket. This is fairly simple. We immediately go
1171  *      to 'closed' state and remove our protocol entry in the device list.
1172  */
1173 
1174 static int packet_release(struct socket *sock)
1175 {
1176         struct sock *sk = sock->sk;
1177         struct packet_sock *po;
1178         struct net *net;
1179 #ifdef CONFIG_PACKET_MMAP
1180         struct tpacket_req req;
1181 #endif
1182 
1183         if (!sk)
1184                 return 0;
1185 
1186         net = sock_net(sk);
1187         po = pkt_sk(sk);
1188 
1189         write_lock_bh(&net->packet.sklist_lock);
1190         sk_del_node_init(sk);
1191         sock_prot_inuse_add(net, sk->sk_prot, -1);
1192         write_unlock_bh(&net->packet.sklist_lock);
1193 
1194         /*
1195          *      Unhook packet receive handler.
1196          */
1197 
1198         if (po->running) {
1199                 /*
1200                  *      Remove the protocol hook
1201                  */
1202                 dev_remove_pack(&po->prot_hook);
1203                 po->running = 0;
1204                 po->num = 0;
1205                 __sock_put(sk);
1206         }
1207 
1208         packet_flush_mclist(sk);
1209 
1210 #ifdef CONFIG_PACKET_MMAP
1211         memset(&req, 0, sizeof(req));
1212 
1213         if (po->rx_ring.pg_vec)
1214                 packet_set_ring(sk, &req, 1, 0);
1215 
1216         if (po->tx_ring.pg_vec)
1217                 packet_set_ring(sk, &req, 1, 1);
1218 #endif
1219 
1220         /*
1221          *      Now the socket is dead. No more input will appear.
1222          */
1223 
1224         sock_orphan(sk);
1225         sock->sk = NULL;
1226 
1227         /* Purge queues */
1228 
1229         skb_queue_purge(&sk->sk_receive_queue);
1230         sk_refcnt_debug_release(sk);
1231 
1232         sock_put(sk);
1233         return 0;
1234 }
1235 
1236 /*
1237  *      Attach a packet hook.
1238  */
1239 
1240 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1241 {
1242         struct packet_sock *po = pkt_sk(sk);
1243         /*
1244          *      Detach an existing hook if present.
1245          */
1246 
1247         lock_sock(sk);
1248 
1249         spin_lock(&po->bind_lock);
1250         if (po->running) {
1251                 __sock_put(sk);
1252                 po->running = 0;
1253                 po->num = 0;
1254                 spin_unlock(&po->bind_lock);
1255                 dev_remove_pack(&po->prot_hook);
1256                 spin_lock(&po->bind_lock);
1257         }
1258 
1259         po->num = protocol;
1260         po->prot_hook.type = protocol;
1261         po->prot_hook.dev = dev;
1262 
1263         po->ifindex = dev ? dev->ifindex : 0;
1264 
1265         if (protocol == 0)
1266                 goto out_unlock;
1267 
1268         if (!dev || (dev->flags & IFF_UP)) {
1269                 dev_add_pack(&po->prot_hook);
1270                 sock_hold(sk);
1271                 po->running = 1;
1272         } else {
1273                 sk->sk_err = ENETDOWN;
1274                 if (!sock_flag(sk, SOCK_DEAD))
1275                         sk->sk_error_report(sk);
1276         }
1277 
1278 out_unlock:
1279         spin_unlock(&po->bind_lock);
1280         release_sock(sk);
1281         return 0;
1282 }
1283 
1284 /*
1285  *      Bind a packet socket to a device
1286  */
1287 
1288 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1289                             int addr_len)
1290 {
1291         struct sock *sk = sock->sk;
1292         char name[15];
1293         struct net_device *dev;
1294         int err = -ENODEV;
1295 
1296         /*
1297          *      Check legality
1298          */
1299 
1300         if (addr_len != sizeof(struct sockaddr))
1301                 return -EINVAL;
1302         strlcpy(name, uaddr->sa_data, sizeof(name));
1303 
1304         dev = dev_get_by_name(sock_net(sk), name);
1305         if (dev) {
1306                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1307                 dev_put(dev);
1308         }
1309         return err;
1310 }
1311 
1312 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1313 {
1314         struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1315         struct sock *sk = sock->sk;
1316         struct net_device *dev = NULL;
1317         int err;
1318 
1319 
1320         /*
1321          *      Check legality
1322          */
1323 
1324         if (addr_len < sizeof(struct sockaddr_ll))
1325                 return -EINVAL;
1326         if (sll->sll_family != AF_PACKET)
1327                 return -EINVAL;
1328 
1329         if (sll->sll_ifindex) {
1330                 err = -ENODEV;
1331                 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1332                 if (dev == NULL)
1333                         goto out;
1334         }
1335         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1336         if (dev)
1337                 dev_put(dev);
1338 
1339 out:
1340         return err;
1341 }
1342 
1343 static struct proto packet_proto = {
1344         .name     = "PACKET",
1345         .owner    = THIS_MODULE,
1346         .obj_size = sizeof(struct packet_sock),
1347 };
1348 
1349 /*
1350  *      Create a packet of type SOCK_PACKET.
1351  */
1352 
1353 static int packet_create(struct net *net, struct socket *sock, int protocol)
1354 {
1355         struct sock *sk;
1356         struct packet_sock *po;
1357         __be16 proto = (__force __be16)protocol; /* weird, but documented */
1358         int err;
1359 
1360         if (!capable(CAP_NET_RAW))
1361                 return -EPERM;
1362         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1363             sock->type != SOCK_PACKET)
1364                 return -ESOCKTNOSUPPORT;
1365 
1366         sock->state = SS_UNCONNECTED;
1367 
1368         err = -ENOBUFS;
1369         sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1370         if (sk == NULL)
1371                 goto out;
1372 
1373         sock->ops = &packet_ops;
1374         if (sock->type == SOCK_PACKET)
1375                 sock->ops = &packet_ops_spkt;
1376 
1377         sock_init_data(sock, sk);
1378 
1379         po = pkt_sk(sk);
1380         sk->sk_family = PF_PACKET;
1381         po->num = proto;
1382 
1383         sk->sk_destruct = packet_sock_destruct;
1384         sk_refcnt_debug_inc(sk);
1385 
1386         /*
1387          *      Attach a protocol block
1388          */
1389 
1390         spin_lock_init(&po->bind_lock);
1391         mutex_init(&po->pg_vec_lock);
1392         po->prot_hook.func = packet_rcv;
1393 
1394         if (sock->type == SOCK_PACKET)
1395                 po->prot_hook.func = packet_rcv_spkt;
1396 
1397         po->prot_hook.af_packet_priv = sk;
1398 
1399         if (proto) {
1400                 po->prot_hook.type = proto;
1401                 dev_add_pack(&po->prot_hook);
1402                 sock_hold(sk);
1403                 po->running = 1;
1404         }
1405 
1406         write_lock_bh(&net->packet.sklist_lock);
1407         sk_add_node(sk, &net->packet.sklist);
1408         sock_prot_inuse_add(net, &packet_proto, 1);
1409         write_unlock_bh(&net->packet.sklist_lock);
1410         return 0;
1411 out:
1412         return err;
1413 }
1414 
1415 /*
1416  *      Pull a packet from our receive queue and hand it to the user.
1417  *      If necessary we block.
1418  */
1419 
1420 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1421                           struct msghdr *msg, size_t len, int flags)
1422 {
1423         struct sock *sk = sock->sk;
1424         struct sk_buff *skb;
1425         int copied, err;
1426 
1427         err = -EINVAL;
1428         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1429                 goto out;
1430 
1431 #if 0
1432         /* What error should we return now? EUNATTACH? */
1433         if (pkt_sk(sk)->ifindex < 0)
1434                 return -ENODEV;
1435 #endif
1436 
1437         /*
1438          *      Call the generic datagram receiver. This handles all sorts
1439          *      of horrible races and re-entrancy so we can forget about it
1440          *      in the protocol layers.
1441          *
1442          *      Now it will return ENETDOWN, if device have just gone down,
1443          *      but then it will block.
1444          */
1445 
1446         skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1447 
1448         /*
1449          *      An error occurred so return it. Because skb_recv_datagram()
1450          *      handles the blocking we don't see and worry about blocking
1451          *      retries.
1452          */
1453 
1454         if (skb == NULL)
1455                 goto out;
1456 
1457         /* You lose any data beyond the buffer you gave. If it worries
1458          * a user program they can ask the device for its MTU
1459          * anyway.
1460          */
1461         copied = skb->len;
1462         if (copied > len) {
1463                 copied = len;
1464                 msg->msg_flags |= MSG_TRUNC;
1465         }
1466 
1467         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1468         if (err)
1469                 goto out_free;
1470 
1471         sock_recv_timestamp(msg, sk, skb);
1472 
1473         if (msg->msg_name) {
1474                 /* If the address length field is there to be filled
1475                  * in, we fill it in now.
1476                  */
1477                 if (sock->type == SOCK_PACKET) {
1478                         msg->msg_namelen = sizeof(struct sockaddr_pkt);
1479                 } else {
1480                         struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
1481                         msg->msg_namelen = sll->sll_halen +
1482                                 offsetof(struct sockaddr_ll, sll_addr);
1483                 }
1484                 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1485                        msg->msg_namelen);
1486         }
1487 
1488         if (pkt_sk(sk)->auxdata) {
1489                 struct tpacket_auxdata aux;
1490 
1491                 aux.tp_status = TP_STATUS_USER;
1492                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1493                         aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1494                 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1495                 aux.tp_snaplen = skb->len;
1496                 aux.tp_mac = 0;
1497                 aux.tp_net = skb_network_offset(skb);
1498                 aux.tp_vlan_tci = skb->vlan_tci;
1499 
1500                 aux.tp_padding = 0;
1501                 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1502         }
1503 
1504         /*
1505          *      Free or return the buffer as appropriate. Again this
1506          *      hides all the races and re-entrancy issues from us.
1507          */
1508         err = (flags&MSG_TRUNC) ? skb->len : copied;
1509 
1510 out_free:
1511         skb_free_datagram(sk, skb);
1512 out:
1513         return err;
1514 }
1515 
1516 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1517                                int *uaddr_len, int peer)
1518 {
1519         struct net_device *dev;
1520         struct sock *sk = sock->sk;
1521 
1522         if (peer)
1523                 return -EOPNOTSUPP;
1524 
1525         uaddr->sa_family = AF_PACKET;
1526         memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
1527         dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
1528         if (dev) {
1529                 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
1530                 dev_put(dev);
1531         }
1532         *uaddr_len = sizeof(*uaddr);
1533 
1534         return 0;
1535 }
1536 
1537 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1538                           int *uaddr_len, int peer)
1539 {
1540         struct net_device *dev;
1541         struct sock *sk = sock->sk;
1542         struct packet_sock *po = pkt_sk(sk);
1543         struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1544 
1545         if (peer)
1546                 return -EOPNOTSUPP;
1547 
1548         sll->sll_family = AF_PACKET;
1549         sll->sll_ifindex = po->ifindex;
1550         sll->sll_protocol = po->num;
1551         sll->sll_pkttype = 0;
1552         dev = dev_get_by_index(sock_net(sk), po->ifindex);
1553         if (dev) {
1554                 sll->sll_hatype = dev->type;
1555                 sll->sll_halen = dev->addr_len;
1556                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1557                 dev_put(dev);
1558         } else {
1559                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1560                 sll->sll_halen = 0;
1561         }
1562         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1563 
1564         return 0;
1565 }
1566 
1567 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1568                          int what)
1569 {
1570         switch (i->type) {
1571         case PACKET_MR_MULTICAST:
1572                 if (what > 0)
1573                         return dev_mc_add(dev, i->addr, i->alen, 0);
1574                 else
1575                         return dev_mc_delete(dev, i->addr, i->alen, 0);
1576                 break;
1577         case PACKET_MR_PROMISC:
1578                 return dev_set_promiscuity(dev, what);
1579                 break;
1580         case PACKET_MR_ALLMULTI:
1581                 return dev_set_allmulti(dev, what);
1582                 break;
1583         case PACKET_MR_UNICAST:
1584                 if (what > 0)
1585                         return dev_unicast_add(dev, i->addr);
1586                 else
1587                         return dev_unicast_delete(dev, i->addr);
1588                 break;
1589         default:
1590                 break;
1591         }
1592         return 0;
1593 }
1594 
1595 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1596 {
1597         for ( ; i; i = i->next) {
1598                 if (i->ifindex == dev->ifindex)
1599                         packet_dev_mc(dev, i, what);
1600         }
1601 }
1602 
1603 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1604 {
1605         struct packet_sock *po = pkt_sk(sk);
1606         struct packet_mclist *ml, *i;
1607         struct net_device *dev;
1608         int err;
1609 
1610         rtnl_lock();
1611 
1612         err = -ENODEV;
1613         dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1614         if (!dev)
1615                 goto done;
1616 
1617         err = -EINVAL;
1618         if (mreq->mr_alen > dev->addr_len)
1619                 goto done;
1620 
1621         err = -ENOBUFS;
1622         i = kmalloc(sizeof(*i), GFP_KERNEL);
1623         if (i == NULL)
1624                 goto done;
1625 
1626         err = 0;
1627         for (ml = po->mclist; ml; ml = ml->next) {
1628                 if (ml->ifindex == mreq->mr_ifindex &&
1629                     ml->type == mreq->mr_type &&
1630                     ml->alen == mreq->mr_alen &&
1631                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1632                         ml->count++;
1633                         /* Free the new element ... */
1634                         kfree(i);
1635                         goto done;
1636                 }
1637         }
1638 
1639         i->type = mreq->mr_type;
1640         i->ifindex = mreq->mr_ifindex;
1641         i->alen = mreq->mr_alen;
1642         memcpy(i->addr, mreq->mr_address, i->alen);
1643         i->count = 1;
1644         i->next = po->mclist;
1645         po->mclist = i;
1646         err = packet_dev_mc(dev, i, 1);
1647         if (err) {
1648                 po->mclist = i->next;
1649                 kfree(i);
1650         }
1651 
1652 done:
1653         rtnl_unlock();
1654         return err;
1655 }
1656 
1657 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1658 {
1659         struct packet_mclist *ml, **mlp;
1660 
1661         rtnl_lock();
1662 
1663         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1664                 if (ml->ifindex == mreq->mr_ifindex &&
1665                     ml->type == mreq->mr_type &&
1666                     ml->alen == mreq->mr_alen &&
1667                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1668                         if (--ml->count == 0) {
1669                                 struct net_device *dev;
1670                                 *mlp = ml->next;
1671                                 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1672                                 if (dev) {
1673                                         packet_dev_mc(dev, ml, -1);
1674                                         dev_put(dev);
1675                                 }
1676                                 kfree(ml);
1677                         }
1678                         rtnl_unlock();
1679                         return 0;
1680                 }
1681         }
1682         rtnl_unlock();
1683         return -EADDRNOTAVAIL;
1684 }
1685 
1686 static void packet_flush_mclist(struct sock *sk)
1687 {
1688         struct packet_sock *po = pkt_sk(sk);
1689         struct packet_mclist *ml;
1690 
1691         if (!po->mclist)
1692                 return;
1693 
1694         rtnl_lock();
1695         while ((ml = po->mclist) != NULL) {
1696                 struct net_device *dev;
1697 
1698                 po->mclist = ml->next;
1699                 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1700                 if (dev != NULL) {
1701                         packet_dev_mc(dev, ml, -1);
1702                         dev_put(dev);
1703                 }
1704                 kfree(ml);
1705         }
1706         rtnl_unlock();
1707 }
1708 
1709 static int
1710 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1711 {
1712         struct sock *sk = sock->sk;
1713         struct packet_sock *po = pkt_sk(sk);
1714         int ret;
1715 
1716         if (level != SOL_PACKET)
1717                 return -ENOPROTOOPT;
1718 
1719         switch (optname) {
1720         case PACKET_ADD_MEMBERSHIP:
1721         case PACKET_DROP_MEMBERSHIP:
1722         {
1723                 struct packet_mreq_max mreq;
1724                 int len = optlen;
1725                 memset(&mreq, 0, sizeof(mreq));
1726                 if (len < sizeof(struct packet_mreq))
1727                         return -EINVAL;
1728                 if (len > sizeof(mreq))
1729                         len = sizeof(mreq);
1730                 if (copy_from_user(&mreq, optval, len))
1731                         return -EFAULT;
1732                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1733                         return -EINVAL;
1734                 if (optname == PACKET_ADD_MEMBERSHIP)
1735                         ret = packet_mc_add(sk, &mreq);
1736                 else
1737                         ret = packet_mc_drop(sk, &mreq);
1738                 return ret;
1739         }
1740 
1741 #ifdef CONFIG_PACKET_MMAP
1742         case PACKET_RX_RING:
1743         case PACKET_TX_RING:
1744         {
1745                 struct tpacket_req req;
1746 
1747                 if (optlen < sizeof(req))
1748                         return -EINVAL;
1749                 if (copy_from_user(&req, optval, sizeof(req)))
1750                         return -EFAULT;
1751                 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1752         }
1753         case PACKET_COPY_THRESH:
1754         {
1755                 int val;
1756 
1757                 if (optlen != sizeof(val))
1758                         return -EINVAL;
1759                 if (copy_from_user(&val, optval, sizeof(val)))
1760                         return -EFAULT;
1761 
1762                 pkt_sk(sk)->copy_thresh = val;
1763                 return 0;
1764         }
1765         case PACKET_VERSION:
1766         {
1767                 int val;
1768 
1769                 if (optlen != sizeof(val))
1770                         return -EINVAL;
1771                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1772                         return -EBUSY;
1773                 if (copy_from_user(&val, optval, sizeof(val)))
1774                         return -EFAULT;
1775                 switch (val) {
1776                 case TPACKET_V1:
1777                 case TPACKET_V2:
1778                         po->tp_version = val;
1779                         return 0;
1780                 default:
1781                         return -EINVAL;
1782                 }
1783         }
1784         case PACKET_RESERVE:
1785         {
1786                 unsigned int val;
1787 
1788                 if (optlen != sizeof(val))
1789                         return -EINVAL;
1790                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1791                         return -EBUSY;
1792                 if (copy_from_user(&val, optval, sizeof(val)))
1793                         return -EFAULT;
1794                 po->tp_reserve = val;
1795                 return 0;
1796         }
1797         case PACKET_LOSS:
1798         {
1799                 unsigned int val;
1800 
1801                 if (optlen != sizeof(val))
1802                         return -EINVAL;
1803                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1804                         return -EBUSY;
1805                 if (copy_from_user(&val, optval, sizeof(val)))
1806                         return -EFAULT;
1807                 po->tp_loss = !!val;
1808                 return 0;
1809         }
1810 #endif
1811         case PACKET_AUXDATA:
1812         {
1813                 int val;
1814 
1815                 if (optlen < sizeof(val))
1816                         return -EINVAL;
1817                 if (copy_from_user(&val, optval, sizeof(val)))
1818                         return -EFAULT;
1819 
1820                 po->auxdata = !!val;
1821                 return 0;
1822         }
1823         case PACKET_ORIGDEV:
1824         {
1825                 int val;
1826 
1827                 if (optlen < sizeof(val))
1828                         return -EINVAL;
1829                 if (copy_from_user(&val, optval, sizeof(val)))
1830                         return -EFAULT;
1831 
1832                 po->origdev = !!val;
1833                 return 0;
1834         }
1835         default:
1836                 return -ENOPROTOOPT;
1837         }
1838 }
1839 
1840 static int packet_getsockopt(struct socket *sock, int level, int optname,
1841                              char __user *optval, int __user *optlen)
1842 {
1843         int len;
1844         int val;
1845         struct sock *sk = sock->sk;
1846         struct packet_sock *po = pkt_sk(sk);
1847         void *data;
1848         struct tpacket_stats st;
1849 
1850         if (level != SOL_PACKET)
1851                 return -ENOPROTOOPT;
1852 
1853         if (get_user(len, optlen))
1854                 return -EFAULT;
1855 
1856         if (len < 0)
1857                 return -EINVAL;
1858 
1859         switch (optname) {
1860         case PACKET_STATISTICS:
1861                 if (len > sizeof(struct tpacket_stats))
1862                         len = sizeof(struct tpacket_stats);
1863                 spin_lock_bh(&sk->sk_receive_queue.lock);
1864                 st = po->stats;
1865                 memset(&po->stats, 0, sizeof(st));
1866                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1867                 st.tp_packets += st.tp_drops;
1868 
1869                 data = &st;
1870                 break;
1871         case PACKET_AUXDATA:
1872                 if (len > sizeof(int))
1873                         len = sizeof(int);
1874                 val = po->auxdata;
1875 
1876                 data = &val;
1877                 break;
1878         case PACKET_ORIGDEV:
1879                 if (len > sizeof(int))
1880                         len = sizeof(int);
1881                 val = po->origdev;
1882 
1883                 data = &val;
1884                 break;
1885 #ifdef CONFIG_PACKET_MMAP
1886         case PACKET_VERSION:
1887                 if (len > sizeof(int))
1888                         len = sizeof(int);
1889                 val = po->tp_version;
1890                 data = &val;
1891                 break;
1892         case PACKET_HDRLEN:
1893                 if (len > sizeof(int))
1894                         len = sizeof(int);
1895                 if (copy_from_user(&val, optval, len))
1896                         return -EFAULT;
1897                 switch (val) {
1898                 case TPACKET_V1:
1899                         val = sizeof(struct tpacket_hdr);
1900                         break;
1901                 case TPACKET_V2:
1902                         val = sizeof(struct tpacket2_hdr);
1903                         break;
1904                 default:
1905                         return -EINVAL;
1906                 }
1907                 data = &val;
1908                 break;
1909         case PACKET_RESERVE:
1910                 if (len > sizeof(unsigned int))
1911                         len = sizeof(unsigned int);
1912                 val = po->tp_reserve;
1913                 data = &val;
1914                 break;
1915         case PACKET_LOSS:
1916                 if (len > sizeof(unsigned int))
1917                         len = sizeof(unsigned int);
1918                 val = po->tp_loss;
1919                 data = &val;
1920                 break;
1921 #endif
1922         default:
1923                 return -ENOPROTOOPT;
1924         }
1925 
1926         if (put_user(len, optlen))
1927                 return -EFAULT;
1928         if (copy_to_user(optval, data, len))
1929                 return -EFAULT;
1930         return 0;
1931 }
1932 
1933 
1934 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1935 {
1936         struct sock *sk;
1937         struct hlist_node *node;
1938         struct net_device *dev = data;
1939         struct net *net = dev_net(dev);
1940 
1941         read_lock(&net->packet.sklist_lock);
1942         sk_for_each(sk, node, &net->packet.sklist) {
1943                 struct packet_sock *po = pkt_sk(sk);
1944 
1945                 switch (msg) {
1946                 case NETDEV_UNREGISTER:
1947                         if (po->mclist)
1948                                 packet_dev_mclist(dev, po->mclist, -1);
1949                         /* fallthrough */
1950 
1951                 case NETDEV_DOWN:
1952                         if (dev->ifindex == po->ifindex) {
1953                                 spin_lock(&po->bind_lock);
1954                                 if (po->running) {
1955                                         __dev_remove_pack(&po->prot_hook);
1956                                         __sock_put(sk);
1957                                         po->running = 0;
1958                                         sk->sk_err = ENETDOWN;
1959                                         if (!sock_flag(sk, SOCK_DEAD))
1960                                                 sk->sk_error_report(sk);
1961                                 }
1962                                 if (msg == NETDEV_UNREGISTER) {
1963                                         po->ifindex = -1;
1964                                         po->prot_hook.dev = NULL;
1965                                 }
1966                                 spin_unlock(&po->bind_lock);
1967                         }
1968                         break;
1969                 case NETDEV_UP:
1970                         spin_lock(&po->bind_lock);
1971                         if (dev->ifindex == po->ifindex && po->num &&
1972                             !po->running) {
1973                                 dev_add_pack(&po->prot_hook);
1974                                 sock_hold(sk);
1975                                 po->running = 1;
1976                         }
1977                         spin_unlock(&po->bind_lock);
1978                         break;
1979                 }
1980         }
1981         read_unlock(&net->packet.sklist_lock);
1982         return NOTIFY_DONE;
1983 }
1984 
1985 
1986 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1987                         unsigned long arg)
1988 {
1989         struct sock *sk = sock->sk;
1990 
1991         switch (cmd) {
1992         case SIOCOUTQ:
1993         {
1994                 int amount = sk_wmem_alloc_get(sk);
1995 
1996                 return put_user(amount, (int __user *)arg);
1997         }
1998         case SIOCINQ:
1999         {
2000                 struct sk_buff *skb;
2001                 int amount = 0;
2002 
2003                 spin_lock_bh(&sk->sk_receive_queue.lock);
2004                 skb = skb_peek(&sk->sk_receive_queue);
2005                 if (skb)
2006                         amount = skb->len;
2007                 spin_unlock_bh(&sk->sk_receive_queue.lock);
2008                 return put_user(amount, (int __user *)arg);
2009         }
2010         case SIOCGSTAMP:
2011                 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2012         case SIOCGSTAMPNS:
2013                 return sock_get_timestampns(sk, (struct timespec __user *)arg);
2014 
2015 #ifdef CONFIG_INET
2016         case SIOCADDRT:
2017         case SIOCDELRT:
2018         case SIOCDARP:
2019         case SIOCGARP:
2020         case SIOCSARP:
2021         case SIOCGIFADDR:
2022         case SIOCSIFADDR:
2023         case SIOCGIFBRDADDR:
2024         case SIOCSIFBRDADDR:
2025         case SIOCGIFNETMASK:
2026         case SIOCSIFNETMASK:
2027         case SIOCGIFDSTADDR:
2028         case SIOCSIFDSTADDR:
2029         case SIOCSIFFLAGS:
2030                 if (!net_eq(sock_net(sk), &init_net))
2031                         return -ENOIOCTLCMD;
2032                 return inet_dgram_ops.ioctl(sock, cmd, arg);
2033 #endif
2034 
2035         default:
2036                 return -ENOIOCTLCMD;
2037         }
2038         return 0;
2039 }
2040 
2041 #ifndef CONFIG_PACKET_MMAP
2042 #define packet_mmap sock_no_mmap
2043 #define packet_poll datagram_poll
2044 #else
2045 
2046 static unsigned int packet_poll(struct file *file, struct socket *sock,
2047                                 poll_table *wait)
2048 {
2049         struct sock *sk = sock->sk;
2050         struct packet_sock *po = pkt_sk(sk);
2051         unsigned int mask = datagram_poll(file, sock, wait);
2052 
2053         spin_lock_bh(&sk->sk_receive_queue.lock);
2054         if (po->rx_ring.pg_vec) {
2055                 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
2056                         mask |= POLLIN | POLLRDNORM;
2057         }
2058         spin_unlock_bh(&sk->sk_receive_queue.lock);
2059         spin_lock_bh(&sk->sk_write_queue.lock);
2060         if (po->tx_ring.pg_vec) {
2061                 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2062                         mask |= POLLOUT | POLLWRNORM;
2063         }
2064         spin_unlock_bh(&sk->sk_write_queue.lock);
2065         return mask;
2066 }
2067 
2068 
2069 /* Dirty? Well, I still did not learn better way to account
2070  * for user mmaps.
2071  */
2072 
2073 static void packet_mm_open(struct vm_area_struct *vma)
2074 {
2075         struct file *file = vma->vm_file;
2076         struct socket *sock = file->private_data;
2077         struct sock *sk = sock->sk;
2078 
2079         if (sk)
2080                 atomic_inc(&pkt_sk(sk)->mapped);
2081 }
2082 
2083 static void packet_mm_close(struct vm_area_struct *vma)
2084 {
2085         struct file *file = vma->vm_file;
2086         struct socket *sock = file->private_data;
2087         struct sock *sk = sock->sk;
2088 
2089         if (sk)
2090                 atomic_dec(&pkt_sk(sk)->mapped);
2091 }
2092 
2093 static const struct vm_operations_struct packet_mmap_ops = {
2094         .open   =       packet_mm_open,
2095         .close  =       packet_mm_close,
2096 };
2097 
2098 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
2099 {
2100         int i;
2101 
2102         for (i = 0; i < len; i++) {
2103                 if (likely(pg_vec[i]))
2104                         free_pages((unsigned long) pg_vec[i], order);
2105         }
2106         kfree(pg_vec);
2107 }
2108 
2109 static inline char *alloc_one_pg_vec_page(unsigned long order)
2110 {
2111         gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2112 
2113         return (char *) __get_free_pages(gfp_flags, order);
2114 }
2115 
2116 static char **alloc_pg_vec(struct tpacket_req *req, int order)
2117 {
2118         unsigned int block_nr = req->tp_block_nr;
2119         char **pg_vec;
2120         int i;
2121 
2122         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2123         if (unlikely(!pg_vec))
2124                 goto out;
2125 
2126         for (i = 0; i < block_nr; i++) {
2127                 pg_vec[i] = alloc_one_pg_vec_page(order);
2128                 if (unlikely(!pg_vec[i]))
2129                         goto out_free_pgvec;
2130         }
2131 
2132 out:
2133         return pg_vec;
2134 
2135 out_free_pgvec:
2136         free_pg_vec(pg_vec, order, block_nr);
2137         pg_vec = NULL;
2138         goto out;
2139 }
2140 
2141 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2142                 int closing, int tx_ring)
2143 {
2144         char **pg_vec = NULL;
2145         struct packet_sock *po = pkt_sk(sk);
2146         int was_running, order = 0;
2147         struct packet_ring_buffer *rb;
2148         struct sk_buff_head *rb_queue;
2149         __be16 num;
2150         int err;
2151 
2152         rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2153         rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2154 
2155         err = -EBUSY;
2156         if (!closing) {
2157                 if (atomic_read(&po->mapped))
2158                         goto out;
2159                 if (atomic_read(&rb->pending))
2160                         goto out;
2161         }
2162 
2163         if (req->tp_block_nr) {
2164                 /* Sanity tests and some calculations */
2165                 err = -EBUSY;
2166                 if (unlikely(rb->pg_vec))
2167                         goto out;
2168 
2169                 switch (po->tp_version) {
2170                 case TPACKET_V1:
2171                         po->tp_hdrlen = TPACKET_HDRLEN;
2172                         break;
2173                 case TPACKET_V2:
2174                         po->tp_hdrlen = TPACKET2_HDRLEN;
2175                         break;
2176                 }
2177 
2178                 err = -EINVAL;
2179                 if (unlikely((int)req->tp_block_size <= 0))
2180                         goto out;
2181                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
2182                         goto out;
2183                 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
2184                                         po->tp_reserve))
2185                         goto out;
2186                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
2187                         goto out;
2188 
2189                 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2190                 if (unlikely(rb->frames_per_block <= 0))
2191                         goto out;
2192                 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2193                                         req->tp_frame_nr))
2194                         goto out;
2195 
2196                 err = -ENOMEM;
2197                 order = get_order(req->tp_block_size);
2198                 pg_vec = alloc_pg_vec(req, order);
2199                 if (unlikely(!pg_vec))
2200                         goto out;
2201         }
2202         /* Done */
2203         else {
2204                 err = -EINVAL;
2205                 if (unlikely(req->tp_frame_nr))
2206                         goto out;
2207         }
2208 
2209         lock_sock(sk);
2210 
2211         /* Detach socket from network */
2212         spin_lock(&po->bind_lock);
2213         was_running = po->running;
2214         num = po->num;
2215         if (was_running) {
2216                 __dev_remove_pack(&po->prot_hook);
2217                 po->num = 0;
2218                 po->running = 0;
2219                 __sock_put(sk);
2220         }
2221         spin_unlock(&po->bind_lock);
2222 
2223         synchronize_net();
2224 
2225         err = -EBUSY;
2226         mutex_lock(&po->pg_vec_lock);
2227         if (closing || atomic_read(&po->mapped) == 0) {
2228                 err = 0;
2229 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
2230                 spin_lock_bh(&rb_queue->lock);
2231                 pg_vec = XC(rb->pg_vec, pg_vec);
2232                 rb->frame_max = (req->tp_frame_nr - 1);
2233                 rb->head = 0;
2234                 rb->frame_size = req->tp_frame_size;
2235                 spin_unlock_bh(&rb_queue->lock);
2236 
2237                 order = XC(rb->pg_vec_order, order);
2238                 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2239 
2240                 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2241                 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2242                                                 tpacket_rcv : packet_rcv;
2243                 skb_queue_purge(rb_queue);
2244 #undef XC
2245                 if (atomic_read(&po->mapped))
2246                         pr_err("packet_mmap: vma is busy: %d\n",
2247                                atomic_read(&po->mapped));
2248         }
2249         mutex_unlock(&po->pg_vec_lock);
2250 
2251         spin_lock(&po->bind_lock);
2252         if (was_running && !po->running) {
2253                 sock_hold(sk);
2254                 po->running = 1;
2255                 po->num = num;
2256                 dev_add_pack(&po->prot_hook);
2257         }
2258         spin_unlock(&po->bind_lock);
2259 
2260         release_sock(sk);
2261 
2262         if (pg_vec)
2263                 free_pg_vec(pg_vec, order, req->tp_block_nr);
2264 out:
2265         return err;
2266 }
2267 
2268 static int packet_mmap(struct file *file, struct socket *sock,
2269                 struct vm_area_struct *vma)
2270 {
2271         struct sock *sk = sock->sk;
2272         struct packet_sock *po = pkt_sk(sk);
2273         unsigned long size, expected_size;
2274         struct packet_ring_buffer *rb;
2275         unsigned long start;
2276         int err = -EINVAL;
2277         int i;
2278 
2279         if (vma->vm_pgoff)
2280                 return -EINVAL;
2281 
2282         mutex_lock(&po->pg_vec_lock);
2283 
2284         expected_size = 0;
2285         for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2286                 if (rb->pg_vec) {
2287                         expected_size += rb->pg_vec_len
2288                                                 * rb->pg_vec_pages
2289                                                 * PAGE_SIZE;
2290                 }
2291         }
2292 
2293         if (expected_size == 0)
2294                 goto out;
2295 
2296         size = vma->vm_end - vma->vm_start;
2297         if (size != expected_size)
2298                 goto out;
2299 
2300         start = vma->vm_start;
2301         for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2302                 if (rb->pg_vec == NULL)
2303                         continue;
2304 
2305                 for (i = 0; i < rb->pg_vec_len; i++) {
2306                         struct page *page = virt_to_page(rb->pg_vec[i]);
2307                         int pg_num;
2308 
2309                         for (pg_num = 0; pg_num < rb->pg_vec_pages;
2310                                         pg_num++, page++) {
2311                                 err = vm_insert_page(vma, start, page);
2312                                 if (unlikely(err))
2313                                         goto out;
2314                                 start += PAGE_SIZE;
2315                         }
2316                 }
2317         }
2318 
2319         atomic_inc(&po->mapped);
2320         vma->vm_ops = &packet_mmap_ops;
2321         err = 0;
2322 
2323 out:
2324         mutex_unlock(&po->pg_vec_lock);
2325         return err;
2326 }
2327 #endif
2328 
2329 
2330 static const struct proto_ops packet_ops_spkt = {
2331         .family =       PF_PACKET,
2332         .owner =        THIS_MODULE,
2333         .release =      packet_release,
2334         .bind =         packet_bind_spkt,
2335         .connect =      sock_no_connect,
2336         .socketpair =   sock_no_socketpair,
2337         .accept =       sock_no_accept,
2338         .getname =      packet_getname_spkt,
2339         .poll =         datagram_poll,
2340         .ioctl =        packet_ioctl,
2341         .listen =       sock_no_listen,
2342         .shutdown =     sock_no_shutdown,
2343         .setsockopt =   sock_no_setsockopt,
2344         .getsockopt =   sock_no_getsockopt,
2345         .sendmsg =      packet_sendmsg_spkt,
2346         .recvmsg =      packet_recvmsg,
2347         .mmap =         sock_no_mmap,
2348         .sendpage =     sock_no_sendpage,
2349 };
2350 
2351 static const struct proto_ops packet_ops = {
2352         .family =       PF_PACKET,
2353         .owner =        THIS_MODULE,
2354         .release =      packet_release,
2355         .bind =         packet_bind,
2356         .connect =      sock_no_connect,
2357         .socketpair =   sock_no_socketpair,
2358         .accept =       sock_no_accept,
2359         .getname =      packet_getname,
2360         .poll =         packet_poll,
2361         .ioctl =        packet_ioctl,
2362         .listen =       sock_no_listen,
2363         .shutdown =     sock_no_shutdown,
2364         .setsockopt =   packet_setsockopt,
2365         .getsockopt =   packet_getsockopt,
2366         .sendmsg =      packet_sendmsg,
2367         .recvmsg =      packet_recvmsg,
2368         .mmap =         packet_mmap,
2369         .sendpage =     sock_no_sendpage,
2370 };
2371 
2372 static struct net_proto_family packet_family_ops = {
2373         .family =       PF_PACKET,
2374         .create =       packet_create,
2375         .owner  =       THIS_MODULE,
2376 };
2377 
2378 static struct notifier_block packet_netdev_notifier = {
2379         .notifier_call =        packet_notifier,
2380 };
2381 
2382 #ifdef CONFIG_PROC_FS
2383 static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
2384 {
2385         struct sock *s;
2386         struct hlist_node *node;
2387 
2388         sk_for_each(s, node, &net->packet.sklist) {
2389                 if (!off--)
2390                         return s;
2391         }
2392         return NULL;
2393 }
2394 
2395 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2396         __acquires(seq_file_net(seq)->packet.sklist_lock)
2397 {
2398         struct net *net = seq_file_net(seq);
2399         read_lock(&net->packet.sklist_lock);
2400         return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
2401 }
2402 
2403 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2404 {
2405         struct net *net = seq_file_net(seq);
2406         ++*pos;
2407         return  (v == SEQ_START_TOKEN)
2408                 ? sk_head(&net->packet.sklist)
2409                 : sk_next((struct sock *)v) ;
2410 }
2411 
2412 static void packet_seq_stop(struct seq_file *seq, void *v)
2413         __releases(seq_file_net(seq)->packet.sklist_lock)
2414 {
2415         struct net *net = seq_file_net(seq);
2416         read_unlock(&net->packet.sklist_lock);
2417 }
2418 
2419 static int packet_seq_show(struct seq_file *seq, void *v)
2420 {
2421         if (v == SEQ_START_TOKEN)
2422                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
2423         else {
2424                 struct sock *s = v;
2425                 const struct packet_sock *po = pkt_sk(s);
2426 
2427                 seq_printf(seq,
2428                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
2429                            s,
2430                            atomic_read(&s->sk_refcnt),
2431                            s->sk_type,
2432                            ntohs(po->num),
2433                            po->ifindex,
2434                            po->running,
2435                            atomic_read(&s->sk_rmem_alloc),
2436                            sock_i_uid(s),
2437                            sock_i_ino(s));
2438         }
2439 
2440         return 0;
2441 }
2442 
2443 static const struct seq_operations packet_seq_ops = {
2444         .start  = packet_seq_start,
2445         .next   = packet_seq_next,
2446         .stop   = packet_seq_stop,
2447         .show   = packet_seq_show,
2448 };
2449 
2450 static int packet_seq_open(struct inode *inode, struct file *file)
2451 {
2452         return seq_open_net(inode, file, &packet_seq_ops,
2453                             sizeof(struct seq_net_private));
2454 }
2455 
2456 static const struct file_operations packet_seq_fops = {
2457         .owner          = THIS_MODULE,
2458         .open           = packet_seq_open,
2459         .read           = seq_read,
2460         .llseek         = seq_lseek,
2461         .release        = seq_release_net,
2462 };
2463 
2464 #endif
2465 
2466 static int packet_net_init(struct net *net)
2467 {
2468         rwlock_init(&net->packet.sklist_lock);
2469         INIT_HLIST_HEAD(&net->packet.sklist);
2470 
2471         if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2472                 return -ENOMEM;
2473 
2474         return 0;
2475 }
2476 
2477 static void packet_net_exit(struct net *net)
2478 {
2479         proc_net_remove(net, "packet");
2480 }
2481 
2482 static struct pernet_operations packet_net_ops = {
2483         .init = packet_net_init,
2484         .exit = packet_net_exit,
2485 };
2486 
2487 
2488 static void __exit packet_exit(void)
2489 {
2490         unregister_netdevice_notifier(&packet_netdev_notifier);
2491         unregister_pernet_subsys(&packet_net_ops);
2492         sock_unregister(PF_PACKET);
2493         proto_unregister(&packet_proto);
2494 }
2495 
2496 static int __init packet_init(void)
2497 {
2498         int rc = proto_register(&packet_proto, 0);
2499 
2500         if (rc != 0)
2501                 goto out;
2502 
2503         sock_register(&packet_family_ops);
2504         register_pernet_subsys(&packet_net_ops);
2505         register_netdevice_notifier(&packet_netdev_notifier);
2506 out:
2507         return rc;
2508 }
2509 
2510 module_init(packet_init);
2511 module_exit(packet_exit);
2512 MODULE_LICENSE("GPL");
2513 MODULE_ALIAS_NETPROTO(PF_PACKET);
2514 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp