~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/netlink/af_netlink.c

Version: ~ [ linux-5.11-rc3 ] ~ [ linux-5.10.7 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.89 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.167 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.215 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.251 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.251 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * NETLINK      Kernel-user communication protocol.
  3  *
  4  *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
  5  *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  6  *                              Patrick McHardy <kaber@trash.net>
  7  *
  8  *              This program is free software; you can redistribute it and/or
  9  *              modify it under the terms of the GNU General Public License
 10  *              as published by the Free Software Foundation; either version
 11  *              2 of the License, or (at your option) any later version.
 12  *
 13  * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
 14  *                               added netlink_proto_exit
 15  * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
 16  *                               use nlk_sk, as sk->protinfo is on a diet 8)
 17  * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
 18  *                               - inc module use count of module that owns
 19  *                                 the kernel socket in case userspace opens
 20  *                                 socket of same protocol
 21  *                               - remove all module support, since netlink is
 22  *                                 mandatory if CONFIG_NET=y these days
 23  */
 24 
 25 #include <linux/module.h>
 26 
 27 #include <linux/capability.h>
 28 #include <linux/kernel.h>
 29 #include <linux/init.h>
 30 #include <linux/signal.h>
 31 #include <linux/sched.h>
 32 #include <linux/errno.h>
 33 #include <linux/string.h>
 34 #include <linux/stat.h>
 35 #include <linux/socket.h>
 36 #include <linux/un.h>
 37 #include <linux/fcntl.h>
 38 #include <linux/termios.h>
 39 #include <linux/sockios.h>
 40 #include <linux/net.h>
 41 #include <linux/fs.h>
 42 #include <linux/slab.h>
 43 #include <asm/uaccess.h>
 44 #include <linux/skbuff.h>
 45 #include <linux/netdevice.h>
 46 #include <linux/rtnetlink.h>
 47 #include <linux/proc_fs.h>
 48 #include <linux/seq_file.h>
 49 #include <linux/notifier.h>
 50 #include <linux/security.h>
 51 #include <linux/jhash.h>
 52 #include <linux/jiffies.h>
 53 #include <linux/random.h>
 54 #include <linux/bitops.h>
 55 #include <linux/mm.h>
 56 #include <linux/types.h>
 57 #include <linux/audit.h>
 58 #include <linux/mutex.h>
 59 #include <linux/vmalloc.h>
 60 #include <linux/if_arp.h>
 61 #include <asm/cacheflush.h>
 62 
 63 #include <net/net_namespace.h>
 64 #include <net/sock.h>
 65 #include <net/scm.h>
 66 #include <net/netlink.h>
 67 
 68 #include "af_netlink.h"
 69 
 70 struct listeners {
 71         struct rcu_head         rcu;
 72         unsigned long           masks[0];
 73 };
 74 
 75 /* state bits */
 76 #define NETLINK_CONGESTED       0x0
 77 
 78 /* flags */
 79 #define NETLINK_KERNEL_SOCKET   0x1
 80 #define NETLINK_RECV_PKTINFO    0x2
 81 #define NETLINK_BROADCAST_SEND_ERROR    0x4
 82 #define NETLINK_RECV_NO_ENOBUFS 0x8
 83 
 84 static inline int netlink_is_kernel(struct sock *sk)
 85 {
 86         return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
 87 }
 88 
 89 struct netlink_table *nl_table;
 90 EXPORT_SYMBOL_GPL(nl_table);
 91 
 92 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 93 
 94 static int netlink_dump(struct sock *sk);
 95 static void netlink_skb_destructor(struct sk_buff *skb);
 96 
 97 DEFINE_RWLOCK(nl_table_lock);
 98 EXPORT_SYMBOL_GPL(nl_table_lock);
 99 static atomic_t nl_table_users = ATOMIC_INIT(0);
100 
101 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
102 
103 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
104 
105 static DEFINE_SPINLOCK(netlink_tap_lock);
106 static struct list_head netlink_tap_all __read_mostly;
107 
108 static inline u32 netlink_group_mask(u32 group)
109 {
110         return group ? 1 << (group - 1) : 0;
111 }
112 
113 static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
114 {
115         return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
116 }
117 
118 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
119                                            gfp_t gfp_mask)
120 {
121         unsigned int len = skb_end_offset(skb);
122         struct sk_buff *new;
123 
124         new = alloc_skb(len, gfp_mask);
125         if (new == NULL)
126                 return NULL;
127 
128         NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
129         NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
130         NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
131 
132         memcpy(skb_put(new, len), skb->data, len);
133         return new;
134 }
135 
136 int netlink_add_tap(struct netlink_tap *nt)
137 {
138         if (unlikely(nt->dev->type != ARPHRD_NETLINK))
139                 return -EINVAL;
140 
141         spin_lock(&netlink_tap_lock);
142         list_add_rcu(&nt->list, &netlink_tap_all);
143         spin_unlock(&netlink_tap_lock);
144 
145         if (nt->module)
146                 __module_get(nt->module);
147 
148         return 0;
149 }
150 EXPORT_SYMBOL_GPL(netlink_add_tap);
151 
152 static int __netlink_remove_tap(struct netlink_tap *nt)
153 {
154         bool found = false;
155         struct netlink_tap *tmp;
156 
157         spin_lock(&netlink_tap_lock);
158 
159         list_for_each_entry(tmp, &netlink_tap_all, list) {
160                 if (nt == tmp) {
161                         list_del_rcu(&nt->list);
162                         found = true;
163                         goto out;
164                 }
165         }
166 
167         pr_warn("__netlink_remove_tap: %p not found\n", nt);
168 out:
169         spin_unlock(&netlink_tap_lock);
170 
171         if (found && nt->module)
172                 module_put(nt->module);
173 
174         return found ? 0 : -ENODEV;
175 }
176 
177 int netlink_remove_tap(struct netlink_tap *nt)
178 {
179         int ret;
180 
181         ret = __netlink_remove_tap(nt);
182         synchronize_net();
183 
184         return ret;
185 }
186 EXPORT_SYMBOL_GPL(netlink_remove_tap);
187 
188 static bool netlink_filter_tap(const struct sk_buff *skb)
189 {
190         struct sock *sk = skb->sk;
191         bool pass = false;
192 
193         /* We take the more conservative approach and
194          * whitelist socket protocols that may pass.
195          */
196         switch (sk->sk_protocol) {
197         case NETLINK_ROUTE:
198         case NETLINK_USERSOCK:
199         case NETLINK_SOCK_DIAG:
200         case NETLINK_NFLOG:
201         case NETLINK_XFRM:
202         case NETLINK_FIB_LOOKUP:
203         case NETLINK_NETFILTER:
204         case NETLINK_GENERIC:
205                 pass = true;
206                 break;
207         }
208 
209         return pass;
210 }
211 
212 static int __netlink_deliver_tap_skb(struct sk_buff *skb,
213                                      struct net_device *dev)
214 {
215         struct sk_buff *nskb;
216         struct sock *sk = skb->sk;
217         int ret = -ENOMEM;
218 
219         dev_hold(dev);
220 
221         if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
222                 nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
223         else
224                 nskb = skb_clone(skb, GFP_ATOMIC);
225         if (nskb) {
226                 nskb->dev = dev;
227                 nskb->protocol = htons((u16) sk->sk_protocol);
228                 nskb->pkt_type = netlink_is_kernel(sk) ?
229                                  PACKET_KERNEL : PACKET_USER;
230                 skb_reset_network_header(nskb);
231                 ret = dev_queue_xmit(nskb);
232                 if (unlikely(ret > 0))
233                         ret = net_xmit_errno(ret);
234         }
235 
236         dev_put(dev);
237         return ret;
238 }
239 
240 static void __netlink_deliver_tap(struct sk_buff *skb)
241 {
242         int ret;
243         struct netlink_tap *tmp;
244 
245         if (!netlink_filter_tap(skb))
246                 return;
247 
248         list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
249                 ret = __netlink_deliver_tap_skb(skb, tmp->dev);
250                 if (unlikely(ret))
251                         break;
252         }
253 }
254 
255 static void netlink_deliver_tap(struct sk_buff *skb)
256 {
257         rcu_read_lock();
258 
259         if (unlikely(!list_empty(&netlink_tap_all)))
260                 __netlink_deliver_tap(skb);
261 
262         rcu_read_unlock();
263 }
264 
265 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
266                                        struct sk_buff *skb)
267 {
268         if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
269                 netlink_deliver_tap(skb);
270 }
271 
272 static void netlink_overrun(struct sock *sk)
273 {
274         struct netlink_sock *nlk = nlk_sk(sk);
275 
276         if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
277                 if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
278                         sk->sk_err = ENOBUFS;
279                         sk->sk_error_report(sk);
280                 }
281         }
282         atomic_inc(&sk->sk_drops);
283 }
284 
285 static void netlink_rcv_wake(struct sock *sk)
286 {
287         struct netlink_sock *nlk = nlk_sk(sk);
288 
289         if (skb_queue_empty(&sk->sk_receive_queue))
290                 clear_bit(NETLINK_CONGESTED, &nlk->state);
291         if (!test_bit(NETLINK_CONGESTED, &nlk->state))
292                 wake_up_interruptible(&nlk->wait);
293 }
294 
295 #ifdef CONFIG_NETLINK_MMAP
296 static bool netlink_rx_is_mmaped(struct sock *sk)
297 {
298         return nlk_sk(sk)->rx_ring.pg_vec != NULL;
299 }
300 
301 static bool netlink_tx_is_mmaped(struct sock *sk)
302 {
303         return nlk_sk(sk)->tx_ring.pg_vec != NULL;
304 }
305 
306 static __pure struct page *pgvec_to_page(const void *addr)
307 {
308         if (is_vmalloc_addr(addr))
309                 return vmalloc_to_page(addr);
310         else
311                 return virt_to_page(addr);
312 }
313 
314 static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
315 {
316         unsigned int i;
317 
318         for (i = 0; i < len; i++) {
319                 if (pg_vec[i] != NULL) {
320                         if (is_vmalloc_addr(pg_vec[i]))
321                                 vfree(pg_vec[i]);
322                         else
323                                 free_pages((unsigned long)pg_vec[i], order);
324                 }
325         }
326         kfree(pg_vec);
327 }
328 
329 static void *alloc_one_pg_vec_page(unsigned long order)
330 {
331         void *buffer;
332         gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
333                           __GFP_NOWARN | __GFP_NORETRY;
334 
335         buffer = (void *)__get_free_pages(gfp_flags, order);
336         if (buffer != NULL)
337                 return buffer;
338 
339         buffer = vzalloc((1 << order) * PAGE_SIZE);
340         if (buffer != NULL)
341                 return buffer;
342 
343         gfp_flags &= ~__GFP_NORETRY;
344         return (void *)__get_free_pages(gfp_flags, order);
345 }
346 
347 static void **alloc_pg_vec(struct netlink_sock *nlk,
348                            struct nl_mmap_req *req, unsigned int order)
349 {
350         unsigned int block_nr = req->nm_block_nr;
351         unsigned int i;
352         void **pg_vec;
353 
354         pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
355         if (pg_vec == NULL)
356                 return NULL;
357 
358         for (i = 0; i < block_nr; i++) {
359                 pg_vec[i] = alloc_one_pg_vec_page(order);
360                 if (pg_vec[i] == NULL)
361                         goto err1;
362         }
363 
364         return pg_vec;
365 err1:
366         free_pg_vec(pg_vec, order, block_nr);
367         return NULL;
368 }
369 
370 
371 static void
372 __netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
373                    unsigned int order)
374 {
375         struct netlink_sock *nlk = nlk_sk(sk);
376         struct sk_buff_head *queue;
377         struct netlink_ring *ring;
378 
379         queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
380         ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
381 
382         spin_lock_bh(&queue->lock);
383 
384         ring->frame_max         = req->nm_frame_nr - 1;
385         ring->head              = 0;
386         ring->frame_size        = req->nm_frame_size;
387         ring->pg_vec_pages      = req->nm_block_size / PAGE_SIZE;
388 
389         swap(ring->pg_vec_len, req->nm_block_nr);
390         swap(ring->pg_vec_order, order);
391         swap(ring->pg_vec, pg_vec);
392 
393         __skb_queue_purge(queue);
394         spin_unlock_bh(&queue->lock);
395 
396         WARN_ON(atomic_read(&nlk->mapped));
397 
398         if (pg_vec)
399                 free_pg_vec(pg_vec, order, req->nm_block_nr);
400 }
401 
402 static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
403                             bool tx_ring)
404 {
405         struct netlink_sock *nlk = nlk_sk(sk);
406         struct netlink_ring *ring;
407         void **pg_vec = NULL;
408         unsigned int order = 0;
409 
410         ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
411 
412         if (atomic_read(&nlk->mapped))
413                 return -EBUSY;
414         if (atomic_read(&ring->pending))
415                 return -EBUSY;
416 
417         if (req->nm_block_nr) {
418                 if (ring->pg_vec != NULL)
419                         return -EBUSY;
420 
421                 if ((int)req->nm_block_size <= 0)
422                         return -EINVAL;
423                 if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
424                         return -EINVAL;
425                 if (req->nm_frame_size < NL_MMAP_HDRLEN)
426                         return -EINVAL;
427                 if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
428                         return -EINVAL;
429 
430                 ring->frames_per_block = req->nm_block_size /
431                                          req->nm_frame_size;
432                 if (ring->frames_per_block == 0)
433                         return -EINVAL;
434                 if (ring->frames_per_block * req->nm_block_nr !=
435                     req->nm_frame_nr)
436                         return -EINVAL;
437 
438                 order = get_order(req->nm_block_size);
439                 pg_vec = alloc_pg_vec(nlk, req, order);
440                 if (pg_vec == NULL)
441                         return -ENOMEM;
442         } else {
443                 if (req->nm_frame_nr)
444                         return -EINVAL;
445         }
446 
447         mutex_lock(&nlk->pg_vec_lock);
448         if (atomic_read(&nlk->mapped) == 0) {
449                 __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
450                 mutex_unlock(&nlk->pg_vec_lock);
451                 return 0;
452         }
453 
454         mutex_unlock(&nlk->pg_vec_lock);
455 
456         if (pg_vec)
457                 free_pg_vec(pg_vec, order, req->nm_block_nr);
458 
459         return -EBUSY;
460 }
461 
462 static void netlink_mm_open(struct vm_area_struct *vma)
463 {
464         struct file *file = vma->vm_file;
465         struct socket *sock = file->private_data;
466         struct sock *sk = sock->sk;
467 
468         if (sk)
469                 atomic_inc(&nlk_sk(sk)->mapped);
470 }
471 
472 static void netlink_mm_close(struct vm_area_struct *vma)
473 {
474         struct file *file = vma->vm_file;
475         struct socket *sock = file->private_data;
476         struct sock *sk = sock->sk;
477 
478         if (sk)
479                 atomic_dec(&nlk_sk(sk)->mapped);
480 }
481 
482 static const struct vm_operations_struct netlink_mmap_ops = {
483         .open   = netlink_mm_open,
484         .close  = netlink_mm_close,
485 };
486 
487 static int netlink_mmap(struct file *file, struct socket *sock,
488                         struct vm_area_struct *vma)
489 {
490         struct sock *sk = sock->sk;
491         struct netlink_sock *nlk = nlk_sk(sk);
492         struct netlink_ring *ring;
493         unsigned long start, size, expected;
494         unsigned int i;
495         int err = -EINVAL;
496 
497         if (vma->vm_pgoff)
498                 return -EINVAL;
499 
500         mutex_lock(&nlk->pg_vec_lock);
501 
502         expected = 0;
503         for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
504                 if (ring->pg_vec == NULL)
505                         continue;
506                 expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
507         }
508 
509         if (expected == 0)
510                 goto out;
511 
512         size = vma->vm_end - vma->vm_start;
513         if (size != expected)
514                 goto out;
515 
516         start = vma->vm_start;
517         for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
518                 if (ring->pg_vec == NULL)
519                         continue;
520 
521                 for (i = 0; i < ring->pg_vec_len; i++) {
522                         struct page *page;
523                         void *kaddr = ring->pg_vec[i];
524                         unsigned int pg_num;
525 
526                         for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
527                                 page = pgvec_to_page(kaddr);
528                                 err = vm_insert_page(vma, start, page);
529                                 if (err < 0)
530                                         goto out;
531                                 start += PAGE_SIZE;
532                                 kaddr += PAGE_SIZE;
533                         }
534                 }
535         }
536 
537         atomic_inc(&nlk->mapped);
538         vma->vm_ops = &netlink_mmap_ops;
539         err = 0;
540 out:
541         mutex_unlock(&nlk->pg_vec_lock);
542         return err;
543 }
544 
545 static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
546 {
547 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
548         struct page *p_start, *p_end;
549 
550         /* First page is flushed through netlink_{get,set}_status */
551         p_start = pgvec_to_page(hdr + PAGE_SIZE);
552         p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
553         while (p_start <= p_end) {
554                 flush_dcache_page(p_start);
555                 p_start++;
556         }
557 #endif
558 }
559 
560 static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
561 {
562         smp_rmb();
563         flush_dcache_page(pgvec_to_page(hdr));
564         return hdr->nm_status;
565 }
566 
567 static void netlink_set_status(struct nl_mmap_hdr *hdr,
568                                enum nl_mmap_status status)
569 {
570         smp_mb();
571         hdr->nm_status = status;
572         flush_dcache_page(pgvec_to_page(hdr));
573 }
574 
575 static struct nl_mmap_hdr *
576 __netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
577 {
578         unsigned int pg_vec_pos, frame_off;
579 
580         pg_vec_pos = pos / ring->frames_per_block;
581         frame_off  = pos % ring->frames_per_block;
582 
583         return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
584 }
585 
586 static struct nl_mmap_hdr *
587 netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
588                      enum nl_mmap_status status)
589 {
590         struct nl_mmap_hdr *hdr;
591 
592         hdr = __netlink_lookup_frame(ring, pos);
593         if (netlink_get_status(hdr) != status)
594                 return NULL;
595 
596         return hdr;
597 }
598 
599 static struct nl_mmap_hdr *
600 netlink_current_frame(const struct netlink_ring *ring,
601                       enum nl_mmap_status status)
602 {
603         return netlink_lookup_frame(ring, ring->head, status);
604 }
605 
606 static struct nl_mmap_hdr *
607 netlink_previous_frame(const struct netlink_ring *ring,
608                        enum nl_mmap_status status)
609 {
610         unsigned int prev;
611 
612         prev = ring->head ? ring->head - 1 : ring->frame_max;
613         return netlink_lookup_frame(ring, prev, status);
614 }
615 
616 static void netlink_increment_head(struct netlink_ring *ring)
617 {
618         ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
619 }
620 
621 static void netlink_forward_ring(struct netlink_ring *ring)
622 {
623         unsigned int head = ring->head, pos = head;
624         const struct nl_mmap_hdr *hdr;
625 
626         do {
627                 hdr = __netlink_lookup_frame(ring, pos);
628                 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
629                         break;
630                 if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
631                         break;
632                 netlink_increment_head(ring);
633         } while (ring->head != head);
634 }
635 
636 static bool netlink_dump_space(struct netlink_sock *nlk)
637 {
638         struct netlink_ring *ring = &nlk->rx_ring;
639         struct nl_mmap_hdr *hdr;
640         unsigned int n;
641 
642         hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
643         if (hdr == NULL)
644                 return false;
645 
646         n = ring->head + ring->frame_max / 2;
647         if (n > ring->frame_max)
648                 n -= ring->frame_max;
649 
650         hdr = __netlink_lookup_frame(ring, n);
651 
652         return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
653 }
654 
655 static unsigned int netlink_poll(struct file *file, struct socket *sock,
656                                  poll_table *wait)
657 {
658         struct sock *sk = sock->sk;
659         struct netlink_sock *nlk = nlk_sk(sk);
660         unsigned int mask;
661         int err;
662 
663         if (nlk->rx_ring.pg_vec != NULL) {
664                 /* Memory mapped sockets don't call recvmsg(), so flow control
665                  * for dumps is performed here. A dump is allowed to continue
666                  * if at least half the ring is unused.
667                  */
668                 while (nlk->cb_running && netlink_dump_space(nlk)) {
669                         err = netlink_dump(sk);
670                         if (err < 0) {
671                                 sk->sk_err = -err;
672                                 sk->sk_error_report(sk);
673                                 break;
674                         }
675                 }
676                 netlink_rcv_wake(sk);
677         }
678 
679         mask = datagram_poll(file, sock, wait);
680 
681         spin_lock_bh(&sk->sk_receive_queue.lock);
682         if (nlk->rx_ring.pg_vec) {
683                 netlink_forward_ring(&nlk->rx_ring);
684                 if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
685                         mask |= POLLIN | POLLRDNORM;
686         }
687         spin_unlock_bh(&sk->sk_receive_queue.lock);
688 
689         spin_lock_bh(&sk->sk_write_queue.lock);
690         if (nlk->tx_ring.pg_vec) {
691                 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
692                         mask |= POLLOUT | POLLWRNORM;
693         }
694         spin_unlock_bh(&sk->sk_write_queue.lock);
695 
696         return mask;
697 }
698 
699 static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
700 {
701         return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
702 }
703 
704 static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
705                                    struct netlink_ring *ring,
706                                    struct nl_mmap_hdr *hdr)
707 {
708         unsigned int size;
709         void *data;
710 
711         size = ring->frame_size - NL_MMAP_HDRLEN;
712         data = (void *)hdr + NL_MMAP_HDRLEN;
713 
714         skb->head       = data;
715         skb->data       = data;
716         skb_reset_tail_pointer(skb);
717         skb->end        = skb->tail + size;
718         skb->len        = 0;
719 
720         skb->destructor = netlink_skb_destructor;
721         NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
722         NETLINK_CB(skb).sk = sk;
723 }
724 
725 static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
726                                 u32 dst_portid, u32 dst_group,
727                                 struct sock_iocb *siocb)
728 {
729         struct netlink_sock *nlk = nlk_sk(sk);
730         struct netlink_ring *ring;
731         struct nl_mmap_hdr *hdr;
732         struct sk_buff *skb;
733         unsigned int maxlen;
734         int err = 0, len = 0;
735 
736         mutex_lock(&nlk->pg_vec_lock);
737 
738         ring   = &nlk->tx_ring;
739         maxlen = ring->frame_size - NL_MMAP_HDRLEN;
740 
741         do {
742                 unsigned int nm_len;
743 
744                 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
745                 if (hdr == NULL) {
746                         if (!(msg->msg_flags & MSG_DONTWAIT) &&
747                             atomic_read(&nlk->tx_ring.pending))
748                                 schedule();
749                         continue;
750                 }
751 
752                 nm_len = ACCESS_ONCE(hdr->nm_len);
753                 if (nm_len > maxlen) {
754                         err = -EINVAL;
755                         goto out;
756                 }
757 
758                 netlink_frame_flush_dcache(hdr, nm_len);
759 
760                 skb = alloc_skb(nm_len, GFP_KERNEL);
761                 if (skb == NULL) {
762                         err = -ENOBUFS;
763                         goto out;
764                 }
765                 __skb_put(skb, nm_len);
766                 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
767                 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
768 
769                 netlink_increment_head(ring);
770 
771                 NETLINK_CB(skb).portid    = nlk->portid;
772                 NETLINK_CB(skb).dst_group = dst_group;
773                 NETLINK_CB(skb).creds     = siocb->scm->creds;
774 
775                 err = security_netlink_send(sk, skb);
776                 if (err) {
777                         kfree_skb(skb);
778                         goto out;
779                 }
780 
781                 if (unlikely(dst_group)) {
782                         atomic_inc(&skb->users);
783                         netlink_broadcast(sk, skb, dst_portid, dst_group,
784                                           GFP_KERNEL);
785                 }
786                 err = netlink_unicast(sk, skb, dst_portid,
787                                       msg->msg_flags & MSG_DONTWAIT);
788                 if (err < 0)
789                         goto out;
790                 len += err;
791 
792         } while (hdr != NULL ||
793                  (!(msg->msg_flags & MSG_DONTWAIT) &&
794                   atomic_read(&nlk->tx_ring.pending)));
795 
796         if (len > 0)
797                 err = len;
798 out:
799         mutex_unlock(&nlk->pg_vec_lock);
800         return err;
801 }
802 
803 static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
804 {
805         struct nl_mmap_hdr *hdr;
806 
807         hdr = netlink_mmap_hdr(skb);
808         hdr->nm_len     = skb->len;
809         hdr->nm_group   = NETLINK_CB(skb).dst_group;
810         hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
811         hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
812         hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
813         netlink_frame_flush_dcache(hdr, hdr->nm_len);
814         netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
815 
816         NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
817         kfree_skb(skb);
818 }
819 
820 static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
821 {
822         struct netlink_sock *nlk = nlk_sk(sk);
823         struct netlink_ring *ring = &nlk->rx_ring;
824         struct nl_mmap_hdr *hdr;
825 
826         spin_lock_bh(&sk->sk_receive_queue.lock);
827         hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
828         if (hdr == NULL) {
829                 spin_unlock_bh(&sk->sk_receive_queue.lock);
830                 kfree_skb(skb);
831                 netlink_overrun(sk);
832                 return;
833         }
834         netlink_increment_head(ring);
835         __skb_queue_tail(&sk->sk_receive_queue, skb);
836         spin_unlock_bh(&sk->sk_receive_queue.lock);
837 
838         hdr->nm_len     = skb->len;
839         hdr->nm_group   = NETLINK_CB(skb).dst_group;
840         hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
841         hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
842         hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
843         netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
844 }
845 
846 #else /* CONFIG_NETLINK_MMAP */
847 #define netlink_rx_is_mmaped(sk)        false
848 #define netlink_tx_is_mmaped(sk)        false
849 #define netlink_mmap                    sock_no_mmap
850 #define netlink_poll                    datagram_poll
851 #define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb)     0
852 #endif /* CONFIG_NETLINK_MMAP */
853 
854 static void netlink_skb_destructor(struct sk_buff *skb)
855 {
856 #ifdef CONFIG_NETLINK_MMAP
857         struct nl_mmap_hdr *hdr;
858         struct netlink_ring *ring;
859         struct sock *sk;
860 
861         /* If a packet from the kernel to userspace was freed because of an
862          * error without being delivered to userspace, the kernel must reset
863          * the status. In the direction userspace to kernel, the status is
864          * always reset here after the packet was processed and freed.
865          */
866         if (netlink_skb_is_mmaped(skb)) {
867                 hdr = netlink_mmap_hdr(skb);
868                 sk = NETLINK_CB(skb).sk;
869 
870                 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
871                         netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
872                         ring = &nlk_sk(sk)->tx_ring;
873                 } else {
874                         if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
875                                 hdr->nm_len = 0;
876                                 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
877                         }
878                         ring = &nlk_sk(sk)->rx_ring;
879                 }
880 
881                 WARN_ON(atomic_read(&ring->pending) == 0);
882                 atomic_dec(&ring->pending);
883                 sock_put(sk);
884 
885                 skb->head = NULL;
886         }
887 #endif
888         if (is_vmalloc_addr(skb->head)) {
889                 if (!skb->cloned ||
890                     !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
891                         vfree(skb->head);
892 
893                 skb->head = NULL;
894         }
895         if (skb->sk != NULL)
896                 sock_rfree(skb);
897 }
898 
899 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
900 {
901         WARN_ON(skb->sk != NULL);
902         skb->sk = sk;
903         skb->destructor = netlink_skb_destructor;
904         atomic_add(skb->truesize, &sk->sk_rmem_alloc);
905         sk_mem_charge(sk, skb->truesize);
906 }
907 
908 static void netlink_sock_destruct(struct sock *sk)
909 {
910         struct netlink_sock *nlk = nlk_sk(sk);
911 
912         if (nlk->cb_running) {
913                 if (nlk->cb.done)
914                         nlk->cb.done(&nlk->cb);
915 
916                 module_put(nlk->cb.module);
917                 kfree_skb(nlk->cb.skb);
918         }
919 
920         skb_queue_purge(&sk->sk_receive_queue);
921 #ifdef CONFIG_NETLINK_MMAP
922         if (1) {
923                 struct nl_mmap_req req;
924 
925                 memset(&req, 0, sizeof(req));
926                 if (nlk->rx_ring.pg_vec)
927                         __netlink_set_ring(sk, &req, false, NULL, 0);
928                 memset(&req, 0, sizeof(req));
929                 if (nlk->tx_ring.pg_vec)
930                         __netlink_set_ring(sk, &req, true, NULL, 0);
931         }
932 #endif /* CONFIG_NETLINK_MMAP */
933 
934         if (!sock_flag(sk, SOCK_DEAD)) {
935                 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
936                 return;
937         }
938 
939         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
940         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
941         WARN_ON(nlk_sk(sk)->groups);
942 }
943 
944 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
945  * SMP. Look, when several writers sleep and reader wakes them up, all but one
946  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
947  * this, _but_ remember, it adds useless work on UP machines.
948  */
949 
950 void netlink_table_grab(void)
951         __acquires(nl_table_lock)
952 {
953         might_sleep();
954 
955         write_lock_irq(&nl_table_lock);
956 
957         if (atomic_read(&nl_table_users)) {
958                 DECLARE_WAITQUEUE(wait, current);
959 
960                 add_wait_queue_exclusive(&nl_table_wait, &wait);
961                 for (;;) {
962                         set_current_state(TASK_UNINTERRUPTIBLE);
963                         if (atomic_read(&nl_table_users) == 0)
964                                 break;
965                         write_unlock_irq(&nl_table_lock);
966                         schedule();
967                         write_lock_irq(&nl_table_lock);
968                 }
969 
970                 __set_current_state(TASK_RUNNING);
971                 remove_wait_queue(&nl_table_wait, &wait);
972         }
973 }
974 
975 void netlink_table_ungrab(void)
976         __releases(nl_table_lock)
977 {
978         write_unlock_irq(&nl_table_lock);
979         wake_up(&nl_table_wait);
980 }
981 
982 static inline void
983 netlink_lock_table(void)
984 {
985         /* read_lock() synchronizes us to netlink_table_grab */
986 
987         read_lock(&nl_table_lock);
988         atomic_inc(&nl_table_users);
989         read_unlock(&nl_table_lock);
990 }
991 
992 static inline void
993 netlink_unlock_table(void)
994 {
995         if (atomic_dec_and_test(&nl_table_users))
996                 wake_up(&nl_table_wait);
997 }
998 
999 static bool netlink_compare(struct net *net, struct sock *sk)
1000 {
1001         return net_eq(sock_net(sk), net);
1002 }
1003 
1004 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
1005 {
1006         struct netlink_table *table = &nl_table[protocol];
1007         struct nl_portid_hash *hash = &table->hash;
1008         struct hlist_head *head;
1009         struct sock *sk;
1010 
1011         read_lock(&nl_table_lock);
1012         head = nl_portid_hashfn(hash, portid);
1013         sk_for_each(sk, head) {
1014                 if (table->compare(net, sk) &&
1015                     (nlk_sk(sk)->portid == portid)) {
1016                         sock_hold(sk);
1017                         goto found;
1018                 }
1019         }
1020         sk = NULL;
1021 found:
1022         read_unlock(&nl_table_lock);
1023         return sk;
1024 }
1025 
1026 static struct hlist_head *nl_portid_hash_zalloc(size_t size)
1027 {
1028         if (size <= PAGE_SIZE)
1029                 return kzalloc(size, GFP_ATOMIC);
1030         else
1031                 return (struct hlist_head *)
1032                         __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
1033                                          get_order(size));
1034 }
1035 
1036 static void nl_portid_hash_free(struct hlist_head *table, size_t size)
1037 {
1038         if (size <= PAGE_SIZE)
1039                 kfree(table);
1040         else
1041                 free_pages((unsigned long)table, get_order(size));
1042 }
1043 
1044 static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
1045 {
1046         unsigned int omask, mask, shift;
1047         size_t osize, size;
1048         struct hlist_head *otable, *table;
1049         int i;
1050 
1051         omask = mask = hash->mask;
1052         osize = size = (mask + 1) * sizeof(*table);
1053         shift = hash->shift;
1054 
1055         if (grow) {
1056                 if (++shift > hash->max_shift)
1057                         return 0;
1058                 mask = mask * 2 + 1;
1059                 size *= 2;
1060         }
1061 
1062         table = nl_portid_hash_zalloc(size);
1063         if (!table)
1064                 return 0;
1065 
1066         otable = hash->table;
1067         hash->table = table;
1068         hash->mask = mask;
1069         hash->shift = shift;
1070         get_random_bytes(&hash->rnd, sizeof(hash->rnd));
1071 
1072         for (i = 0; i <= omask; i++) {
1073                 struct sock *sk;
1074                 struct hlist_node *tmp;
1075 
1076                 sk_for_each_safe(sk, tmp, &otable[i])
1077                         __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
1078         }
1079 
1080         nl_portid_hash_free(otable, osize);
1081         hash->rehash_time = jiffies + 10 * 60 * HZ;
1082         return 1;
1083 }
1084 
1085 static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
1086 {
1087         int avg = hash->entries >> hash->shift;
1088 
1089         if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
1090                 return 1;
1091 
1092         if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
1093                 nl_portid_hash_rehash(hash, 0);
1094                 return 1;
1095         }
1096 
1097         return 0;
1098 }
1099 
1100 static const struct proto_ops netlink_ops;
1101 
1102 static void
1103 netlink_update_listeners(struct sock *sk)
1104 {
1105         struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1106         unsigned long mask;
1107         unsigned int i;
1108         struct listeners *listeners;
1109 
1110         listeners = nl_deref_protected(tbl->listeners);
1111         if (!listeners)
1112                 return;
1113 
1114         for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
1115                 mask = 0;
1116                 sk_for_each_bound(sk, &tbl->mc_list) {
1117                         if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
1118                                 mask |= nlk_sk(sk)->groups[i];
1119                 }
1120                 listeners->masks[i] = mask;
1121         }
1122         /* this function is only called with the netlink table "grabbed", which
1123          * makes sure updates are visible before bind or setsockopt return. */
1124 }
1125 
1126 static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
1127 {
1128         struct netlink_table *table = &nl_table[sk->sk_protocol];
1129         struct nl_portid_hash *hash = &table->hash;
1130         struct hlist_head *head;
1131         int err = -EADDRINUSE;
1132         struct sock *osk;
1133         int len;
1134 
1135         netlink_table_grab();
1136         head = nl_portid_hashfn(hash, portid);
1137         len = 0;
1138         sk_for_each(osk, head) {
1139                 if (table->compare(net, osk) &&
1140                     (nlk_sk(osk)->portid == portid))
1141                         break;
1142                 len++;
1143         }
1144         if (osk)
1145                 goto err;
1146 
1147         err = -EBUSY;
1148         if (nlk_sk(sk)->portid)
1149                 goto err;
1150 
1151         err = -ENOMEM;
1152         if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
1153                 goto err;
1154 
1155         if (len && nl_portid_hash_dilute(hash, len))
1156                 head = nl_portid_hashfn(hash, portid);
1157         hash->entries++;
1158         nlk_sk(sk)->portid = portid;
1159         sk_add_node(sk, head);
1160         err = 0;
1161 
1162 err:
1163         netlink_table_ungrab();
1164         return err;
1165 }
1166 
1167 static void netlink_remove(struct sock *sk)
1168 {
1169         netlink_table_grab();
1170         if (sk_del_node_init(sk))
1171                 nl_table[sk->sk_protocol].hash.entries--;
1172         if (nlk_sk(sk)->subscriptions)
1173                 __sk_del_bind_node(sk);
1174         netlink_table_ungrab();
1175 }
1176 
1177 static struct proto netlink_proto = {
1178         .name     = "NETLINK",
1179         .owner    = THIS_MODULE,
1180         .obj_size = sizeof(struct netlink_sock),
1181 };
1182 
1183 static int __netlink_create(struct net *net, struct socket *sock,
1184                             struct mutex *cb_mutex, int protocol)
1185 {
1186         struct sock *sk;
1187         struct netlink_sock *nlk;
1188 
1189         sock->ops = &netlink_ops;
1190 
1191         sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
1192         if (!sk)
1193                 return -ENOMEM;
1194 
1195         sock_init_data(sock, sk);
1196 
1197         nlk = nlk_sk(sk);
1198         if (cb_mutex) {
1199                 nlk->cb_mutex = cb_mutex;
1200         } else {
1201                 nlk->cb_mutex = &nlk->cb_def_mutex;
1202                 mutex_init(nlk->cb_mutex);
1203         }
1204         init_waitqueue_head(&nlk->wait);
1205 #ifdef CONFIG_NETLINK_MMAP
1206         mutex_init(&nlk->pg_vec_lock);
1207 #endif
1208 
1209         sk->sk_destruct = netlink_sock_destruct;
1210         sk->sk_protocol = protocol;
1211         return 0;
1212 }
1213 
1214 static int netlink_create(struct net *net, struct socket *sock, int protocol,
1215                           int kern)
1216 {
1217         struct module *module = NULL;
1218         struct mutex *cb_mutex;
1219         struct netlink_sock *nlk;
1220         void (*bind)(int group);
1221         int err = 0;
1222 
1223         sock->state = SS_UNCONNECTED;
1224 
1225         if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
1226                 return -ESOCKTNOSUPPORT;
1227 
1228         if (protocol < 0 || protocol >= MAX_LINKS)
1229                 return -EPROTONOSUPPORT;
1230 
1231         netlink_lock_table();
1232 #ifdef CONFIG_MODULES
1233         if (!nl_table[protocol].registered) {
1234                 netlink_unlock_table();
1235                 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
1236                 netlink_lock_table();
1237         }
1238 #endif
1239         if (nl_table[protocol].registered &&
1240             try_module_get(nl_table[protocol].module))
1241                 module = nl_table[protocol].module;
1242         else
1243                 err = -EPROTONOSUPPORT;
1244         cb_mutex = nl_table[protocol].cb_mutex;
1245         bind = nl_table[protocol].bind;
1246         netlink_unlock_table();
1247 
1248         if (err < 0)
1249                 goto out;
1250 
1251         err = __netlink_create(net, sock, cb_mutex, protocol);
1252         if (err < 0)
1253                 goto out_module;
1254 
1255         local_bh_disable();
1256         sock_prot_inuse_add(net, &netlink_proto, 1);
1257         local_bh_enable();
1258 
1259         nlk = nlk_sk(sock->sk);
1260         nlk->module = module;
1261         nlk->netlink_bind = bind;
1262 out:
1263         return err;
1264 
1265 out_module:
1266         module_put(module);
1267         goto out;
1268 }
1269 
1270 static int netlink_release(struct socket *sock)
1271 {
1272         struct sock *sk = sock->sk;
1273         struct netlink_sock *nlk;
1274 
1275         if (!sk)
1276                 return 0;
1277 
1278         netlink_remove(sk);
1279         sock_orphan(sk);
1280         nlk = nlk_sk(sk);
1281 
1282         /*
1283          * OK. Socket is unlinked, any packets that arrive now
1284          * will be purged.
1285          */
1286 
1287         sock->sk = NULL;
1288         wake_up_interruptible_all(&nlk->wait);
1289 
1290         skb_queue_purge(&sk->sk_write_queue);
1291 
1292         if (nlk->portid) {
1293                 struct netlink_notify n = {
1294                                                 .net = sock_net(sk),
1295                                                 .protocol = sk->sk_protocol,
1296                                                 .portid = nlk->portid,
1297                                           };
1298                 atomic_notifier_call_chain(&netlink_chain,
1299                                 NETLINK_URELEASE, &n);
1300         }
1301 
1302         module_put(nlk->module);
1303 
1304         netlink_table_grab();
1305         if (netlink_is_kernel(sk)) {
1306                 BUG_ON(nl_table[sk->sk_protocol].registered == 0);
1307                 if (--nl_table[sk->sk_protocol].registered == 0) {
1308                         struct listeners *old;
1309 
1310                         old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
1311                         RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
1312                         kfree_rcu(old, rcu);
1313                         nl_table[sk->sk_protocol].module = NULL;
1314                         nl_table[sk->sk_protocol].bind = NULL;
1315                         nl_table[sk->sk_protocol].flags = 0;
1316                         nl_table[sk->sk_protocol].registered = 0;
1317                 }
1318         } else if (nlk->subscriptions) {
1319                 netlink_update_listeners(sk);
1320         }
1321         netlink_table_ungrab();
1322 
1323         kfree(nlk->groups);
1324         nlk->groups = NULL;
1325 
1326         local_bh_disable();
1327         sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
1328         local_bh_enable();
1329         sock_put(sk);
1330         return 0;
1331 }
1332 
1333 static int netlink_autobind(struct socket *sock)
1334 {
1335         struct sock *sk = sock->sk;
1336         struct net *net = sock_net(sk);
1337         struct netlink_table *table = &nl_table[sk->sk_protocol];
1338         struct nl_portid_hash *hash = &table->hash;
1339         struct hlist_head *head;
1340         struct sock *osk;
1341         s32 portid = task_tgid_vnr(current);
1342         int err;
1343         static s32 rover = -4097;
1344 
1345 retry:
1346         cond_resched();
1347         netlink_table_grab();
1348         head = nl_portid_hashfn(hash, portid);
1349         sk_for_each(osk, head) {
1350                 if (!table->compare(net, osk))
1351                         continue;
1352                 if (nlk_sk(osk)->portid == portid) {
1353                         /* Bind collision, search negative portid values. */
1354                         portid = rover--;
1355                         if (rover > -4097)
1356                                 rover = -4097;
1357                         netlink_table_ungrab();
1358                         goto retry;
1359                 }
1360         }
1361         netlink_table_ungrab();
1362 
1363         err = netlink_insert(sk, net, portid);
1364         if (err == -EADDRINUSE)
1365                 goto retry;
1366 
1367         /* If 2 threads race to autobind, that is fine.  */
1368         if (err == -EBUSY)
1369                 err = 0;
1370 
1371         return err;
1372 }
1373 
1374 /**
1375  * __netlink_ns_capable - General netlink message capability test
1376  * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
1377  * @user_ns: The user namespace of the capability to use
1378  * @cap: The capability to use
1379  *
1380  * Test to see if the opener of the socket we received the message
1381  * from had when the netlink socket was created and the sender of the
1382  * message has has the capability @cap in the user namespace @user_ns.
1383  */
1384 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
1385                         struct user_namespace *user_ns, int cap)
1386 {
1387         return ((nsp->flags & NETLINK_SKB_DST) ||
1388                 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
1389                 ns_capable(user_ns, cap);
1390 }
1391 EXPORT_SYMBOL(__netlink_ns_capable);
1392 
1393 /**
1394  * netlink_ns_capable - General netlink message capability test
1395  * @skb: socket buffer holding a netlink command from userspace
1396  * @user_ns: The user namespace of the capability to use
1397  * @cap: The capability to use
1398  *
1399  * Test to see if the opener of the socket we received the message
1400  * from had when the netlink socket was created and the sender of the
1401  * message has has the capability @cap in the user namespace @user_ns.
1402  */
1403 bool netlink_ns_capable(const struct sk_buff *skb,
1404                         struct user_namespace *user_ns, int cap)
1405 {
1406         return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
1407 }
1408 EXPORT_SYMBOL(netlink_ns_capable);
1409 
1410 /**
1411  * netlink_capable - Netlink global message capability test
1412  * @skb: socket buffer holding a netlink command from userspace
1413  * @cap: The capability to use
1414  *
1415  * Test to see if the opener of the socket we received the message
1416  * from had when the netlink socket was created and the sender of the
1417  * message has has the capability @cap in all user namespaces.
1418  */
1419 bool netlink_capable(const struct sk_buff *skb, int cap)
1420 {
1421         return netlink_ns_capable(skb, &init_user_ns, cap);
1422 }
1423 EXPORT_SYMBOL(netlink_capable);
1424 
1425 /**
1426  * netlink_net_capable - Netlink network namespace message capability test
1427  * @skb: socket buffer holding a netlink command from userspace
1428  * @cap: The capability to use
1429  *
1430  * Test to see if the opener of the socket we received the message
1431  * from had when the netlink socket was created and the sender of the
1432  * message has has the capability @cap over the network namespace of
1433  * the socket we received the message from.
1434  */
1435 bool netlink_net_capable(const struct sk_buff *skb, int cap)
1436 {
1437         return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
1438 }
1439 EXPORT_SYMBOL(netlink_net_capable);
1440 
1441 static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
1442 {
1443         return (nl_table[sock->sk->sk_protocol].flags & flag) ||
1444                 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
1445 }
1446 
1447 static void
1448 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
1449 {
1450         struct netlink_sock *nlk = nlk_sk(sk);
1451 
1452         if (nlk->subscriptions && !subscriptions)
1453                 __sk_del_bind_node(sk);
1454         else if (!nlk->subscriptions && subscriptions)
1455                 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
1456         nlk->subscriptions = subscriptions;
1457 }
1458 
1459 static int netlink_realloc_groups(struct sock *sk)
1460 {
1461         struct netlink_sock *nlk = nlk_sk(sk);
1462         unsigned int groups;
1463         unsigned long *new_groups;
1464         int err = 0;
1465 
1466         netlink_table_grab();
1467 
1468         groups = nl_table[sk->sk_protocol].groups;
1469         if (!nl_table[sk->sk_protocol].registered) {
1470                 err = -ENOENT;
1471                 goto out_unlock;
1472         }
1473 
1474         if (nlk->ngroups >= groups)
1475                 goto out_unlock;
1476 
1477         new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
1478         if (new_groups == NULL) {
1479                 err = -ENOMEM;
1480                 goto out_unlock;
1481         }
1482         memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
1483                NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
1484 
1485         nlk->groups = new_groups;
1486         nlk->ngroups = groups;
1487  out_unlock:
1488         netlink_table_ungrab();
1489         return err;
1490 }
1491 
1492 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1493                         int addr_len)
1494 {
1495         struct sock *sk = sock->sk;
1496         struct net *net = sock_net(sk);
1497         struct netlink_sock *nlk = nlk_sk(sk);
1498         struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1499         int err;
1500 
1501         if (addr_len < sizeof(struct sockaddr_nl))
1502                 return -EINVAL;
1503 
1504         if (nladdr->nl_family != AF_NETLINK)
1505                 return -EINVAL;
1506 
1507         /* Only superuser is allowed to listen multicasts */
1508         if (nladdr->nl_groups) {
1509                 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
1510                         return -EPERM;
1511                 err = netlink_realloc_groups(sk);
1512                 if (err)
1513                         return err;
1514         }
1515 
1516         if (nlk->portid) {
1517                 if (nladdr->nl_pid != nlk->portid)
1518                         return -EINVAL;
1519         } else {
1520                 err = nladdr->nl_pid ?
1521                         netlink_insert(sk, net, nladdr->nl_pid) :
1522                         netlink_autobind(sock);
1523                 if (err)
1524                         return err;
1525         }
1526 
1527         if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
1528                 return 0;
1529 
1530         netlink_table_grab();
1531         netlink_update_subscriptions(sk, nlk->subscriptions +
1532                                          hweight32(nladdr->nl_groups) -
1533                                          hweight32(nlk->groups[0]));
1534         nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
1535         netlink_update_listeners(sk);
1536         netlink_table_ungrab();
1537 
1538         if (nlk->netlink_bind && nlk->groups[0]) {
1539                 int i;
1540 
1541                 for (i=0; i<nlk->ngroups; i++) {
1542                         if (test_bit(i, nlk->groups))
1543                                 nlk->netlink_bind(i);
1544                 }
1545         }
1546 
1547         return 0;
1548 }
1549 
1550 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1551                            int alen, int flags)
1552 {
1553         int err = 0;
1554         struct sock *sk = sock->sk;
1555         struct netlink_sock *nlk = nlk_sk(sk);
1556         struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1557 
1558         if (alen < sizeof(addr->sa_family))
1559                 return -EINVAL;
1560 
1561         if (addr->sa_family == AF_UNSPEC) {
1562                 sk->sk_state    = NETLINK_UNCONNECTED;
1563                 nlk->dst_portid = 0;
1564                 nlk->dst_group  = 0;
1565                 return 0;
1566         }
1567         if (addr->sa_family != AF_NETLINK)
1568                 return -EINVAL;
1569 
1570         if ((nladdr->nl_groups || nladdr->nl_pid) &&
1571             !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1572                 return -EPERM;
1573 
1574         if (!nlk->portid)
1575                 err = netlink_autobind(sock);
1576 
1577         if (err == 0) {
1578                 sk->sk_state    = NETLINK_CONNECTED;
1579                 nlk->dst_portid = nladdr->nl_pid;
1580                 nlk->dst_group  = ffs(nladdr->nl_groups);
1581         }
1582 
1583         return err;
1584 }
1585 
1586 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1587                            int *addr_len, int peer)
1588 {
1589         struct sock *sk = sock->sk;
1590         struct netlink_sock *nlk = nlk_sk(sk);
1591         DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
1592 
1593         nladdr->nl_family = AF_NETLINK;
1594         nladdr->nl_pad = 0;
1595         *addr_len = sizeof(*nladdr);
1596 
1597         if (peer) {
1598                 nladdr->nl_pid = nlk->dst_portid;
1599                 nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
1600         } else {
1601                 nladdr->nl_pid = nlk->portid;
1602                 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1603         }
1604         return 0;
1605 }
1606 
1607 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1608 {
1609         struct sock *sock;
1610         struct netlink_sock *nlk;
1611 
1612         sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
1613         if (!sock)
1614                 return ERR_PTR(-ECONNREFUSED);
1615 
1616         /* Don't bother queuing skb if kernel socket has no input function */
1617         nlk = nlk_sk(sock);
1618         if (sock->sk_state == NETLINK_CONNECTED &&
1619             nlk->dst_portid != nlk_sk(ssk)->portid) {
1620                 sock_put(sock);
1621                 return ERR_PTR(-ECONNREFUSED);
1622         }
1623         return sock;
1624 }
1625 
1626 struct sock *netlink_getsockbyfilp(struct file *filp)
1627 {
1628         struct inode *inode = file_inode(filp);
1629         struct sock *sock;
1630 
1631         if (!S_ISSOCK(inode->i_mode))
1632                 return ERR_PTR(-ENOTSOCK);
1633 
1634         sock = SOCKET_I(inode)->sk;
1635         if (sock->sk_family != AF_NETLINK)
1636                 return ERR_PTR(-EINVAL);
1637 
1638         sock_hold(sock);
1639         return sock;
1640 }
1641 
1642 static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
1643                                                int broadcast)
1644 {
1645         struct sk_buff *skb;
1646         void *data;
1647 
1648         if (size <= NLMSG_GOODSIZE || broadcast)
1649                 return alloc_skb(size, GFP_KERNEL);
1650 
1651         size = SKB_DATA_ALIGN(size) +
1652                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1653 
1654         data = vmalloc(size);
1655         if (data == NULL)
1656                 return NULL;
1657 
1658         skb = __build_skb(data, size);
1659         if (skb == NULL)
1660                 vfree(data);
1661         else
1662                 skb->destructor = netlink_skb_destructor;
1663 
1664         return skb;
1665 }
1666 
1667 /*
1668  * Attach a skb to a netlink socket.
1669  * The caller must hold a reference to the destination socket. On error, the
1670  * reference is dropped. The skb is not send to the destination, just all
1671  * all error checks are performed and memory in the queue is reserved.
1672  * Return values:
1673  * < 0: error. skb freed, reference to sock dropped.
1674  * 0: continue
1675  * 1: repeat lookup - reference dropped while waiting for socket memory.
1676  */
1677 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1678                       long *timeo, struct sock *ssk)
1679 {
1680         struct netlink_sock *nlk;
1681 
1682         nlk = nlk_sk(sk);
1683 
1684         if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1685              test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1686             !netlink_skb_is_mmaped(skb)) {
1687                 DECLARE_WAITQUEUE(wait, current);
1688                 if (!*timeo) {
1689                         if (!ssk || netlink_is_kernel(ssk))
1690                                 netlink_overrun(sk);
1691                         sock_put(sk);
1692                         kfree_skb(skb);
1693                         return -EAGAIN;
1694                 }
1695 
1696                 __set_current_state(TASK_INTERRUPTIBLE);
1697                 add_wait_queue(&nlk->wait, &wait);
1698 
1699                 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1700                      test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1701                     !sock_flag(sk, SOCK_DEAD))
1702                         *timeo = schedule_timeout(*timeo);
1703 
1704                 __set_current_state(TASK_RUNNING);
1705                 remove_wait_queue(&nlk->wait, &wait);
1706                 sock_put(sk);
1707 
1708                 if (signal_pending(current)) {
1709                         kfree_skb(skb);
1710                         return sock_intr_errno(*timeo);
1711                 }
1712                 return 1;
1713         }
1714         netlink_skb_set_owner_r(skb, sk);
1715         return 0;
1716 }
1717 
1718 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1719 {
1720         int len = skb->len;
1721 
1722         netlink_deliver_tap(skb);
1723 
1724 #ifdef CONFIG_NETLINK_MMAP
1725         if (netlink_skb_is_mmaped(skb))
1726                 netlink_queue_mmaped_skb(sk, skb);
1727         else if (netlink_rx_is_mmaped(sk))
1728                 netlink_ring_set_copied(sk, skb);
1729         else
1730 #endif /* CONFIG_NETLINK_MMAP */
1731                 skb_queue_tail(&sk->sk_receive_queue, skb);
1732         sk->sk_data_ready(sk, len);
1733         return len;
1734 }
1735 
1736 int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1737 {
1738         int len = __netlink_sendskb(sk, skb);
1739 
1740         sock_put(sk);
1741         return len;
1742 }
1743 
1744 void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
1745 {
1746         kfree_skb(skb);
1747         sock_put(sk);
1748 }
1749 
1750 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1751 {
1752         int delta;
1753 
1754         WARN_ON(skb->sk != NULL);
1755         if (netlink_skb_is_mmaped(skb))
1756                 return skb;
1757 
1758         delta = skb->end - skb->tail;
1759         if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1760                 return skb;
1761 
1762         if (skb_shared(skb)) {
1763                 struct sk_buff *nskb = skb_clone(skb, allocation);
1764                 if (!nskb)
1765                         return skb;
1766                 consume_skb(skb);
1767                 skb = nskb;
1768         }
1769 
1770         if (!pskb_expand_head(skb, 0, -delta, allocation))
1771                 skb->truesize -= delta;
1772 
1773         return skb;
1774 }
1775 
1776 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
1777                                   struct sock *ssk)
1778 {
1779         int ret;
1780         struct netlink_sock *nlk = nlk_sk(sk);
1781 
1782         ret = -ECONNREFUSED;
1783         if (nlk->netlink_rcv != NULL) {
1784                 ret = skb->len;
1785                 netlink_skb_set_owner_r(skb, sk);
1786                 NETLINK_CB(skb).sk = ssk;
1787                 netlink_deliver_tap_kernel(sk, ssk, skb);
1788                 nlk->netlink_rcv(skb);
1789                 consume_skb(skb);
1790         } else {
1791                 kfree_skb(skb);
1792         }
1793         sock_put(sk);
1794         return ret;
1795 }
1796 
1797 int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
1798                     u32 portid, int nonblock)
1799 {
1800         struct sock *sk;
1801         int err;
1802         long timeo;
1803 
1804         skb = netlink_trim(skb, gfp_any());
1805 
1806         timeo = sock_sndtimeo(ssk, nonblock);
1807 retry:
1808         sk = netlink_getsockbyportid(ssk, portid);
1809         if (IS_ERR(sk)) {
1810                 kfree_skb(skb);
1811                 return PTR_ERR(sk);
1812         }
1813         if (netlink_is_kernel(sk))
1814                 return netlink_unicast_kernel(sk, skb, ssk);
1815 
1816         if (sk_filter(sk, skb)) {
1817                 err = skb->len;
1818                 kfree_skb(skb);
1819                 sock_put(sk);
1820                 return err;
1821         }
1822 
1823         err = netlink_attachskb(sk, skb, &timeo, ssk);
1824         if (err == 1)
1825                 goto retry;
1826         if (err)
1827                 return err;
1828 
1829         return netlink_sendskb(sk, skb);
1830 }
1831 EXPORT_SYMBOL(netlink_unicast);
1832 
1833 struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1834                                   u32 dst_portid, gfp_t gfp_mask)
1835 {
1836 #ifdef CONFIG_NETLINK_MMAP
1837         struct sock *sk = NULL;
1838         struct sk_buff *skb;
1839         struct netlink_ring *ring;
1840         struct nl_mmap_hdr *hdr;
1841         unsigned int maxlen;
1842 
1843         sk = netlink_getsockbyportid(ssk, dst_portid);
1844         if (IS_ERR(sk))
1845                 goto out;
1846 
1847         ring = &nlk_sk(sk)->rx_ring;
1848         /* fast-path without atomic ops for common case: non-mmaped receiver */
1849         if (ring->pg_vec == NULL)
1850                 goto out_put;
1851 
1852         if (ring->frame_size - NL_MMAP_HDRLEN < size)
1853                 goto out_put;
1854 
1855         skb = alloc_skb_head(gfp_mask);
1856         if (skb == NULL)
1857                 goto err1;
1858 
1859         spin_lock_bh(&sk->sk_receive_queue.lock);
1860         /* check again under lock */
1861         if (ring->pg_vec == NULL)
1862                 goto out_free;
1863 
1864         /* check again under lock */
1865         maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1866         if (maxlen < size)
1867                 goto out_free;
1868 
1869         netlink_forward_ring(ring);
1870         hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1871         if (hdr == NULL)
1872                 goto err2;
1873         netlink_ring_setup_skb(skb, sk, ring, hdr);
1874         netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1875         atomic_inc(&ring->pending);
1876         netlink_increment_head(ring);
1877 
1878         spin_unlock_bh(&sk->sk_receive_queue.lock);
1879         return skb;
1880 
1881 err2:
1882         kfree_skb(skb);
1883         spin_unlock_bh(&sk->sk_receive_queue.lock);
1884         netlink_overrun(sk);
1885 err1:
1886         sock_put(sk);
1887         return NULL;
1888 
1889 out_free:
1890         kfree_skb(skb);
1891         spin_unlock_bh(&sk->sk_receive_queue.lock);
1892 out_put:
1893         sock_put(sk);
1894 out:
1895 #endif
1896         return alloc_skb(size, gfp_mask);
1897 }
1898 EXPORT_SYMBOL_GPL(netlink_alloc_skb);
1899 
1900 int netlink_has_listeners(struct sock *sk, unsigned int group)
1901 {
1902         int res = 0;
1903         struct listeners *listeners;
1904 
1905         BUG_ON(!netlink_is_kernel(sk));
1906 
1907         rcu_read_lock();
1908         listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
1909 
1910         if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
1911                 res = test_bit(group - 1, listeners->masks);
1912 
1913         rcu_read_unlock();
1914 
1915         return res;
1916 }
1917 EXPORT_SYMBOL_GPL(netlink_has_listeners);
1918 
1919 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1920 {
1921         struct netlink_sock *nlk = nlk_sk(sk);
1922 
1923         if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1924             !test_bit(NETLINK_CONGESTED, &nlk->state)) {
1925                 netlink_skb_set_owner_r(skb, sk);
1926                 __netlink_sendskb(sk, skb);
1927                 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1928         }
1929         return -1;
1930 }
1931 
1932 struct netlink_broadcast_data {
1933         struct sock *exclude_sk;
1934         struct net *net;
1935         u32 portid;
1936         u32 group;
1937         int failure;
1938         int delivery_failure;
1939         int congested;
1940         int delivered;
1941         gfp_t allocation;
1942         struct sk_buff *skb, *skb2;
1943         int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1944         void *tx_data;
1945 };
1946 
1947 static int do_one_broadcast(struct sock *sk,
1948                                    struct netlink_broadcast_data *p)
1949 {
1950         struct netlink_sock *nlk = nlk_sk(sk);
1951         int val;
1952 
1953         if (p->exclude_sk == sk)
1954                 goto out;
1955 
1956         if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1957             !test_bit(p->group - 1, nlk->groups))
1958                 goto out;
1959 
1960         if (!net_eq(sock_net(sk), p->net))
1961                 goto out;
1962 
1963         if (p->failure) {
1964                 netlink_overrun(sk);
1965                 goto out;
1966         }
1967 
1968         sock_hold(sk);
1969         if (p->skb2 == NULL) {
1970                 if (skb_shared(p->skb)) {
1971                         p->skb2 = skb_clone(p->skb, p->allocation);
1972                 } else {
1973                         p->skb2 = skb_get(p->skb);
1974                         /*
1975                          * skb ownership may have been set when
1976                          * delivered to a previous socket.
1977                          */
1978                         skb_orphan(p->skb2);
1979                 }
1980         }
1981         if (p->skb2 == NULL) {
1982                 netlink_overrun(sk);
1983                 /* Clone failed. Notify ALL listeners. */
1984                 p->failure = 1;
1985                 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1986                         p->delivery_failure = 1;
1987         } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1988                 kfree_skb(p->skb2);
1989                 p->skb2 = NULL;
1990         } else if (sk_filter(sk, p->skb2)) {
1991                 kfree_skb(p->skb2);
1992                 p->skb2 = NULL;
1993         } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1994                 netlink_overrun(sk);
1995                 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1996                         p->delivery_failure = 1;
1997         } else {
1998                 p->congested |= val;
1999                 p->delivered = 1;
2000                 p->skb2 = NULL;
2001         }
2002         sock_put(sk);
2003 
2004 out:
2005         return 0;
2006 }
2007 
2008 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
2009         u32 group, gfp_t allocation,
2010         int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
2011         void *filter_data)
2012 {
2013         struct net *net = sock_net(ssk);
2014         struct netlink_broadcast_data info;
2015         struct sock *sk;
2016 
2017         skb = netlink_trim(skb, allocation);
2018 
2019         info.exclude_sk = ssk;
2020         info.net = net;
2021         info.portid = portid;
2022         info.group = group;
2023         info.failure = 0;
2024         info.delivery_failure = 0;
2025         info.congested = 0;
2026         info.delivered = 0;
2027         info.allocation = allocation;
2028         info.skb = skb;
2029         info.skb2 = NULL;
2030         info.tx_filter = filter;
2031         info.tx_data = filter_data;
2032 
2033         /* While we sleep in clone, do not allow to change socket list */
2034 
2035         netlink_lock_table();
2036 
2037         sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
2038                 do_one_broadcast(sk, &info);
2039 
2040         consume_skb(skb);
2041 
2042         netlink_unlock_table();
2043 
2044         if (info.delivery_failure) {
2045                 kfree_skb(info.skb2);
2046                 return -ENOBUFS;
2047         }
2048         consume_skb(info.skb2);
2049 
2050         if (info.delivered) {
2051                 if (info.congested && (allocation & __GFP_WAIT))
2052                         yield();
2053                 return 0;
2054         }
2055         return -ESRCH;
2056 }
2057 EXPORT_SYMBOL(netlink_broadcast_filtered);
2058 
2059 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
2060                       u32 group, gfp_t allocation)
2061 {
2062         return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
2063                 NULL, NULL);
2064 }
2065 EXPORT_SYMBOL(netlink_broadcast);
2066 
2067 struct netlink_set_err_data {
2068         struct sock *exclude_sk;
2069         u32 portid;
2070         u32 group;
2071         int code;
2072 };
2073 
2074 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
2075 {
2076         struct netlink_sock *nlk = nlk_sk(sk);
2077         int ret = 0;
2078 
2079         if (sk == p->exclude_sk)
2080                 goto out;
2081 
2082         if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
2083                 goto out;
2084 
2085         if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
2086             !test_bit(p->group - 1, nlk->groups))
2087                 goto out;
2088 
2089         if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
2090                 ret = 1;
2091                 goto out;
2092         }
2093 
2094         sk->sk_err = p->code;
2095         sk->sk_error_report(sk);
2096 out:
2097         return ret;
2098 }
2099 
2100 /**
2101  * netlink_set_err - report error to broadcast listeners
2102  * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
2103  * @portid: the PORTID of a process that we want to skip (if any)
2104  * @group: the broadcast group that will notice the error
2105  * @code: error code, must be negative (as usual in kernelspace)
2106  *
2107  * This function returns the number of broadcast listeners that have set the
2108  * NETLINK_RECV_NO_ENOBUFS socket option.
2109  */
2110 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
2111 {
2112         struct netlink_set_err_data info;
2113         struct sock *sk;
2114         int ret = 0;
2115 
2116         info.exclude_sk = ssk;
2117         info.portid = portid;
2118         info.group = group;
2119         /* sk->sk_err wants a positive error value */
2120         info.code = -code;
2121 
2122         read_lock(&nl_table_lock);
2123 
2124         sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
2125                 ret += do_one_set_err(sk, &info);
2126 
2127         read_unlock(&nl_table_lock);
2128         return ret;
2129 }
2130 EXPORT_SYMBOL(netlink_set_err);
2131 
2132 /* must be called with netlink table grabbed */
2133 static void netlink_update_socket_mc(struct netlink_sock *nlk,
2134                                      unsigned int group,
2135                                      int is_new)
2136 {
2137         int old, new = !!is_new, subscriptions;
2138 
2139         old = test_bit(group - 1, nlk->groups);
2140         subscriptions = nlk->subscriptions - old + new;
2141         if (new)
2142                 __set_bit(group - 1, nlk->groups);
2143         else
2144                 __clear_bit(group - 1, nlk->groups);
2145         netlink_update_subscriptions(&nlk->sk, subscriptions);
2146         netlink_update_listeners(&nlk->sk);
2147 }
2148 
2149 static int netlink_setsockopt(struct socket *sock, int level, int optname,
2150                               char __user *optval, unsigned int optlen)
2151 {
2152         struct sock *sk = sock->sk;
2153         struct netlink_sock *nlk = nlk_sk(sk);
2154         unsigned int val = 0;
2155         int err;
2156 
2157         if (level != SOL_NETLINK)
2158                 return -ENOPROTOOPT;
2159 
2160         if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
2161             optlen >= sizeof(int) &&
2162             get_user(val, (unsigned int __user *)optval))
2163                 return -EFAULT;
2164 
2165         switch (optname) {
2166         case NETLINK_PKTINFO:
2167                 if (val)
2168                         nlk->flags |= NETLINK_RECV_PKTINFO;
2169                 else
2170                         nlk->flags &= ~NETLINK_RECV_PKTINFO;
2171                 err = 0;
2172                 break;
2173         case NETLINK_ADD_MEMBERSHIP:
2174         case NETLINK_DROP_MEMBERSHIP: {
2175                 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
2176                         return -EPERM;
2177                 err = netlink_realloc_groups(sk);
2178                 if (err)
2179                         return err;
2180                 if (!val || val - 1 >= nlk->ngroups)
2181                         return -EINVAL;
2182                 netlink_table_grab();
2183                 netlink_update_socket_mc(nlk, val,
2184                                          optname == NETLINK_ADD_MEMBERSHIP);
2185                 netlink_table_ungrab();
2186 
2187                 if (nlk->netlink_bind)
2188                         nlk->netlink_bind(val);
2189 
2190                 err = 0;
2191                 break;
2192         }
2193         case NETLINK_BROADCAST_ERROR:
2194                 if (val)
2195                         nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
2196                 else
2197                         nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
2198                 err = 0;
2199                 break;
2200         case NETLINK_NO_ENOBUFS:
2201                 if (val) {
2202                         nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
2203                         clear_bit(NETLINK_CONGESTED, &nlk->state);
2204                         wake_up_interruptible(&nlk->wait);
2205                 } else {
2206                         nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
2207                 }
2208                 err = 0;
2209                 break;
2210 #ifdef CONFIG_NETLINK_MMAP
2211         case NETLINK_RX_RING:
2212         case NETLINK_TX_RING: {
2213                 struct nl_mmap_req req;
2214 
2215                 /* Rings might consume more memory than queue limits, require
2216                  * CAP_NET_ADMIN.
2217                  */
2218                 if (!capable(CAP_NET_ADMIN))
2219                         return -EPERM;
2220                 if (optlen < sizeof(req))
2221                         return -EINVAL;
2222                 if (copy_from_user(&req, optval, sizeof(req)))
2223                         return -EFAULT;
2224                 err = netlink_set_ring(sk, &req,
2225                                        optname == NETLINK_TX_RING);
2226                 break;
2227         }
2228 #endif /* CONFIG_NETLINK_MMAP */
2229         default:
2230                 err = -ENOPROTOOPT;
2231         }
2232         return err;
2233 }
2234 
2235 static int netlink_getsockopt(struct socket *sock, int level, int optname,
2236                               char __user *optval, int __user *optlen)
2237 {
2238         struct sock *sk = sock->sk;
2239         struct netlink_sock *nlk = nlk_sk(sk);
2240         int len, val, err;
2241 
2242         if (level != SOL_NETLINK)
2243                 return -ENOPROTOOPT;
2244 
2245         if (get_user(len, optlen))
2246                 return -EFAULT;
2247         if (len < 0)
2248                 return -EINVAL;
2249 
2250         switch (optname) {
2251         case NETLINK_PKTINFO:
2252                 if (len < sizeof(int))
2253                         return -EINVAL;
2254                 len = sizeof(int);
2255                 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
2256                 if (put_user(len, optlen) ||
2257                     put_user(val, optval))
2258                         return -EFAULT;
2259                 err = 0;
2260                 break;
2261         case NETLINK_BROADCAST_ERROR:
2262                 if (len < sizeof(int))
2263                         return -EINVAL;
2264                 len = sizeof(int);
2265                 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
2266                 if (put_user(len, optlen) ||
2267                     put_user(val, optval))
2268                         return -EFAULT;
2269                 err = 0;
2270                 break;
2271         case NETLINK_NO_ENOBUFS:
2272                 if (len < sizeof(int))
2273                         return -EINVAL;
2274                 len = sizeof(int);
2275                 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
2276                 if (put_user(len, optlen) ||
2277                     put_user(val, optval))
2278                         return -EFAULT;
2279                 err = 0;
2280                 break;
2281         default:
2282                 err = -ENOPROTOOPT;
2283         }
2284         return err;
2285 }
2286 
2287 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
2288 {
2289         struct nl_pktinfo info;
2290 
2291         info.group = NETLINK_CB(skb).dst_group;
2292         put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
2293 }
2294 
2295 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
2296                            struct msghdr *msg, size_t len)
2297 {
2298         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
2299         struct sock *sk = sock->sk;
2300         struct netlink_sock *nlk = nlk_sk(sk);
2301         DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
2302         u32 dst_portid;
2303         u32 dst_group;
2304         struct sk_buff *skb;
2305         int err;
2306         struct scm_cookie scm;
2307         u32 netlink_skb_flags = 0;
2308 
2309         if (msg->msg_flags&MSG_OOB)
2310                 return -EOPNOTSUPP;
2311 
2312         if (NULL == siocb->scm)
2313                 siocb->scm = &scm;
2314 
2315         err = scm_send(sock, msg, siocb->scm, true);
2316         if (err < 0)
2317                 return err;
2318 
2319         if (msg->msg_namelen) {
2320                 err = -EINVAL;
2321                 if (addr->nl_family != AF_NETLINK)
2322                         goto out;
2323                 dst_portid = addr->nl_pid;
2324                 dst_group = ffs(addr->nl_groups);
2325                 err =  -EPERM;
2326                 if ((dst_group || dst_portid) &&
2327                     !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
2328                         goto out;
2329                 netlink_skb_flags |= NETLINK_SKB_DST;
2330         } else {
2331                 dst_portid = nlk->dst_portid;
2332                 dst_group = nlk->dst_group;
2333         }
2334 
2335         if (!nlk->portid) {
2336                 err = netlink_autobind(sock);
2337                 if (err)
2338                         goto out;
2339         }
2340 
2341         if (netlink_tx_is_mmaped(sk) &&
2342             msg->msg_iov->iov_base == NULL) {
2343                 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2344                                            siocb);
2345                 goto out;
2346         }
2347 
2348         err = -EMSGSIZE;
2349         if (len > sk->sk_sndbuf - 32)
2350                 goto out;
2351         err = -ENOBUFS;
2352         skb = netlink_alloc_large_skb(len, dst_group);
2353         if (skb == NULL)
2354                 goto out;
2355 
2356         NETLINK_CB(skb).portid  = nlk->portid;
2357         NETLINK_CB(skb).dst_group = dst_group;
2358         NETLINK_CB(skb).creds   = siocb->scm->creds;
2359         NETLINK_CB(skb).flags   = netlink_skb_flags;
2360 
2361         err = -EFAULT;
2362         if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
2363                 kfree_skb(skb);
2364                 goto out;
2365         }
2366 
2367         err = security_netlink_send(sk, skb);
2368         if (err) {
2369                 kfree_skb(skb);
2370                 goto out;
2371         }
2372 
2373         if (dst_group) {
2374                 atomic_inc(&skb->users);
2375                 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
2376         }
2377         err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
2378 
2379 out:
2380         scm_destroy(siocb->scm);
2381         return err;
2382 }
2383 
2384 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
2385                            struct msghdr *msg, size_t len,
2386                            int flags)
2387 {
2388         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
2389         struct scm_cookie scm;
2390         struct sock *sk = sock->sk;
2391         struct netlink_sock *nlk = nlk_sk(sk);
2392         int noblock = flags&MSG_DONTWAIT;
2393         size_t copied;
2394         struct sk_buff *skb, *data_skb;
2395         int err, ret;
2396 
2397         if (flags&MSG_OOB)
2398                 return -EOPNOTSUPP;
2399 
2400         copied = 0;
2401 
2402         skb = skb_recv_datagram(sk, flags, noblock, &err);
2403         if (skb == NULL)
2404                 goto out;
2405 
2406         data_skb = skb;
2407 
2408 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
2409         if (unlikely(skb_shinfo(skb)->frag_list)) {
2410                 /*
2411                  * If this skb has a frag_list, then here that means that we
2412                  * will have to use the frag_list skb's data for compat tasks
2413                  * and the regular skb's data for normal (non-compat) tasks.
2414                  *
2415                  * If we need to send the compat skb, assign it to the
2416                  * 'data_skb' variable so that it will be used below for data
2417                  * copying. We keep 'skb' for everything else, including
2418                  * freeing both later.
2419                  */
2420                 if (flags & MSG_CMSG_COMPAT)
2421                         data_skb = skb_shinfo(skb)->frag_list;
2422         }
2423 #endif
2424 
2425         copied = data_skb->len;
2426         if (len < copied) {
2427                 msg->msg_flags |= MSG_TRUNC;
2428                 copied = len;
2429         }
2430 
2431         skb_reset_transport_header(data_skb);
2432         err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
2433 
2434         if (msg->msg_name) {
2435                 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
2436                 addr->nl_family = AF_NETLINK;
2437                 addr->nl_pad    = 0;
2438                 addr->nl_pid    = NETLINK_CB(skb).portid;
2439                 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
2440                 msg->msg_namelen = sizeof(*addr);
2441         }
2442 
2443         if (nlk->flags & NETLINK_RECV_PKTINFO)
2444                 netlink_cmsg_recv_pktinfo(msg, skb);
2445 
2446         if (NULL == siocb->scm) {
2447                 memset(&scm, 0, sizeof(scm));
2448                 siocb->scm = &scm;
2449         }
2450         siocb->scm->creds = *NETLINK_CREDS(skb);
2451         if (flags & MSG_TRUNC)
2452                 copied = data_skb->len;
2453 
2454         skb_free_datagram(sk, skb);
2455 
2456         if (nlk->cb_running &&
2457             atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
2458                 ret = netlink_dump(sk);
2459                 if (ret) {
2460                         sk->sk_err = -ret;
2461                         sk->sk_error_report(sk);
2462                 }
2463         }
2464 
2465         scm_recv(sock, msg, siocb->scm, flags);
2466 out:
2467         netlink_rcv_wake(sk);
2468         return err ? : copied;
2469 }
2470 
2471 static void netlink_data_ready(struct sock *sk, int len)
2472 {
2473         BUG();
2474 }
2475 
2476 /*
2477  *      We export these functions to other modules. They provide a
2478  *      complete set of kernel non-blocking support for message
2479  *      queueing.
2480  */
2481 
2482 struct sock *
2483 __netlink_kernel_create(struct net *net, int unit, struct module *module,
2484                         struct netlink_kernel_cfg *cfg)
2485 {
2486         struct socket *sock;
2487         struct sock *sk;
2488         struct netlink_sock *nlk;
2489         struct listeners *listeners = NULL;
2490         struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
2491         unsigned int groups;
2492 
2493         BUG_ON(!nl_table);
2494 
2495         if (unit < 0 || unit >= MAX_LINKS)
2496                 return NULL;
2497 
2498         if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
2499                 return NULL;
2500 
2501         /*
2502          * We have to just have a reference on the net from sk, but don't
2503          * get_net it. Besides, we cannot get and then put the net here.
2504          * So we create one inside init_net and the move it to net.
2505          */
2506 
2507         if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
2508                 goto out_sock_release_nosk;
2509 
2510         sk = sock->sk;
2511         sk_change_net(sk, net);
2512 
2513         if (!cfg || cfg->groups < 32)
2514                 groups = 32;
2515         else
2516                 groups = cfg->groups;
2517 
2518         listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2519         if (!listeners)
2520                 goto out_sock_release;
2521 
2522         sk->sk_data_ready = netlink_data_ready;
2523         if (cfg && cfg->input)
2524                 nlk_sk(sk)->netlink_rcv = cfg->input;
2525 
2526         if (netlink_insert(sk, net, 0))
2527                 goto out_sock_release;
2528 
2529         nlk = nlk_sk(sk);
2530         nlk->flags |= NETLINK_KERNEL_SOCKET;
2531 
2532         netlink_table_grab();
2533         if (!nl_table[unit].registered) {
2534                 nl_table[unit].groups = groups;
2535                 rcu_assign_pointer(nl_table[unit].listeners, listeners);
2536                 nl_table[unit].cb_mutex = cb_mutex;
2537                 nl_table[unit].module = module;
2538                 if (cfg) {
2539                         nl_table[unit].bind = cfg->bind;
2540                         nl_table[unit].flags = cfg->flags;
2541                         if (cfg->compare)
2542                                 nl_table[unit].compare = cfg->compare;
2543                 }
2544                 nl_table[unit].registered = 1;
2545         } else {
2546                 kfree(listeners);
2547                 nl_table[unit].registered++;
2548         }
2549         netlink_table_ungrab();
2550         return sk;
2551 
2552 out_sock_release:
2553         kfree(listeners);
2554         netlink_kernel_release(sk);
2555         return NULL;
2556 
2557 out_sock_release_nosk:
2558         sock_release(sock);
2559         return NULL;
2560 }
2561 EXPORT_SYMBOL(__netlink_kernel_create);
2562 
2563 void
2564 netlink_kernel_release(struct sock *sk)
2565 {
2566         sk_release_kernel(sk);
2567 }
2568 EXPORT_SYMBOL(netlink_kernel_release);
2569 
2570 int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
2571 {
2572         struct listeners *new, *old;
2573         struct netlink_table *tbl = &nl_table[sk->sk_protocol];
2574 
2575         if (groups < 32)
2576                 groups = 32;
2577 
2578         if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
2579                 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
2580                 if (!new)
2581                         return -ENOMEM;
2582                 old = nl_deref_protected(tbl->listeners);
2583                 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
2584                 rcu_assign_pointer(tbl->listeners, new);
2585 
2586                 kfree_rcu(old, rcu);
2587         }
2588         tbl->groups = groups;
2589 
2590         return 0;
2591 }
2592 
2593 /**
2594  * netlink_change_ngroups - change number of multicast groups
2595  *
2596  * This changes the number of multicast groups that are available
2597  * on a certain netlink family. Note that it is not possible to
2598  * change the number of groups to below 32. Also note that it does
2599  * not implicitly call netlink_clear_multicast_users() when the
2600  * number of groups is reduced.
2601  *
2602  * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
2603  * @groups: The new number of groups.
2604  */
2605 int netlink_change_ngroups(struct sock *sk, unsigned int groups)
2606 {
2607         int err;
2608 
2609         netlink_table_grab();
2610         err = __netlink_change_ngroups(sk, groups);
2611         netlink_table_ungrab();
2612 
2613         return err;
2614 }
2615 
2616 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2617 {
2618         struct sock *sk;
2619         struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
2620 
2621         sk_for_each_bound(sk, &tbl->mc_list)
2622                 netlink_update_socket_mc(nlk_sk(sk), group, 0);
2623 }
2624 
2625 struct nlmsghdr *
2626 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
2627 {
2628         struct nlmsghdr *nlh;
2629         int size = nlmsg_msg_size(len);
2630 
2631         nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
2632         nlh->nlmsg_type = type;
2633         nlh->nlmsg_len = size;
2634         nlh->nlmsg_flags = flags;
2635         nlh->nlmsg_pid = portid;
2636         nlh->nlmsg_seq = seq;
2637         if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
2638                 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
2639         return nlh;
2640 }
2641 EXPORT_SYMBOL(__nlmsg_put);
2642 
2643 /*
2644  * It looks a bit ugly.
2645  * It would be better to create kernel thread.
2646  */
2647 
2648 static int netlink_dump(struct sock *sk)
2649 {
2650         struct netlink_sock *nlk = nlk_sk(sk);
2651         struct netlink_callback *cb;
2652         struct sk_buff *skb = NULL;
2653         struct nlmsghdr *nlh;
2654         struct module *module;
2655         int len, err = -ENOBUFS;
2656         int alloc_size;
2657 
2658         mutex_lock(nlk->cb_mutex);
2659         if (!nlk->cb_running) {
2660                 err = -EINVAL;
2661                 goto errout_skb;
2662         }
2663 
2664         cb = &nlk->cb;
2665         alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2666 
2667         if (!netlink_rx_is_mmaped(sk) &&
2668             atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2669                 goto errout_skb;
2670         skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
2671         if (!skb)
2672                 goto errout_skb;
2673         netlink_skb_set_owner_r(skb, sk);
2674 
2675         len = cb->dump(skb, cb);
2676 
2677         if (len > 0) {
2678                 mutex_unlock(nlk->cb_mutex);
2679 
2680                 if (sk_filter(sk, skb))
2681                         kfree_skb(skb);
2682                 else
2683                         __netlink_sendskb(sk, skb);
2684                 return 0;
2685         }
2686 
2687         nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
2688         if (!nlh)
2689                 goto errout_skb;
2690 
2691         nl_dump_check_consistent(cb, nlh);
2692 
2693         memcpy(nlmsg_data(nlh), &len, sizeof(len));
2694 
2695         if (sk_filter(sk, skb))
2696                 kfree_skb(skb);
2697         else
2698                 __netlink_sendskb(sk, skb);
2699 
2700         if (cb->done)
2701                 cb->done(cb);
2702 
2703         nlk->cb_running = false;
2704         module = cb->module;
2705         skb = cb->skb;
2706         mutex_unlock(nlk->cb_mutex);
2707         module_put(module);
2708         consume_skb(skb);
2709         return 0;
2710 
2711 errout_skb:
2712         mutex_unlock(nlk->cb_mutex);
2713         kfree_skb(skb);
2714         return err;
2715 }
2716 
2717 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2718                          const struct nlmsghdr *nlh,
2719                          struct netlink_dump_control *control)
2720 {
2721         struct netlink_callback *cb;
2722         struct sock *sk;
2723         struct netlink_sock *nlk;
2724         int ret;
2725 
2726         /* Memory mapped dump requests need to be copied to avoid looping
2727          * on the pending state in netlink_mmap_sendmsg() while the CB hold
2728          * a reference to the skb.
2729          */
2730         if (netlink_skb_is_mmaped(skb)) {
2731                 skb = skb_copy(skb, GFP_KERNEL);
2732                 if (skb == NULL)
2733                         return -ENOBUFS;
2734         } else
2735                 atomic_inc(&skb->users);
2736 
2737         sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2738         if (sk == NULL) {
2739                 ret = -ECONNREFUSED;
2740                 goto error_free;
2741         }
2742 
2743         nlk = nlk_sk(sk);
2744         mutex_lock(nlk->cb_mutex);
2745         /* A dump is in progress... */
2746         if (nlk->cb_running) {
2747                 ret = -EBUSY;
2748                 goto error_unlock;
2749         }
2750         /* add reference of module which cb->dump belongs to */
2751         if (!try_module_get(control->module)) {
2752                 ret = -EPROTONOSUPPORT;
2753                 goto error_unlock;
2754         }
2755 
2756         cb = &nlk->cb;
2757         memset(cb, 0, sizeof(*cb));
2758         cb->dump = control->dump;
2759         cb->done = control->done;
2760         cb->nlh = nlh;
2761         cb->data = control->data;
2762         cb->module = control->module;
2763         cb->min_dump_alloc = control->min_dump_alloc;
2764         cb->skb = skb;
2765 
2766         nlk->cb_running = true;
2767 
2768         mutex_unlock(nlk->cb_mutex);
2769 
2770         ret = netlink_dump(sk);
2771         sock_put(sk);
2772 
2773         if (ret)
2774                 return ret;
2775 
2776         /* We successfully started a dump, by returning -EINTR we
2777          * signal not to send ACK even if it was requested.
2778          */
2779         return -EINTR;
2780 
2781 error_unlock:
2782         sock_put(sk);
2783         mutex_unlock(nlk->cb_mutex);
2784 error_free:
2785         kfree_skb(skb);
2786         return ret;
2787 }
2788 EXPORT_SYMBOL(__netlink_dump_start);
2789 
2790 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2791 {
2792         struct sk_buff *skb;
2793         struct nlmsghdr *rep;
2794         struct nlmsgerr *errmsg;
2795         size_t payload = sizeof(*errmsg);
2796 
2797         /* error messages get the original request appened */
2798         if (err)
2799                 payload += nlmsg_len(nlh);
2800 
2801         skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
2802                                 NETLINK_CB(in_skb).portid, GFP_KERNEL);
2803         if (!skb) {
2804                 struct sock *sk;
2805 
2806                 sk = netlink_lookup(sock_net(in_skb->sk),
2807                                     in_skb->sk->sk_protocol,
2808                                     NETLINK_CB(in_skb).portid);
2809                 if (sk) {
2810                         sk->sk_err = ENOBUFS;
2811                         sk->sk_error_report(sk);
2812                         sock_put(sk);
2813                 }
2814                 return;
2815         }
2816 
2817         rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2818                           NLMSG_ERROR, payload, 0);
2819         errmsg = nlmsg_data(rep);
2820         errmsg->error = err;
2821         memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
2822         netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
2823 }
2824 EXPORT_SYMBOL(netlink_ack);
2825 
2826 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2827                                                      struct nlmsghdr *))
2828 {
2829         struct nlmsghdr *nlh;
2830         int err;
2831 
2832         while (skb->len >= nlmsg_total_size(0)) {
2833                 int msglen;
2834 
2835                 nlh = nlmsg_hdr(skb);
2836                 err = 0;
2837 
2838                 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
2839                         return 0;
2840 
2841                 /* Only requests are handled by the kernel */
2842                 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
2843                         goto ack;
2844 
2845                 /* Skip control messages */
2846                 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
2847                         goto ack;
2848 
2849                 err = cb(skb, nlh);
2850                 if (err == -EINTR)
2851                         goto skip;
2852 
2853 ack:
2854                 if (nlh->nlmsg_flags & NLM_F_ACK || err)
2855                         netlink_ack(skb, nlh, err);
2856 
2857 skip:
2858                 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
2859                 if (msglen > skb->len)
2860                         msglen = skb->len;
2861                 skb_pull(skb, msglen);
2862         }
2863 
2864         return 0;
2865 }
2866 EXPORT_SYMBOL(netlink_rcv_skb);
2867 
2868 /**
2869  * nlmsg_notify - send a notification netlink message
2870  * @sk: netlink socket to use
2871  * @skb: notification message
2872  * @portid: destination netlink portid for reports or 0
2873  * @group: destination multicast group or 0
2874  * @report: 1 to report back, 0 to disable
2875  * @flags: allocation flags
2876  */
2877 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
2878                  unsigned int group, int report, gfp_t flags)
2879 {
2880         int err = 0;
2881 
2882         if (group) {
2883                 int exclude_portid = 0;
2884 
2885                 if (report) {
2886                         atomic_inc(&skb->users);
2887                         exclude_portid = portid;
2888                 }
2889 
2890                 /* errors reported via destination sk->sk_err, but propagate
2891                  * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
2892                 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
2893         }
2894 
2895         if (report) {
2896                 int err2;
2897 
2898                 err2 = nlmsg_unicast(sk, skb, portid);
2899                 if (!err || err == -ESRCH)
2900                         err = err2;
2901         }
2902 
2903         return err;
2904 }
2905 EXPORT_SYMBOL(nlmsg_notify);
2906 
2907 #ifdef CONFIG_PROC_FS
2908 struct nl_seq_iter {
2909         struct seq_net_private p;
2910         int link;
2911         int hash_idx;
2912 };
2913 
2914 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
2915 {
2916         struct nl_seq_iter *iter = seq->private;
2917         int i, j;
2918         struct sock *s;
2919         loff_t off = 0;
2920 
2921         for (i = 0; i < MAX_LINKS; i++) {
2922                 struct nl_portid_hash *hash = &nl_table[i].hash;
2923 
2924                 for (j = 0; j <= hash->mask; j++) {
2925                         sk_for_each(s, &hash->table[j]) {
2926                                 if (sock_net(s) != seq_file_net(seq))
2927                                         continue;
2928                                 if (off == pos) {
2929                                         iter->link = i;
2930                                         iter->hash_idx = j;
2931                                         return s;
2932                                 }
2933                                 ++off;
2934                         }
2935                 }
2936         }
2937         return NULL;
2938 }
2939 
2940 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
2941         __acquires(nl_table_lock)
2942 {
2943         read_lock(&nl_table_lock);
2944         return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2945 }
2946 
2947 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2948 {
2949         struct sock *s;
2950         struct nl_seq_iter *iter;
2951         struct net *net;
2952         int i, j;
2953 
2954         ++*pos;
2955 
2956         if (v == SEQ_START_TOKEN)
2957                 return netlink_seq_socket_idx(seq, 0);
2958 
2959         net = seq_file_net(seq);
2960         iter = seq->private;
2961         s = v;
2962         do {
2963                 s = sk_next(s);
2964         } while (s && !nl_table[s->sk_protocol].compare(net, s));
2965         if (s)
2966                 return s;
2967 
2968         i = iter->link;
2969         j = iter->hash_idx + 1;
2970 
2971         do {
2972                 struct nl_portid_hash *hash = &nl_table[i].hash;
2973 
2974                 for (; j <= hash->mask; j++) {
2975                         s = sk_head(&hash->table[j]);
2976 
2977                         while (s && !nl_table[s->sk_protocol].compare(net, s))
2978                                 s = sk_next(s);
2979                         if (s) {
2980                                 iter->link = i;
2981                                 iter->hash_idx = j;
2982                                 return s;
2983                         }
2984                 }
2985 
2986                 j = 0;
2987         } while (++i < MAX_LINKS);
2988 
2989         return NULL;
2990 }
2991 
2992 static void netlink_seq_stop(struct seq_file *seq, void *v)
2993         __releases(nl_table_lock)
2994 {
2995         read_unlock(&nl_table_lock);
2996 }
2997 
2998 
2999 static int netlink_seq_show(struct seq_file *seq, void *v)
3000 {
3001         if (v == SEQ_START_TOKEN) {
3002                 seq_puts(seq,
3003                          "sk       Eth Pid    Groups   "
3004                          "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
3005         } else {
3006                 struct sock *s = v;
3007                 struct netlink_sock *nlk = nlk_sk(s);
3008 
3009                 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
3010                            s,
3011                            s->sk_protocol,
3012                            nlk->portid,
3013                            nlk->groups ? (u32)nlk->groups[0] : 0,
3014                            sk_rmem_alloc_get(s),
3015                            sk_wmem_alloc_get(s),
3016                            nlk->cb_running,
3017                            atomic_read(&s->sk_refcnt),
3018                            atomic_read(&s->sk_drops),
3019                            sock_i_ino(s)
3020                         );
3021 
3022         }
3023         return 0;
3024 }
3025 
3026 static const struct seq_operations netlink_seq_ops = {
3027         .start  = netlink_seq_start,
3028         .next   = netlink_seq_next,
3029         .stop   = netlink_seq_stop,
3030         .show   = netlink_seq_show,
3031 };
3032 
3033 
3034 static int netlink_seq_open(struct inode *inode, struct file *file)
3035 {
3036         return seq_open_net(inode, file, &netlink_seq_ops,
3037                                 sizeof(struct nl_seq_iter));
3038 }
3039 
3040 static const struct file_operations netlink_seq_fops = {
3041         .owner          = THIS_MODULE,
3042         .open           = netlink_seq_open,
3043         .read           = seq_read,
3044         .llseek         = seq_lseek,
3045         .release        = seq_release_net,
3046 };
3047 
3048 #endif
3049 
3050 int netlink_register_notifier(struct notifier_block *nb)
3051 {
3052         return atomic_notifier_chain_register(&netlink_chain, nb);
3053 }
3054 EXPORT_SYMBOL(netlink_register_notifier);
3055 
3056 int netlink_unregister_notifier(struct notifier_block *nb)
3057 {
3058         return atomic_notifier_chain_unregister(&netlink_chain, nb);
3059 }
3060 EXPORT_SYMBOL(netlink_unregister_notifier);
3061 
3062 static const struct proto_ops netlink_ops = {
3063         .family =       PF_NETLINK,
3064         .owner =        THIS_MODULE,
3065         .release =      netlink_release,
3066         .bind =         netlink_bind,
3067         .connect =      netlink_connect,
3068         .socketpair =   sock_no_socketpair,
3069         .accept =       sock_no_accept,
3070         .getname =      netlink_getname,
3071         .poll =         netlink_poll,
3072         .ioctl =        sock_no_ioctl,
3073         .listen =       sock_no_listen,
3074         .shutdown =     sock_no_shutdown,
3075         .setsockopt =   netlink_setsockopt,
3076         .getsockopt =   netlink_getsockopt,
3077         .sendmsg =      netlink_sendmsg,
3078         .recvmsg =      netlink_recvmsg,
3079         .mmap =         netlink_mmap,
3080         .sendpage =     sock_no_sendpage,
3081 };
3082 
3083 static const struct net_proto_family netlink_family_ops = {
3084         .family = PF_NETLINK,
3085         .create = netlink_create,
3086         .owner  = THIS_MODULE,  /* for consistency 8) */
3087 };
3088 
3089 static int __net_init netlink_net_init(struct net *net)
3090 {
3091 #ifdef CONFIG_PROC_FS
3092         if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
3093                 return -ENOMEM;
3094 #endif
3095         return 0;
3096 }
3097 
3098 static void __net_exit netlink_net_exit(struct net *net)
3099 {
3100 #ifdef CONFIG_PROC_FS
3101         remove_proc_entry("netlink", net->proc_net);
3102 #endif
3103 }
3104 
3105 static void __init netlink_add_usersock_entry(void)
3106 {
3107         struct listeners *listeners;
3108         int groups = 32;
3109 
3110         listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
3111         if (!listeners)
3112                 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
3113 
3114         netlink_table_grab();
3115 
3116         nl_table[NETLINK_USERSOCK].groups = groups;
3117         rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
3118         nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
3119         nl_table[NETLINK_USERSOCK].registered = 1;
3120         nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
3121 
3122         netlink_table_ungrab();
3123 }
3124 
3125 static struct pernet_operations __net_initdata netlink_net_ops = {
3126         .init = netlink_net_init,
3127         .exit = netlink_net_exit,
3128 };
3129 
3130 static int __init netlink_proto_init(void)
3131 {
3132         int i;
3133         unsigned long limit;
3134         unsigned int order;
3135         int err = proto_register(&netlink_proto, 0);
3136 
3137         if (err != 0)
3138                 goto out;
3139 
3140         BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
3141 
3142         nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
3143         if (!nl_table)
3144                 goto panic;
3145 
3146         if (totalram_pages >= (128 * 1024))
3147                 limit = totalram_pages >> (21 - PAGE_SHIFT);
3148         else
3149                 limit = totalram_pages >> (23 - PAGE_SHIFT);
3150 
3151         order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
3152         limit = (1UL << order) / sizeof(struct hlist_head);
3153         order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
3154 
3155         for (i = 0; i < MAX_LINKS; i++) {
3156                 struct nl_portid_hash *hash = &nl_table[i].hash;
3157 
3158                 hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
3159                 if (!hash->table) {
3160                         while (i-- > 0)
3161                                 nl_portid_hash_free(nl_table[i].hash.table,
3162                                                  1 * sizeof(*hash->table));
3163                         kfree(nl_table);
3164                         goto panic;
3165                 }
3166                 hash->max_shift = order;
3167                 hash->shift = 0;
3168                 hash->mask = 0;
3169                 hash->rehash_time = jiffies;
3170 
3171                 nl_table[i].compare = netlink_compare;
3172         }
3173 
3174         INIT_LIST_HEAD(&netlink_tap_all);
3175 
3176         netlink_add_usersock_entry();
3177 
3178         sock_register(&netlink_family_ops);
3179         register_pernet_subsys(&netlink_net_ops);
3180         /* The netlink device handler may be needed early. */
3181         rtnetlink_init();
3182 out:
3183         return err;
3184 panic:
3185         panic("netlink_init: Cannot allocate nl_table\n");
3186 }
3187 
3188 core_initcall(netlink_proto_init);
3189 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp