~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/core/sock.c

Version: ~ [ linux-5.8 ] ~ [ linux-5.7.14 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.57 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.138 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.193 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.232 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.232 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              Generic socket support routines. Memory allocators, socket lock/release
  7  *              handler for protocols to use and generic option handler.
  8  *
  9  *
 10  * Authors:     Ross Biro
 11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *              Florian La Roche, <flla@stud.uni-sb.de>
 13  *              Alan Cox, <A.Cox@swansea.ac.uk>
 14  *
 15  * Fixes:
 16  *              Alan Cox        :       Numerous verify_area() problems
 17  *              Alan Cox        :       Connecting on a connecting socket
 18  *                                      now returns an error for tcp.
 19  *              Alan Cox        :       sock->protocol is set correctly.
 20  *                                      and is not sometimes left as 0.
 21  *              Alan Cox        :       connect handles icmp errors on a
 22  *                                      connect properly. Unfortunately there
 23  *                                      is a restart syscall nasty there. I
 24  *                                      can't match BSD without hacking the C
 25  *                                      library. Ideas urgently sought!
 26  *              Alan Cox        :       Disallow bind() to addresses that are
 27  *                                      not ours - especially broadcast ones!!
 28  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
 29  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
 30  *                                      instead they leave that for the DESTROY timer.
 31  *              Alan Cox        :       Clean up error flag in accept
 32  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
 33  *                                      was buggy. Put a remove_sock() in the handler
 34  *                                      for memory when we hit 0. Also altered the timer
 35  *                                      code. The ACK stuff can wait and needs major
 36  *                                      TCP layer surgery.
 37  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
 38  *                                      and fixed timer/inet_bh race.
 39  *              Alan Cox        :       Added zapped flag for TCP
 40  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
 41  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
 42  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
 43  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
 44  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
 45  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
 46  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
 47  *      Pauline Middelink       :       identd support
 48  *              Alan Cox        :       Fixed connect() taking signals I think.
 49  *              Alan Cox        :       SO_LINGER supported
 50  *              Alan Cox        :       Error reporting fixes
 51  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
 52  *              Alan Cox        :       inet sockets don't set sk->type!
 53  *              Alan Cox        :       Split socket option code
 54  *              Alan Cox        :       Callbacks
 55  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
 56  *              Alex            :       Removed restriction on inet fioctl
 57  *              Alan Cox        :       Splitting INET from NET core
 58  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
 59  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
 60  *              Alan Cox        :       Split IP from generic code
 61  *              Alan Cox        :       New kfree_skbmem()
 62  *              Alan Cox        :       Make SO_DEBUG superuser only.
 63  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
 64  *                                      (compatibility fix)
 65  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
 66  *              Alan Cox        :       Allocator for a socket is settable.
 67  *              Alan Cox        :       SO_ERROR includes soft errors.
 68  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
 69  *              Alan Cox        :       Generic socket allocation to make hooks
 70  *                                      easier (suggested by Craig Metz).
 71  *              Michael Pall    :       SO_ERROR returns positive errno again
 72  *              Steve Whitehouse:       Added default destructor to free
 73  *                                      protocol private data.
 74  *              Steve Whitehouse:       Added various other default routines
 75  *                                      common to several socket families.
 76  *              Chris Evans     :       Call suser() check last on F_SETOWN
 77  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
 78  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
 79  *              Andi Kleen      :       Fix write_space callback
 80  *              Chris Evans     :       Security fixes - signedness again
 81  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
 82  *
 83  * To Fix:
 84  *
 85  *
 86  *              This program is free software; you can redistribute it and/or
 87  *              modify it under the terms of the GNU General Public License
 88  *              as published by the Free Software Foundation; either version
 89  *              2 of the License, or (at your option) any later version.
 90  */
 91 
 92 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 93 
 94 #include <linux/capability.h>
 95 #include <linux/errno.h>
 96 #include <linux/errqueue.h>
 97 #include <linux/types.h>
 98 #include <linux/socket.h>
 99 #include <linux/in.h>
100 #include <linux/kernel.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/timer.h>
106 #include <linux/string.h>
107 #include <linux/sockios.h>
108 #include <linux/net.h>
109 #include <linux/mm.h>
110 #include <linux/slab.h>
111 #include <linux/interrupt.h>
112 #include <linux/poll.h>
113 #include <linux/tcp.h>
114 #include <linux/init.h>
115 #include <linux/highmem.h>
116 #include <linux/user_namespace.h>
117 #include <linux/static_key.h>
118 #include <linux/memcontrol.h>
119 #include <linux/prefetch.h>
120 
121 #include <asm/uaccess.h>
122 
123 #include <linux/netdevice.h>
124 #include <net/protocol.h>
125 #include <linux/skbuff.h>
126 #include <net/net_namespace.h>
127 #include <net/request_sock.h>
128 #include <net/sock.h>
129 #include <linux/net_tstamp.h>
130 #include <net/xfrm.h>
131 #include <linux/ipsec.h>
132 #include <net/cls_cgroup.h>
133 #include <net/netprio_cgroup.h>
134 
135 #include <linux/filter.h>
136 
137 #include <trace/events/sock.h>
138 
139 #ifdef CONFIG_INET
140 #include <net/tcp.h>
141 #endif
142 
143 #include <net/busy_poll.h>
144 
145 static DEFINE_MUTEX(proto_list_mutex);
146 static LIST_HEAD(proto_list);
147 
148 /**
149  * sk_ns_capable - General socket capability test
150  * @sk: Socket to use a capability on or through
151  * @user_ns: The user namespace of the capability to use
152  * @cap: The capability to use
153  *
154  * Test to see if the opener of the socket had when the socket was
155  * created and the current process has the capability @cap in the user
156  * namespace @user_ns.
157  */
158 bool sk_ns_capable(const struct sock *sk,
159                    struct user_namespace *user_ns, int cap)
160 {
161         return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
162                 ns_capable(user_ns, cap);
163 }
164 EXPORT_SYMBOL(sk_ns_capable);
165 
166 /**
167  * sk_capable - Socket global capability test
168  * @sk: Socket to use a capability on or through
169  * @cap: The global capability to use
170  *
171  * Test to see if the opener of the socket had when the socket was
172  * created and the current process has the capability @cap in all user
173  * namespaces.
174  */
175 bool sk_capable(const struct sock *sk, int cap)
176 {
177         return sk_ns_capable(sk, &init_user_ns, cap);
178 }
179 EXPORT_SYMBOL(sk_capable);
180 
181 /**
182  * sk_net_capable - Network namespace socket capability test
183  * @sk: Socket to use a capability on or through
184  * @cap: The capability to use
185  *
186  * Test to see if the opener of the socket had when the socket was created
187  * and the current process has the capability @cap over the network namespace
188  * the socket is a member of.
189  */
190 bool sk_net_capable(const struct sock *sk, int cap)
191 {
192         return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
193 }
194 EXPORT_SYMBOL(sk_net_capable);
195 
196 
197 #ifdef CONFIG_MEMCG_KMEM
198 int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
199 {
200         struct proto *proto;
201         int ret = 0;
202 
203         mutex_lock(&proto_list_mutex);
204         list_for_each_entry(proto, &proto_list, node) {
205                 if (proto->init_cgroup) {
206                         ret = proto->init_cgroup(memcg, ss);
207                         if (ret)
208                                 goto out;
209                 }
210         }
211 
212         mutex_unlock(&proto_list_mutex);
213         return ret;
214 out:
215         list_for_each_entry_continue_reverse(proto, &proto_list, node)
216                 if (proto->destroy_cgroup)
217                         proto->destroy_cgroup(memcg);
218         mutex_unlock(&proto_list_mutex);
219         return ret;
220 }
221 
222 void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
223 {
224         struct proto *proto;
225 
226         mutex_lock(&proto_list_mutex);
227         list_for_each_entry_reverse(proto, &proto_list, node)
228                 if (proto->destroy_cgroup)
229                         proto->destroy_cgroup(memcg);
230         mutex_unlock(&proto_list_mutex);
231 }
232 #endif
233 
234 /*
235  * Each address family might have different locking rules, so we have
236  * one slock key per address family:
237  */
238 static struct lock_class_key af_family_keys[AF_MAX];
239 static struct lock_class_key af_family_slock_keys[AF_MAX];
240 
241 #if defined(CONFIG_MEMCG_KMEM)
242 struct static_key memcg_socket_limit_enabled;
243 EXPORT_SYMBOL(memcg_socket_limit_enabled);
244 #endif
245 
246 /*
247  * Make lock validator output more readable. (we pre-construct these
248  * strings build-time, so that runtime initialization of socket
249  * locks is fast):
250  */
251 static const char *const af_family_key_strings[AF_MAX+1] = {
252   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
253   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
254   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
255   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
256   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
257   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
258   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
259   "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
260   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
261   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
262   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
263   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
264   "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
265   "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_MAX"
266 };
267 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
268   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
269   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
270   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
271   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
272   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
273   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
274   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
275   "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
276   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
277   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
278   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
279   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
280   "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
281   "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_MAX"
282 };
283 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
284   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
285   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
286   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
287   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
288   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
289   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
290   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
291   "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
292   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
293   "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
294   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
295   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
296   "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
297   "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_MAX"
298 };
299 
300 /*
301  * sk_callback_lock locking rules are per-address-family,
302  * so split the lock classes by using a per-AF key:
303  */
304 static struct lock_class_key af_callback_keys[AF_MAX];
305 
306 /* Take into consideration the size of the struct sk_buff overhead in the
307  * determination of these values, since that is non-constant across
308  * platforms.  This makes socket queueing behavior and performance
309  * not depend upon such differences.
310  */
311 #define _SK_MEM_PACKETS         256
312 #define _SK_MEM_OVERHEAD        SKB_TRUESIZE(256)
313 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
314 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
315 
316 /* Run time adjustable parameters. */
317 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
318 EXPORT_SYMBOL(sysctl_wmem_max);
319 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
320 EXPORT_SYMBOL(sysctl_rmem_max);
321 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
322 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
323 
324 /* Maximal space eaten by iovec or ancillary data plus some space */
325 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
326 EXPORT_SYMBOL(sysctl_optmem_max);
327 
328 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
329 EXPORT_SYMBOL_GPL(memalloc_socks);
330 
331 /**
332  * sk_set_memalloc - sets %SOCK_MEMALLOC
333  * @sk: socket to set it on
334  *
335  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
336  * It's the responsibility of the admin to adjust min_free_kbytes
337  * to meet the requirements
338  */
339 void sk_set_memalloc(struct sock *sk)
340 {
341         sock_set_flag(sk, SOCK_MEMALLOC);
342         sk->sk_allocation |= __GFP_MEMALLOC;
343         static_key_slow_inc(&memalloc_socks);
344 }
345 EXPORT_SYMBOL_GPL(sk_set_memalloc);
346 
347 void sk_clear_memalloc(struct sock *sk)
348 {
349         sock_reset_flag(sk, SOCK_MEMALLOC);
350         sk->sk_allocation &= ~__GFP_MEMALLOC;
351         static_key_slow_dec(&memalloc_socks);
352 
353         /*
354          * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
355          * progress of swapping. However, if SOCK_MEMALLOC is cleared while
356          * it has rmem allocations there is a risk that the user of the
357          * socket cannot make forward progress due to exceeding the rmem
358          * limits. By rights, sk_clear_memalloc() should only be called
359          * on sockets being torn down but warn and reset the accounting if
360          * that assumption breaks.
361          */
362         if (WARN_ON(sk->sk_forward_alloc))
363                 sk_mem_reclaim(sk);
364 }
365 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
366 
367 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
368 {
369         int ret;
370         unsigned long pflags = current->flags;
371 
372         /* these should have been dropped before queueing */
373         BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
374 
375         current->flags |= PF_MEMALLOC;
376         ret = sk->sk_backlog_rcv(sk, skb);
377         tsk_restore_flags(current, pflags, PF_MEMALLOC);
378 
379         return ret;
380 }
381 EXPORT_SYMBOL(__sk_backlog_rcv);
382 
383 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
384 {
385         struct timeval tv;
386 
387         if (optlen < sizeof(tv))
388                 return -EINVAL;
389         if (copy_from_user(&tv, optval, sizeof(tv)))
390                 return -EFAULT;
391         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
392                 return -EDOM;
393 
394         if (tv.tv_sec < 0) {
395                 static int warned __read_mostly;
396 
397                 *timeo_p = 0;
398                 if (warned < 10 && net_ratelimit()) {
399                         warned++;
400                         pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
401                                 __func__, current->comm, task_pid_nr(current));
402                 }
403                 return 0;
404         }
405         *timeo_p = MAX_SCHEDULE_TIMEOUT;
406         if (tv.tv_sec == 0 && tv.tv_usec == 0)
407                 return 0;
408         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
409                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
410         return 0;
411 }
412 
413 static void sock_warn_obsolete_bsdism(const char *name)
414 {
415         static int warned;
416         static char warncomm[TASK_COMM_LEN];
417         if (strcmp(warncomm, current->comm) && warned < 5) {
418                 strcpy(warncomm,  current->comm);
419                 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
420                         warncomm, name);
421                 warned++;
422         }
423 }
424 
425 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
426 {
427         if (sk->sk_flags & flags) {
428                 sk->sk_flags &= ~flags;
429                 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
430                         net_disable_timestamp();
431         }
432 }
433 
434 
435 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
436 {
437         int err;
438         unsigned long flags;
439         struct sk_buff_head *list = &sk->sk_receive_queue;
440 
441         if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
442                 atomic_inc(&sk->sk_drops);
443                 trace_sock_rcvqueue_full(sk, skb);
444                 return -ENOMEM;
445         }
446 
447         err = sk_filter(sk, skb);
448         if (err)
449                 return err;
450 
451         if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
452                 atomic_inc(&sk->sk_drops);
453                 return -ENOBUFS;
454         }
455 
456         skb->dev = NULL;
457         skb_set_owner_r(skb, sk);
458 
459         /* we escape from rcu protected region, make sure we dont leak
460          * a norefcounted dst
461          */
462         skb_dst_force(skb);
463 
464         spin_lock_irqsave(&list->lock, flags);
465         skb->dropcount = atomic_read(&sk->sk_drops);
466         __skb_queue_tail(list, skb);
467         spin_unlock_irqrestore(&list->lock, flags);
468 
469         if (!sock_flag(sk, SOCK_DEAD))
470                 sk->sk_data_ready(sk);
471         return 0;
472 }
473 EXPORT_SYMBOL(sock_queue_rcv_skb);
474 
475 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
476 {
477         int rc = NET_RX_SUCCESS;
478 
479         if (sk_filter(sk, skb))
480                 goto discard_and_relse;
481 
482         skb->dev = NULL;
483 
484         if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
485                 atomic_inc(&sk->sk_drops);
486                 goto discard_and_relse;
487         }
488         if (nested)
489                 bh_lock_sock_nested(sk);
490         else
491                 bh_lock_sock(sk);
492         if (!sock_owned_by_user(sk)) {
493                 /*
494                  * trylock + unlock semantics:
495                  */
496                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
497 
498                 rc = sk_backlog_rcv(sk, skb);
499 
500                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
501         } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
502                 bh_unlock_sock(sk);
503                 atomic_inc(&sk->sk_drops);
504                 goto discard_and_relse;
505         }
506 
507         bh_unlock_sock(sk);
508 out:
509         sock_put(sk);
510         return rc;
511 discard_and_relse:
512         kfree_skb(skb);
513         goto out;
514 }
515 EXPORT_SYMBOL(sk_receive_skb);
516 
517 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
518 {
519         struct dst_entry *dst = __sk_dst_get(sk);
520 
521         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
522                 sk_tx_queue_clear(sk);
523                 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
524                 dst_release(dst);
525                 return NULL;
526         }
527 
528         return dst;
529 }
530 EXPORT_SYMBOL(__sk_dst_check);
531 
532 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
533 {
534         struct dst_entry *dst = sk_dst_get(sk);
535 
536         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
537                 sk_dst_reset(sk);
538                 dst_release(dst);
539                 return NULL;
540         }
541 
542         return dst;
543 }
544 EXPORT_SYMBOL(sk_dst_check);
545 
546 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
547                                 int optlen)
548 {
549         int ret = -ENOPROTOOPT;
550 #ifdef CONFIG_NETDEVICES
551         struct net *net = sock_net(sk);
552         char devname[IFNAMSIZ];
553         int index;
554 
555         /* Sorry... */
556         ret = -EPERM;
557         if (!ns_capable(net->user_ns, CAP_NET_RAW))
558                 goto out;
559 
560         ret = -EINVAL;
561         if (optlen < 0)
562                 goto out;
563 
564         /* Bind this socket to a particular device like "eth0",
565          * as specified in the passed interface name. If the
566          * name is "" or the option length is zero the socket
567          * is not bound.
568          */
569         if (optlen > IFNAMSIZ - 1)
570                 optlen = IFNAMSIZ - 1;
571         memset(devname, 0, sizeof(devname));
572 
573         ret = -EFAULT;
574         if (copy_from_user(devname, optval, optlen))
575                 goto out;
576 
577         index = 0;
578         if (devname[0] != '\0') {
579                 struct net_device *dev;
580 
581                 rcu_read_lock();
582                 dev = dev_get_by_name_rcu(net, devname);
583                 if (dev)
584                         index = dev->ifindex;
585                 rcu_read_unlock();
586                 ret = -ENODEV;
587                 if (!dev)
588                         goto out;
589         }
590 
591         lock_sock(sk);
592         sk->sk_bound_dev_if = index;
593         sk_dst_reset(sk);
594         release_sock(sk);
595 
596         ret = 0;
597 
598 out:
599 #endif
600 
601         return ret;
602 }
603 
604 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
605                                 int __user *optlen, int len)
606 {
607         int ret = -ENOPROTOOPT;
608 #ifdef CONFIG_NETDEVICES
609         struct net *net = sock_net(sk);
610         char devname[IFNAMSIZ];
611 
612         if (sk->sk_bound_dev_if == 0) {
613                 len = 0;
614                 goto zero;
615         }
616 
617         ret = -EINVAL;
618         if (len < IFNAMSIZ)
619                 goto out;
620 
621         ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
622         if (ret)
623                 goto out;
624 
625         len = strlen(devname) + 1;
626 
627         ret = -EFAULT;
628         if (copy_to_user(optval, devname, len))
629                 goto out;
630 
631 zero:
632         ret = -EFAULT;
633         if (put_user(len, optlen))
634                 goto out;
635 
636         ret = 0;
637 
638 out:
639 #endif
640 
641         return ret;
642 }
643 
644 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
645 {
646         if (valbool)
647                 sock_set_flag(sk, bit);
648         else
649                 sock_reset_flag(sk, bit);
650 }
651 
652 bool sk_mc_loop(struct sock *sk)
653 {
654         if (dev_recursion_level())
655                 return false;
656         if (!sk)
657                 return true;
658         switch (sk->sk_family) {
659         case AF_INET:
660                 return inet_sk(sk)->mc_loop;
661 #if IS_ENABLED(CONFIG_IPV6)
662         case AF_INET6:
663                 return inet6_sk(sk)->mc_loop;
664 #endif
665         }
666         WARN_ON(1);
667         return true;
668 }
669 EXPORT_SYMBOL(sk_mc_loop);
670 
671 /*
672  *      This is meant for all protocols to use and covers goings on
673  *      at the socket level. Everything here is generic.
674  */
675 
676 int sock_setsockopt(struct socket *sock, int level, int optname,
677                     char __user *optval, unsigned int optlen)
678 {
679         struct sock *sk = sock->sk;
680         int val;
681         int valbool;
682         struct linger ling;
683         int ret = 0;
684 
685         /*
686          *      Options without arguments
687          */
688 
689         if (optname == SO_BINDTODEVICE)
690                 return sock_setbindtodevice(sk, optval, optlen);
691 
692         if (optlen < sizeof(int))
693                 return -EINVAL;
694 
695         if (get_user(val, (int __user *)optval))
696                 return -EFAULT;
697 
698         valbool = val ? 1 : 0;
699 
700         lock_sock(sk);
701 
702         switch (optname) {
703         case SO_DEBUG:
704                 if (val && !capable(CAP_NET_ADMIN))
705                         ret = -EACCES;
706                 else
707                         sock_valbool_flag(sk, SOCK_DBG, valbool);
708                 break;
709         case SO_REUSEADDR:
710                 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
711                 break;
712         case SO_REUSEPORT:
713                 sk->sk_reuseport = valbool;
714                 break;
715         case SO_TYPE:
716         case SO_PROTOCOL:
717         case SO_DOMAIN:
718         case SO_ERROR:
719                 ret = -ENOPROTOOPT;
720                 break;
721         case SO_DONTROUTE:
722                 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
723                 sk_dst_reset(sk);
724                 break;
725         case SO_BROADCAST:
726                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
727                 break;
728         case SO_SNDBUF:
729                 /* Don't error on this BSD doesn't and if you think
730                  * about it this is right. Otherwise apps have to
731                  * play 'guess the biggest size' games. RCVBUF/SNDBUF
732                  * are treated in BSD as hints
733                  */
734                 val = min_t(u32, val, sysctl_wmem_max);
735 set_sndbuf:
736                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
737                 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
738                 /* Wake up sending tasks if we upped the value. */
739                 sk->sk_write_space(sk);
740                 break;
741 
742         case SO_SNDBUFFORCE:
743                 if (!capable(CAP_NET_ADMIN)) {
744                         ret = -EPERM;
745                         break;
746                 }
747                 goto set_sndbuf;
748 
749         case SO_RCVBUF:
750                 /* Don't error on this BSD doesn't and if you think
751                  * about it this is right. Otherwise apps have to
752                  * play 'guess the biggest size' games. RCVBUF/SNDBUF
753                  * are treated in BSD as hints
754                  */
755                 val = min_t(u32, val, sysctl_rmem_max);
756 set_rcvbuf:
757                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
758                 /*
759                  * We double it on the way in to account for
760                  * "struct sk_buff" etc. overhead.   Applications
761                  * assume that the SO_RCVBUF setting they make will
762                  * allow that much actual data to be received on that
763                  * socket.
764                  *
765                  * Applications are unaware that "struct sk_buff" and
766                  * other overheads allocate from the receive buffer
767                  * during socket buffer allocation.
768                  *
769                  * And after considering the possible alternatives,
770                  * returning the value we actually used in getsockopt
771                  * is the most desirable behavior.
772                  */
773                 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
774                 break;
775 
776         case SO_RCVBUFFORCE:
777                 if (!capable(CAP_NET_ADMIN)) {
778                         ret = -EPERM;
779                         break;
780                 }
781                 goto set_rcvbuf;
782 
783         case SO_KEEPALIVE:
784 #ifdef CONFIG_INET
785                 if (sk->sk_protocol == IPPROTO_TCP &&
786                     sk->sk_type == SOCK_STREAM)
787                         tcp_set_keepalive(sk, valbool);
788 #endif
789                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
790                 break;
791 
792         case SO_OOBINLINE:
793                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
794                 break;
795 
796         case SO_NO_CHECK:
797                 sk->sk_no_check_tx = valbool;
798                 break;
799 
800         case SO_PRIORITY:
801                 if ((val >= 0 && val <= 6) ||
802                     ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
803                         sk->sk_priority = val;
804                 else
805                         ret = -EPERM;
806                 break;
807 
808         case SO_LINGER:
809                 if (optlen < sizeof(ling)) {
810                         ret = -EINVAL;  /* 1003.1g */
811                         break;
812                 }
813                 if (copy_from_user(&ling, optval, sizeof(ling))) {
814                         ret = -EFAULT;
815                         break;
816                 }
817                 if (!ling.l_onoff)
818                         sock_reset_flag(sk, SOCK_LINGER);
819                 else {
820 #if (BITS_PER_LONG == 32)
821                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
822                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
823                         else
824 #endif
825                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
826                         sock_set_flag(sk, SOCK_LINGER);
827                 }
828                 break;
829 
830         case SO_BSDCOMPAT:
831                 sock_warn_obsolete_bsdism("setsockopt");
832                 break;
833 
834         case SO_PASSCRED:
835                 if (valbool)
836                         set_bit(SOCK_PASSCRED, &sock->flags);
837                 else
838                         clear_bit(SOCK_PASSCRED, &sock->flags);
839                 break;
840 
841         case SO_TIMESTAMP:
842         case SO_TIMESTAMPNS:
843                 if (valbool)  {
844                         if (optname == SO_TIMESTAMP)
845                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
846                         else
847                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
848                         sock_set_flag(sk, SOCK_RCVTSTAMP);
849                         sock_enable_timestamp(sk, SOCK_TIMESTAMP);
850                 } else {
851                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
852                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
853                 }
854                 break;
855 
856         case SO_TIMESTAMPING:
857                 if (val & ~SOF_TIMESTAMPING_MASK) {
858                         ret = -EINVAL;
859                         break;
860                 }
861                 if (val & SOF_TIMESTAMPING_OPT_ID &&
862                     !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
863                         if (sk->sk_protocol == IPPROTO_TCP &&
864                             sk->sk_type == SOCK_STREAM) {
865                                 if (sk->sk_state != TCP_ESTABLISHED) {
866                                         ret = -EINVAL;
867                                         break;
868                                 }
869                                 sk->sk_tskey = tcp_sk(sk)->snd_una;
870                         } else {
871                                 sk->sk_tskey = 0;
872                         }
873                 }
874                 sk->sk_tsflags = val;
875                 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
876                         sock_enable_timestamp(sk,
877                                               SOCK_TIMESTAMPING_RX_SOFTWARE);
878                 else
879                         sock_disable_timestamp(sk,
880                                                (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
881                 break;
882 
883         case SO_RCVLOWAT:
884                 if (val < 0)
885                         val = INT_MAX;
886                 sk->sk_rcvlowat = val ? : 1;
887                 break;
888 
889         case SO_RCVTIMEO:
890                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
891                 break;
892 
893         case SO_SNDTIMEO:
894                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
895                 break;
896 
897         case SO_ATTACH_FILTER:
898                 ret = -EINVAL;
899                 if (optlen == sizeof(struct sock_fprog)) {
900                         struct sock_fprog fprog;
901 
902                         ret = -EFAULT;
903                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
904                                 break;
905 
906                         ret = sk_attach_filter(&fprog, sk);
907                 }
908                 break;
909 
910         case SO_DETACH_FILTER:
911                 ret = sk_detach_filter(sk);
912                 break;
913 
914         case SO_LOCK_FILTER:
915                 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
916                         ret = -EPERM;
917                 else
918                         sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
919                 break;
920 
921         case SO_PASSSEC:
922                 if (valbool)
923                         set_bit(SOCK_PASSSEC, &sock->flags);
924                 else
925                         clear_bit(SOCK_PASSSEC, &sock->flags);
926                 break;
927         case SO_MARK:
928                 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
929                         ret = -EPERM;
930                 else
931                         sk->sk_mark = val;
932                 break;
933 
934                 /* We implement the SO_SNDLOWAT etc to
935                    not be settable (1003.1g 5.3) */
936         case SO_RXQ_OVFL:
937                 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
938                 break;
939 
940         case SO_WIFI_STATUS:
941                 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
942                 break;
943 
944         case SO_PEEK_OFF:
945                 if (sock->ops->set_peek_off)
946                         ret = sock->ops->set_peek_off(sk, val);
947                 else
948                         ret = -EOPNOTSUPP;
949                 break;
950 
951         case SO_NOFCS:
952                 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
953                 break;
954 
955         case SO_SELECT_ERR_QUEUE:
956                 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
957                 break;
958 
959 #ifdef CONFIG_NET_RX_BUSY_POLL
960         case SO_BUSY_POLL:
961                 /* allow unprivileged users to decrease the value */
962                 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
963                         ret = -EPERM;
964                 else {
965                         if (val < 0)
966                                 ret = -EINVAL;
967                         else
968                                 sk->sk_ll_usec = val;
969                 }
970                 break;
971 #endif
972 
973         case SO_MAX_PACING_RATE:
974                 sk->sk_max_pacing_rate = val;
975                 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
976                                          sk->sk_max_pacing_rate);
977                 break;
978 
979         default:
980                 ret = -ENOPROTOOPT;
981                 break;
982         }
983         release_sock(sk);
984         return ret;
985 }
986 EXPORT_SYMBOL(sock_setsockopt);
987 
988 
989 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
990                           struct ucred *ucred)
991 {
992         ucred->pid = pid_vnr(pid);
993         ucred->uid = ucred->gid = -1;
994         if (cred) {
995                 struct user_namespace *current_ns = current_user_ns();
996 
997                 ucred->uid = from_kuid_munged(current_ns, cred->euid);
998                 ucred->gid = from_kgid_munged(current_ns, cred->egid);
999         }
1000 }
1001 
1002 int sock_getsockopt(struct socket *sock, int level, int optname,
1003                     char __user *optval, int __user *optlen)
1004 {
1005         struct sock *sk = sock->sk;
1006 
1007         union {
1008                 int val;
1009                 struct linger ling;
1010                 struct timeval tm;
1011         } v;
1012 
1013         int lv = sizeof(int);
1014         int len;
1015 
1016         if (get_user(len, optlen))
1017                 return -EFAULT;
1018         if (len < 0)
1019                 return -EINVAL;
1020 
1021         memset(&v, 0, sizeof(v));
1022 
1023         switch (optname) {
1024         case SO_DEBUG:
1025                 v.val = sock_flag(sk, SOCK_DBG);
1026                 break;
1027 
1028         case SO_DONTROUTE:
1029                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1030                 break;
1031 
1032         case SO_BROADCAST:
1033                 v.val = sock_flag(sk, SOCK_BROADCAST);
1034                 break;
1035 
1036         case SO_SNDBUF:
1037                 v.val = sk->sk_sndbuf;
1038                 break;
1039 
1040         case SO_RCVBUF:
1041                 v.val = sk->sk_rcvbuf;
1042                 break;
1043 
1044         case SO_REUSEADDR:
1045                 v.val = sk->sk_reuse;
1046                 break;
1047 
1048         case SO_REUSEPORT:
1049                 v.val = sk->sk_reuseport;
1050                 break;
1051 
1052         case SO_KEEPALIVE:
1053                 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1054                 break;
1055 
1056         case SO_TYPE:
1057                 v.val = sk->sk_type;
1058                 break;
1059 
1060         case SO_PROTOCOL:
1061                 v.val = sk->sk_protocol;
1062                 break;
1063 
1064         case SO_DOMAIN:
1065                 v.val = sk->sk_family;
1066                 break;
1067 
1068         case SO_ERROR:
1069                 v.val = -sock_error(sk);
1070                 if (v.val == 0)
1071                         v.val = xchg(&sk->sk_err_soft, 0);
1072                 break;
1073 
1074         case SO_OOBINLINE:
1075                 v.val = sock_flag(sk, SOCK_URGINLINE);
1076                 break;
1077 
1078         case SO_NO_CHECK:
1079                 v.val = sk->sk_no_check_tx;
1080                 break;
1081 
1082         case SO_PRIORITY:
1083                 v.val = sk->sk_priority;
1084                 break;
1085 
1086         case SO_LINGER:
1087                 lv              = sizeof(v.ling);
1088                 v.ling.l_onoff  = sock_flag(sk, SOCK_LINGER);
1089                 v.ling.l_linger = sk->sk_lingertime / HZ;
1090                 break;
1091 
1092         case SO_BSDCOMPAT:
1093                 sock_warn_obsolete_bsdism("getsockopt");
1094                 break;
1095 
1096         case SO_TIMESTAMP:
1097                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1098                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1099                 break;
1100 
1101         case SO_TIMESTAMPNS:
1102                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1103                 break;
1104 
1105         case SO_TIMESTAMPING:
1106                 v.val = sk->sk_tsflags;
1107                 break;
1108 
1109         case SO_RCVTIMEO:
1110                 lv = sizeof(struct timeval);
1111                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1112                         v.tm.tv_sec = 0;
1113                         v.tm.tv_usec = 0;
1114                 } else {
1115                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1116                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1117                 }
1118                 break;
1119 
1120         case SO_SNDTIMEO:
1121                 lv = sizeof(struct timeval);
1122                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1123                         v.tm.tv_sec = 0;
1124                         v.tm.tv_usec = 0;
1125                 } else {
1126                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1127                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1128                 }
1129                 break;
1130 
1131         case SO_RCVLOWAT:
1132                 v.val = sk->sk_rcvlowat;
1133                 break;
1134 
1135         case SO_SNDLOWAT:
1136                 v.val = 1;
1137                 break;
1138 
1139         case SO_PASSCRED:
1140                 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1141                 break;
1142 
1143         case SO_PEERCRED:
1144         {
1145                 struct ucred peercred;
1146                 if (len > sizeof(peercred))
1147                         len = sizeof(peercred);
1148                 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1149                 if (copy_to_user(optval, &peercred, len))
1150                         return -EFAULT;
1151                 goto lenout;
1152         }
1153 
1154         case SO_PEERNAME:
1155         {
1156                 char address[128];
1157 
1158                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1159                         return -ENOTCONN;
1160                 if (lv < len)
1161                         return -EINVAL;
1162                 if (copy_to_user(optval, address, len))
1163                         return -EFAULT;
1164                 goto lenout;
1165         }
1166 
1167         /* Dubious BSD thing... Probably nobody even uses it, but
1168          * the UNIX standard wants it for whatever reason... -DaveM
1169          */
1170         case SO_ACCEPTCONN:
1171                 v.val = sk->sk_state == TCP_LISTEN;
1172                 break;
1173 
1174         case SO_PASSSEC:
1175                 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1176                 break;
1177 
1178         case SO_PEERSEC:
1179                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1180 
1181         case SO_MARK:
1182                 v.val = sk->sk_mark;
1183                 break;
1184 
1185         case SO_RXQ_OVFL:
1186                 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1187                 break;
1188 
1189         case SO_WIFI_STATUS:
1190                 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1191                 break;
1192 
1193         case SO_PEEK_OFF:
1194                 if (!sock->ops->set_peek_off)
1195                         return -EOPNOTSUPP;
1196 
1197                 v.val = sk->sk_peek_off;
1198                 break;
1199         case SO_NOFCS:
1200                 v.val = sock_flag(sk, SOCK_NOFCS);
1201                 break;
1202 
1203         case SO_BINDTODEVICE:
1204                 return sock_getbindtodevice(sk, optval, optlen, len);
1205 
1206         case SO_GET_FILTER:
1207                 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1208                 if (len < 0)
1209                         return len;
1210 
1211                 goto lenout;
1212 
1213         case SO_LOCK_FILTER:
1214                 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1215                 break;
1216 
1217         case SO_BPF_EXTENSIONS:
1218                 v.val = bpf_tell_extensions();
1219                 break;
1220 
1221         case SO_SELECT_ERR_QUEUE:
1222                 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1223                 break;
1224 
1225 #ifdef CONFIG_NET_RX_BUSY_POLL
1226         case SO_BUSY_POLL:
1227                 v.val = sk->sk_ll_usec;
1228                 break;
1229 #endif
1230 
1231         case SO_MAX_PACING_RATE:
1232                 v.val = sk->sk_max_pacing_rate;
1233                 break;
1234 
1235         default:
1236                 return -ENOPROTOOPT;
1237         }
1238 
1239         if (len > lv)
1240                 len = lv;
1241         if (copy_to_user(optval, &v, len))
1242                 return -EFAULT;
1243 lenout:
1244         if (put_user(len, optlen))
1245                 return -EFAULT;
1246         return 0;
1247 }
1248 
1249 /*
1250  * Initialize an sk_lock.
1251  *
1252  * (We also register the sk_lock with the lock validator.)
1253  */
1254 static inline void sock_lock_init(struct sock *sk)
1255 {
1256         sock_lock_init_class_and_name(sk,
1257                         af_family_slock_key_strings[sk->sk_family],
1258                         af_family_slock_keys + sk->sk_family,
1259                         af_family_key_strings[sk->sk_family],
1260                         af_family_keys + sk->sk_family);
1261 }
1262 
1263 /*
1264  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1265  * even temporarly, because of RCU lookups. sk_node should also be left as is.
1266  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1267  */
1268 static void sock_copy(struct sock *nsk, const struct sock *osk)
1269 {
1270 #ifdef CONFIG_SECURITY_NETWORK
1271         void *sptr = nsk->sk_security;
1272 #endif
1273         memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1274 
1275         memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1276                osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1277 
1278 #ifdef CONFIG_SECURITY_NETWORK
1279         nsk->sk_security = sptr;
1280         security_sk_clone(osk, nsk);
1281 #endif
1282 }
1283 
1284 void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1285 {
1286         unsigned long nulls1, nulls2;
1287 
1288         nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1289         nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1290         if (nulls1 > nulls2)
1291                 swap(nulls1, nulls2);
1292 
1293         if (nulls1 != 0)
1294                 memset((char *)sk, 0, nulls1);
1295         memset((char *)sk + nulls1 + sizeof(void *), 0,
1296                nulls2 - nulls1 - sizeof(void *));
1297         memset((char *)sk + nulls2 + sizeof(void *), 0,
1298                size - nulls2 - sizeof(void *));
1299 }
1300 EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1301 
1302 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1303                 int family)
1304 {
1305         struct sock *sk;
1306         struct kmem_cache *slab;
1307 
1308         slab = prot->slab;
1309         if (slab != NULL) {
1310                 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1311                 if (!sk)
1312                         return sk;
1313                 if (priority & __GFP_ZERO) {
1314                         if (prot->clear_sk)
1315                                 prot->clear_sk(sk, prot->obj_size);
1316                         else
1317                                 sk_prot_clear_nulls(sk, prot->obj_size);
1318                 }
1319         } else
1320                 sk = kmalloc(prot->obj_size, priority);
1321 
1322         if (sk != NULL) {
1323                 kmemcheck_annotate_bitfield(sk, flags);
1324 
1325                 if (security_sk_alloc(sk, family, priority))
1326                         goto out_free;
1327 
1328                 if (!try_module_get(prot->owner))
1329                         goto out_free_sec;
1330                 sk_tx_queue_clear(sk);
1331         }
1332 
1333         return sk;
1334 
1335 out_free_sec:
1336         security_sk_free(sk);
1337 out_free:
1338         if (slab != NULL)
1339                 kmem_cache_free(slab, sk);
1340         else
1341                 kfree(sk);
1342         return NULL;
1343 }
1344 
1345 static void sk_prot_free(struct proto *prot, struct sock *sk)
1346 {
1347         struct kmem_cache *slab;
1348         struct module *owner;
1349 
1350         owner = prot->owner;
1351         slab = prot->slab;
1352 
1353         security_sk_free(sk);
1354         if (slab != NULL)
1355                 kmem_cache_free(slab, sk);
1356         else
1357                 kfree(sk);
1358         module_put(owner);
1359 }
1360 
1361 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
1362 void sock_update_netprioidx(struct sock *sk)
1363 {
1364         if (in_interrupt())
1365                 return;
1366 
1367         sk->sk_cgrp_prioidx = task_netprioidx(current);
1368 }
1369 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1370 #endif
1371 
1372 /**
1373  *      sk_alloc - All socket objects are allocated here
1374  *      @net: the applicable net namespace
1375  *      @family: protocol family
1376  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1377  *      @prot: struct proto associated with this new sock instance
1378  */
1379 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1380                       struct proto *prot)
1381 {
1382         struct sock *sk;
1383 
1384         sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1385         if (sk) {
1386                 sk->sk_family = family;
1387                 /*
1388                  * See comment in struct sock definition to understand
1389                  * why we need sk_prot_creator -acme
1390                  */
1391                 sk->sk_prot = sk->sk_prot_creator = prot;
1392                 sock_lock_init(sk);
1393                 sock_net_set(sk, get_net(net));
1394                 atomic_set(&sk->sk_wmem_alloc, 1);
1395 
1396                 sock_update_classid(sk);
1397                 sock_update_netprioidx(sk);
1398         }
1399 
1400         return sk;
1401 }
1402 EXPORT_SYMBOL(sk_alloc);
1403 
1404 static void __sk_free(struct sock *sk)
1405 {
1406         struct sk_filter *filter;
1407 
1408         if (sk->sk_destruct)
1409                 sk->sk_destruct(sk);
1410 
1411         filter = rcu_dereference_check(sk->sk_filter,
1412                                        atomic_read(&sk->sk_wmem_alloc) == 0);
1413         if (filter) {
1414                 sk_filter_uncharge(sk, filter);
1415                 RCU_INIT_POINTER(sk->sk_filter, NULL);
1416         }
1417 
1418         sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1419 
1420         if (atomic_read(&sk->sk_omem_alloc))
1421                 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1422                          __func__, atomic_read(&sk->sk_omem_alloc));
1423 
1424         if (sk->sk_frag.page) {
1425                 put_page(sk->sk_frag.page);
1426                 sk->sk_frag.page = NULL;
1427         }
1428 
1429         if (sk->sk_peer_cred)
1430                 put_cred(sk->sk_peer_cred);
1431         put_pid(sk->sk_peer_pid);
1432         put_net(sock_net(sk));
1433         sk_prot_free(sk->sk_prot_creator, sk);
1434 }
1435 
1436 void sk_free(struct sock *sk)
1437 {
1438         /*
1439          * We subtract one from sk_wmem_alloc and can know if
1440          * some packets are still in some tx queue.
1441          * If not null, sock_wfree() will call __sk_free(sk) later
1442          */
1443         if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1444                 __sk_free(sk);
1445 }
1446 EXPORT_SYMBOL(sk_free);
1447 
1448 /*
1449  * Last sock_put should drop reference to sk->sk_net. It has already
1450  * been dropped in sk_change_net. Taking reference to stopping namespace
1451  * is not an option.
1452  * Take reference to a socket to remove it from hash _alive_ and after that
1453  * destroy it in the context of init_net.
1454  */
1455 void sk_release_kernel(struct sock *sk)
1456 {
1457         if (sk == NULL || sk->sk_socket == NULL)
1458                 return;
1459 
1460         sock_hold(sk);
1461         sock_release(sk->sk_socket);
1462         release_net(sock_net(sk));
1463         sock_net_set(sk, get_net(&init_net));
1464         sock_put(sk);
1465 }
1466 EXPORT_SYMBOL(sk_release_kernel);
1467 
1468 static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1469 {
1470         if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1471                 sock_update_memcg(newsk);
1472 }
1473 
1474 /**
1475  *      sk_clone_lock - clone a socket, and lock its clone
1476  *      @sk: the socket to clone
1477  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1478  *
1479  *      Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1480  */
1481 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1482 {
1483         struct sock *newsk;
1484         bool is_charged = true;
1485 
1486         newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1487         if (newsk != NULL) {
1488                 struct sk_filter *filter;
1489 
1490                 sock_copy(newsk, sk);
1491 
1492                 newsk->sk_prot_creator = sk->sk_prot;
1493 
1494                 /* SANITY */
1495                 get_net(sock_net(newsk));
1496                 sk_node_init(&newsk->sk_node);
1497                 sock_lock_init(newsk);
1498                 bh_lock_sock(newsk);
1499                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
1500                 newsk->sk_backlog.len = 0;
1501 
1502                 atomic_set(&newsk->sk_rmem_alloc, 0);
1503                 /*
1504                  * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1505                  */
1506                 atomic_set(&newsk->sk_wmem_alloc, 1);
1507                 atomic_set(&newsk->sk_omem_alloc, 0);
1508                 skb_queue_head_init(&newsk->sk_receive_queue);
1509                 skb_queue_head_init(&newsk->sk_write_queue);
1510 
1511                 spin_lock_init(&newsk->sk_dst_lock);
1512                 rwlock_init(&newsk->sk_callback_lock);
1513                 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1514                                 af_callback_keys + newsk->sk_family,
1515                                 af_family_clock_key_strings[newsk->sk_family]);
1516 
1517                 newsk->sk_dst_cache     = NULL;
1518                 newsk->sk_wmem_queued   = 0;
1519                 newsk->sk_forward_alloc = 0;
1520                 newsk->sk_send_head     = NULL;
1521                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1522 
1523                 sock_reset_flag(newsk, SOCK_DONE);
1524                 skb_queue_head_init(&newsk->sk_error_queue);
1525 
1526                 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1527                 if (filter != NULL)
1528                         /* though it's an empty new sock, the charging may fail
1529                          * if sysctl_optmem_max was changed between creation of
1530                          * original socket and cloning
1531                          */
1532                         is_charged = sk_filter_charge(newsk, filter);
1533 
1534                 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
1535                         /* We need to make sure that we don't uncharge the new
1536                          * socket if we couldn't charge it in the first place
1537                          * as otherwise we uncharge the parent's filter.
1538                          */
1539                         if (!is_charged)
1540                                 RCU_INIT_POINTER(newsk->sk_filter, NULL);
1541                         /* It is still raw copy of parent, so invalidate
1542                          * destructor and make plain sk_free() */
1543                         newsk->sk_destruct = NULL;
1544                         bh_unlock_sock(newsk);
1545                         sk_free(newsk);
1546                         newsk = NULL;
1547                         goto out;
1548                 }
1549 
1550                 newsk->sk_err      = 0;
1551                 newsk->sk_priority = 0;
1552                 /*
1553                  * Before updating sk_refcnt, we must commit prior changes to memory
1554                  * (Documentation/RCU/rculist_nulls.txt for details)
1555                  */
1556                 smp_wmb();
1557                 atomic_set(&newsk->sk_refcnt, 2);
1558 
1559                 /*
1560                  * Increment the counter in the same struct proto as the master
1561                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1562                  * is the same as sk->sk_prot->socks, as this field was copied
1563                  * with memcpy).
1564                  *
1565                  * This _changes_ the previous behaviour, where
1566                  * tcp_create_openreq_child always was incrementing the
1567                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1568                  * to be taken into account in all callers. -acme
1569                  */
1570                 sk_refcnt_debug_inc(newsk);
1571                 sk_set_socket(newsk, NULL);
1572                 newsk->sk_wq = NULL;
1573 
1574                 sk_update_clone(sk, newsk);
1575 
1576                 if (newsk->sk_prot->sockets_allocated)
1577                         sk_sockets_allocated_inc(newsk);
1578 
1579                 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1580                         net_enable_timestamp();
1581         }
1582 out:
1583         return newsk;
1584 }
1585 EXPORT_SYMBOL_GPL(sk_clone_lock);
1586 
1587 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1588 {
1589         __sk_dst_set(sk, dst);
1590         sk->sk_route_caps = dst->dev->features;
1591         if (sk->sk_route_caps & NETIF_F_GSO)
1592                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1593         sk->sk_route_caps &= ~sk->sk_route_nocaps;
1594         if (sk_can_gso(sk)) {
1595                 if (dst->header_len) {
1596                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1597                 } else {
1598                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1599                         sk->sk_gso_max_size = dst->dev->gso_max_size;
1600                         sk->sk_gso_max_segs = dst->dev->gso_max_segs;
1601                 }
1602         }
1603 }
1604 EXPORT_SYMBOL_GPL(sk_setup_caps);
1605 
1606 /*
1607  *      Simple resource managers for sockets.
1608  */
1609 
1610 
1611 /*
1612  * Write buffer destructor automatically called from kfree_skb.
1613  */
1614 void sock_wfree(struct sk_buff *skb)
1615 {
1616         struct sock *sk = skb->sk;
1617         unsigned int len = skb->truesize;
1618 
1619         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1620                 /*
1621                  * Keep a reference on sk_wmem_alloc, this will be released
1622                  * after sk_write_space() call
1623                  */
1624                 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1625                 sk->sk_write_space(sk);
1626                 len = 1;
1627         }
1628         /*
1629          * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1630          * could not do because of in-flight packets
1631          */
1632         if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1633                 __sk_free(sk);
1634 }
1635 EXPORT_SYMBOL(sock_wfree);
1636 
1637 void skb_orphan_partial(struct sk_buff *skb)
1638 {
1639         if (skb->destructor == sock_wfree
1640 #ifdef CONFIG_INET
1641             || skb->destructor == tcp_wfree
1642 #endif
1643                 ) {
1644                 struct sock *sk = skb->sk;
1645 
1646                 if (atomic_inc_not_zero(&sk->sk_refcnt)) {
1647                         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1648                         skb->destructor = sock_efree;
1649                 }
1650         } else {
1651                 skb_orphan(skb);
1652         }
1653 }
1654 EXPORT_SYMBOL(skb_orphan_partial);
1655 
1656 /*
1657  * Read buffer destructor automatically called from kfree_skb.
1658  */
1659 void sock_rfree(struct sk_buff *skb)
1660 {
1661         struct sock *sk = skb->sk;
1662         unsigned int len = skb->truesize;
1663 
1664         atomic_sub(len, &sk->sk_rmem_alloc);
1665         sk_mem_uncharge(sk, len);
1666 }
1667 EXPORT_SYMBOL(sock_rfree);
1668 
1669 void sock_efree(struct sk_buff *skb)
1670 {
1671         sock_put(skb->sk);
1672 }
1673 EXPORT_SYMBOL(sock_efree);
1674 
1675 #ifdef CONFIG_INET
1676 void sock_edemux(struct sk_buff *skb)
1677 {
1678         struct sock *sk = skb->sk;
1679 
1680         if (sk->sk_state == TCP_TIME_WAIT)
1681                 inet_twsk_put(inet_twsk(sk));
1682         else
1683                 sock_put(sk);
1684 }
1685 EXPORT_SYMBOL(sock_edemux);
1686 #endif
1687 
1688 kuid_t sock_i_uid(struct sock *sk)
1689 {
1690         kuid_t uid;
1691 
1692         read_lock_bh(&sk->sk_callback_lock);
1693         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1694         read_unlock_bh(&sk->sk_callback_lock);
1695         return uid;
1696 }
1697 EXPORT_SYMBOL(sock_i_uid);
1698 
1699 unsigned long sock_i_ino(struct sock *sk)
1700 {
1701         unsigned long ino;
1702 
1703         read_lock_bh(&sk->sk_callback_lock);
1704         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1705         read_unlock_bh(&sk->sk_callback_lock);
1706         return ino;
1707 }
1708 EXPORT_SYMBOL(sock_i_ino);
1709 
1710 /*
1711  * Allocate a skb from the socket's send buffer.
1712  */
1713 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1714                              gfp_t priority)
1715 {
1716         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1717                 struct sk_buff *skb = alloc_skb(size, priority);
1718                 if (skb) {
1719                         skb_set_owner_w(skb, sk);
1720                         return skb;
1721                 }
1722         }
1723         return NULL;
1724 }
1725 EXPORT_SYMBOL(sock_wmalloc);
1726 
1727 /*
1728  * Allocate a memory block from the socket's option memory buffer.
1729  */
1730 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1731 {
1732         if ((unsigned int)size <= sysctl_optmem_max &&
1733             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1734                 void *mem;
1735                 /* First do the add, to avoid the race if kmalloc
1736                  * might sleep.
1737                  */
1738                 atomic_add(size, &sk->sk_omem_alloc);
1739                 mem = kmalloc(size, priority);
1740                 if (mem)
1741                         return mem;
1742                 atomic_sub(size, &sk->sk_omem_alloc);
1743         }
1744         return NULL;
1745 }
1746 EXPORT_SYMBOL(sock_kmalloc);
1747 
1748 /*
1749  * Free an option memory block.
1750  */
1751 void sock_kfree_s(struct sock *sk, void *mem, int size)
1752 {
1753         if (WARN_ON_ONCE(!mem))
1754                 return;
1755         kfree(mem);
1756         atomic_sub(size, &sk->sk_omem_alloc);
1757 }
1758 EXPORT_SYMBOL(sock_kfree_s);
1759 
1760 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1761    I think, these locks should be removed for datagram sockets.
1762  */
1763 static long sock_wait_for_wmem(struct sock *sk, long timeo)
1764 {
1765         DEFINE_WAIT(wait);
1766 
1767         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1768         for (;;) {
1769                 if (!timeo)
1770                         break;
1771                 if (signal_pending(current))
1772                         break;
1773                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1774                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1775                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1776                         break;
1777                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1778                         break;
1779                 if (sk->sk_err)
1780                         break;
1781                 timeo = schedule_timeout(timeo);
1782         }
1783         finish_wait(sk_sleep(sk), &wait);
1784         return timeo;
1785 }
1786 
1787 
1788 /*
1789  *      Generic send/receive buffer handlers
1790  */
1791 
1792 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1793                                      unsigned long data_len, int noblock,
1794                                      int *errcode, int max_page_order)
1795 {
1796         struct sk_buff *skb;
1797         long timeo;
1798         int err;
1799 
1800         timeo = sock_sndtimeo(sk, noblock);
1801         for (;;) {
1802                 err = sock_error(sk);
1803                 if (err != 0)
1804                         goto failure;
1805 
1806                 err = -EPIPE;
1807                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1808                         goto failure;
1809 
1810                 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1811                         break;
1812 
1813                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1814                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1815                 err = -EAGAIN;
1816                 if (!timeo)
1817                         goto failure;
1818                 if (signal_pending(current))
1819                         goto interrupted;
1820                 timeo = sock_wait_for_wmem(sk, timeo);
1821         }
1822         skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
1823                                    errcode, sk->sk_allocation);
1824         if (skb)
1825                 skb_set_owner_w(skb, sk);
1826         return skb;
1827 
1828 interrupted:
1829         err = sock_intr_errno(timeo);
1830 failure:
1831         *errcode = err;
1832         return NULL;
1833 }
1834 EXPORT_SYMBOL(sock_alloc_send_pskb);
1835 
1836 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1837                                     int noblock, int *errcode)
1838 {
1839         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1840 }
1841 EXPORT_SYMBOL(sock_alloc_send_skb);
1842 
1843 /* On 32bit arches, an skb frag is limited to 2^15 */
1844 #define SKB_FRAG_PAGE_ORDER     get_order(32768)
1845 
1846 /**
1847  * skb_page_frag_refill - check that a page_frag contains enough room
1848  * @sz: minimum size of the fragment we want to get
1849  * @pfrag: pointer to page_frag
1850  * @gfp: priority for memory allocation
1851  *
1852  * Note: While this allocator tries to use high order pages, there is
1853  * no guarantee that allocations succeed. Therefore, @sz MUST be
1854  * less or equal than PAGE_SIZE.
1855  */
1856 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1857 {
1858         if (pfrag->page) {
1859                 if (atomic_read(&pfrag->page->_count) == 1) {
1860                         pfrag->offset = 0;
1861                         return true;
1862                 }
1863                 if (pfrag->offset + sz <= pfrag->size)
1864                         return true;
1865                 put_page(pfrag->page);
1866         }
1867 
1868         pfrag->offset = 0;
1869         if (SKB_FRAG_PAGE_ORDER) {
1870                 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
1871                                           __GFP_NOWARN | __GFP_NORETRY,
1872                                           SKB_FRAG_PAGE_ORDER);
1873                 if (likely(pfrag->page)) {
1874                         pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
1875                         return true;
1876                 }
1877         }
1878         pfrag->page = alloc_page(gfp);
1879         if (likely(pfrag->page)) {
1880                 pfrag->size = PAGE_SIZE;
1881                 return true;
1882         }
1883         return false;
1884 }
1885 EXPORT_SYMBOL(skb_page_frag_refill);
1886 
1887 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1888 {
1889         if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
1890                 return true;
1891 
1892         sk_enter_memory_pressure(sk);
1893         sk_stream_moderate_sndbuf(sk);
1894         return false;
1895 }
1896 EXPORT_SYMBOL(sk_page_frag_refill);
1897 
1898 static void __lock_sock(struct sock *sk)
1899         __releases(&sk->sk_lock.slock)
1900         __acquires(&sk->sk_lock.slock)
1901 {
1902         DEFINE_WAIT(wait);
1903 
1904         for (;;) {
1905                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1906                                         TASK_UNINTERRUPTIBLE);
1907                 spin_unlock_bh(&sk->sk_lock.slock);
1908                 schedule();
1909                 spin_lock_bh(&sk->sk_lock.slock);
1910                 if (!sock_owned_by_user(sk))
1911                         break;
1912         }
1913         finish_wait(&sk->sk_lock.wq, &wait);
1914 }
1915 
1916 static void __release_sock(struct sock *sk)
1917         __releases(&sk->sk_lock.slock)
1918         __acquires(&sk->sk_lock.slock)
1919 {
1920         struct sk_buff *skb = sk->sk_backlog.head;
1921 
1922         do {
1923                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1924                 bh_unlock_sock(sk);
1925 
1926                 do {
1927                         struct sk_buff *next = skb->next;
1928 
1929                         prefetch(next);
1930                         WARN_ON_ONCE(skb_dst_is_noref(skb));
1931                         skb->next = NULL;
1932                         sk_backlog_rcv(sk, skb);
1933 
1934                         /*
1935                          * We are in process context here with softirqs
1936                          * disabled, use cond_resched_softirq() to preempt.
1937                          * This is safe to do because we've taken the backlog
1938                          * queue private:
1939                          */
1940                         cond_resched_softirq();
1941 
1942                         skb = next;
1943                 } while (skb != NULL);
1944 
1945                 bh_lock_sock(sk);
1946         } while ((skb = sk->sk_backlog.head) != NULL);
1947 
1948         /*
1949          * Doing the zeroing here guarantee we can not loop forever
1950          * while a wild producer attempts to flood us.
1951          */
1952         sk->sk_backlog.len = 0;
1953 }
1954 
1955 /**
1956  * sk_wait_data - wait for data to arrive at sk_receive_queue
1957  * @sk:    sock to wait on
1958  * @timeo: for how long
1959  *
1960  * Now socket state including sk->sk_err is changed only under lock,
1961  * hence we may omit checks after joining wait queue.
1962  * We check receive queue before schedule() only as optimization;
1963  * it is very likely that release_sock() added new data.
1964  */
1965 int sk_wait_data(struct sock *sk, long *timeo)
1966 {
1967         int rc;
1968         DEFINE_WAIT(wait);
1969 
1970         prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1971         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1972         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1973         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1974         finish_wait(sk_sleep(sk), &wait);
1975         return rc;
1976 }
1977 EXPORT_SYMBOL(sk_wait_data);
1978 
1979 /**
1980  *      __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1981  *      @sk: socket
1982  *      @size: memory size to allocate
1983  *      @kind: allocation type
1984  *
1985  *      If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1986  *      rmem allocation. This function assumes that protocols which have
1987  *      memory_pressure use sk_wmem_queued as write buffer accounting.
1988  */
1989 int __sk_mem_schedule(struct sock *sk, int size, int kind)
1990 {
1991         struct proto *prot = sk->sk_prot;
1992         int amt = sk_mem_pages(size);
1993         long allocated;
1994         int parent_status = UNDER_LIMIT;
1995 
1996         sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1997 
1998         allocated = sk_memory_allocated_add(sk, amt, &parent_status);
1999 
2000         /* Under limit. */
2001         if (parent_status == UNDER_LIMIT &&
2002                         allocated <= sk_prot_mem_limits(sk, 0)) {
2003                 sk_leave_memory_pressure(sk);
2004                 return 1;
2005         }
2006 
2007         /* Under pressure. (we or our parents) */
2008         if ((parent_status > SOFT_LIMIT) ||
2009                         allocated > sk_prot_mem_limits(sk, 1))
2010                 sk_enter_memory_pressure(sk);
2011 
2012         /* Over hard limit (we or our parents) */
2013         if ((parent_status == OVER_LIMIT) ||
2014                         (allocated > sk_prot_mem_limits(sk, 2)))
2015                 goto suppress_allocation;
2016 
2017         /* guarantee minimum buffer size under pressure */
2018         if (kind == SK_MEM_RECV) {
2019                 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2020                         return 1;
2021 
2022         } else { /* SK_MEM_SEND */
2023                 if (sk->sk_type == SOCK_STREAM) {
2024                         if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2025                                 return 1;
2026                 } else if (atomic_read(&sk->sk_wmem_alloc) <
2027                            prot->sysctl_wmem[0])
2028                                 return 1;
2029         }
2030 
2031         if (sk_has_memory_pressure(sk)) {
2032                 int alloc;
2033 
2034                 if (!sk_under_memory_pressure(sk))
2035                         return 1;
2036                 alloc = sk_sockets_allocated_read_positive(sk);
2037                 if (sk_prot_mem_limits(sk, 2) > alloc *
2038                     sk_mem_pages(sk->sk_wmem_queued +
2039                                  atomic_read(&sk->sk_rmem_alloc) +
2040                                  sk->sk_forward_alloc))
2041                         return 1;
2042         }
2043 
2044 suppress_allocation:
2045 
2046         if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2047                 sk_stream_moderate_sndbuf(sk);
2048 
2049                 /* Fail only if socket is _under_ its sndbuf.
2050                  * In this case we cannot block, so that we have to fail.
2051                  */
2052                 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2053                         return 1;
2054         }
2055 
2056         trace_sock_exceed_buf_limit(sk, prot, allocated);
2057 
2058         /* Alas. Undo changes. */
2059         sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
2060 
2061         sk_memory_allocated_sub(sk, amt);
2062 
2063         return 0;
2064 }
2065 EXPORT_SYMBOL(__sk_mem_schedule);
2066 
2067 /**
2068  *      __sk_reclaim - reclaim memory_allocated
2069  *      @sk: socket
2070  */
2071 void __sk_mem_reclaim(struct sock *sk)
2072 {
2073         sk_memory_allocated_sub(sk,
2074                                 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
2075         sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
2076 
2077         if (sk_under_memory_pressure(sk) &&
2078             (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2079                 sk_leave_memory_pressure(sk);
2080 }
2081 EXPORT_SYMBOL(__sk_mem_reclaim);
2082 
2083 
2084 /*
2085  * Set of default routines for initialising struct proto_ops when
2086  * the protocol does not support a particular function. In certain
2087  * cases where it makes no sense for a protocol to have a "do nothing"
2088  * function, some default processing is provided.
2089  */
2090 
2091 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2092 {
2093         return -EOPNOTSUPP;
2094 }
2095 EXPORT_SYMBOL(sock_no_bind);
2096 
2097 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2098                     int len, int flags)
2099 {
2100         return -EOPNOTSUPP;
2101 }
2102 EXPORT_SYMBOL(sock_no_connect);
2103 
2104 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2105 {
2106         return -EOPNOTSUPP;
2107 }
2108 EXPORT_SYMBOL(sock_no_socketpair);
2109 
2110 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2111 {
2112         return -EOPNOTSUPP;
2113 }
2114 EXPORT_SYMBOL(sock_no_accept);
2115 
2116 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2117                     int *len, int peer)
2118 {
2119         return -EOPNOTSUPP;
2120 }
2121 EXPORT_SYMBOL(sock_no_getname);
2122 
2123 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2124 {
2125         return 0;
2126 }
2127 EXPORT_SYMBOL(sock_no_poll);
2128 
2129 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2130 {
2131         return -EOPNOTSUPP;
2132 }
2133 EXPORT_SYMBOL(sock_no_ioctl);
2134 
2135 int sock_no_listen(struct socket *sock, int backlog)
2136 {
2137         return -EOPNOTSUPP;
2138 }
2139 EXPORT_SYMBOL(sock_no_listen);
2140 
2141 int sock_no_shutdown(struct socket *sock, int how)
2142 {
2143         return -EOPNOTSUPP;
2144 }
2145 EXPORT_SYMBOL(sock_no_shutdown);
2146 
2147 int sock_no_setsockopt(struct socket *sock, int level, int optname,
2148                     char __user *optval, unsigned int optlen)
2149 {
2150         return -EOPNOTSUPP;
2151 }
2152 EXPORT_SYMBOL(sock_no_setsockopt);
2153 
2154 int sock_no_getsockopt(struct socket *sock, int level, int optname,
2155                     char __user *optval, int __user *optlen)
2156 {
2157         return -EOPNOTSUPP;
2158 }
2159 EXPORT_SYMBOL(sock_no_getsockopt);
2160 
2161 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2162                     size_t len)
2163 {
2164         return -EOPNOTSUPP;
2165 }
2166 EXPORT_SYMBOL(sock_no_sendmsg);
2167 
2168 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2169                     size_t len, int flags)
2170 {
2171         return -EOPNOTSUPP;
2172 }
2173 EXPORT_SYMBOL(sock_no_recvmsg);
2174 
2175 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2176 {
2177         /* Mirror missing mmap method error code */
2178         return -ENODEV;
2179 }
2180 EXPORT_SYMBOL(sock_no_mmap);
2181 
2182 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2183 {
2184         ssize_t res;
2185         struct msghdr msg = {.msg_flags = flags};
2186         struct kvec iov;
2187         char *kaddr = kmap(page);
2188         iov.iov_base = kaddr + offset;
2189         iov.iov_len = size;
2190         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2191         kunmap(page);
2192         return res;
2193 }
2194 EXPORT_SYMBOL(sock_no_sendpage);
2195 
2196 /*
2197  *      Default Socket Callbacks
2198  */
2199 
2200 static void sock_def_wakeup(struct sock *sk)
2201 {
2202         struct socket_wq *wq;
2203 
2204         rcu_read_lock();
2205         wq = rcu_dereference(sk->sk_wq);
2206         if (wq_has_sleeper(wq))
2207                 wake_up_interruptible_all(&wq->wait);
2208         rcu_read_unlock();
2209 }
2210 
2211 static void sock_def_error_report(struct sock *sk)
2212 {
2213         struct socket_wq *wq;
2214 
2215         rcu_read_lock();
2216         wq = rcu_dereference(sk->sk_wq);
2217         if (wq_has_sleeper(wq))
2218                 wake_up_interruptible_poll(&wq->wait, POLLERR);
2219         sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2220         rcu_read_unlock();
2221 }
2222 
2223 static void sock_def_readable(struct sock *sk)
2224 {
2225         struct socket_wq *wq;
2226 
2227         rcu_read_lock();
2228         wq = rcu_dereference(sk->sk_wq);
2229         if (wq_has_sleeper(wq))
2230                 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2231                                                 POLLRDNORM | POLLRDBAND);
2232         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2233         rcu_read_unlock();
2234 }
2235 
2236 static void sock_def_write_space(struct sock *sk)
2237 {
2238         struct socket_wq *wq;
2239 
2240         rcu_read_lock();
2241 
2242         /* Do not wake up a writer until he can make "significant"
2243          * progress.  --DaveM
2244          */
2245         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2246                 wq = rcu_dereference(sk->sk_wq);
2247                 if (wq_has_sleeper(wq))
2248                         wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2249                                                 POLLWRNORM | POLLWRBAND);
2250 
2251                 /* Should agree with poll, otherwise some programs break */
2252                 if (sock_writeable(sk))
2253                         sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2254         }
2255 
2256         rcu_read_unlock();
2257 }
2258 
2259 static void sock_def_destruct(struct sock *sk)
2260 {
2261         kfree(sk->sk_protinfo);
2262 }
2263 
2264 void sk_send_sigurg(struct sock *sk)
2265 {
2266         if (sk->sk_socket && sk->sk_socket->file)
2267                 if (send_sigurg(&sk->sk_socket->file->f_owner))
2268                         sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2269 }
2270 EXPORT_SYMBOL(sk_send_sigurg);
2271 
2272 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2273                     unsigned long expires)
2274 {
2275         if (!mod_timer(timer, expires))
2276                 sock_hold(sk);
2277 }
2278 EXPORT_SYMBOL(sk_reset_timer);
2279 
2280 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2281 {
2282         if (del_timer(timer))
2283                 __sock_put(sk);
2284 }
2285 EXPORT_SYMBOL(sk_stop_timer);
2286 
2287 void sock_init_data(struct socket *sock, struct sock *sk)
2288 {
2289         skb_queue_head_init(&sk->sk_receive_queue);
2290         skb_queue_head_init(&sk->sk_write_queue);
2291         skb_queue_head_init(&sk->sk_error_queue);
2292 
2293         sk->sk_send_head        =       NULL;
2294 
2295         init_timer(&sk->sk_timer);
2296 
2297         sk->sk_allocation       =       GFP_KERNEL;
2298         sk->sk_rcvbuf           =       sysctl_rmem_default;
2299         sk->sk_sndbuf           =       sysctl_wmem_default;
2300         sk->sk_state            =       TCP_CLOSE;
2301         sk_set_socket(sk, sock);
2302 
2303         sock_set_flag(sk, SOCK_ZAPPED);
2304 
2305         if (sock) {
2306                 sk->sk_type     =       sock->type;
2307                 sk->sk_wq       =       sock->wq;
2308                 sock->sk        =       sk;
2309         } else
2310                 sk->sk_wq       =       NULL;
2311 
2312         spin_lock_init(&sk->sk_dst_lock);
2313         rwlock_init(&sk->sk_callback_lock);
2314         lockdep_set_class_and_name(&sk->sk_callback_lock,
2315                         af_callback_keys + sk->sk_family,
2316                         af_family_clock_key_strings[sk->sk_family]);
2317 
2318         sk->sk_state_change     =       sock_def_wakeup;
2319         sk->sk_data_ready       =       sock_def_readable;
2320         sk->sk_write_space      =       sock_def_write_space;
2321         sk->sk_error_report     =       sock_def_error_report;
2322         sk->sk_destruct         =       sock_def_destruct;
2323 
2324         sk->sk_frag.page        =       NULL;
2325         sk->sk_frag.offset      =       0;
2326         sk->sk_peek_off         =       -1;
2327 
2328         sk->sk_peer_pid         =       NULL;
2329         sk->sk_peer_cred        =       NULL;
2330         sk->sk_write_pending    =       0;
2331         sk->sk_rcvlowat         =       1;
2332         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
2333         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
2334 
2335         sk->sk_stamp = ktime_set(-1L, 0);
2336 #if BITS_PER_LONG==32
2337         seqlock_init(&sk->sk_stamp_seq);
2338 #endif
2339 
2340 #ifdef CONFIG_NET_RX_BUSY_POLL
2341         sk->sk_napi_id          =       0;
2342         sk->sk_ll_usec          =       sysctl_net_busy_read;
2343 #endif
2344 
2345         sk->sk_max_pacing_rate = ~0U;
2346         sk->sk_pacing_rate = ~0U;
2347         /*
2348          * Before updating sk_refcnt, we must commit prior changes to memory
2349          * (Documentation/RCU/rculist_nulls.txt for details)
2350          */
2351         smp_wmb();
2352         atomic_set(&sk->sk_refcnt, 1);
2353         atomic_set(&sk->sk_drops, 0);
2354 }
2355 EXPORT_SYMBOL(sock_init_data);
2356 
2357 void lock_sock_nested(struct sock *sk, int subclass)
2358 {
2359         might_sleep();
2360         spin_lock_bh(&sk->sk_lock.slock);
2361         if (sk->sk_lock.owned)
2362                 __lock_sock(sk);
2363         sk->sk_lock.owned = 1;
2364         spin_unlock(&sk->sk_lock.slock);
2365         /*
2366          * The sk_lock has mutex_lock() semantics here:
2367          */
2368         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2369         local_bh_enable();
2370 }
2371 EXPORT_SYMBOL(lock_sock_nested);
2372 
2373 void release_sock(struct sock *sk)
2374 {
2375         /*
2376          * The sk_lock has mutex_unlock() semantics:
2377          */
2378         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2379 
2380         spin_lock_bh(&sk->sk_lock.slock);
2381         if (sk->sk_backlog.tail)
2382                 __release_sock(sk);
2383 
2384         /* Warning : release_cb() might need to release sk ownership,
2385          * ie call sock_release_ownership(sk) before us.
2386          */
2387         if (sk->sk_prot->release_cb)
2388                 sk->sk_prot->release_cb(sk);
2389 
2390         sock_release_ownership(sk);
2391         if (waitqueue_active(&sk->sk_lock.wq))
2392                 wake_up(&sk->sk_lock.wq);
2393         spin_unlock_bh(&sk->sk_lock.slock);
2394 }
2395 EXPORT_SYMBOL(release_sock);
2396 
2397 /**
2398  * lock_sock_fast - fast version of lock_sock
2399  * @sk: socket
2400  *
2401  * This version should be used for very small section, where process wont block
2402  * return false if fast path is taken
2403  *   sk_lock.slock locked, owned = 0, BH disabled
2404  * return true if slow path is taken
2405  *   sk_lock.slock unlocked, owned = 1, BH enabled
2406  */
2407 bool lock_sock_fast(struct sock *sk)
2408 {
2409         might_sleep();
2410         spin_lock_bh(&sk->sk_lock.slock);
2411 
2412         if (!sk->sk_lock.owned)
2413                 /*
2414                  * Note : We must disable BH
2415                  */
2416                 return false;
2417 
2418         __lock_sock(sk);
2419         sk->sk_lock.owned = 1;
2420         spin_unlock(&sk->sk_lock.slock);
2421         /*
2422          * The sk_lock has mutex_lock() semantics here:
2423          */
2424         mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2425         local_bh_enable();
2426         return true;
2427 }
2428 EXPORT_SYMBOL(lock_sock_fast);
2429 
2430 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2431 {
2432         struct timeval tv;
2433         if (!sock_flag(sk, SOCK_TIMESTAMP))
2434                 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2435         tv = ktime_to_timeval(sk->sk_stamp);
2436         if (tv.tv_sec == -1)
2437                 return -ENOENT;
2438         if (tv.tv_sec == 0) {
2439                 sk->sk_stamp = ktime_get_real();
2440                 tv = ktime_to_timeval(sk->sk_stamp);
2441         }
2442         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2443 }
2444 EXPORT_SYMBOL(sock_get_timestamp);
2445 
2446 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2447 {
2448         struct timespec ts;
2449         if (!sock_flag(sk, SOCK_TIMESTAMP))
2450                 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2451         ts = ktime_to_timespec(sk->sk_stamp);
2452         if (ts.tv_sec == -1)
2453                 return -ENOENT;
2454         if (ts.tv_sec == 0) {
2455                 sk->sk_stamp = ktime_get_real();
2456                 ts = ktime_to_timespec(sk->sk_stamp);
2457         }
2458         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2459 }
2460 EXPORT_SYMBOL(sock_get_timestampns);
2461 
2462 void sock_enable_timestamp(struct sock *sk, int flag)
2463 {
2464         if (!sock_flag(sk, flag)) {
2465                 unsigned long previous_flags = sk->sk_flags;
2466 
2467                 sock_set_flag(sk, flag);
2468                 /*
2469                  * we just set one of the two flags which require net
2470                  * time stamping, but time stamping might have been on
2471                  * already because of the other one
2472                  */
2473                 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
2474                         net_enable_timestamp();
2475         }
2476 }
2477 
2478 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2479                        int level, int type)
2480 {
2481         struct sock_exterr_skb *serr;
2482         struct sk_buff *skb;
2483         int copied, err;
2484 
2485         err = -EAGAIN;
2486         skb = sock_dequeue_err_skb(sk);
2487         if (skb == NULL)
2488                 goto out;
2489 
2490         copied = skb->len;
2491         if (copied > len) {
2492                 msg->msg_flags |= MSG_TRUNC;
2493                 copied = len;
2494         }
2495         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2496         if (err)
2497                 goto out_free_skb;
2498 
2499         sock_recv_timestamp(msg, sk, skb);
2500 
2501         serr = SKB_EXT_ERR(skb);
2502         put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2503 
2504         msg->msg_flags |= MSG_ERRQUEUE;
2505         err = copied;
2506 
2507 out_free_skb:
2508         kfree_skb(skb);
2509 out:
2510         return err;
2511 }
2512 EXPORT_SYMBOL(sock_recv_errqueue);
2513 
2514 /*
2515  *      Get a socket option on an socket.
2516  *
2517  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
2518  *      asynchronous errors should be reported by getsockopt. We assume
2519  *      this means if you specify SO_ERROR (otherwise whats the point of it).
2520  */
2521 int sock_common_getsockopt(struct socket *sock, int level, int optname,
2522                            char __user *optval, int __user *optlen)
2523 {
2524         struct sock *sk = sock->sk;
2525 
2526         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2527 }
2528 EXPORT_SYMBOL(sock_common_getsockopt);
2529 
2530 #ifdef CONFIG_COMPAT
2531 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2532                                   char __user *optval, int __user *optlen)
2533 {
2534         struct sock *sk = sock->sk;
2535 
2536         if (sk->sk_prot->compat_getsockopt != NULL)
2537                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2538                                                       optval, optlen);
2539         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2540 }
2541 EXPORT_SYMBOL(compat_sock_common_getsockopt);
2542 #endif
2543 
2544 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2545                         struct msghdr *msg, size_t size, int flags)
2546 {
2547         struct sock *sk = sock->sk;
2548         int addr_len = 0;
2549         int err;
2550 
2551         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2552                                    flags & ~MSG_DONTWAIT, &addr_len);
2553         if (err >= 0)
2554                 msg->msg_namelen = addr_len;
2555         return err;
2556 }
2557 EXPORT_SYMBOL(sock_common_recvmsg);
2558 
2559 /*
2560  *      Set socket options on an inet socket.
2561  */
2562 int sock_common_setsockopt(struct socket *sock, int level, int optname,
2563                            char __user *optval, unsigned int optlen)
2564 {
2565         struct sock *sk = sock->sk;
2566 
2567         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2568 }
2569 EXPORT_SYMBOL(sock_common_setsockopt);
2570 
2571 #ifdef CONFIG_COMPAT
2572 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2573                                   char __user *optval, unsigned int optlen)
2574 {
2575         struct sock *sk = sock->sk;
2576 
2577         if (sk->sk_prot->compat_setsockopt != NULL)
2578                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2579                                                       optval, optlen);
2580         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2581 }
2582 EXPORT_SYMBOL(compat_sock_common_setsockopt);
2583 #endif
2584 
2585 void sk_common_release(struct sock *sk)
2586 {
2587         if (sk->sk_prot->destroy)
2588                 sk->sk_prot->destroy(sk);
2589 
2590         /*
2591          * Observation: when sock_common_release is called, processes have
2592          * no access to socket. But net still has.
2593          * Step one, detach it from networking:
2594          *
2595          * A. Remove from hash tables.
2596          */
2597 
2598         sk->sk_prot->unhash(sk);
2599 
2600         /*
2601          * In this point socket cannot receive new packets, but it is possible
2602          * that some packets are in flight because some CPU runs receiver and
2603          * did hash table lookup before we unhashed socket. They will achieve
2604          * receive queue and will be purged by socket destructor.
2605          *
2606          * Also we still have packets pending on receive queue and probably,
2607          * our own packets waiting in device queues. sock_destroy will drain
2608          * receive queue, but transmitted packets will delay socket destruction
2609          * until the last reference will be released.
2610          */
2611 
2612         sock_orphan(sk);
2613 
2614         xfrm_sk_free_policy(sk);
2615 
2616         sk_refcnt_debug_release(sk);
2617 
2618         sock_put(sk);
2619 }
2620 EXPORT_SYMBOL(sk_common_release);
2621 
2622 #ifdef CONFIG_PROC_FS
2623 #define PROTO_INUSE_NR  64      /* should be enough for the first time */
2624 struct prot_inuse {
2625         int val[PROTO_INUSE_NR];
2626 };
2627 
2628 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2629 
2630 #ifdef CONFIG_NET_NS
2631 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2632 {
2633         __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2634 }
2635 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2636 
2637 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2638 {
2639         int cpu, idx = prot->inuse_idx;
2640         int res = 0;
2641 
2642         for_each_possible_cpu(cpu)
2643                 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2644 
2645         return res >= 0 ? res : 0;
2646 }
2647 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2648 
2649 static int __net_init sock_inuse_init_net(struct net *net)
2650 {
2651         net->core.inuse = alloc_percpu(struct prot_inuse);
2652         return net->core.inuse ? 0 : -ENOMEM;
2653 }
2654 
2655 static void __net_exit sock_inuse_exit_net(struct net *net)
2656 {
2657         free_percpu(net->core.inuse);
2658 }
2659 
2660 static struct pernet_operations net_inuse_ops = {
2661         .init = sock_inuse_init_net,
2662         .exit = sock_inuse_exit_net,
2663 };
2664 
2665 static __init int net_inuse_init(void)
2666 {
2667         if (register_pernet_subsys(&net_inuse_ops))
2668                 panic("Cannot initialize net inuse counters");
2669 
2670         return 0;
2671 }
2672 
2673 core_initcall(net_inuse_init);
2674 #else
2675 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2676 
2677 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2678 {
2679         __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2680 }
2681 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2682 
2683 int sock_prot_inuse_get(struct net *net, struct proto *prot)
2684 {
2685         int cpu, idx = prot->inuse_idx;
2686         int res = 0;
2687 
2688         for_each_possible_cpu(cpu)
2689                 res += per_cpu(prot_inuse, cpu).val[idx];
2690 
2691         return res >= 0 ? res : 0;
2692 }
2693 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2694 #endif
2695 
2696 static void assign_proto_idx(struct proto *prot)
2697 {
2698         prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2699 
2700         if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2701                 pr_err("PROTO_INUSE_NR exhausted\n");
2702                 return;
2703         }
2704 
2705         set_bit(prot->inuse_idx, proto_inuse_idx);
2706 }
2707 
2708 static void release_proto_idx(struct proto *prot)
2709 {
2710         if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2711                 clear_bit(prot->inuse_idx, proto_inuse_idx);
2712 }
2713 #else
2714 static inline void assign_proto_idx(struct proto *prot)
2715 {
2716 }
2717 
2718 static inline void release_proto_idx(struct proto *prot)
2719 {
2720 }
2721 #endif
2722 
2723 int proto_register(struct proto *prot, int alloc_slab)
2724 {
2725         if (alloc_slab) {
2726                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2727                                         SLAB_HWCACHE_ALIGN | prot->slab_flags,
2728                                         NULL);
2729 
2730                 if (prot->slab == NULL) {
2731                         pr_crit("%s: Can't create sock SLAB cache!\n",
2732                                 prot->name);
2733                         goto out;
2734                 }
2735 
2736                 if (prot->rsk_prot != NULL) {
2737                         prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2738                         if (prot->rsk_prot->slab_name == NULL)
2739                                 goto out_free_sock_slab;
2740 
2741                         prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2742                                                                  prot->rsk_prot->obj_size, 0,
2743                                                                  SLAB_HWCACHE_ALIGN, NULL);
2744 
2745                         if (prot->rsk_prot->slab == NULL) {
2746                                 pr_crit("%s: Can't create request sock SLAB cache!\n",
2747                                         prot->name);
2748                                 goto out_free_request_sock_slab_name;
2749                         }
2750                 }
2751 
2752                 if (prot->twsk_prot != NULL) {
2753                         prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2754 
2755                         if (prot->twsk_prot->twsk_slab_name == NULL)
2756                                 goto out_free_request_sock_slab;
2757 
2758                         prot->twsk_prot->twsk_slab =
2759                                 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2760                                                   prot->twsk_prot->twsk_obj_size,
2761                                                   0,
2762                                                   SLAB_HWCACHE_ALIGN |
2763                                                         prot->slab_flags,
2764                                                   NULL);
2765                         if (prot->twsk_prot->twsk_slab == NULL)
2766                                 goto out_free_timewait_sock_slab_name;
2767                 }
2768         }
2769 
2770         mutex_lock(&proto_list_mutex);
2771         list_add(&prot->node, &proto_list);
2772         assign_proto_idx(prot);
2773         mutex_unlock(&proto_list_mutex);
2774         return 0;
2775 
2776 out_free_timewait_sock_slab_name:
2777         kfree(prot->twsk_prot->twsk_slab_name);
2778 out_free_request_sock_slab:
2779         if (prot->rsk_prot && prot->rsk_prot->slab) {
2780                 kmem_cache_destroy(prot->rsk_prot->slab);
2781                 prot->rsk_prot->slab = NULL;
2782         }
2783 out_free_request_sock_slab_name:
2784         if (prot->rsk_prot)
2785                 kfree(prot->rsk_prot->slab_name);
2786 out_free_sock_slab:
2787         kmem_cache_destroy(prot->slab);
2788         prot->slab = NULL;
2789 out:
2790         return -ENOBUFS;
2791 }
2792 EXPORT_SYMBOL(proto_register);
2793 
2794 void proto_unregister(struct proto *prot)
2795 {
2796         mutex_lock(&proto_list_mutex);
2797         release_proto_idx(prot);
2798         list_del(&prot->node);
2799         mutex_unlock(&proto_list_mutex);
2800 
2801         if (prot->slab != NULL) {
2802                 kmem_cache_destroy(prot->slab);
2803                 prot->slab = NULL;
2804         }
2805 
2806         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2807                 kmem_cache_destroy(prot->rsk_prot->slab);
2808                 kfree(prot->rsk_prot->slab_name);
2809                 prot->rsk_prot->slab = NULL;
2810         }
2811 
2812         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2813                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2814                 kfree(prot->twsk_prot->twsk_slab_name);
2815                 prot->twsk_prot->twsk_slab = NULL;
2816         }
2817 }
2818 EXPORT_SYMBOL(proto_unregister);
2819 
2820 #ifdef CONFIG_PROC_FS
2821 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2822         __acquires(proto_list_mutex)
2823 {
2824         mutex_lock(&proto_list_mutex);
2825         return seq_list_start_head(&proto_list, *pos);
2826 }
2827 
2828 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2829 {
2830         return seq_list_next(v, &proto_list, pos);
2831 }
2832 
2833 static void proto_seq_stop(struct seq_file *seq, void *v)
2834         __releases(proto_list_mutex)
2835 {
2836         mutex_unlock(&proto_list_mutex);
2837 }
2838 
2839 static char proto_method_implemented(const void *method)
2840 {
2841         return method == NULL ? 'n' : 'y';
2842 }
2843 static long sock_prot_memory_allocated(struct proto *proto)
2844 {
2845         return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
2846 }
2847 
2848 static char *sock_prot_memory_pressure(struct proto *proto)
2849 {
2850         return proto->memory_pressure != NULL ?
2851         proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2852 }
2853 
2854 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2855 {
2856 
2857         seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
2858                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2859                    proto->name,
2860                    proto->obj_size,
2861                    sock_prot_inuse_get(seq_file_net(seq), proto),
2862                    sock_prot_memory_allocated(proto),
2863                    sock_prot_memory_pressure(proto),
2864                    proto->max_header,
2865                    proto->slab == NULL ? "no" : "yes",
2866                    module_name(proto->owner),
2867                    proto_method_implemented(proto->close),
2868                    proto_method_implemented(proto->connect),
2869                    proto_method_implemented(proto->disconnect),
2870                    proto_method_implemented(proto->accept),
2871                    proto_method_implemented(proto->ioctl),
2872                    proto_method_implemented(proto->init),
2873                    proto_method_implemented(proto->destroy),
2874                    proto_method_implemented(proto->shutdown),
2875                    proto_method_implemented(proto->setsockopt),
2876                    proto_method_implemented(proto->getsockopt),
2877                    proto_method_implemented(proto->sendmsg),
2878                    proto_method_implemented(proto->recvmsg),
2879                    proto_method_implemented(proto->sendpage),
2880                    proto_method_implemented(proto->bind),
2881                    proto_method_implemented(proto->backlog_rcv),
2882                    proto_method_implemented(proto->hash),
2883                    proto_method_implemented(proto->unhash),
2884                    proto_method_implemented(proto->get_port),
2885                    proto_method_implemented(proto->enter_memory_pressure));
2886 }
2887 
2888 static int proto_seq_show(struct seq_file *seq, void *v)
2889 {
2890         if (v == &proto_list)
2891                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2892                            "protocol",
2893                            "size",
2894                            "sockets",
2895                            "memory",
2896                            "press",
2897                            "maxhdr",
2898                            "slab",
2899                            "module",
2900                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2901         else
2902                 proto_seq_printf(seq, list_entry(v, struct proto, node));
2903         return 0;
2904 }
2905 
2906 static const struct seq_operations proto_seq_ops = {
2907         .start  = proto_seq_start,
2908         .next   = proto_seq_next,
2909         .stop   = proto_seq_stop,
2910         .show   = proto_seq_show,
2911 };
2912 
2913 static int proto_seq_open(struct inode *inode, struct file *file)
2914 {
2915         return seq_open_net(inode, file, &proto_seq_ops,
2916                             sizeof(struct seq_net_private));
2917 }
2918 
2919 static const struct file_operations proto_seq_fops = {
2920         .owner          = THIS_MODULE,
2921         .open           = proto_seq_open,
2922         .read           = seq_read,
2923         .llseek         = seq_lseek,
2924         .release        = seq_release_net,
2925 };
2926 
2927 static __net_init int proto_init_net(struct net *net)
2928 {
2929         if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
2930                 return -ENOMEM;
2931 
2932         return 0;
2933 }
2934 
2935 static __net_exit void proto_exit_net(struct net *net)
2936 {
2937         remove_proc_entry("protocols", net->proc_net);
2938 }
2939 
2940 
2941 static __net_initdata struct pernet_operations proto_net_ops = {
2942         .init = proto_init_net,
2943         .exit = proto_exit_net,
2944 };
2945 
2946 static int __init proto_init(void)
2947 {
2948         return register_pernet_subsys(&proto_net_ops);
2949 }
2950 
2951 subsys_initcall(proto_init);
2952 
2953 #endif /* PROC_FS */
2954 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp