~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/unix/af_unix.c

Version: ~ [ linux-5.12-rc1 ] ~ [ linux-5.11.2 ] ~ [ linux-5.10.19 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.101 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.177 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.222 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.258 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.258 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * NET4:        Implementation of BSD Unix domain sockets.
  4  *
  5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
  6  *
  7  * Fixes:
  8  *              Linus Torvalds  :       Assorted bug cures.
  9  *              Niibe Yutaka    :       async I/O support.
 10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
 11  *              Alan Cox        :       Limit size of allocated blocks.
 12  *              Alan Cox        :       Fixed the stupid socketpair bug.
 13  *              Alan Cox        :       BSD compatibility fine tuning.
 14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
 15  *              Alan Cox        :       Sorted out a proper draft version of
 16  *                                      file descriptor passing hacked up from
 17  *                                      Mike Shaver's work.
 18  *              Marty Leisner   :       Fixes to fd passing
 19  *              Nick Nevin      :       recvmsg bugfix.
 20  *              Alan Cox        :       Started proper garbage collector
 21  *              Heiko EiBfeldt  :       Missing verify_area check
 22  *              Alan Cox        :       Started POSIXisms
 23  *              Andreas Schwab  :       Replace inode by dentry for proper
 24  *                                      reference counting
 25  *              Kirk Petersen   :       Made this a module
 26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
 27  *                                      Lots of bug fixes.
 28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
 29  *                                      by above two patches.
 30  *           Andrea Arcangeli   :       If possible we block in connect(2)
 31  *                                      if the max backlog of the listen socket
 32  *                                      is been reached. This won't break
 33  *                                      old apps and it will avoid huge amount
 34  *                                      of socks hashed (this for unix_gc()
 35  *                                      performances reasons).
 36  *                                      Security fix that limits the max
 37  *                                      number of socks to 2*max_files and
 38  *                                      the number of skb queueable in the
 39  *                                      dgram receiver.
 40  *              Artur Skawina   :       Hash function optimizations
 41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
 42  *            Malcolm Beattie   :       Set peercred for socketpair
 43  *           Michal Ostrowski   :       Module initialization cleanup.
 44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
 45  *                                      the core infrastructure is doing that
 46  *                                      for all net proto families now (2.5.69+)
 47  *
 48  * Known differences from reference BSD that was tested:
 49  *
 50  *      [TO FIX]
 51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
 52  *              other the moment one end closes.
 53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
 54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
 55  *      [NOT TO FIX]
 56  *      accept() returns a path name even if the connecting socket has closed
 57  *              in the meantime (BSD loses the path and gives up).
 58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
 59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
 60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
 61  *      BSD af_unix apparently has connect forgetting to block properly.
 62  *              (need to check this with the POSIX spec in detail)
 63  *
 64  * Differences from 2.0.0-11-... (ANK)
 65  *      Bug fixes and improvements.
 66  *              - client shutdown killed server socket.
 67  *              - removed all useless cli/sti pairs.
 68  *
 69  *      Semantic changes/extensions.
 70  *              - generic control message passing.
 71  *              - SCM_CREDENTIALS control message.
 72  *              - "Abstract" (not FS based) socket bindings.
 73  *                Abstract names are sequences of bytes (not zero terminated)
 74  *                started by 0, so that this name space does not intersect
 75  *                with BSD names.
 76  */
 77 
 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 79 
 80 #include <linux/module.h>
 81 #include <linux/kernel.h>
 82 #include <linux/signal.h>
 83 #include <linux/sched/signal.h>
 84 #include <linux/errno.h>
 85 #include <linux/string.h>
 86 #include <linux/stat.h>
 87 #include <linux/dcache.h>
 88 #include <linux/namei.h>
 89 #include <linux/socket.h>
 90 #include <linux/un.h>
 91 #include <linux/fcntl.h>
 92 #include <linux/termios.h>
 93 #include <linux/sockios.h>
 94 #include <linux/net.h>
 95 #include <linux/in.h>
 96 #include <linux/fs.h>
 97 #include <linux/slab.h>
 98 #include <linux/uaccess.h>
 99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 
117 #include "scm.h"
118 
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124 
125 
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128         unsigned long hash = (unsigned long)addr;
129 
130         hash ^= hash >> 16;
131         hash ^= hash >> 8;
132         hash %= UNIX_HASH_SIZE;
133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135 
136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141         UNIXCB(skb).secid = scm->secid;
142 }
143 
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146         scm->secid = UNIXCB(skb).secid;
147 }
148 
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151         return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159 
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162         return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165 
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171 
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174         unsigned int hash = (__force unsigned int)csum_fold(n);
175 
176         hash ^= hash>>8;
177         return hash&(UNIX_HASH_SIZE-1);
178 }
179 
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181 
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184         return unix_peer(osk) == sk;
185 }
186 
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191 
192 static inline int unix_recvq_full(struct sock const *sk)
193 {
194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196 
197 struct sock *unix_peer_get(struct sock *s)
198 {
199         struct sock *peer;
200 
201         unix_state_lock(s);
202         peer = unix_peer(s);
203         if (peer)
204                 sock_hold(peer);
205         unix_state_unlock(s);
206         return peer;
207 }
208 EXPORT_SYMBOL_GPL(unix_peer_get);
209 
210 static inline void unix_release_addr(struct unix_address *addr)
211 {
212         if (refcount_dec_and_test(&addr->refcnt))
213                 kfree(addr);
214 }
215 
216 /*
217  *      Check unix socket name:
218  *              - should be not zero length.
219  *              - if started by not zero, should be NULL terminated (FS object)
220  *              - if started by zero, it is abstract name.
221  */
222 
223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
224 {
225         *hashp = 0;
226 
227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
228                 return -EINVAL;
229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230                 return -EINVAL;
231         if (sunaddr->sun_path[0]) {
232                 /*
233                  * This may look like an off by one error but it is a bit more
234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
235                  * sun_path[108] doesn't as such exist.  However in kernel space
236                  * we are guaranteed that it is a valid memory location in our
237                  * kernel address buffer.
238                  */
239                 ((char *)sunaddr)[len] = 0;
240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
241                 return len;
242         }
243 
244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245         return len;
246 }
247 
248 static void __unix_remove_socket(struct sock *sk)
249 {
250         sk_del_node_init(sk);
251 }
252 
253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255         WARN_ON(!sk_unhashed(sk));
256         sk_add_node(sk, list);
257 }
258 
259 static inline void unix_remove_socket(struct sock *sk)
260 {
261         spin_lock(&unix_table_lock);
262         __unix_remove_socket(sk);
263         spin_unlock(&unix_table_lock);
264 }
265 
266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267 {
268         spin_lock(&unix_table_lock);
269         __unix_insert_socket(list, sk);
270         spin_unlock(&unix_table_lock);
271 }
272 
273 static struct sock *__unix_find_socket_byname(struct net *net,
274                                               struct sockaddr_un *sunname,
275                                               int len, int type, unsigned int hash)
276 {
277         struct sock *s;
278 
279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
280                 struct unix_sock *u = unix_sk(s);
281 
282                 if (!net_eq(sock_net(s), net))
283                         continue;
284 
285                 if (u->addr->len == len &&
286                     !memcmp(u->addr->name, sunname, len))
287                         return s;
288         }
289         return NULL;
290 }
291 
292 static inline struct sock *unix_find_socket_byname(struct net *net,
293                                                    struct sockaddr_un *sunname,
294                                                    int len, int type,
295                                                    unsigned int hash)
296 {
297         struct sock *s;
298 
299         spin_lock(&unix_table_lock);
300         s = __unix_find_socket_byname(net, sunname, len, type, hash);
301         if (s)
302                 sock_hold(s);
303         spin_unlock(&unix_table_lock);
304         return s;
305 }
306 
307 static struct sock *unix_find_socket_byinode(struct inode *i)
308 {
309         struct sock *s;
310 
311         spin_lock(&unix_table_lock);
312         sk_for_each(s,
313                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
314                 struct dentry *dentry = unix_sk(s)->path.dentry;
315 
316                 if (dentry && d_backing_inode(dentry) == i) {
317                         sock_hold(s);
318                         goto found;
319                 }
320         }
321         s = NULL;
322 found:
323         spin_unlock(&unix_table_lock);
324         return s;
325 }
326 
327 /* Support code for asymmetrically connected dgram sockets
328  *
329  * If a datagram socket is connected to a socket not itself connected
330  * to the first socket (eg, /dev/log), clients may only enqueue more
331  * messages if the present receive queue of the server socket is not
332  * "too large". This means there's a second writeability condition
333  * poll and sendmsg need to test. The dgram recv code will do a wake
334  * up on the peer_wait wait queue of a socket upon reception of a
335  * datagram which needs to be propagated to sleeping would-be writers
336  * since these might not have sent anything so far. This can't be
337  * accomplished via poll_wait because the lifetime of the server
338  * socket might be less than that of its clients if these break their
339  * association with it or if the server socket is closed while clients
340  * are still connected to it and there's no way to inform "a polling
341  * implementation" that it should let go of a certain wait queue
342  *
343  * In order to propagate a wake up, a wait_queue_entry_t of the client
344  * socket is enqueued on the peer_wait queue of the server socket
345  * whose wake function does a wake_up on the ordinary client socket
346  * wait queue. This connection is established whenever a write (or
347  * poll for write) hit the flow control condition and broken when the
348  * association to the server socket is dissolved or after a wake up
349  * was relayed.
350  */
351 
352 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
353                                       void *key)
354 {
355         struct unix_sock *u;
356         wait_queue_head_t *u_sleep;
357 
358         u = container_of(q, struct unix_sock, peer_wake);
359 
360         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
361                             q);
362         u->peer_wake.private = NULL;
363 
364         /* relaying can only happen while the wq still exists */
365         u_sleep = sk_sleep(&u->sk);
366         if (u_sleep)
367                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
368 
369         return 0;
370 }
371 
372 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
373 {
374         struct unix_sock *u, *u_other;
375         int rc;
376 
377         u = unix_sk(sk);
378         u_other = unix_sk(other);
379         rc = 0;
380         spin_lock(&u_other->peer_wait.lock);
381 
382         if (!u->peer_wake.private) {
383                 u->peer_wake.private = other;
384                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
385 
386                 rc = 1;
387         }
388 
389         spin_unlock(&u_other->peer_wait.lock);
390         return rc;
391 }
392 
393 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
394                                             struct sock *other)
395 {
396         struct unix_sock *u, *u_other;
397 
398         u = unix_sk(sk);
399         u_other = unix_sk(other);
400         spin_lock(&u_other->peer_wait.lock);
401 
402         if (u->peer_wake.private == other) {
403                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
404                 u->peer_wake.private = NULL;
405         }
406 
407         spin_unlock(&u_other->peer_wait.lock);
408 }
409 
410 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
411                                                    struct sock *other)
412 {
413         unix_dgram_peer_wake_disconnect(sk, other);
414         wake_up_interruptible_poll(sk_sleep(sk),
415                                    EPOLLOUT |
416                                    EPOLLWRNORM |
417                                    EPOLLWRBAND);
418 }
419 
420 /* preconditions:
421  *      - unix_peer(sk) == other
422  *      - association is stable
423  */
424 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
425 {
426         int connected;
427 
428         connected = unix_dgram_peer_wake_connect(sk, other);
429 
430         /* If other is SOCK_DEAD, we want to make sure we signal
431          * POLLOUT, such that a subsequent write() can get a
432          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
433          * to other and its full, we will hang waiting for POLLOUT.
434          */
435         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
436                 return 1;
437 
438         if (connected)
439                 unix_dgram_peer_wake_disconnect(sk, other);
440 
441         return 0;
442 }
443 
444 static int unix_writable(const struct sock *sk)
445 {
446         return sk->sk_state != TCP_LISTEN &&
447                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
448 }
449 
450 static void unix_write_space(struct sock *sk)
451 {
452         struct socket_wq *wq;
453 
454         rcu_read_lock();
455         if (unix_writable(sk)) {
456                 wq = rcu_dereference(sk->sk_wq);
457                 if (skwq_has_sleeper(wq))
458                         wake_up_interruptible_sync_poll(&wq->wait,
459                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
460                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
461         }
462         rcu_read_unlock();
463 }
464 
465 /* When dgram socket disconnects (or changes its peer), we clear its receive
466  * queue of packets arrived from previous peer. First, it allows to do
467  * flow control based only on wmem_alloc; second, sk connected to peer
468  * may receive messages only from that peer. */
469 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
470 {
471         if (!skb_queue_empty(&sk->sk_receive_queue)) {
472                 skb_queue_purge(&sk->sk_receive_queue);
473                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
474 
475                 /* If one link of bidirectional dgram pipe is disconnected,
476                  * we signal error. Messages are lost. Do not make this,
477                  * when peer was not connected to us.
478                  */
479                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
480                         other->sk_err = ECONNRESET;
481                         other->sk_error_report(other);
482                 }
483         }
484 }
485 
486 static void unix_sock_destructor(struct sock *sk)
487 {
488         struct unix_sock *u = unix_sk(sk);
489 
490         skb_queue_purge(&sk->sk_receive_queue);
491 
492         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
493         WARN_ON(!sk_unhashed(sk));
494         WARN_ON(sk->sk_socket);
495         if (!sock_flag(sk, SOCK_DEAD)) {
496                 pr_info("Attempt to release alive unix socket: %p\n", sk);
497                 return;
498         }
499 
500         if (u->addr)
501                 unix_release_addr(u->addr);
502 
503         atomic_long_dec(&unix_nr_socks);
504         local_bh_disable();
505         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
506         local_bh_enable();
507 #ifdef UNIX_REFCNT_DEBUG
508         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
509                 atomic_long_read(&unix_nr_socks));
510 #endif
511 }
512 
513 static void unix_release_sock(struct sock *sk, int embrion)
514 {
515         struct unix_sock *u = unix_sk(sk);
516         struct path path;
517         struct sock *skpair;
518         struct sk_buff *skb;
519         int state;
520 
521         unix_remove_socket(sk);
522 
523         /* Clear state */
524         unix_state_lock(sk);
525         sock_orphan(sk);
526         sk->sk_shutdown = SHUTDOWN_MASK;
527         path         = u->path;
528         u->path.dentry = NULL;
529         u->path.mnt = NULL;
530         state = sk->sk_state;
531         sk->sk_state = TCP_CLOSE;
532         unix_state_unlock(sk);
533 
534         wake_up_interruptible_all(&u->peer_wait);
535 
536         skpair = unix_peer(sk);
537 
538         if (skpair != NULL) {
539                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
540                         unix_state_lock(skpair);
541                         /* No more writes */
542                         skpair->sk_shutdown = SHUTDOWN_MASK;
543                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
544                                 skpair->sk_err = ECONNRESET;
545                         unix_state_unlock(skpair);
546                         skpair->sk_state_change(skpair);
547                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
548                 }
549 
550                 unix_dgram_peer_wake_disconnect(sk, skpair);
551                 sock_put(skpair); /* It may now die */
552                 unix_peer(sk) = NULL;
553         }
554 
555         /* Try to flush out this socket. Throw out buffers at least */
556 
557         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
558                 if (state == TCP_LISTEN)
559                         unix_release_sock(skb->sk, 1);
560                 /* passed fds are erased in the kfree_skb hook        */
561                 UNIXCB(skb).consumed = skb->len;
562                 kfree_skb(skb);
563         }
564 
565         if (path.dentry)
566                 path_put(&path);
567 
568         sock_put(sk);
569 
570         /* ---- Socket is dead now and most probably destroyed ---- */
571 
572         /*
573          * Fixme: BSD difference: In BSD all sockets connected to us get
574          *        ECONNRESET and we die on the spot. In Linux we behave
575          *        like files and pipes do and wait for the last
576          *        dereference.
577          *
578          * Can't we simply set sock->err?
579          *
580          *        What the above comment does talk about? --ANK(980817)
581          */
582 
583         if (unix_tot_inflight)
584                 unix_gc();              /* Garbage collect fds */
585 }
586 
587 static void init_peercred(struct sock *sk)
588 {
589         put_pid(sk->sk_peer_pid);
590         if (sk->sk_peer_cred)
591                 put_cred(sk->sk_peer_cred);
592         sk->sk_peer_pid  = get_pid(task_tgid(current));
593         sk->sk_peer_cred = get_current_cred();
594 }
595 
596 static void copy_peercred(struct sock *sk, struct sock *peersk)
597 {
598         put_pid(sk->sk_peer_pid);
599         if (sk->sk_peer_cred)
600                 put_cred(sk->sk_peer_cred);
601         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
602         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
603 }
604 
605 static int unix_listen(struct socket *sock, int backlog)
606 {
607         int err;
608         struct sock *sk = sock->sk;
609         struct unix_sock *u = unix_sk(sk);
610         struct pid *old_pid = NULL;
611 
612         err = -EOPNOTSUPP;
613         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
614                 goto out;       /* Only stream/seqpacket sockets accept */
615         err = -EINVAL;
616         if (!u->addr)
617                 goto out;       /* No listens on an unbound socket */
618         unix_state_lock(sk);
619         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
620                 goto out_unlock;
621         if (backlog > sk->sk_max_ack_backlog)
622                 wake_up_interruptible_all(&u->peer_wait);
623         sk->sk_max_ack_backlog  = backlog;
624         sk->sk_state            = TCP_LISTEN;
625         /* set credentials so connect can copy them */
626         init_peercred(sk);
627         err = 0;
628 
629 out_unlock:
630         unix_state_unlock(sk);
631         put_pid(old_pid);
632 out:
633         return err;
634 }
635 
636 static int unix_release(struct socket *);
637 static int unix_bind(struct socket *, struct sockaddr *, int);
638 static int unix_stream_connect(struct socket *, struct sockaddr *,
639                                int addr_len, int flags);
640 static int unix_socketpair(struct socket *, struct socket *);
641 static int unix_accept(struct socket *, struct socket *, int, bool);
642 static int unix_getname(struct socket *, struct sockaddr *, int);
643 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
644 static __poll_t unix_dgram_poll(struct file *, struct socket *,
645                                     poll_table *);
646 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
647 #ifdef CONFIG_COMPAT
648 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
649 #endif
650 static int unix_shutdown(struct socket *, int);
651 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
652 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
653 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
654                                     size_t size, int flags);
655 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
656                                        struct pipe_inode_info *, size_t size,
657                                        unsigned int flags);
658 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
659 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
660 static int unix_dgram_connect(struct socket *, struct sockaddr *,
661                               int, int);
662 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
663 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
664                                   int);
665 
666 static int unix_set_peek_off(struct sock *sk, int val)
667 {
668         struct unix_sock *u = unix_sk(sk);
669 
670         if (mutex_lock_interruptible(&u->iolock))
671                 return -EINTR;
672 
673         sk->sk_peek_off = val;
674         mutex_unlock(&u->iolock);
675 
676         return 0;
677 }
678 
679 
680 static const struct proto_ops unix_stream_ops = {
681         .family =       PF_UNIX,
682         .owner =        THIS_MODULE,
683         .release =      unix_release,
684         .bind =         unix_bind,
685         .connect =      unix_stream_connect,
686         .socketpair =   unix_socketpair,
687         .accept =       unix_accept,
688         .getname =      unix_getname,
689         .poll =         unix_poll,
690         .ioctl =        unix_ioctl,
691 #ifdef CONFIG_COMPAT
692         .compat_ioctl = unix_compat_ioctl,
693 #endif
694         .listen =       unix_listen,
695         .shutdown =     unix_shutdown,
696         .setsockopt =   sock_no_setsockopt,
697         .getsockopt =   sock_no_getsockopt,
698         .sendmsg =      unix_stream_sendmsg,
699         .recvmsg =      unix_stream_recvmsg,
700         .mmap =         sock_no_mmap,
701         .sendpage =     unix_stream_sendpage,
702         .splice_read =  unix_stream_splice_read,
703         .set_peek_off = unix_set_peek_off,
704 };
705 
706 static const struct proto_ops unix_dgram_ops = {
707         .family =       PF_UNIX,
708         .owner =        THIS_MODULE,
709         .release =      unix_release,
710         .bind =         unix_bind,
711         .connect =      unix_dgram_connect,
712         .socketpair =   unix_socketpair,
713         .accept =       sock_no_accept,
714         .getname =      unix_getname,
715         .poll =         unix_dgram_poll,
716         .ioctl =        unix_ioctl,
717 #ifdef CONFIG_COMPAT
718         .compat_ioctl = unix_compat_ioctl,
719 #endif
720         .listen =       sock_no_listen,
721         .shutdown =     unix_shutdown,
722         .setsockopt =   sock_no_setsockopt,
723         .getsockopt =   sock_no_getsockopt,
724         .sendmsg =      unix_dgram_sendmsg,
725         .recvmsg =      unix_dgram_recvmsg,
726         .mmap =         sock_no_mmap,
727         .sendpage =     sock_no_sendpage,
728         .set_peek_off = unix_set_peek_off,
729 };
730 
731 static const struct proto_ops unix_seqpacket_ops = {
732         .family =       PF_UNIX,
733         .owner =        THIS_MODULE,
734         .release =      unix_release,
735         .bind =         unix_bind,
736         .connect =      unix_stream_connect,
737         .socketpair =   unix_socketpair,
738         .accept =       unix_accept,
739         .getname =      unix_getname,
740         .poll =         unix_dgram_poll,
741         .ioctl =        unix_ioctl,
742 #ifdef CONFIG_COMPAT
743         .compat_ioctl = unix_compat_ioctl,
744 #endif
745         .listen =       unix_listen,
746         .shutdown =     unix_shutdown,
747         .setsockopt =   sock_no_setsockopt,
748         .getsockopt =   sock_no_getsockopt,
749         .sendmsg =      unix_seqpacket_sendmsg,
750         .recvmsg =      unix_seqpacket_recvmsg,
751         .mmap =         sock_no_mmap,
752         .sendpage =     sock_no_sendpage,
753         .set_peek_off = unix_set_peek_off,
754 };
755 
756 static struct proto unix_proto = {
757         .name                   = "UNIX",
758         .owner                  = THIS_MODULE,
759         .obj_size               = sizeof(struct unix_sock),
760 };
761 
762 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
763 {
764         struct sock *sk = NULL;
765         struct unix_sock *u;
766 
767         atomic_long_inc(&unix_nr_socks);
768         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
769                 goto out;
770 
771         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
772         if (!sk)
773                 goto out;
774 
775         sock_init_data(sock, sk);
776 
777         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
778         sk->sk_write_space      = unix_write_space;
779         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
780         sk->sk_destruct         = unix_sock_destructor;
781         u         = unix_sk(sk);
782         u->path.dentry = NULL;
783         u->path.mnt = NULL;
784         spin_lock_init(&u->lock);
785         atomic_long_set(&u->inflight, 0);
786         INIT_LIST_HEAD(&u->link);
787         mutex_init(&u->iolock); /* single task reading lock */
788         mutex_init(&u->bindlock); /* single task binding lock */
789         init_waitqueue_head(&u->peer_wait);
790         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
791         unix_insert_socket(unix_sockets_unbound(sk), sk);
792 out:
793         if (sk == NULL)
794                 atomic_long_dec(&unix_nr_socks);
795         else {
796                 local_bh_disable();
797                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
798                 local_bh_enable();
799         }
800         return sk;
801 }
802 
803 static int unix_create(struct net *net, struct socket *sock, int protocol,
804                        int kern)
805 {
806         if (protocol && protocol != PF_UNIX)
807                 return -EPROTONOSUPPORT;
808 
809         sock->state = SS_UNCONNECTED;
810 
811         switch (sock->type) {
812         case SOCK_STREAM:
813                 sock->ops = &unix_stream_ops;
814                 break;
815                 /*
816                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
817                  *      nothing uses it.
818                  */
819         case SOCK_RAW:
820                 sock->type = SOCK_DGRAM;
821                 /* fall through */
822         case SOCK_DGRAM:
823                 sock->ops = &unix_dgram_ops;
824                 break;
825         case SOCK_SEQPACKET:
826                 sock->ops = &unix_seqpacket_ops;
827                 break;
828         default:
829                 return -ESOCKTNOSUPPORT;
830         }
831 
832         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
833 }
834 
835 static int unix_release(struct socket *sock)
836 {
837         struct sock *sk = sock->sk;
838 
839         if (!sk)
840                 return 0;
841 
842         unix_release_sock(sk, 0);
843         sock->sk = NULL;
844 
845         return 0;
846 }
847 
848 static int unix_autobind(struct socket *sock)
849 {
850         struct sock *sk = sock->sk;
851         struct net *net = sock_net(sk);
852         struct unix_sock *u = unix_sk(sk);
853         static u32 ordernum = 1;
854         struct unix_address *addr;
855         int err;
856         unsigned int retries = 0;
857 
858         err = mutex_lock_interruptible(&u->bindlock);
859         if (err)
860                 return err;
861 
862         err = 0;
863         if (u->addr)
864                 goto out;
865 
866         err = -ENOMEM;
867         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
868         if (!addr)
869                 goto out;
870 
871         addr->name->sun_family = AF_UNIX;
872         refcount_set(&addr->refcnt, 1);
873 
874 retry:
875         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
876         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
877 
878         spin_lock(&unix_table_lock);
879         ordernum = (ordernum+1)&0xFFFFF;
880 
881         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
882                                       addr->hash)) {
883                 spin_unlock(&unix_table_lock);
884                 /*
885                  * __unix_find_socket_byname() may take long time if many names
886                  * are already in use.
887                  */
888                 cond_resched();
889                 /* Give up if all names seems to be in use. */
890                 if (retries++ == 0xFFFFF) {
891                         err = -ENOSPC;
892                         kfree(addr);
893                         goto out;
894                 }
895                 goto retry;
896         }
897         addr->hash ^= sk->sk_type;
898 
899         __unix_remove_socket(sk);
900         smp_store_release(&u->addr, addr);
901         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
902         spin_unlock(&unix_table_lock);
903         err = 0;
904 
905 out:    mutex_unlock(&u->bindlock);
906         return err;
907 }
908 
909 static struct sock *unix_find_other(struct net *net,
910                                     struct sockaddr_un *sunname, int len,
911                                     int type, unsigned int hash, int *error)
912 {
913         struct sock *u;
914         struct path path;
915         int err = 0;
916 
917         if (sunname->sun_path[0]) {
918                 struct inode *inode;
919                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
920                 if (err)
921                         goto fail;
922                 inode = d_backing_inode(path.dentry);
923                 err = inode_permission(inode, MAY_WRITE);
924                 if (err)
925                         goto put_fail;
926 
927                 err = -ECONNREFUSED;
928                 if (!S_ISSOCK(inode->i_mode))
929                         goto put_fail;
930                 u = unix_find_socket_byinode(inode);
931                 if (!u)
932                         goto put_fail;
933 
934                 if (u->sk_type == type)
935                         touch_atime(&path);
936 
937                 path_put(&path);
938 
939                 err = -EPROTOTYPE;
940                 if (u->sk_type != type) {
941                         sock_put(u);
942                         goto fail;
943                 }
944         } else {
945                 err = -ECONNREFUSED;
946                 u = unix_find_socket_byname(net, sunname, len, type, hash);
947                 if (u) {
948                         struct dentry *dentry;
949                         dentry = unix_sk(u)->path.dentry;
950                         if (dentry)
951                                 touch_atime(&unix_sk(u)->path);
952                 } else
953                         goto fail;
954         }
955         return u;
956 
957 put_fail:
958         path_put(&path);
959 fail:
960         *error = err;
961         return NULL;
962 }
963 
964 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
965 {
966         struct dentry *dentry;
967         struct path path;
968         int err = 0;
969         /*
970          * Get the parent directory, calculate the hash for last
971          * component.
972          */
973         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
974         err = PTR_ERR(dentry);
975         if (IS_ERR(dentry))
976                 return err;
977 
978         /*
979          * All right, let's create it.
980          */
981         err = security_path_mknod(&path, dentry, mode, 0);
982         if (!err) {
983                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
984                 if (!err) {
985                         res->mnt = mntget(path.mnt);
986                         res->dentry = dget(dentry);
987                 }
988         }
989         done_path_create(&path, dentry);
990         return err;
991 }
992 
993 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
994 {
995         struct sock *sk = sock->sk;
996         struct net *net = sock_net(sk);
997         struct unix_sock *u = unix_sk(sk);
998         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
999         char *sun_path = sunaddr->sun_path;
1000         int err;
1001         unsigned int hash;
1002         struct unix_address *addr;
1003         struct hlist_head *list;
1004         struct path path = { };
1005 
1006         err = -EINVAL;
1007         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1008             sunaddr->sun_family != AF_UNIX)
1009                 goto out;
1010 
1011         if (addr_len == sizeof(short)) {
1012                 err = unix_autobind(sock);
1013                 goto out;
1014         }
1015 
1016         err = unix_mkname(sunaddr, addr_len, &hash);
1017         if (err < 0)
1018                 goto out;
1019         addr_len = err;
1020 
1021         if (sun_path[0]) {
1022                 umode_t mode = S_IFSOCK |
1023                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1024                 err = unix_mknod(sun_path, mode, &path);
1025                 if (err) {
1026                         if (err == -EEXIST)
1027                                 err = -EADDRINUSE;
1028                         goto out;
1029                 }
1030         }
1031 
1032         err = mutex_lock_interruptible(&u->bindlock);
1033         if (err)
1034                 goto out_put;
1035 
1036         err = -EINVAL;
1037         if (u->addr)
1038                 goto out_up;
1039 
1040         err = -ENOMEM;
1041         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1042         if (!addr)
1043                 goto out_up;
1044 
1045         memcpy(addr->name, sunaddr, addr_len);
1046         addr->len = addr_len;
1047         addr->hash = hash ^ sk->sk_type;
1048         refcount_set(&addr->refcnt, 1);
1049 
1050         if (sun_path[0]) {
1051                 addr->hash = UNIX_HASH_SIZE;
1052                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1053                 spin_lock(&unix_table_lock);
1054                 u->path = path;
1055                 list = &unix_socket_table[hash];
1056         } else {
1057                 spin_lock(&unix_table_lock);
1058                 err = -EADDRINUSE;
1059                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1060                                               sk->sk_type, hash)) {
1061                         unix_release_addr(addr);
1062                         goto out_unlock;
1063                 }
1064 
1065                 list = &unix_socket_table[addr->hash];
1066         }
1067 
1068         err = 0;
1069         __unix_remove_socket(sk);
1070         smp_store_release(&u->addr, addr);
1071         __unix_insert_socket(list, sk);
1072 
1073 out_unlock:
1074         spin_unlock(&unix_table_lock);
1075 out_up:
1076         mutex_unlock(&u->bindlock);
1077 out_put:
1078         if (err)
1079                 path_put(&path);
1080 out:
1081         return err;
1082 }
1083 
1084 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1085 {
1086         if (unlikely(sk1 == sk2) || !sk2) {
1087                 unix_state_lock(sk1);
1088                 return;
1089         }
1090         if (sk1 < sk2) {
1091                 unix_state_lock(sk1);
1092                 unix_state_lock_nested(sk2);
1093         } else {
1094                 unix_state_lock(sk2);
1095                 unix_state_lock_nested(sk1);
1096         }
1097 }
1098 
1099 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1100 {
1101         if (unlikely(sk1 == sk2) || !sk2) {
1102                 unix_state_unlock(sk1);
1103                 return;
1104         }
1105         unix_state_unlock(sk1);
1106         unix_state_unlock(sk2);
1107 }
1108 
1109 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1110                               int alen, int flags)
1111 {
1112         struct sock *sk = sock->sk;
1113         struct net *net = sock_net(sk);
1114         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1115         struct sock *other;
1116         unsigned int hash;
1117         int err;
1118 
1119         err = -EINVAL;
1120         if (alen < offsetofend(struct sockaddr, sa_family))
1121                 goto out;
1122 
1123         if (addr->sa_family != AF_UNSPEC) {
1124                 err = unix_mkname(sunaddr, alen, &hash);
1125                 if (err < 0)
1126                         goto out;
1127                 alen = err;
1128 
1129                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1130                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1131                         goto out;
1132 
1133 restart:
1134                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1135                 if (!other)
1136                         goto out;
1137 
1138                 unix_state_double_lock(sk, other);
1139 
1140                 /* Apparently VFS overslept socket death. Retry. */
1141                 if (sock_flag(other, SOCK_DEAD)) {
1142                         unix_state_double_unlock(sk, other);
1143                         sock_put(other);
1144                         goto restart;
1145                 }
1146 
1147                 err = -EPERM;
1148                 if (!unix_may_send(sk, other))
1149                         goto out_unlock;
1150 
1151                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1152                 if (err)
1153                         goto out_unlock;
1154 
1155         } else {
1156                 /*
1157                  *      1003.1g breaking connected state with AF_UNSPEC
1158                  */
1159                 other = NULL;
1160                 unix_state_double_lock(sk, other);
1161         }
1162 
1163         /*
1164          * If it was connected, reconnect.
1165          */
1166         if (unix_peer(sk)) {
1167                 struct sock *old_peer = unix_peer(sk);
1168                 unix_peer(sk) = other;
1169                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1170 
1171                 unix_state_double_unlock(sk, other);
1172 
1173                 if (other != old_peer)
1174                         unix_dgram_disconnected(sk, old_peer);
1175                 sock_put(old_peer);
1176         } else {
1177                 unix_peer(sk) = other;
1178                 unix_state_double_unlock(sk, other);
1179         }
1180         return 0;
1181 
1182 out_unlock:
1183         unix_state_double_unlock(sk, other);
1184         sock_put(other);
1185 out:
1186         return err;
1187 }
1188 
1189 static long unix_wait_for_peer(struct sock *other, long timeo)
1190 {
1191         struct unix_sock *u = unix_sk(other);
1192         int sched;
1193         DEFINE_WAIT(wait);
1194 
1195         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1196 
1197         sched = !sock_flag(other, SOCK_DEAD) &&
1198                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1199                 unix_recvq_full(other);
1200 
1201         unix_state_unlock(other);
1202 
1203         if (sched)
1204                 timeo = schedule_timeout(timeo);
1205 
1206         finish_wait(&u->peer_wait, &wait);
1207         return timeo;
1208 }
1209 
1210 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1211                                int addr_len, int flags)
1212 {
1213         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1214         struct sock *sk = sock->sk;
1215         struct net *net = sock_net(sk);
1216         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1217         struct sock *newsk = NULL;
1218         struct sock *other = NULL;
1219         struct sk_buff *skb = NULL;
1220         unsigned int hash;
1221         int st;
1222         int err;
1223         long timeo;
1224 
1225         err = unix_mkname(sunaddr, addr_len, &hash);
1226         if (err < 0)
1227                 goto out;
1228         addr_len = err;
1229 
1230         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1231             (err = unix_autobind(sock)) != 0)
1232                 goto out;
1233 
1234         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1235 
1236         /* First of all allocate resources.
1237            If we will make it after state is locked,
1238            we will have to recheck all again in any case.
1239          */
1240 
1241         err = -ENOMEM;
1242 
1243         /* create new sock for complete connection */
1244         newsk = unix_create1(sock_net(sk), NULL, 0);
1245         if (newsk == NULL)
1246                 goto out;
1247 
1248         /* Allocate skb for sending to listening sock */
1249         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1250         if (skb == NULL)
1251                 goto out;
1252 
1253 restart:
1254         /*  Find listening sock. */
1255         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1256         if (!other)
1257                 goto out;
1258 
1259         /* Latch state of peer */
1260         unix_state_lock(other);
1261 
1262         /* Apparently VFS overslept socket death. Retry. */
1263         if (sock_flag(other, SOCK_DEAD)) {
1264                 unix_state_unlock(other);
1265                 sock_put(other);
1266                 goto restart;
1267         }
1268 
1269         err = -ECONNREFUSED;
1270         if (other->sk_state != TCP_LISTEN)
1271                 goto out_unlock;
1272         if (other->sk_shutdown & RCV_SHUTDOWN)
1273                 goto out_unlock;
1274 
1275         if (unix_recvq_full(other)) {
1276                 err = -EAGAIN;
1277                 if (!timeo)
1278                         goto out_unlock;
1279 
1280                 timeo = unix_wait_for_peer(other, timeo);
1281 
1282                 err = sock_intr_errno(timeo);
1283                 if (signal_pending(current))
1284                         goto out;
1285                 sock_put(other);
1286                 goto restart;
1287         }
1288 
1289         /* Latch our state.
1290 
1291            It is tricky place. We need to grab our state lock and cannot
1292            drop lock on peer. It is dangerous because deadlock is
1293            possible. Connect to self case and simultaneous
1294            attempt to connect are eliminated by checking socket
1295            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1296            check this before attempt to grab lock.
1297 
1298            Well, and we have to recheck the state after socket locked.
1299          */
1300         st = sk->sk_state;
1301 
1302         switch (st) {
1303         case TCP_CLOSE:
1304                 /* This is ok... continue with connect */
1305                 break;
1306         case TCP_ESTABLISHED:
1307                 /* Socket is already connected */
1308                 err = -EISCONN;
1309                 goto out_unlock;
1310         default:
1311                 err = -EINVAL;
1312                 goto out_unlock;
1313         }
1314 
1315         unix_state_lock_nested(sk);
1316 
1317         if (sk->sk_state != st) {
1318                 unix_state_unlock(sk);
1319                 unix_state_unlock(other);
1320                 sock_put(other);
1321                 goto restart;
1322         }
1323 
1324         err = security_unix_stream_connect(sk, other, newsk);
1325         if (err) {
1326                 unix_state_unlock(sk);
1327                 goto out_unlock;
1328         }
1329 
1330         /* The way is open! Fastly set all the necessary fields... */
1331 
1332         sock_hold(sk);
1333         unix_peer(newsk)        = sk;
1334         newsk->sk_state         = TCP_ESTABLISHED;
1335         newsk->sk_type          = sk->sk_type;
1336         init_peercred(newsk);
1337         newu = unix_sk(newsk);
1338         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1339         otheru = unix_sk(other);
1340 
1341         /* copy address information from listening to new sock
1342          *
1343          * The contents of *(otheru->addr) and otheru->path
1344          * are seen fully set up here, since we have found
1345          * otheru in hash under unix_table_lock.  Insertion
1346          * into the hash chain we'd found it in had been done
1347          * in an earlier critical area protected by unix_table_lock,
1348          * the same one where we'd set *(otheru->addr) contents,
1349          * as well as otheru->path and otheru->addr itself.
1350          *
1351          * Using smp_store_release() here to set newu->addr
1352          * is enough to make those stores, as well as stores
1353          * to newu->path visible to anyone who gets newu->addr
1354          * by smp_load_acquire().  IOW, the same warranties
1355          * as for unix_sock instances bound in unix_bind() or
1356          * in unix_autobind().
1357          */
1358         if (otheru->path.dentry) {
1359                 path_get(&otheru->path);
1360                 newu->path = otheru->path;
1361         }
1362         refcount_inc(&otheru->addr->refcnt);
1363         smp_store_release(&newu->addr, otheru->addr);
1364 
1365         /* Set credentials */
1366         copy_peercred(sk, other);
1367 
1368         sock->state     = SS_CONNECTED;
1369         sk->sk_state    = TCP_ESTABLISHED;
1370         sock_hold(newsk);
1371 
1372         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1373         unix_peer(sk)   = newsk;
1374 
1375         unix_state_unlock(sk);
1376 
1377         /* take ten and and send info to listening sock */
1378         spin_lock(&other->sk_receive_queue.lock);
1379         __skb_queue_tail(&other->sk_receive_queue, skb);
1380         spin_unlock(&other->sk_receive_queue.lock);
1381         unix_state_unlock(other);
1382         other->sk_data_ready(other);
1383         sock_put(other);
1384         return 0;
1385 
1386 out_unlock:
1387         if (other)
1388                 unix_state_unlock(other);
1389 
1390 out:
1391         kfree_skb(skb);
1392         if (newsk)
1393                 unix_release_sock(newsk, 0);
1394         if (other)
1395                 sock_put(other);
1396         return err;
1397 }
1398 
1399 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1400 {
1401         struct sock *ska = socka->sk, *skb = sockb->sk;
1402 
1403         /* Join our sockets back to back */
1404         sock_hold(ska);
1405         sock_hold(skb);
1406         unix_peer(ska) = skb;
1407         unix_peer(skb) = ska;
1408         init_peercred(ska);
1409         init_peercred(skb);
1410 
1411         if (ska->sk_type != SOCK_DGRAM) {
1412                 ska->sk_state = TCP_ESTABLISHED;
1413                 skb->sk_state = TCP_ESTABLISHED;
1414                 socka->state  = SS_CONNECTED;
1415                 sockb->state  = SS_CONNECTED;
1416         }
1417         return 0;
1418 }
1419 
1420 static void unix_sock_inherit_flags(const struct socket *old,
1421                                     struct socket *new)
1422 {
1423         if (test_bit(SOCK_PASSCRED, &old->flags))
1424                 set_bit(SOCK_PASSCRED, &new->flags);
1425         if (test_bit(SOCK_PASSSEC, &old->flags))
1426                 set_bit(SOCK_PASSSEC, &new->flags);
1427 }
1428 
1429 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1430                        bool kern)
1431 {
1432         struct sock *sk = sock->sk;
1433         struct sock *tsk;
1434         struct sk_buff *skb;
1435         int err;
1436 
1437         err = -EOPNOTSUPP;
1438         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1439                 goto out;
1440 
1441         err = -EINVAL;
1442         if (sk->sk_state != TCP_LISTEN)
1443                 goto out;
1444 
1445         /* If socket state is TCP_LISTEN it cannot change (for now...),
1446          * so that no locks are necessary.
1447          */
1448 
1449         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1450         if (!skb) {
1451                 /* This means receive shutdown. */
1452                 if (err == 0)
1453                         err = -EINVAL;
1454                 goto out;
1455         }
1456 
1457         tsk = skb->sk;
1458         skb_free_datagram(sk, skb);
1459         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1460 
1461         /* attach accepted sock to socket */
1462         unix_state_lock(tsk);
1463         newsock->state = SS_CONNECTED;
1464         unix_sock_inherit_flags(sock, newsock);
1465         sock_graft(tsk, newsock);
1466         unix_state_unlock(tsk);
1467         return 0;
1468 
1469 out:
1470         return err;
1471 }
1472 
1473 
1474 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1475 {
1476         struct sock *sk = sock->sk;
1477         struct unix_address *addr;
1478         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1479         int err = 0;
1480 
1481         if (peer) {
1482                 sk = unix_peer_get(sk);
1483 
1484                 err = -ENOTCONN;
1485                 if (!sk)
1486                         goto out;
1487                 err = 0;
1488         } else {
1489                 sock_hold(sk);
1490         }
1491 
1492         addr = smp_load_acquire(&unix_sk(sk)->addr);
1493         if (!addr) {
1494                 sunaddr->sun_family = AF_UNIX;
1495                 sunaddr->sun_path[0] = 0;
1496                 err = sizeof(short);
1497         } else {
1498                 err = addr->len;
1499                 memcpy(sunaddr, addr->name, addr->len);
1500         }
1501         sock_put(sk);
1502 out:
1503         return err;
1504 }
1505 
1506 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1507 {
1508         int err = 0;
1509 
1510         UNIXCB(skb).pid  = get_pid(scm->pid);
1511         UNIXCB(skb).uid = scm->creds.uid;
1512         UNIXCB(skb).gid = scm->creds.gid;
1513         UNIXCB(skb).fp = NULL;
1514         unix_get_secdata(scm, skb);
1515         if (scm->fp && send_fds)
1516                 err = unix_attach_fds(scm, skb);
1517 
1518         skb->destructor = unix_destruct_scm;
1519         return err;
1520 }
1521 
1522 static bool unix_passcred_enabled(const struct socket *sock,
1523                                   const struct sock *other)
1524 {
1525         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1526                !other->sk_socket ||
1527                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1528 }
1529 
1530 /*
1531  * Some apps rely on write() giving SCM_CREDENTIALS
1532  * We include credentials if source or destination socket
1533  * asserted SOCK_PASSCRED.
1534  */
1535 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1536                             const struct sock *other)
1537 {
1538         if (UNIXCB(skb).pid)
1539                 return;
1540         if (unix_passcred_enabled(sock, other)) {
1541                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1542                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1543         }
1544 }
1545 
1546 static int maybe_init_creds(struct scm_cookie *scm,
1547                             struct socket *socket,
1548                             const struct sock *other)
1549 {
1550         int err;
1551         struct msghdr msg = { .msg_controllen = 0 };
1552 
1553         err = scm_send(socket, &msg, scm, false);
1554         if (err)
1555                 return err;
1556 
1557         if (unix_passcred_enabled(socket, other)) {
1558                 scm->pid = get_pid(task_tgid(current));
1559                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1560         }
1561         return err;
1562 }
1563 
1564 static bool unix_skb_scm_eq(struct sk_buff *skb,
1565                             struct scm_cookie *scm)
1566 {
1567         const struct unix_skb_parms *u = &UNIXCB(skb);
1568 
1569         return u->pid == scm->pid &&
1570                uid_eq(u->uid, scm->creds.uid) &&
1571                gid_eq(u->gid, scm->creds.gid) &&
1572                unix_secdata_eq(scm, skb);
1573 }
1574 
1575 /*
1576  *      Send AF_UNIX data.
1577  */
1578 
1579 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1580                               size_t len)
1581 {
1582         struct sock *sk = sock->sk;
1583         struct net *net = sock_net(sk);
1584         struct unix_sock *u = unix_sk(sk);
1585         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1586         struct sock *other = NULL;
1587         int namelen = 0; /* fake GCC */
1588         int err;
1589         unsigned int hash;
1590         struct sk_buff *skb;
1591         long timeo;
1592         struct scm_cookie scm;
1593         int data_len = 0;
1594         int sk_locked;
1595 
1596         wait_for_unix_gc();
1597         err = scm_send(sock, msg, &scm, false);
1598         if (err < 0)
1599                 return err;
1600 
1601         err = -EOPNOTSUPP;
1602         if (msg->msg_flags&MSG_OOB)
1603                 goto out;
1604 
1605         if (msg->msg_namelen) {
1606                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1607                 if (err < 0)
1608                         goto out;
1609                 namelen = err;
1610         } else {
1611                 sunaddr = NULL;
1612                 err = -ENOTCONN;
1613                 other = unix_peer_get(sk);
1614                 if (!other)
1615                         goto out;
1616         }
1617 
1618         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1619             && (err = unix_autobind(sock)) != 0)
1620                 goto out;
1621 
1622         err = -EMSGSIZE;
1623         if (len > sk->sk_sndbuf - 32)
1624                 goto out;
1625 
1626         if (len > SKB_MAX_ALLOC) {
1627                 data_len = min_t(size_t,
1628                                  len - SKB_MAX_ALLOC,
1629                                  MAX_SKB_FRAGS * PAGE_SIZE);
1630                 data_len = PAGE_ALIGN(data_len);
1631 
1632                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1633         }
1634 
1635         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1636                                    msg->msg_flags & MSG_DONTWAIT, &err,
1637                                    PAGE_ALLOC_COSTLY_ORDER);
1638         if (skb == NULL)
1639                 goto out;
1640 
1641         err = unix_scm_to_skb(&scm, skb, true);
1642         if (err < 0)
1643                 goto out_free;
1644 
1645         skb_put(skb, len - data_len);
1646         skb->data_len = data_len;
1647         skb->len = len;
1648         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1649         if (err)
1650                 goto out_free;
1651 
1652         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1653 
1654 restart:
1655         if (!other) {
1656                 err = -ECONNRESET;
1657                 if (sunaddr == NULL)
1658                         goto out_free;
1659 
1660                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1661                                         hash, &err);
1662                 if (other == NULL)
1663                         goto out_free;
1664         }
1665 
1666         if (sk_filter(other, skb) < 0) {
1667                 /* Toss the packet but do not return any error to the sender */
1668                 err = len;
1669                 goto out_free;
1670         }
1671 
1672         sk_locked = 0;
1673         unix_state_lock(other);
1674 restart_locked:
1675         err = -EPERM;
1676         if (!unix_may_send(sk, other))
1677                 goto out_unlock;
1678 
1679         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1680                 /*
1681                  *      Check with 1003.1g - what should
1682                  *      datagram error
1683                  */
1684                 unix_state_unlock(other);
1685                 sock_put(other);
1686 
1687                 if (!sk_locked)
1688                         unix_state_lock(sk);
1689 
1690                 err = 0;
1691                 if (unix_peer(sk) == other) {
1692                         unix_peer(sk) = NULL;
1693                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1694 
1695                         unix_state_unlock(sk);
1696 
1697                         unix_dgram_disconnected(sk, other);
1698                         sock_put(other);
1699                         err = -ECONNREFUSED;
1700                 } else {
1701                         unix_state_unlock(sk);
1702                 }
1703 
1704                 other = NULL;
1705                 if (err)
1706                         goto out_free;
1707                 goto restart;
1708         }
1709 
1710         err = -EPIPE;
1711         if (other->sk_shutdown & RCV_SHUTDOWN)
1712                 goto out_unlock;
1713 
1714         if (sk->sk_type != SOCK_SEQPACKET) {
1715                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1716                 if (err)
1717                         goto out_unlock;
1718         }
1719 
1720         /* other == sk && unix_peer(other) != sk if
1721          * - unix_peer(sk) == NULL, destination address bound to sk
1722          * - unix_peer(sk) == sk by time of get but disconnected before lock
1723          */
1724         if (other != sk &&
1725             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1726                 if (timeo) {
1727                         timeo = unix_wait_for_peer(other, timeo);
1728 
1729                         err = sock_intr_errno(timeo);
1730                         if (signal_pending(current))
1731                                 goto out_free;
1732 
1733                         goto restart;
1734                 }
1735 
1736                 if (!sk_locked) {
1737                         unix_state_unlock(other);
1738                         unix_state_double_lock(sk, other);
1739                 }
1740 
1741                 if (unix_peer(sk) != other ||
1742                     unix_dgram_peer_wake_me(sk, other)) {
1743                         err = -EAGAIN;
1744                         sk_locked = 1;
1745                         goto out_unlock;
1746                 }
1747 
1748                 if (!sk_locked) {
1749                         sk_locked = 1;
1750                         goto restart_locked;
1751                 }
1752         }
1753 
1754         if (unlikely(sk_locked))
1755                 unix_state_unlock(sk);
1756 
1757         if (sock_flag(other, SOCK_RCVTSTAMP))
1758                 __net_timestamp(skb);
1759         maybe_add_creds(skb, sock, other);
1760         skb_queue_tail(&other->sk_receive_queue, skb);
1761         unix_state_unlock(other);
1762         other->sk_data_ready(other);
1763         sock_put(other);
1764         scm_destroy(&scm);
1765         return len;
1766 
1767 out_unlock:
1768         if (sk_locked)
1769                 unix_state_unlock(sk);
1770         unix_state_unlock(other);
1771 out_free:
1772         kfree_skb(skb);
1773 out:
1774         if (other)
1775                 sock_put(other);
1776         scm_destroy(&scm);
1777         return err;
1778 }
1779 
1780 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1781  * bytes, and a minimum of a full page.
1782  */
1783 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1784 
1785 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1786                                size_t len)
1787 {
1788         struct sock *sk = sock->sk;
1789         struct sock *other = NULL;
1790         int err, size;
1791         struct sk_buff *skb;
1792         int sent = 0;
1793         struct scm_cookie scm;
1794         bool fds_sent = false;
1795         int data_len;
1796 
1797         wait_for_unix_gc();
1798         err = scm_send(sock, msg, &scm, false);
1799         if (err < 0)
1800                 return err;
1801 
1802         err = -EOPNOTSUPP;
1803         if (msg->msg_flags&MSG_OOB)
1804                 goto out_err;
1805 
1806         if (msg->msg_namelen) {
1807                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1808                 goto out_err;
1809         } else {
1810                 err = -ENOTCONN;
1811                 other = unix_peer(sk);
1812                 if (!other)
1813                         goto out_err;
1814         }
1815 
1816         if (sk->sk_shutdown & SEND_SHUTDOWN)
1817                 goto pipe_err;
1818 
1819         while (sent < len) {
1820                 size = len - sent;
1821 
1822                 /* Keep two messages in the pipe so it schedules better */
1823                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1824 
1825                 /* allow fallback to order-0 allocations */
1826                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1827 
1828                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1829 
1830                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1831 
1832                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1833                                            msg->msg_flags & MSG_DONTWAIT, &err,
1834                                            get_order(UNIX_SKB_FRAGS_SZ));
1835                 if (!skb)
1836                         goto out_err;
1837 
1838                 /* Only send the fds in the first buffer */
1839                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1840                 if (err < 0) {
1841                         kfree_skb(skb);
1842                         goto out_err;
1843                 }
1844                 fds_sent = true;
1845 
1846                 skb_put(skb, size - data_len);
1847                 skb->data_len = data_len;
1848                 skb->len = size;
1849                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1850                 if (err) {
1851                         kfree_skb(skb);
1852                         goto out_err;
1853                 }
1854 
1855                 unix_state_lock(other);
1856 
1857                 if (sock_flag(other, SOCK_DEAD) ||
1858                     (other->sk_shutdown & RCV_SHUTDOWN))
1859                         goto pipe_err_free;
1860 
1861                 maybe_add_creds(skb, sock, other);
1862                 skb_queue_tail(&other->sk_receive_queue, skb);
1863                 unix_state_unlock(other);
1864                 other->sk_data_ready(other);
1865                 sent += size;
1866         }
1867 
1868         scm_destroy(&scm);
1869 
1870         return sent;
1871 
1872 pipe_err_free:
1873         unix_state_unlock(other);
1874         kfree_skb(skb);
1875 pipe_err:
1876         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1877                 send_sig(SIGPIPE, current, 0);
1878         err = -EPIPE;
1879 out_err:
1880         scm_destroy(&scm);
1881         return sent ? : err;
1882 }
1883 
1884 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1885                                     int offset, size_t size, int flags)
1886 {
1887         int err;
1888         bool send_sigpipe = false;
1889         bool init_scm = true;
1890         struct scm_cookie scm;
1891         struct sock *other, *sk = socket->sk;
1892         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1893 
1894         if (flags & MSG_OOB)
1895                 return -EOPNOTSUPP;
1896 
1897         other = unix_peer(sk);
1898         if (!other || sk->sk_state != TCP_ESTABLISHED)
1899                 return -ENOTCONN;
1900 
1901         if (false) {
1902 alloc_skb:
1903                 unix_state_unlock(other);
1904                 mutex_unlock(&unix_sk(other)->iolock);
1905                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1906                                               &err, 0);
1907                 if (!newskb)
1908                         goto err;
1909         }
1910 
1911         /* we must acquire iolock as we modify already present
1912          * skbs in the sk_receive_queue and mess with skb->len
1913          */
1914         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1915         if (err) {
1916                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1917                 goto err;
1918         }
1919 
1920         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1921                 err = -EPIPE;
1922                 send_sigpipe = true;
1923                 goto err_unlock;
1924         }
1925 
1926         unix_state_lock(other);
1927 
1928         if (sock_flag(other, SOCK_DEAD) ||
1929             other->sk_shutdown & RCV_SHUTDOWN) {
1930                 err = -EPIPE;
1931                 send_sigpipe = true;
1932                 goto err_state_unlock;
1933         }
1934 
1935         if (init_scm) {
1936                 err = maybe_init_creds(&scm, socket, other);
1937                 if (err)
1938                         goto err_state_unlock;
1939                 init_scm = false;
1940         }
1941 
1942         skb = skb_peek_tail(&other->sk_receive_queue);
1943         if (tail && tail == skb) {
1944                 skb = newskb;
1945         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1946                 if (newskb) {
1947                         skb = newskb;
1948                 } else {
1949                         tail = skb;
1950                         goto alloc_skb;
1951                 }
1952         } else if (newskb) {
1953                 /* this is fast path, we don't necessarily need to
1954                  * call to kfree_skb even though with newskb == NULL
1955                  * this - does no harm
1956                  */
1957                 consume_skb(newskb);
1958                 newskb = NULL;
1959         }
1960 
1961         if (skb_append_pagefrags(skb, page, offset, size)) {
1962                 tail = skb;
1963                 goto alloc_skb;
1964         }
1965 
1966         skb->len += size;
1967         skb->data_len += size;
1968         skb->truesize += size;
1969         refcount_add(size, &sk->sk_wmem_alloc);
1970 
1971         if (newskb) {
1972                 err = unix_scm_to_skb(&scm, skb, false);
1973                 if (err)
1974                         goto err_state_unlock;
1975                 spin_lock(&other->sk_receive_queue.lock);
1976                 __skb_queue_tail(&other->sk_receive_queue, newskb);
1977                 spin_unlock(&other->sk_receive_queue.lock);
1978         }
1979 
1980         unix_state_unlock(other);
1981         mutex_unlock(&unix_sk(other)->iolock);
1982 
1983         other->sk_data_ready(other);
1984         scm_destroy(&scm);
1985         return size;
1986 
1987 err_state_unlock:
1988         unix_state_unlock(other);
1989 err_unlock:
1990         mutex_unlock(&unix_sk(other)->iolock);
1991 err:
1992         kfree_skb(newskb);
1993         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1994                 send_sig(SIGPIPE, current, 0);
1995         if (!init_scm)
1996                 scm_destroy(&scm);
1997         return err;
1998 }
1999 
2000 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2001                                   size_t len)
2002 {
2003         int err;
2004         struct sock *sk = sock->sk;
2005 
2006         err = sock_error(sk);
2007         if (err)
2008                 return err;
2009 
2010         if (sk->sk_state != TCP_ESTABLISHED)
2011                 return -ENOTCONN;
2012 
2013         if (msg->msg_namelen)
2014                 msg->msg_namelen = 0;
2015 
2016         return unix_dgram_sendmsg(sock, msg, len);
2017 }
2018 
2019 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2020                                   size_t size, int flags)
2021 {
2022         struct sock *sk = sock->sk;
2023 
2024         if (sk->sk_state != TCP_ESTABLISHED)
2025                 return -ENOTCONN;
2026 
2027         return unix_dgram_recvmsg(sock, msg, size, flags);
2028 }
2029 
2030 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2031 {
2032         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2033 
2034         if (addr) {
2035                 msg->msg_namelen = addr->len;
2036                 memcpy(msg->msg_name, addr->name, addr->len);
2037         }
2038 }
2039 
2040 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2041                               size_t size, int flags)
2042 {
2043         struct scm_cookie scm;
2044         struct sock *sk = sock->sk;
2045         struct unix_sock *u = unix_sk(sk);
2046         struct sk_buff *skb, *last;
2047         long timeo;
2048         int skip;
2049         int err;
2050 
2051         err = -EOPNOTSUPP;
2052         if (flags&MSG_OOB)
2053                 goto out;
2054 
2055         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2056 
2057         do {
2058                 mutex_lock(&u->iolock);
2059 
2060                 skip = sk_peek_offset(sk, flags);
2061                 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2062                                               &last);
2063                 if (skb)
2064                         break;
2065 
2066                 mutex_unlock(&u->iolock);
2067 
2068                 if (err != -EAGAIN)
2069                         break;
2070         } while (timeo &&
2071                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2072 
2073         if (!skb) { /* implies iolock unlocked */
2074                 unix_state_lock(sk);
2075                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2076                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2077                     (sk->sk_shutdown & RCV_SHUTDOWN))
2078                         err = 0;
2079                 unix_state_unlock(sk);
2080                 goto out;
2081         }
2082 
2083         if (wq_has_sleeper(&u->peer_wait))
2084                 wake_up_interruptible_sync_poll(&u->peer_wait,
2085                                                 EPOLLOUT | EPOLLWRNORM |
2086                                                 EPOLLWRBAND);
2087 
2088         if (ccs_socket_post_recvmsg_permission(sk, skb, flags)) {
2089                 err = -EAGAIN; /* Hope less harmful than -EPERM. */
2090                 goto out_unlock;
2091         }
2092         if (msg->msg_name)
2093                 unix_copy_addr(msg, skb->sk);
2094 
2095         if (size > skb->len - skip)
2096                 size = skb->len - skip;
2097         else if (size < skb->len - skip)
2098                 msg->msg_flags |= MSG_TRUNC;
2099 
2100         err = skb_copy_datagram_msg(skb, skip, msg, size);
2101         if (err)
2102                 goto out_free;
2103 
2104         if (sock_flag(sk, SOCK_RCVTSTAMP))
2105                 __sock_recv_timestamp(msg, sk, skb);
2106 
2107         memset(&scm, 0, sizeof(scm));
2108 
2109         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2110         unix_set_secdata(&scm, skb);
2111 
2112         if (!(flags & MSG_PEEK)) {
2113                 if (UNIXCB(skb).fp)
2114                         unix_detach_fds(&scm, skb);
2115 
2116                 sk_peek_offset_bwd(sk, skb->len);
2117         } else {
2118                 /* It is questionable: on PEEK we could:
2119                    - do not return fds - good, but too simple 8)
2120                    - return fds, and do not return them on read (old strategy,
2121                      apparently wrong)
2122                    - clone fds (I chose it for now, it is the most universal
2123                      solution)
2124 
2125                    POSIX 1003.1g does not actually define this clearly
2126                    at all. POSIX 1003.1g doesn't define a lot of things
2127                    clearly however!
2128 
2129                 */
2130 
2131                 sk_peek_offset_fwd(sk, size);
2132 
2133                 if (UNIXCB(skb).fp)
2134                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2135         }
2136         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2137 
2138         scm_recv(sock, msg, &scm, flags);
2139 
2140 out_free:
2141         skb_free_datagram(sk, skb);
2142 out_unlock:
2143         mutex_unlock(&u->iolock);
2144 out:
2145         return err;
2146 }
2147 
2148 /*
2149  *      Sleep until more data has arrived. But check for races..
2150  */
2151 static long unix_stream_data_wait(struct sock *sk, long timeo,
2152                                   struct sk_buff *last, unsigned int last_len,
2153                                   bool freezable)
2154 {
2155         struct sk_buff *tail;
2156         DEFINE_WAIT(wait);
2157 
2158         unix_state_lock(sk);
2159 
2160         for (;;) {
2161                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2162 
2163                 tail = skb_peek_tail(&sk->sk_receive_queue);
2164                 if (tail != last ||
2165                     (tail && tail->len != last_len) ||
2166                     sk->sk_err ||
2167                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2168                     signal_pending(current) ||
2169                     !timeo)
2170                         break;
2171 
2172                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2173                 unix_state_unlock(sk);
2174                 if (freezable)
2175                         timeo = freezable_schedule_timeout(timeo);
2176                 else
2177                         timeo = schedule_timeout(timeo);
2178                 unix_state_lock(sk);
2179 
2180                 if (sock_flag(sk, SOCK_DEAD))
2181                         break;
2182 
2183                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2184         }
2185 
2186         finish_wait(sk_sleep(sk), &wait);
2187         unix_state_unlock(sk);
2188         return timeo;
2189 }
2190 
2191 static unsigned int unix_skb_len(const struct sk_buff *skb)
2192 {
2193         return skb->len - UNIXCB(skb).consumed;
2194 }
2195 
2196 struct unix_stream_read_state {
2197         int (*recv_actor)(struct sk_buff *, int, int,
2198                           struct unix_stream_read_state *);
2199         struct socket *socket;
2200         struct msghdr *msg;
2201         struct pipe_inode_info *pipe;
2202         size_t size;
2203         int flags;
2204         unsigned int splice_flags;
2205 };
2206 
2207 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2208                                     bool freezable)
2209 {
2210         struct scm_cookie scm;
2211         struct socket *sock = state->socket;
2212         struct sock *sk = sock->sk;
2213         struct unix_sock *u = unix_sk(sk);
2214         int copied = 0;
2215         int flags = state->flags;
2216         int noblock = flags & MSG_DONTWAIT;
2217         bool check_creds = false;
2218         int target;
2219         int err = 0;
2220         long timeo;
2221         int skip;
2222         size_t size = state->size;
2223         unsigned int last_len;
2224 
2225         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2226                 err = -EINVAL;
2227                 goto out;
2228         }
2229 
2230         if (unlikely(flags & MSG_OOB)) {
2231                 err = -EOPNOTSUPP;
2232                 goto out;
2233         }
2234 
2235         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2236         timeo = sock_rcvtimeo(sk, noblock);
2237 
2238         memset(&scm, 0, sizeof(scm));
2239 
2240         /* Lock the socket to prevent queue disordering
2241          * while sleeps in memcpy_tomsg
2242          */
2243         mutex_lock(&u->iolock);
2244 
2245         skip = max(sk_peek_offset(sk, flags), 0);
2246 
2247         do {
2248                 int chunk;
2249                 bool drop_skb;
2250                 struct sk_buff *skb, *last;
2251 
2252 redo:
2253                 unix_state_lock(sk);
2254                 if (sock_flag(sk, SOCK_DEAD)) {
2255                         err = -ECONNRESET;
2256                         goto unlock;
2257                 }
2258                 last = skb = skb_peek(&sk->sk_receive_queue);
2259                 last_len = last ? last->len : 0;
2260 again:
2261                 if (skb == NULL) {
2262                         if (copied >= target)
2263                                 goto unlock;
2264 
2265                         /*
2266                          *      POSIX 1003.1g mandates this order.
2267                          */
2268 
2269                         err = sock_error(sk);
2270                         if (err)
2271                                 goto unlock;
2272                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2273                                 goto unlock;
2274 
2275                         unix_state_unlock(sk);
2276                         if (!timeo) {
2277                                 err = -EAGAIN;
2278                                 break;
2279                         }
2280 
2281                         mutex_unlock(&u->iolock);
2282 
2283                         timeo = unix_stream_data_wait(sk, timeo, last,
2284                                                       last_len, freezable);
2285 
2286                         if (signal_pending(current)) {
2287                                 err = sock_intr_errno(timeo);
2288                                 scm_destroy(&scm);
2289                                 goto out;
2290                         }
2291 
2292                         mutex_lock(&u->iolock);
2293                         goto redo;
2294 unlock:
2295                         unix_state_unlock(sk);
2296                         break;
2297                 }
2298 
2299                 while (skip >= unix_skb_len(skb)) {
2300                         skip -= unix_skb_len(skb);
2301                         last = skb;
2302                         last_len = skb->len;
2303                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2304                         if (!skb)
2305                                 goto again;
2306                 }
2307 
2308                 unix_state_unlock(sk);
2309 
2310                 if (check_creds) {
2311                         /* Never glue messages from different writers */
2312                         if (!unix_skb_scm_eq(skb, &scm))
2313                                 break;
2314                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2315                         /* Copy credentials */
2316                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2317                         unix_set_secdata(&scm, skb);
2318                         check_creds = true;
2319                 }
2320 
2321                 /* Copy address just once */
2322                 if (state->msg && state->msg->msg_name) {
2323                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2324                                          state->msg->msg_name);
2325                         unix_copy_addr(state->msg, skb->sk);
2326                         sunaddr = NULL;
2327                 }
2328 
2329                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2330                 skb_get(skb);
2331                 chunk = state->recv_actor(skb, skip, chunk, state);
2332                 drop_skb = !unix_skb_len(skb);
2333                 /* skb is only safe to use if !drop_skb */
2334                 consume_skb(skb);
2335                 if (chunk < 0) {
2336                         if (copied == 0)
2337                                 copied = -EFAULT;
2338                         break;
2339                 }
2340                 copied += chunk;
2341                 size -= chunk;
2342 
2343                 if (drop_skb) {
2344                         /* the skb was touched by a concurrent reader;
2345                          * we should not expect anything from this skb
2346                          * anymore and assume it invalid - we can be
2347                          * sure it was dropped from the socket queue
2348                          *
2349                          * let's report a short read
2350                          */
2351                         err = 0;
2352                         break;
2353                 }
2354 
2355                 /* Mark read part of skb as used */
2356                 if (!(flags & MSG_PEEK)) {
2357                         UNIXCB(skb).consumed += chunk;
2358 
2359                         sk_peek_offset_bwd(sk, chunk);
2360 
2361                         if (UNIXCB(skb).fp)
2362                                 unix_detach_fds(&scm, skb);
2363 
2364                         if (unix_skb_len(skb))
2365                                 break;
2366 
2367                         skb_unlink(skb, &sk->sk_receive_queue);
2368                         consume_skb(skb);
2369 
2370                         if (scm.fp)
2371                                 break;
2372                 } else {
2373                         /* It is questionable, see note in unix_dgram_recvmsg.
2374                          */
2375                         if (UNIXCB(skb).fp)
2376                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2377 
2378                         sk_peek_offset_fwd(sk, chunk);
2379 
2380                         if (UNIXCB(skb).fp)
2381                                 break;
2382 
2383                         skip = 0;
2384                         last = skb;
2385                         last_len = skb->len;
2386                         unix_state_lock(sk);
2387                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2388                         if (skb)
2389                                 goto again;
2390                         unix_state_unlock(sk);
2391                         break;
2392                 }
2393         } while (size);
2394 
2395         mutex_unlock(&u->iolock);
2396         if (state->msg)
2397                 scm_recv(sock, state->msg, &scm, flags);
2398         else
2399                 scm_destroy(&scm);
2400 out:
2401         return copied ? : err;
2402 }
2403 
2404 static int unix_stream_read_actor(struct sk_buff *skb,
2405                                   int skip, int chunk,
2406                                   struct unix_stream_read_state *state)
2407 {
2408         int ret;
2409 
2410         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2411                                     state->msg, chunk);
2412         return ret ?: chunk;
2413 }
2414 
2415 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2416                                size_t size, int flags)
2417 {
2418         struct unix_stream_read_state state = {
2419                 .recv_actor = unix_stream_read_actor,
2420                 .socket = sock,
2421                 .msg = msg,
2422                 .size = size,
2423                 .flags = flags
2424         };
2425 
2426         return unix_stream_read_generic(&state, true);
2427 }
2428 
2429 static int unix_stream_splice_actor(struct sk_buff *skb,
2430                                     int skip, int chunk,
2431                                     struct unix_stream_read_state *state)
2432 {
2433         return skb_splice_bits(skb, state->socket->sk,
2434                                UNIXCB(skb).consumed + skip,
2435                                state->pipe, chunk, state->splice_flags);
2436 }
2437 
2438 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2439                                        struct pipe_inode_info *pipe,
2440                                        size_t size, unsigned int flags)
2441 {
2442         struct unix_stream_read_state state = {
2443                 .recv_actor = unix_stream_splice_actor,
2444                 .socket = sock,
2445                 .pipe = pipe,
2446                 .size = size,
2447                 .splice_flags = flags,
2448         };
2449 
2450         if (unlikely(*ppos))
2451                 return -ESPIPE;
2452 
2453         if (sock->file->f_flags & O_NONBLOCK ||
2454             flags & SPLICE_F_NONBLOCK)
2455                 state.flags = MSG_DONTWAIT;
2456 
2457         return unix_stream_read_generic(&state, false);
2458 }
2459 
2460 static int unix_shutdown(struct socket *sock, int mode)
2461 {
2462         struct sock *sk = sock->sk;
2463         struct sock *other;
2464 
2465         if (mode < SHUT_RD || mode > SHUT_RDWR)
2466                 return -EINVAL;
2467         /* This maps:
2468          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2469          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2470          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2471          */
2472         ++mode;
2473 
2474         unix_state_lock(sk);
2475         sk->sk_shutdown |= mode;
2476         other = unix_peer(sk);
2477         if (other)
2478                 sock_hold(other);
2479         unix_state_unlock(sk);
2480         sk->sk_state_change(sk);
2481 
2482         if (other &&
2483                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2484 
2485                 int peer_mode = 0;
2486 
2487                 if (mode&RCV_SHUTDOWN)
2488                         peer_mode |= SEND_SHUTDOWN;
2489                 if (mode&SEND_SHUTDOWN)
2490                         peer_mode |= RCV_SHUTDOWN;
2491                 unix_state_lock(other);
2492                 other->sk_shutdown |= peer_mode;
2493                 unix_state_unlock(other);
2494                 other->sk_state_change(other);
2495                 if (peer_mode == SHUTDOWN_MASK)
2496                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2497                 else if (peer_mode & RCV_SHUTDOWN)
2498                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2499         }
2500         if (other)
2501                 sock_put(other);
2502 
2503         return 0;
2504 }
2505 
2506 long unix_inq_len(struct sock *sk)
2507 {
2508         struct sk_buff *skb;
2509         long amount = 0;
2510 
2511         if (sk->sk_state == TCP_LISTEN)
2512                 return -EINVAL;
2513 
2514         spin_lock(&sk->sk_receive_queue.lock);
2515         if (sk->sk_type == SOCK_STREAM ||
2516             sk->sk_type == SOCK_SEQPACKET) {
2517                 skb_queue_walk(&sk->sk_receive_queue, skb)
2518                         amount += unix_skb_len(skb);
2519         } else {
2520                 skb = skb_peek(&sk->sk_receive_queue);
2521                 if (skb)
2522                         amount = skb->len;
2523         }
2524         spin_unlock(&sk->sk_receive_queue.lock);
2525 
2526         return amount;
2527 }
2528 EXPORT_SYMBOL_GPL(unix_inq_len);
2529 
2530 long unix_outq_len(struct sock *sk)
2531 {
2532         return sk_wmem_alloc_get(sk);
2533 }
2534 EXPORT_SYMBOL_GPL(unix_outq_len);
2535 
2536 static int unix_open_file(struct sock *sk)
2537 {
2538         struct path path;
2539         struct file *f;
2540         int fd;
2541 
2542         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2543                 return -EPERM;
2544 
2545         if (!smp_load_acquire(&unix_sk(sk)->addr))
2546                 return -ENOENT;
2547 
2548         path = unix_sk(sk)->path;
2549         if (!path.dentry)
2550                 return -ENOENT;
2551 
2552         path_get(&path);
2553 
2554         fd = get_unused_fd_flags(O_CLOEXEC);
2555         if (fd < 0)
2556                 goto out;
2557 
2558         f = dentry_open(&path, O_PATH, current_cred());
2559         if (IS_ERR(f)) {
2560                 put_unused_fd(fd);
2561                 fd = PTR_ERR(f);
2562                 goto out;
2563         }
2564 
2565         fd_install(fd, f);
2566 out:
2567         path_put(&path);
2568 
2569         return fd;
2570 }
2571 
2572 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2573 {
2574         struct sock *sk = sock->sk;
2575         long amount = 0;
2576         int err;
2577 
2578         switch (cmd) {
2579         case SIOCOUTQ:
2580                 amount = unix_outq_len(sk);
2581                 err = put_user(amount, (int __user *)arg);
2582                 break;
2583         case SIOCINQ:
2584                 amount = unix_inq_len(sk);
2585                 if (amount < 0)
2586                         err = amount;
2587                 else
2588                         err = put_user(amount, (int __user *)arg);
2589                 break;
2590         case SIOCUNIXFILE:
2591                 err = unix_open_file(sk);
2592                 break;
2593         default:
2594                 err = -ENOIOCTLCMD;
2595                 break;
2596         }
2597         return err;
2598 }
2599 
2600 #ifdef CONFIG_COMPAT
2601 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2602 {
2603         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2604 }
2605 #endif
2606 
2607 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2608 {
2609         struct sock *sk = sock->sk;
2610         __poll_t mask;
2611 
2612         sock_poll_wait(file, sock, wait);
2613         mask = 0;
2614 
2615         /* exceptional events? */
2616         if (sk->sk_err)
2617                 mask |= EPOLLERR;
2618         if (sk->sk_shutdown == SHUTDOWN_MASK)
2619                 mask |= EPOLLHUP;
2620         if (sk->sk_shutdown & RCV_SHUTDOWN)
2621                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2622 
2623         /* readable? */
2624         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2625                 mask |= EPOLLIN | EPOLLRDNORM;
2626 
2627         /* Connection-based need to check for termination and startup */
2628         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2629             sk->sk_state == TCP_CLOSE)
2630                 mask |= EPOLLHUP;
2631 
2632         /*
2633          * we set writable also when the other side has shut down the
2634          * connection. This prevents stuck sockets.
2635          */
2636         if (unix_writable(sk))
2637                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2638 
2639         return mask;
2640 }
2641 
2642 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2643                                     poll_table *wait)
2644 {
2645         struct sock *sk = sock->sk, *other;
2646         unsigned int writable;
2647         __poll_t mask;
2648 
2649         sock_poll_wait(file, sock, wait);
2650         mask = 0;
2651 
2652         /* exceptional events? */
2653         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2654                 mask |= EPOLLERR |
2655                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2656 
2657         if (sk->sk_shutdown & RCV_SHUTDOWN)
2658                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2659         if (sk->sk_shutdown == SHUTDOWN_MASK)
2660                 mask |= EPOLLHUP;
2661 
2662         /* readable? */
2663         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2664                 mask |= EPOLLIN | EPOLLRDNORM;
2665 
2666         /* Connection-based need to check for termination and startup */
2667         if (sk->sk_type == SOCK_SEQPACKET) {
2668                 if (sk->sk_state == TCP_CLOSE)
2669                         mask |= EPOLLHUP;
2670                 /* connection hasn't started yet? */
2671                 if (sk->sk_state == TCP_SYN_SENT)
2672                         return mask;
2673         }
2674 
2675         /* No write status requested, avoid expensive OUT tests. */
2676         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2677                 return mask;
2678 
2679         writable = unix_writable(sk);
2680         if (writable) {
2681                 unix_state_lock(sk);
2682 
2683                 other = unix_peer(sk);
2684                 if (other && unix_peer(other) != sk &&
2685                     unix_recvq_full(other) &&
2686                     unix_dgram_peer_wake_me(sk, other))
2687                         writable = 0;
2688 
2689                 unix_state_unlock(sk);
2690         }
2691 
2692         if (writable)
2693                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2694         else
2695                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2696 
2697         return mask;
2698 }
2699 
2700 #ifdef CONFIG_PROC_FS
2701 
2702 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2703 
2704 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2705 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2706 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2707 
2708 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2709 {
2710         unsigned long offset = get_offset(*pos);
2711         unsigned long bucket = get_bucket(*pos);
2712         struct sock *sk;
2713         unsigned long count = 0;
2714 
2715         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2716                 if (sock_net(sk) != seq_file_net(seq))
2717                         continue;
2718                 if (++count == offset)
2719                         break;
2720         }
2721 
2722         return sk;
2723 }
2724 
2725 static struct sock *unix_next_socket(struct seq_file *seq,
2726                                      struct sock *sk,
2727                                      loff_t *pos)
2728 {
2729         unsigned long bucket;
2730 
2731         while (sk > (struct sock *)SEQ_START_TOKEN) {
2732                 sk = sk_next(sk);
2733                 if (!sk)
2734                         goto next_bucket;
2735                 if (sock_net(sk) == seq_file_net(seq))
2736                         return sk;
2737         }
2738 
2739         do {
2740                 sk = unix_from_bucket(seq, pos);
2741                 if (sk)
2742                         return sk;
2743 
2744 next_bucket:
2745                 bucket = get_bucket(*pos) + 1;
2746                 *pos = set_bucket_offset(bucket, 1);
2747         } while (bucket < ARRAY_SIZE(unix_socket_table));
2748 
2749         return NULL;
2750 }
2751 
2752 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2753         __acquires(unix_table_lock)
2754 {
2755         spin_lock(&unix_table_lock);
2756 
2757         if (!*pos)
2758                 return SEQ_START_TOKEN;
2759 
2760         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2761                 return NULL;
2762 
2763         return unix_next_socket(seq, NULL, pos);
2764 }
2765 
2766 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2767 {
2768         ++*pos;
2769         return unix_next_socket(seq, v, pos);
2770 }
2771 
2772 static void unix_seq_stop(struct seq_file *seq, void *v)
2773         __releases(unix_table_lock)
2774 {
2775         spin_unlock(&unix_table_lock);
2776 }
2777 
2778 static int unix_seq_show(struct seq_file *seq, void *v)
2779 {
2780 
2781         if (v == SEQ_START_TOKEN)
2782                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2783                          "Inode Path\n");
2784         else {
2785                 struct sock *s = v;
2786                 struct unix_sock *u = unix_sk(s);
2787                 unix_state_lock(s);
2788 
2789                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2790                         s,
2791                         refcount_read(&s->sk_refcnt),
2792                         0,
2793                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2794                         s->sk_type,
2795                         s->sk_socket ?
2796                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2797                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2798                         sock_i_ino(s));
2799 
2800                 if (u->addr) {  // under unix_table_lock here
2801                         int i, len;
2802                         seq_putc(seq, ' ');
2803 
2804                         i = 0;
2805                         len = u->addr->len - sizeof(short);
2806                         if (!UNIX_ABSTRACT(s))
2807                                 len--;
2808                         else {
2809                                 seq_putc(seq, '@');
2810                                 i++;
2811                         }
2812                         for ( ; i < len; i++)
2813                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2814                                          '@');
2815                 }
2816                 unix_state_unlock(s);
2817                 seq_putc(seq, '\n');
2818         }
2819 
2820         return 0;
2821 }
2822 
2823 static const struct seq_operations unix_seq_ops = {
2824         .start  = unix_seq_start,
2825         .next   = unix_seq_next,
2826         .stop   = unix_seq_stop,
2827         .show   = unix_seq_show,
2828 };
2829 #endif
2830 
2831 static const struct net_proto_family unix_family_ops = {
2832         .family = PF_UNIX,
2833         .create = unix_create,
2834         .owner  = THIS_MODULE,
2835 };
2836 
2837 
2838 static int __net_init unix_net_init(struct net *net)
2839 {
2840         int error = -ENOMEM;
2841 
2842         net->unx.sysctl_max_dgram_qlen = 10;
2843         if (unix_sysctl_register(net))
2844                 goto out;
2845 
2846 #ifdef CONFIG_PROC_FS
2847         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2848                         sizeof(struct seq_net_private))) {
2849                 unix_sysctl_unregister(net);
2850                 goto out;
2851         }
2852 #endif
2853         error = 0;
2854 out:
2855         return error;
2856 }
2857 
2858 static void __net_exit unix_net_exit(struct net *net)
2859 {
2860         unix_sysctl_unregister(net);
2861         remove_proc_entry("unix", net->proc_net);
2862 }
2863 
2864 static struct pernet_operations unix_net_ops = {
2865         .init = unix_net_init,
2866         .exit = unix_net_exit,
2867 };
2868 
2869 static int __init af_unix_init(void)
2870 {
2871         int rc = -1;
2872 
2873         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2874 
2875         rc = proto_register(&unix_proto, 1);
2876         if (rc != 0) {
2877                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2878                 goto out;
2879         }
2880 
2881         sock_register(&unix_family_ops);
2882         register_pernet_subsys(&unix_net_ops);
2883 out:
2884         return rc;
2885 }
2886 
2887 static void __exit af_unix_exit(void)
2888 {
2889         sock_unregister(PF_UNIX);
2890         proto_unregister(&unix_proto);
2891         unregister_pernet_subsys(&unix_net_ops);
2892 }
2893 
2894 /* Earlier than device_initcall() so that other drivers invoking
2895    request_module() don't end up in a loop when modprobe tries
2896    to use a UNIX socket. But later than subsys_initcall() because
2897    we depend on stuff initialised there */
2898 fs_initcall(af_unix_init);
2899 module_exit(af_unix_exit);
2900 
2901 MODULE_LICENSE("GPL");
2902 MODULE_ALIAS_NETPROTO(PF_UNIX);
2903 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp