~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/unix/af_unix.c

Version: ~ [ linux-5.12-rc1 ] ~ [ linux-5.11.2 ] ~ [ linux-5.10.19 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.101 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.177 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.222 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.258 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.258 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * NET4:        Implementation of BSD Unix domain sockets.
  4  *
  5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
  6  *
  7  * Fixes:
  8  *              Linus Torvalds  :       Assorted bug cures.
  9  *              Niibe Yutaka    :       async I/O support.
 10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
 11  *              Alan Cox        :       Limit size of allocated blocks.
 12  *              Alan Cox        :       Fixed the stupid socketpair bug.
 13  *              Alan Cox        :       BSD compatibility fine tuning.
 14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
 15  *              Alan Cox        :       Sorted out a proper draft version of
 16  *                                      file descriptor passing hacked up from
 17  *                                      Mike Shaver's work.
 18  *              Marty Leisner   :       Fixes to fd passing
 19  *              Nick Nevin      :       recvmsg bugfix.
 20  *              Alan Cox        :       Started proper garbage collector
 21  *              Heiko EiBfeldt  :       Missing verify_area check
 22  *              Alan Cox        :       Started POSIXisms
 23  *              Andreas Schwab  :       Replace inode by dentry for proper
 24  *                                      reference counting
 25  *              Kirk Petersen   :       Made this a module
 26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
 27  *                                      Lots of bug fixes.
 28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
 29  *                                      by above two patches.
 30  *           Andrea Arcangeli   :       If possible we block in connect(2)
 31  *                                      if the max backlog of the listen socket
 32  *                                      is been reached. This won't break
 33  *                                      old apps and it will avoid huge amount
 34  *                                      of socks hashed (this for unix_gc()
 35  *                                      performances reasons).
 36  *                                      Security fix that limits the max
 37  *                                      number of socks to 2*max_files and
 38  *                                      the number of skb queueable in the
 39  *                                      dgram receiver.
 40  *              Artur Skawina   :       Hash function optimizations
 41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
 42  *            Malcolm Beattie   :       Set peercred for socketpair
 43  *           Michal Ostrowski   :       Module initialization cleanup.
 44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
 45  *                                      the core infrastructure is doing that
 46  *                                      for all net proto families now (2.5.69+)
 47  *
 48  * Known differences from reference BSD that was tested:
 49  *
 50  *      [TO FIX]
 51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
 52  *              other the moment one end closes.
 53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
 54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
 55  *      [NOT TO FIX]
 56  *      accept() returns a path name even if the connecting socket has closed
 57  *              in the meantime (BSD loses the path and gives up).
 58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
 59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
 60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
 61  *      BSD af_unix apparently has connect forgetting to block properly.
 62  *              (need to check this with the POSIX spec in detail)
 63  *
 64  * Differences from 2.0.0-11-... (ANK)
 65  *      Bug fixes and improvements.
 66  *              - client shutdown killed server socket.
 67  *              - removed all useless cli/sti pairs.
 68  *
 69  *      Semantic changes/extensions.
 70  *              - generic control message passing.
 71  *              - SCM_CREDENTIALS control message.
 72  *              - "Abstract" (not FS based) socket bindings.
 73  *                Abstract names are sequences of bytes (not zero terminated)
 74  *                started by 0, so that this name space does not intersect
 75  *                with BSD names.
 76  */
 77 
 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 79 
 80 #include <linux/module.h>
 81 #include <linux/kernel.h>
 82 #include <linux/signal.h>
 83 #include <linux/sched/signal.h>
 84 #include <linux/errno.h>
 85 #include <linux/string.h>
 86 #include <linux/stat.h>
 87 #include <linux/dcache.h>
 88 #include <linux/namei.h>
 89 #include <linux/socket.h>
 90 #include <linux/un.h>
 91 #include <linux/fcntl.h>
 92 #include <linux/termios.h>
 93 #include <linux/sockios.h>
 94 #include <linux/net.h>
 95 #include <linux/in.h>
 96 #include <linux/fs.h>
 97 #include <linux/slab.h>
 98 #include <linux/uaccess.h>
 99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 
117 #include "scm.h"
118 
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124 
125 
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128         unsigned long hash = (unsigned long)addr;
129 
130         hash ^= hash >> 16;
131         hash ^= hash >> 8;
132         hash %= UNIX_HASH_SIZE;
133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135 
136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141         UNIXCB(skb).secid = scm->secid;
142 }
143 
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146         scm->secid = UNIXCB(skb).secid;
147 }
148 
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151         return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159 
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162         return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165 
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171 
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174         unsigned int hash = (__force unsigned int)csum_fold(n);
175 
176         hash ^= hash>>8;
177         return hash&(UNIX_HASH_SIZE-1);
178 }
179 
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181 
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184         return unix_peer(osk) == sk;
185 }
186 
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191 
192 static inline int unix_recvq_full(struct sock const *sk)
193 {
194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196 
197 struct sock *unix_peer_get(struct sock *s)
198 {
199         struct sock *peer;
200 
201         unix_state_lock(s);
202         peer = unix_peer(s);
203         if (peer)
204                 sock_hold(peer);
205         unix_state_unlock(s);
206         return peer;
207 }
208 EXPORT_SYMBOL_GPL(unix_peer_get);
209 
210 static inline void unix_release_addr(struct unix_address *addr)
211 {
212         if (refcount_dec_and_test(&addr->refcnt))
213                 kfree(addr);
214 }
215 
216 /*
217  *      Check unix socket name:
218  *              - should be not zero length.
219  *              - if started by not zero, should be NULL terminated (FS object)
220  *              - if started by zero, it is abstract name.
221  */
222 
223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
224 {
225         *hashp = 0;
226 
227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
228                 return -EINVAL;
229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230                 return -EINVAL;
231         if (sunaddr->sun_path[0]) {
232                 /*
233                  * This may look like an off by one error but it is a bit more
234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
235                  * sun_path[108] doesn't as such exist.  However in kernel space
236                  * we are guaranteed that it is a valid memory location in our
237                  * kernel address buffer.
238                  */
239                 ((char *)sunaddr)[len] = 0;
240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
241                 return len;
242         }
243 
244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245         return len;
246 }
247 
248 static void __unix_remove_socket(struct sock *sk)
249 {
250         sk_del_node_init(sk);
251 }
252 
253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255         WARN_ON(!sk_unhashed(sk));
256         sk_add_node(sk, list);
257 }
258 
259 static inline void unix_remove_socket(struct sock *sk)
260 {
261         spin_lock(&unix_table_lock);
262         __unix_remove_socket(sk);
263         spin_unlock(&unix_table_lock);
264 }
265 
266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267 {
268         spin_lock(&unix_table_lock);
269         __unix_insert_socket(list, sk);
270         spin_unlock(&unix_table_lock);
271 }
272 
273 static struct sock *__unix_find_socket_byname(struct net *net,
274                                               struct sockaddr_un *sunname,
275                                               int len, int type, unsigned int hash)
276 {
277         struct sock *s;
278 
279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
280                 struct unix_sock *u = unix_sk(s);
281 
282                 if (!net_eq(sock_net(s), net))
283                         continue;
284 
285                 if (u->addr->len == len &&
286                     !memcmp(u->addr->name, sunname, len))
287                         goto found;
288         }
289         s = NULL;
290 found:
291         return s;
292 }
293 
294 static inline struct sock *unix_find_socket_byname(struct net *net,
295                                                    struct sockaddr_un *sunname,
296                                                    int len, int type,
297                                                    unsigned int hash)
298 {
299         struct sock *s;
300 
301         spin_lock(&unix_table_lock);
302         s = __unix_find_socket_byname(net, sunname, len, type, hash);
303         if (s)
304                 sock_hold(s);
305         spin_unlock(&unix_table_lock);
306         return s;
307 }
308 
309 static struct sock *unix_find_socket_byinode(struct inode *i)
310 {
311         struct sock *s;
312 
313         spin_lock(&unix_table_lock);
314         sk_for_each(s,
315                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316                 struct dentry *dentry = unix_sk(s)->path.dentry;
317 
318                 if (dentry && d_backing_inode(dentry) == i) {
319                         sock_hold(s);
320                         goto found;
321                 }
322         }
323         s = NULL;
324 found:
325         spin_unlock(&unix_table_lock);
326         return s;
327 }
328 
329 /* Support code for asymmetrically connected dgram sockets
330  *
331  * If a datagram socket is connected to a socket not itself connected
332  * to the first socket (eg, /dev/log), clients may only enqueue more
333  * messages if the present receive queue of the server socket is not
334  * "too large". This means there's a second writeability condition
335  * poll and sendmsg need to test. The dgram recv code will do a wake
336  * up on the peer_wait wait queue of a socket upon reception of a
337  * datagram which needs to be propagated to sleeping would-be writers
338  * since these might not have sent anything so far. This can't be
339  * accomplished via poll_wait because the lifetime of the server
340  * socket might be less than that of its clients if these break their
341  * association with it or if the server socket is closed while clients
342  * are still connected to it and there's no way to inform "a polling
343  * implementation" that it should let go of a certain wait queue
344  *
345  * In order to propagate a wake up, a wait_queue_entry_t of the client
346  * socket is enqueued on the peer_wait queue of the server socket
347  * whose wake function does a wake_up on the ordinary client socket
348  * wait queue. This connection is established whenever a write (or
349  * poll for write) hit the flow control condition and broken when the
350  * association to the server socket is dissolved or after a wake up
351  * was relayed.
352  */
353 
354 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
355                                       void *key)
356 {
357         struct unix_sock *u;
358         wait_queue_head_t *u_sleep;
359 
360         u = container_of(q, struct unix_sock, peer_wake);
361 
362         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363                             q);
364         u->peer_wake.private = NULL;
365 
366         /* relaying can only happen while the wq still exists */
367         u_sleep = sk_sleep(&u->sk);
368         if (u_sleep)
369                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
370 
371         return 0;
372 }
373 
374 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375 {
376         struct unix_sock *u, *u_other;
377         int rc;
378 
379         u = unix_sk(sk);
380         u_other = unix_sk(other);
381         rc = 0;
382         spin_lock(&u_other->peer_wait.lock);
383 
384         if (!u->peer_wake.private) {
385                 u->peer_wake.private = other;
386                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387 
388                 rc = 1;
389         }
390 
391         spin_unlock(&u_other->peer_wait.lock);
392         return rc;
393 }
394 
395 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396                                             struct sock *other)
397 {
398         struct unix_sock *u, *u_other;
399 
400         u = unix_sk(sk);
401         u_other = unix_sk(other);
402         spin_lock(&u_other->peer_wait.lock);
403 
404         if (u->peer_wake.private == other) {
405                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406                 u->peer_wake.private = NULL;
407         }
408 
409         spin_unlock(&u_other->peer_wait.lock);
410 }
411 
412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413                                                    struct sock *other)
414 {
415         unix_dgram_peer_wake_disconnect(sk, other);
416         wake_up_interruptible_poll(sk_sleep(sk),
417                                    EPOLLOUT |
418                                    EPOLLWRNORM |
419                                    EPOLLWRBAND);
420 }
421 
422 /* preconditions:
423  *      - unix_peer(sk) == other
424  *      - association is stable
425  */
426 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427 {
428         int connected;
429 
430         connected = unix_dgram_peer_wake_connect(sk, other);
431 
432         /* If other is SOCK_DEAD, we want to make sure we signal
433          * POLLOUT, such that a subsequent write() can get a
434          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
435          * to other and its full, we will hang waiting for POLLOUT.
436          */
437         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
438                 return 1;
439 
440         if (connected)
441                 unix_dgram_peer_wake_disconnect(sk, other);
442 
443         return 0;
444 }
445 
446 static int unix_writable(const struct sock *sk)
447 {
448         return sk->sk_state != TCP_LISTEN &&
449                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
450 }
451 
452 static void unix_write_space(struct sock *sk)
453 {
454         struct socket_wq *wq;
455 
456         rcu_read_lock();
457         if (unix_writable(sk)) {
458                 wq = rcu_dereference(sk->sk_wq);
459                 if (skwq_has_sleeper(wq))
460                         wake_up_interruptible_sync_poll(&wq->wait,
461                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
462                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
463         }
464         rcu_read_unlock();
465 }
466 
467 /* When dgram socket disconnects (or changes its peer), we clear its receive
468  * queue of packets arrived from previous peer. First, it allows to do
469  * flow control based only on wmem_alloc; second, sk connected to peer
470  * may receive messages only from that peer. */
471 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
472 {
473         if (!skb_queue_empty(&sk->sk_receive_queue)) {
474                 skb_queue_purge(&sk->sk_receive_queue);
475                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
476 
477                 /* If one link of bidirectional dgram pipe is disconnected,
478                  * we signal error. Messages are lost. Do not make this,
479                  * when peer was not connected to us.
480                  */
481                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
482                         other->sk_err = ECONNRESET;
483                         other->sk_error_report(other);
484                 }
485         }
486 }
487 
488 static void unix_sock_destructor(struct sock *sk)
489 {
490         struct unix_sock *u = unix_sk(sk);
491 
492         skb_queue_purge(&sk->sk_receive_queue);
493 
494         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
495         WARN_ON(!sk_unhashed(sk));
496         WARN_ON(sk->sk_socket);
497         if (!sock_flag(sk, SOCK_DEAD)) {
498                 pr_info("Attempt to release alive unix socket: %p\n", sk);
499                 return;
500         }
501 
502         if (u->addr)
503                 unix_release_addr(u->addr);
504 
505         atomic_long_dec(&unix_nr_socks);
506         local_bh_disable();
507         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
508         local_bh_enable();
509 #ifdef UNIX_REFCNT_DEBUG
510         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
511                 atomic_long_read(&unix_nr_socks));
512 #endif
513 }
514 
515 static void unix_release_sock(struct sock *sk, int embrion)
516 {
517         struct unix_sock *u = unix_sk(sk);
518         struct path path;
519         struct sock *skpair;
520         struct sk_buff *skb;
521         int state;
522 
523         unix_remove_socket(sk);
524 
525         /* Clear state */
526         unix_state_lock(sk);
527         sock_orphan(sk);
528         sk->sk_shutdown = SHUTDOWN_MASK;
529         path         = u->path;
530         u->path.dentry = NULL;
531         u->path.mnt = NULL;
532         state = sk->sk_state;
533         sk->sk_state = TCP_CLOSE;
534         unix_state_unlock(sk);
535 
536         wake_up_interruptible_all(&u->peer_wait);
537 
538         skpair = unix_peer(sk);
539 
540         if (skpair != NULL) {
541                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
542                         unix_state_lock(skpair);
543                         /* No more writes */
544                         skpair->sk_shutdown = SHUTDOWN_MASK;
545                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
546                                 skpair->sk_err = ECONNRESET;
547                         unix_state_unlock(skpair);
548                         skpair->sk_state_change(skpair);
549                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
550                 }
551 
552                 unix_dgram_peer_wake_disconnect(sk, skpair);
553                 sock_put(skpair); /* It may now die */
554                 unix_peer(sk) = NULL;
555         }
556 
557         /* Try to flush out this socket. Throw out buffers at least */
558 
559         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
560                 if (state == TCP_LISTEN)
561                         unix_release_sock(skb->sk, 1);
562                 /* passed fds are erased in the kfree_skb hook        */
563                 UNIXCB(skb).consumed = skb->len;
564                 kfree_skb(skb);
565         }
566 
567         if (path.dentry)
568                 path_put(&path);
569 
570         sock_put(sk);
571 
572         /* ---- Socket is dead now and most probably destroyed ---- */
573 
574         /*
575          * Fixme: BSD difference: In BSD all sockets connected to us get
576          *        ECONNRESET and we die on the spot. In Linux we behave
577          *        like files and pipes do and wait for the last
578          *        dereference.
579          *
580          * Can't we simply set sock->err?
581          *
582          *        What the above comment does talk about? --ANK(980817)
583          */
584 
585         if (unix_tot_inflight)
586                 unix_gc();              /* Garbage collect fds */
587 }
588 
589 static void init_peercred(struct sock *sk)
590 {
591         put_pid(sk->sk_peer_pid);
592         if (sk->sk_peer_cred)
593                 put_cred(sk->sk_peer_cred);
594         sk->sk_peer_pid  = get_pid(task_tgid(current));
595         sk->sk_peer_cred = get_current_cred();
596 }
597 
598 static void copy_peercred(struct sock *sk, struct sock *peersk)
599 {
600         put_pid(sk->sk_peer_pid);
601         if (sk->sk_peer_cred)
602                 put_cred(sk->sk_peer_cred);
603         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
604         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
605 }
606 
607 static int unix_listen(struct socket *sock, int backlog)
608 {
609         int err;
610         struct sock *sk = sock->sk;
611         struct unix_sock *u = unix_sk(sk);
612         struct pid *old_pid = NULL;
613 
614         err = -EOPNOTSUPP;
615         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
616                 goto out;       /* Only stream/seqpacket sockets accept */
617         err = -EINVAL;
618         if (!u->addr)
619                 goto out;       /* No listens on an unbound socket */
620         unix_state_lock(sk);
621         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
622                 goto out_unlock;
623         if (backlog > sk->sk_max_ack_backlog)
624                 wake_up_interruptible_all(&u->peer_wait);
625         sk->sk_max_ack_backlog  = backlog;
626         sk->sk_state            = TCP_LISTEN;
627         /* set credentials so connect can copy them */
628         init_peercred(sk);
629         err = 0;
630 
631 out_unlock:
632         unix_state_unlock(sk);
633         put_pid(old_pid);
634 out:
635         return err;
636 }
637 
638 static int unix_release(struct socket *);
639 static int unix_bind(struct socket *, struct sockaddr *, int);
640 static int unix_stream_connect(struct socket *, struct sockaddr *,
641                                int addr_len, int flags);
642 static int unix_socketpair(struct socket *, struct socket *);
643 static int unix_accept(struct socket *, struct socket *, int, bool);
644 static int unix_getname(struct socket *, struct sockaddr *, int);
645 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
646 static __poll_t unix_dgram_poll(struct file *, struct socket *,
647                                     poll_table *);
648 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
649 static int unix_shutdown(struct socket *, int);
650 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
651 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
652 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
653                                     size_t size, int flags);
654 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
655                                        struct pipe_inode_info *, size_t size,
656                                        unsigned int flags);
657 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
658 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
659 static int unix_dgram_connect(struct socket *, struct sockaddr *,
660                               int, int);
661 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
662 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
663                                   int);
664 
665 static int unix_set_peek_off(struct sock *sk, int val)
666 {
667         struct unix_sock *u = unix_sk(sk);
668 
669         if (mutex_lock_interruptible(&u->iolock))
670                 return -EINTR;
671 
672         sk->sk_peek_off = val;
673         mutex_unlock(&u->iolock);
674 
675         return 0;
676 }
677 
678 
679 static const struct proto_ops unix_stream_ops = {
680         .family =       PF_UNIX,
681         .owner =        THIS_MODULE,
682         .release =      unix_release,
683         .bind =         unix_bind,
684         .connect =      unix_stream_connect,
685         .socketpair =   unix_socketpair,
686         .accept =       unix_accept,
687         .getname =      unix_getname,
688         .poll =         unix_poll,
689         .ioctl =        unix_ioctl,
690         .listen =       unix_listen,
691         .shutdown =     unix_shutdown,
692         .setsockopt =   sock_no_setsockopt,
693         .getsockopt =   sock_no_getsockopt,
694         .sendmsg =      unix_stream_sendmsg,
695         .recvmsg =      unix_stream_recvmsg,
696         .mmap =         sock_no_mmap,
697         .sendpage =     unix_stream_sendpage,
698         .splice_read =  unix_stream_splice_read,
699         .set_peek_off = unix_set_peek_off,
700 };
701 
702 static const struct proto_ops unix_dgram_ops = {
703         .family =       PF_UNIX,
704         .owner =        THIS_MODULE,
705         .release =      unix_release,
706         .bind =         unix_bind,
707         .connect =      unix_dgram_connect,
708         .socketpair =   unix_socketpair,
709         .accept =       sock_no_accept,
710         .getname =      unix_getname,
711         .poll =         unix_dgram_poll,
712         .ioctl =        unix_ioctl,
713         .listen =       sock_no_listen,
714         .shutdown =     unix_shutdown,
715         .setsockopt =   sock_no_setsockopt,
716         .getsockopt =   sock_no_getsockopt,
717         .sendmsg =      unix_dgram_sendmsg,
718         .recvmsg =      unix_dgram_recvmsg,
719         .mmap =         sock_no_mmap,
720         .sendpage =     sock_no_sendpage,
721         .set_peek_off = unix_set_peek_off,
722 };
723 
724 static const struct proto_ops unix_seqpacket_ops = {
725         .family =       PF_UNIX,
726         .owner =        THIS_MODULE,
727         .release =      unix_release,
728         .bind =         unix_bind,
729         .connect =      unix_stream_connect,
730         .socketpair =   unix_socketpair,
731         .accept =       unix_accept,
732         .getname =      unix_getname,
733         .poll =         unix_dgram_poll,
734         .ioctl =        unix_ioctl,
735         .listen =       unix_listen,
736         .shutdown =     unix_shutdown,
737         .setsockopt =   sock_no_setsockopt,
738         .getsockopt =   sock_no_getsockopt,
739         .sendmsg =      unix_seqpacket_sendmsg,
740         .recvmsg =      unix_seqpacket_recvmsg,
741         .mmap =         sock_no_mmap,
742         .sendpage =     sock_no_sendpage,
743         .set_peek_off = unix_set_peek_off,
744 };
745 
746 static struct proto unix_proto = {
747         .name                   = "UNIX",
748         .owner                  = THIS_MODULE,
749         .obj_size               = sizeof(struct unix_sock),
750 };
751 
752 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
753 {
754         struct sock *sk = NULL;
755         struct unix_sock *u;
756 
757         atomic_long_inc(&unix_nr_socks);
758         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
759                 goto out;
760 
761         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
762         if (!sk)
763                 goto out;
764 
765         sock_init_data(sock, sk);
766 
767         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
768         sk->sk_write_space      = unix_write_space;
769         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
770         sk->sk_destruct         = unix_sock_destructor;
771         u         = unix_sk(sk);
772         u->path.dentry = NULL;
773         u->path.mnt = NULL;
774         spin_lock_init(&u->lock);
775         atomic_long_set(&u->inflight, 0);
776         INIT_LIST_HEAD(&u->link);
777         mutex_init(&u->iolock); /* single task reading lock */
778         mutex_init(&u->bindlock); /* single task binding lock */
779         init_waitqueue_head(&u->peer_wait);
780         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
781         unix_insert_socket(unix_sockets_unbound(sk), sk);
782 out:
783         if (sk == NULL)
784                 atomic_long_dec(&unix_nr_socks);
785         else {
786                 local_bh_disable();
787                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
788                 local_bh_enable();
789         }
790         return sk;
791 }
792 
793 static int unix_create(struct net *net, struct socket *sock, int protocol,
794                        int kern)
795 {
796         if (protocol && protocol != PF_UNIX)
797                 return -EPROTONOSUPPORT;
798 
799         sock->state = SS_UNCONNECTED;
800 
801         switch (sock->type) {
802         case SOCK_STREAM:
803                 sock->ops = &unix_stream_ops;
804                 break;
805                 /*
806                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
807                  *      nothing uses it.
808                  */
809         case SOCK_RAW:
810                 sock->type = SOCK_DGRAM;
811                 /* fall through */
812         case SOCK_DGRAM:
813                 sock->ops = &unix_dgram_ops;
814                 break;
815         case SOCK_SEQPACKET:
816                 sock->ops = &unix_seqpacket_ops;
817                 break;
818         default:
819                 return -ESOCKTNOSUPPORT;
820         }
821 
822         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
823 }
824 
825 static int unix_release(struct socket *sock)
826 {
827         struct sock *sk = sock->sk;
828 
829         if (!sk)
830                 return 0;
831 
832         unix_release_sock(sk, 0);
833         sock->sk = NULL;
834 
835         return 0;
836 }
837 
838 static int unix_autobind(struct socket *sock)
839 {
840         struct sock *sk = sock->sk;
841         struct net *net = sock_net(sk);
842         struct unix_sock *u = unix_sk(sk);
843         static u32 ordernum = 1;
844         struct unix_address *addr;
845         int err;
846         unsigned int retries = 0;
847 
848         err = mutex_lock_interruptible(&u->bindlock);
849         if (err)
850                 return err;
851 
852         err = 0;
853         if (u->addr)
854                 goto out;
855 
856         err = -ENOMEM;
857         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
858         if (!addr)
859                 goto out;
860 
861         addr->name->sun_family = AF_UNIX;
862         refcount_set(&addr->refcnt, 1);
863 
864 retry:
865         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
866         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
867 
868         spin_lock(&unix_table_lock);
869         ordernum = (ordernum+1)&0xFFFFF;
870 
871         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
872                                       addr->hash)) {
873                 spin_unlock(&unix_table_lock);
874                 /*
875                  * __unix_find_socket_byname() may take long time if many names
876                  * are already in use.
877                  */
878                 cond_resched();
879                 /* Give up if all names seems to be in use. */
880                 if (retries++ == 0xFFFFF) {
881                         err = -ENOSPC;
882                         kfree(addr);
883                         goto out;
884                 }
885                 goto retry;
886         }
887         addr->hash ^= sk->sk_type;
888 
889         __unix_remove_socket(sk);
890         smp_store_release(&u->addr, addr);
891         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
892         spin_unlock(&unix_table_lock);
893         err = 0;
894 
895 out:    mutex_unlock(&u->bindlock);
896         return err;
897 }
898 
899 static struct sock *unix_find_other(struct net *net,
900                                     struct sockaddr_un *sunname, int len,
901                                     int type, unsigned int hash, int *error)
902 {
903         struct sock *u;
904         struct path path;
905         int err = 0;
906 
907         if (sunname->sun_path[0]) {
908                 struct inode *inode;
909                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
910                 if (err)
911                         goto fail;
912                 inode = d_backing_inode(path.dentry);
913                 err = inode_permission(inode, MAY_WRITE);
914                 if (err)
915                         goto put_fail;
916 
917                 err = -ECONNREFUSED;
918                 if (!S_ISSOCK(inode->i_mode))
919                         goto put_fail;
920                 u = unix_find_socket_byinode(inode);
921                 if (!u)
922                         goto put_fail;
923 
924                 if (u->sk_type == type)
925                         touch_atime(&path);
926 
927                 path_put(&path);
928 
929                 err = -EPROTOTYPE;
930                 if (u->sk_type != type) {
931                         sock_put(u);
932                         goto fail;
933                 }
934         } else {
935                 err = -ECONNREFUSED;
936                 u = unix_find_socket_byname(net, sunname, len, type, hash);
937                 if (u) {
938                         struct dentry *dentry;
939                         dentry = unix_sk(u)->path.dentry;
940                         if (dentry)
941                                 touch_atime(&unix_sk(u)->path);
942                 } else
943                         goto fail;
944         }
945         return u;
946 
947 put_fail:
948         path_put(&path);
949 fail:
950         *error = err;
951         return NULL;
952 }
953 
954 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
955 {
956         struct dentry *dentry;
957         struct path path;
958         int err = 0;
959         /*
960          * Get the parent directory, calculate the hash for last
961          * component.
962          */
963         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
964         err = PTR_ERR(dentry);
965         if (IS_ERR(dentry))
966                 return err;
967 
968         /*
969          * All right, let's create it.
970          */
971         err = security_path_mknod(&path, dentry, mode, 0);
972         if (!err) {
973                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
974                 if (!err) {
975                         res->mnt = mntget(path.mnt);
976                         res->dentry = dget(dentry);
977                 }
978         }
979         done_path_create(&path, dentry);
980         return err;
981 }
982 
983 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
984 {
985         struct sock *sk = sock->sk;
986         struct net *net = sock_net(sk);
987         struct unix_sock *u = unix_sk(sk);
988         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
989         char *sun_path = sunaddr->sun_path;
990         int err;
991         unsigned int hash;
992         struct unix_address *addr;
993         struct hlist_head *list;
994         struct path path = { };
995 
996         err = -EINVAL;
997         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
998             sunaddr->sun_family != AF_UNIX)
999                 goto out;
1000 
1001         if (addr_len == sizeof(short)) {
1002                 err = unix_autobind(sock);
1003                 goto out;
1004         }
1005 
1006         err = unix_mkname(sunaddr, addr_len, &hash);
1007         if (err < 0)
1008                 goto out;
1009         addr_len = err;
1010 
1011         if (sun_path[0]) {
1012                 umode_t mode = S_IFSOCK |
1013                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1014                 err = unix_mknod(sun_path, mode, &path);
1015                 if (err) {
1016                         if (err == -EEXIST)
1017                                 err = -EADDRINUSE;
1018                         goto out;
1019                 }
1020         }
1021 
1022         err = mutex_lock_interruptible(&u->bindlock);
1023         if (err)
1024                 goto out_put;
1025 
1026         err = -EINVAL;
1027         if (u->addr)
1028                 goto out_up;
1029 
1030         err = -ENOMEM;
1031         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1032         if (!addr)
1033                 goto out_up;
1034 
1035         memcpy(addr->name, sunaddr, addr_len);
1036         addr->len = addr_len;
1037         addr->hash = hash ^ sk->sk_type;
1038         refcount_set(&addr->refcnt, 1);
1039 
1040         if (sun_path[0]) {
1041                 addr->hash = UNIX_HASH_SIZE;
1042                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1043                 spin_lock(&unix_table_lock);
1044                 u->path = path;
1045                 list = &unix_socket_table[hash];
1046         } else {
1047                 spin_lock(&unix_table_lock);
1048                 err = -EADDRINUSE;
1049                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1050                                               sk->sk_type, hash)) {
1051                         unix_release_addr(addr);
1052                         goto out_unlock;
1053                 }
1054 
1055                 list = &unix_socket_table[addr->hash];
1056         }
1057 
1058         err = 0;
1059         __unix_remove_socket(sk);
1060         smp_store_release(&u->addr, addr);
1061         __unix_insert_socket(list, sk);
1062 
1063 out_unlock:
1064         spin_unlock(&unix_table_lock);
1065 out_up:
1066         mutex_unlock(&u->bindlock);
1067 out_put:
1068         if (err)
1069                 path_put(&path);
1070 out:
1071         return err;
1072 }
1073 
1074 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1075 {
1076         if (unlikely(sk1 == sk2) || !sk2) {
1077                 unix_state_lock(sk1);
1078                 return;
1079         }
1080         if (sk1 < sk2) {
1081                 unix_state_lock(sk1);
1082                 unix_state_lock_nested(sk2);
1083         } else {
1084                 unix_state_lock(sk2);
1085                 unix_state_lock_nested(sk1);
1086         }
1087 }
1088 
1089 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1090 {
1091         if (unlikely(sk1 == sk2) || !sk2) {
1092                 unix_state_unlock(sk1);
1093                 return;
1094         }
1095         unix_state_unlock(sk1);
1096         unix_state_unlock(sk2);
1097 }
1098 
1099 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1100                               int alen, int flags)
1101 {
1102         struct sock *sk = sock->sk;
1103         struct net *net = sock_net(sk);
1104         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1105         struct sock *other;
1106         unsigned int hash;
1107         int err;
1108 
1109         err = -EINVAL;
1110         if (alen < offsetofend(struct sockaddr, sa_family))
1111                 goto out;
1112 
1113         if (addr->sa_family != AF_UNSPEC) {
1114                 err = unix_mkname(sunaddr, alen, &hash);
1115                 if (err < 0)
1116                         goto out;
1117                 alen = err;
1118 
1119                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1120                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1121                         goto out;
1122 
1123 restart:
1124                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1125                 if (!other)
1126                         goto out;
1127 
1128                 unix_state_double_lock(sk, other);
1129 
1130                 /* Apparently VFS overslept socket death. Retry. */
1131                 if (sock_flag(other, SOCK_DEAD)) {
1132                         unix_state_double_unlock(sk, other);
1133                         sock_put(other);
1134                         goto restart;
1135                 }
1136 
1137                 err = -EPERM;
1138                 if (!unix_may_send(sk, other))
1139                         goto out_unlock;
1140 
1141                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1142                 if (err)
1143                         goto out_unlock;
1144 
1145         } else {
1146                 /*
1147                  *      1003.1g breaking connected state with AF_UNSPEC
1148                  */
1149                 other = NULL;
1150                 unix_state_double_lock(sk, other);
1151         }
1152 
1153         /*
1154          * If it was connected, reconnect.
1155          */
1156         if (unix_peer(sk)) {
1157                 struct sock *old_peer = unix_peer(sk);
1158                 unix_peer(sk) = other;
1159                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1160 
1161                 unix_state_double_unlock(sk, other);
1162 
1163                 if (other != old_peer)
1164                         unix_dgram_disconnected(sk, old_peer);
1165                 sock_put(old_peer);
1166         } else {
1167                 unix_peer(sk) = other;
1168                 unix_state_double_unlock(sk, other);
1169         }
1170         return 0;
1171 
1172 out_unlock:
1173         unix_state_double_unlock(sk, other);
1174         sock_put(other);
1175 out:
1176         return err;
1177 }
1178 
1179 static long unix_wait_for_peer(struct sock *other, long timeo)
1180 {
1181         struct unix_sock *u = unix_sk(other);
1182         int sched;
1183         DEFINE_WAIT(wait);
1184 
1185         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1186 
1187         sched = !sock_flag(other, SOCK_DEAD) &&
1188                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1189                 unix_recvq_full(other);
1190 
1191         unix_state_unlock(other);
1192 
1193         if (sched)
1194                 timeo = schedule_timeout(timeo);
1195 
1196         finish_wait(&u->peer_wait, &wait);
1197         return timeo;
1198 }
1199 
1200 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1201                                int addr_len, int flags)
1202 {
1203         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1204         struct sock *sk = sock->sk;
1205         struct net *net = sock_net(sk);
1206         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1207         struct sock *newsk = NULL;
1208         struct sock *other = NULL;
1209         struct sk_buff *skb = NULL;
1210         unsigned int hash;
1211         int st;
1212         int err;
1213         long timeo;
1214 
1215         err = unix_mkname(sunaddr, addr_len, &hash);
1216         if (err < 0)
1217                 goto out;
1218         addr_len = err;
1219 
1220         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1221             (err = unix_autobind(sock)) != 0)
1222                 goto out;
1223 
1224         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1225 
1226         /* First of all allocate resources.
1227            If we will make it after state is locked,
1228            we will have to recheck all again in any case.
1229          */
1230 
1231         err = -ENOMEM;
1232 
1233         /* create new sock for complete connection */
1234         newsk = unix_create1(sock_net(sk), NULL, 0);
1235         if (newsk == NULL)
1236                 goto out;
1237 
1238         /* Allocate skb for sending to listening sock */
1239         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1240         if (skb == NULL)
1241                 goto out;
1242 
1243 restart:
1244         /*  Find listening sock. */
1245         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1246         if (!other)
1247                 goto out;
1248 
1249         /* Latch state of peer */
1250         unix_state_lock(other);
1251 
1252         /* Apparently VFS overslept socket death. Retry. */
1253         if (sock_flag(other, SOCK_DEAD)) {
1254                 unix_state_unlock(other);
1255                 sock_put(other);
1256                 goto restart;
1257         }
1258 
1259         err = -ECONNREFUSED;
1260         if (other->sk_state != TCP_LISTEN)
1261                 goto out_unlock;
1262         if (other->sk_shutdown & RCV_SHUTDOWN)
1263                 goto out_unlock;
1264 
1265         if (unix_recvq_full(other)) {
1266                 err = -EAGAIN;
1267                 if (!timeo)
1268                         goto out_unlock;
1269 
1270                 timeo = unix_wait_for_peer(other, timeo);
1271 
1272                 err = sock_intr_errno(timeo);
1273                 if (signal_pending(current))
1274                         goto out;
1275                 sock_put(other);
1276                 goto restart;
1277         }
1278 
1279         /* Latch our state.
1280 
1281            It is tricky place. We need to grab our state lock and cannot
1282            drop lock on peer. It is dangerous because deadlock is
1283            possible. Connect to self case and simultaneous
1284            attempt to connect are eliminated by checking socket
1285            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1286            check this before attempt to grab lock.
1287 
1288            Well, and we have to recheck the state after socket locked.
1289          */
1290         st = sk->sk_state;
1291 
1292         switch (st) {
1293         case TCP_CLOSE:
1294                 /* This is ok... continue with connect */
1295                 break;
1296         case TCP_ESTABLISHED:
1297                 /* Socket is already connected */
1298                 err = -EISCONN;
1299                 goto out_unlock;
1300         default:
1301                 err = -EINVAL;
1302                 goto out_unlock;
1303         }
1304 
1305         unix_state_lock_nested(sk);
1306 
1307         if (sk->sk_state != st) {
1308                 unix_state_unlock(sk);
1309                 unix_state_unlock(other);
1310                 sock_put(other);
1311                 goto restart;
1312         }
1313 
1314         err = security_unix_stream_connect(sk, other, newsk);
1315         if (err) {
1316                 unix_state_unlock(sk);
1317                 goto out_unlock;
1318         }
1319 
1320         /* The way is open! Fastly set all the necessary fields... */
1321 
1322         sock_hold(sk);
1323         unix_peer(newsk)        = sk;
1324         newsk->sk_state         = TCP_ESTABLISHED;
1325         newsk->sk_type          = sk->sk_type;
1326         init_peercred(newsk);
1327         newu = unix_sk(newsk);
1328         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1329         otheru = unix_sk(other);
1330 
1331         /* copy address information from listening to new sock
1332          *
1333          * The contents of *(otheru->addr) and otheru->path
1334          * are seen fully set up here, since we have found
1335          * otheru in hash under unix_table_lock.  Insertion
1336          * into the hash chain we'd found it in had been done
1337          * in an earlier critical area protected by unix_table_lock,
1338          * the same one where we'd set *(otheru->addr) contents,
1339          * as well as otheru->path and otheru->addr itself.
1340          *
1341          * Using smp_store_release() here to set newu->addr
1342          * is enough to make those stores, as well as stores
1343          * to newu->path visible to anyone who gets newu->addr
1344          * by smp_load_acquire().  IOW, the same warranties
1345          * as for unix_sock instances bound in unix_bind() or
1346          * in unix_autobind().
1347          */
1348         if (otheru->path.dentry) {
1349                 path_get(&otheru->path);
1350                 newu->path = otheru->path;
1351         }
1352         refcount_inc(&otheru->addr->refcnt);
1353         smp_store_release(&newu->addr, otheru->addr);
1354 
1355         /* Set credentials */
1356         copy_peercred(sk, other);
1357 
1358         sock->state     = SS_CONNECTED;
1359         sk->sk_state    = TCP_ESTABLISHED;
1360         sock_hold(newsk);
1361 
1362         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1363         unix_peer(sk)   = newsk;
1364 
1365         unix_state_unlock(sk);
1366 
1367         /* take ten and and send info to listening sock */
1368         spin_lock(&other->sk_receive_queue.lock);
1369         __skb_queue_tail(&other->sk_receive_queue, skb);
1370         spin_unlock(&other->sk_receive_queue.lock);
1371         unix_state_unlock(other);
1372         other->sk_data_ready(other);
1373         sock_put(other);
1374         return 0;
1375 
1376 out_unlock:
1377         if (other)
1378                 unix_state_unlock(other);
1379 
1380 out:
1381         kfree_skb(skb);
1382         if (newsk)
1383                 unix_release_sock(newsk, 0);
1384         if (other)
1385                 sock_put(other);
1386         return err;
1387 }
1388 
1389 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1390 {
1391         struct sock *ska = socka->sk, *skb = sockb->sk;
1392 
1393         /* Join our sockets back to back */
1394         sock_hold(ska);
1395         sock_hold(skb);
1396         unix_peer(ska) = skb;
1397         unix_peer(skb) = ska;
1398         init_peercred(ska);
1399         init_peercred(skb);
1400 
1401         if (ska->sk_type != SOCK_DGRAM) {
1402                 ska->sk_state = TCP_ESTABLISHED;
1403                 skb->sk_state = TCP_ESTABLISHED;
1404                 socka->state  = SS_CONNECTED;
1405                 sockb->state  = SS_CONNECTED;
1406         }
1407         return 0;
1408 }
1409 
1410 static void unix_sock_inherit_flags(const struct socket *old,
1411                                     struct socket *new)
1412 {
1413         if (test_bit(SOCK_PASSCRED, &old->flags))
1414                 set_bit(SOCK_PASSCRED, &new->flags);
1415         if (test_bit(SOCK_PASSSEC, &old->flags))
1416                 set_bit(SOCK_PASSSEC, &new->flags);
1417 }
1418 
1419 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1420                        bool kern)
1421 {
1422         struct sock *sk = sock->sk;
1423         struct sock *tsk;
1424         struct sk_buff *skb;
1425         int err;
1426 
1427         err = -EOPNOTSUPP;
1428         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1429                 goto out;
1430 
1431         err = -EINVAL;
1432         if (sk->sk_state != TCP_LISTEN)
1433                 goto out;
1434 
1435         /* If socket state is TCP_LISTEN it cannot change (for now...),
1436          * so that no locks are necessary.
1437          */
1438 
1439         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1440         if (!skb) {
1441                 /* This means receive shutdown. */
1442                 if (err == 0)
1443                         err = -EINVAL;
1444                 goto out;
1445         }
1446 
1447         tsk = skb->sk;
1448         skb_free_datagram(sk, skb);
1449         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1450 
1451         /* attach accepted sock to socket */
1452         unix_state_lock(tsk);
1453         newsock->state = SS_CONNECTED;
1454         unix_sock_inherit_flags(sock, newsock);
1455         sock_graft(tsk, newsock);
1456         unix_state_unlock(tsk);
1457         return 0;
1458 
1459 out:
1460         return err;
1461 }
1462 
1463 
1464 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1465 {
1466         struct sock *sk = sock->sk;
1467         struct unix_address *addr;
1468         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1469         int err = 0;
1470 
1471         if (peer) {
1472                 sk = unix_peer_get(sk);
1473 
1474                 err = -ENOTCONN;
1475                 if (!sk)
1476                         goto out;
1477                 err = 0;
1478         } else {
1479                 sock_hold(sk);
1480         }
1481 
1482         addr = smp_load_acquire(&unix_sk(sk)->addr);
1483         if (!addr) {
1484                 sunaddr->sun_family = AF_UNIX;
1485                 sunaddr->sun_path[0] = 0;
1486                 err = sizeof(short);
1487         } else {
1488                 err = addr->len;
1489                 memcpy(sunaddr, addr->name, addr->len);
1490         }
1491         sock_put(sk);
1492 out:
1493         return err;
1494 }
1495 
1496 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1497 {
1498         int err = 0;
1499 
1500         UNIXCB(skb).pid  = get_pid(scm->pid);
1501         UNIXCB(skb).uid = scm->creds.uid;
1502         UNIXCB(skb).gid = scm->creds.gid;
1503         UNIXCB(skb).fp = NULL;
1504         unix_get_secdata(scm, skb);
1505         if (scm->fp && send_fds)
1506                 err = unix_attach_fds(scm, skb);
1507 
1508         skb->destructor = unix_destruct_scm;
1509         return err;
1510 }
1511 
1512 static bool unix_passcred_enabled(const struct socket *sock,
1513                                   const struct sock *other)
1514 {
1515         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1516                !other->sk_socket ||
1517                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1518 }
1519 
1520 /*
1521  * Some apps rely on write() giving SCM_CREDENTIALS
1522  * We include credentials if source or destination socket
1523  * asserted SOCK_PASSCRED.
1524  */
1525 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1526                             const struct sock *other)
1527 {
1528         if (UNIXCB(skb).pid)
1529                 return;
1530         if (unix_passcred_enabled(sock, other)) {
1531                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1532                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1533         }
1534 }
1535 
1536 static int maybe_init_creds(struct scm_cookie *scm,
1537                             struct socket *socket,
1538                             const struct sock *other)
1539 {
1540         int err;
1541         struct msghdr msg = { .msg_controllen = 0 };
1542 
1543         err = scm_send(socket, &msg, scm, false);
1544         if (err)
1545                 return err;
1546 
1547         if (unix_passcred_enabled(socket, other)) {
1548                 scm->pid = get_pid(task_tgid(current));
1549                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1550         }
1551         return err;
1552 }
1553 
1554 static bool unix_skb_scm_eq(struct sk_buff *skb,
1555                             struct scm_cookie *scm)
1556 {
1557         const struct unix_skb_parms *u = &UNIXCB(skb);
1558 
1559         return u->pid == scm->pid &&
1560                uid_eq(u->uid, scm->creds.uid) &&
1561                gid_eq(u->gid, scm->creds.gid) &&
1562                unix_secdata_eq(scm, skb);
1563 }
1564 
1565 /*
1566  *      Send AF_UNIX data.
1567  */
1568 
1569 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1570                               size_t len)
1571 {
1572         struct sock *sk = sock->sk;
1573         struct net *net = sock_net(sk);
1574         struct unix_sock *u = unix_sk(sk);
1575         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1576         struct sock *other = NULL;
1577         int namelen = 0; /* fake GCC */
1578         int err;
1579         unsigned int hash;
1580         struct sk_buff *skb;
1581         long timeo;
1582         struct scm_cookie scm;
1583         int data_len = 0;
1584         int sk_locked;
1585 
1586         wait_for_unix_gc();
1587         err = scm_send(sock, msg, &scm, false);
1588         if (err < 0)
1589                 return err;
1590 
1591         err = -EOPNOTSUPP;
1592         if (msg->msg_flags&MSG_OOB)
1593                 goto out;
1594 
1595         if (msg->msg_namelen) {
1596                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1597                 if (err < 0)
1598                         goto out;
1599                 namelen = err;
1600         } else {
1601                 sunaddr = NULL;
1602                 err = -ENOTCONN;
1603                 other = unix_peer_get(sk);
1604                 if (!other)
1605                         goto out;
1606         }
1607 
1608         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1609             && (err = unix_autobind(sock)) != 0)
1610                 goto out;
1611 
1612         err = -EMSGSIZE;
1613         if (len > sk->sk_sndbuf - 32)
1614                 goto out;
1615 
1616         if (len > SKB_MAX_ALLOC) {
1617                 data_len = min_t(size_t,
1618                                  len - SKB_MAX_ALLOC,
1619                                  MAX_SKB_FRAGS * PAGE_SIZE);
1620                 data_len = PAGE_ALIGN(data_len);
1621 
1622                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1623         }
1624 
1625         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1626                                    msg->msg_flags & MSG_DONTWAIT, &err,
1627                                    PAGE_ALLOC_COSTLY_ORDER);
1628         if (skb == NULL)
1629                 goto out;
1630 
1631         err = unix_scm_to_skb(&scm, skb, true);
1632         if (err < 0)
1633                 goto out_free;
1634 
1635         skb_put(skb, len - data_len);
1636         skb->data_len = data_len;
1637         skb->len = len;
1638         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1639         if (err)
1640                 goto out_free;
1641 
1642         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1643 
1644 restart:
1645         if (!other) {
1646                 err = -ECONNRESET;
1647                 if (sunaddr == NULL)
1648                         goto out_free;
1649 
1650                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1651                                         hash, &err);
1652                 if (other == NULL)
1653                         goto out_free;
1654         }
1655 
1656         if (sk_filter(other, skb) < 0) {
1657                 /* Toss the packet but do not return any error to the sender */
1658                 err = len;
1659                 goto out_free;
1660         }
1661 
1662         sk_locked = 0;
1663         unix_state_lock(other);
1664 restart_locked:
1665         err = -EPERM;
1666         if (!unix_may_send(sk, other))
1667                 goto out_unlock;
1668 
1669         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1670                 /*
1671                  *      Check with 1003.1g - what should
1672                  *      datagram error
1673                  */
1674                 unix_state_unlock(other);
1675                 sock_put(other);
1676 
1677                 if (!sk_locked)
1678                         unix_state_lock(sk);
1679 
1680                 err = 0;
1681                 if (unix_peer(sk) == other) {
1682                         unix_peer(sk) = NULL;
1683                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1684 
1685                         unix_state_unlock(sk);
1686 
1687                         unix_dgram_disconnected(sk, other);
1688                         sock_put(other);
1689                         err = -ECONNREFUSED;
1690                 } else {
1691                         unix_state_unlock(sk);
1692                 }
1693 
1694                 other = NULL;
1695                 if (err)
1696                         goto out_free;
1697                 goto restart;
1698         }
1699 
1700         err = -EPIPE;
1701         if (other->sk_shutdown & RCV_SHUTDOWN)
1702                 goto out_unlock;
1703 
1704         if (sk->sk_type != SOCK_SEQPACKET) {
1705                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1706                 if (err)
1707                         goto out_unlock;
1708         }
1709 
1710         /* other == sk && unix_peer(other) != sk if
1711          * - unix_peer(sk) == NULL, destination address bound to sk
1712          * - unix_peer(sk) == sk by time of get but disconnected before lock
1713          */
1714         if (other != sk &&
1715             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1716                 if (timeo) {
1717                         timeo = unix_wait_for_peer(other, timeo);
1718 
1719                         err = sock_intr_errno(timeo);
1720                         if (signal_pending(current))
1721                                 goto out_free;
1722 
1723                         goto restart;
1724                 }
1725 
1726                 if (!sk_locked) {
1727                         unix_state_unlock(other);
1728                         unix_state_double_lock(sk, other);
1729                 }
1730 
1731                 if (unix_peer(sk) != other ||
1732                     unix_dgram_peer_wake_me(sk, other)) {
1733                         err = -EAGAIN;
1734                         sk_locked = 1;
1735                         goto out_unlock;
1736                 }
1737 
1738                 if (!sk_locked) {
1739                         sk_locked = 1;
1740                         goto restart_locked;
1741                 }
1742         }
1743 
1744         if (unlikely(sk_locked))
1745                 unix_state_unlock(sk);
1746 
1747         if (sock_flag(other, SOCK_RCVTSTAMP))
1748                 __net_timestamp(skb);
1749         maybe_add_creds(skb, sock, other);
1750         skb_queue_tail(&other->sk_receive_queue, skb);
1751         unix_state_unlock(other);
1752         other->sk_data_ready(other);
1753         sock_put(other);
1754         scm_destroy(&scm);
1755         return len;
1756 
1757 out_unlock:
1758         if (sk_locked)
1759                 unix_state_unlock(sk);
1760         unix_state_unlock(other);
1761 out_free:
1762         kfree_skb(skb);
1763 out:
1764         if (other)
1765                 sock_put(other);
1766         scm_destroy(&scm);
1767         return err;
1768 }
1769 
1770 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1771  * bytes, and a minimum of a full page.
1772  */
1773 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1774 
1775 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1776                                size_t len)
1777 {
1778         struct sock *sk = sock->sk;
1779         struct sock *other = NULL;
1780         int err, size;
1781         struct sk_buff *skb;
1782         int sent = 0;
1783         struct scm_cookie scm;
1784         bool fds_sent = false;
1785         int data_len;
1786 
1787         wait_for_unix_gc();
1788         err = scm_send(sock, msg, &scm, false);
1789         if (err < 0)
1790                 return err;
1791 
1792         err = -EOPNOTSUPP;
1793         if (msg->msg_flags&MSG_OOB)
1794                 goto out_err;
1795 
1796         if (msg->msg_namelen) {
1797                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1798                 goto out_err;
1799         } else {
1800                 err = -ENOTCONN;
1801                 other = unix_peer(sk);
1802                 if (!other)
1803                         goto out_err;
1804         }
1805 
1806         if (sk->sk_shutdown & SEND_SHUTDOWN)
1807                 goto pipe_err;
1808 
1809         while (sent < len) {
1810                 size = len - sent;
1811 
1812                 /* Keep two messages in the pipe so it schedules better */
1813                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1814 
1815                 /* allow fallback to order-0 allocations */
1816                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1817 
1818                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1819 
1820                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1821 
1822                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1823                                            msg->msg_flags & MSG_DONTWAIT, &err,
1824                                            get_order(UNIX_SKB_FRAGS_SZ));
1825                 if (!skb)
1826                         goto out_err;
1827 
1828                 /* Only send the fds in the first buffer */
1829                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1830                 if (err < 0) {
1831                         kfree_skb(skb);
1832                         goto out_err;
1833                 }
1834                 fds_sent = true;
1835 
1836                 skb_put(skb, size - data_len);
1837                 skb->data_len = data_len;
1838                 skb->len = size;
1839                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1840                 if (err) {
1841                         kfree_skb(skb);
1842                         goto out_err;
1843                 }
1844 
1845                 unix_state_lock(other);
1846 
1847                 if (sock_flag(other, SOCK_DEAD) ||
1848                     (other->sk_shutdown & RCV_SHUTDOWN))
1849                         goto pipe_err_free;
1850 
1851                 maybe_add_creds(skb, sock, other);
1852                 skb_queue_tail(&other->sk_receive_queue, skb);
1853                 unix_state_unlock(other);
1854                 other->sk_data_ready(other);
1855                 sent += size;
1856         }
1857 
1858         scm_destroy(&scm);
1859 
1860         return sent;
1861 
1862 pipe_err_free:
1863         unix_state_unlock(other);
1864         kfree_skb(skb);
1865 pipe_err:
1866         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1867                 send_sig(SIGPIPE, current, 0);
1868         err = -EPIPE;
1869 out_err:
1870         scm_destroy(&scm);
1871         return sent ? : err;
1872 }
1873 
1874 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1875                                     int offset, size_t size, int flags)
1876 {
1877         int err;
1878         bool send_sigpipe = false;
1879         bool init_scm = true;
1880         struct scm_cookie scm;
1881         struct sock *other, *sk = socket->sk;
1882         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1883 
1884         if (flags & MSG_OOB)
1885                 return -EOPNOTSUPP;
1886 
1887         other = unix_peer(sk);
1888         if (!other || sk->sk_state != TCP_ESTABLISHED)
1889                 return -ENOTCONN;
1890 
1891         if (false) {
1892 alloc_skb:
1893                 unix_state_unlock(other);
1894                 mutex_unlock(&unix_sk(other)->iolock);
1895                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1896                                               &err, 0);
1897                 if (!newskb)
1898                         goto err;
1899         }
1900 
1901         /* we must acquire iolock as we modify already present
1902          * skbs in the sk_receive_queue and mess with skb->len
1903          */
1904         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1905         if (err) {
1906                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1907                 goto err;
1908         }
1909 
1910         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1911                 err = -EPIPE;
1912                 send_sigpipe = true;
1913                 goto err_unlock;
1914         }
1915 
1916         unix_state_lock(other);
1917 
1918         if (sock_flag(other, SOCK_DEAD) ||
1919             other->sk_shutdown & RCV_SHUTDOWN) {
1920                 err = -EPIPE;
1921                 send_sigpipe = true;
1922                 goto err_state_unlock;
1923         }
1924 
1925         if (init_scm) {
1926                 err = maybe_init_creds(&scm, socket, other);
1927                 if (err)
1928                         goto err_state_unlock;
1929                 init_scm = false;
1930         }
1931 
1932         skb = skb_peek_tail(&other->sk_receive_queue);
1933         if (tail && tail == skb) {
1934                 skb = newskb;
1935         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1936                 if (newskb) {
1937                         skb = newskb;
1938                 } else {
1939                         tail = skb;
1940                         goto alloc_skb;
1941                 }
1942         } else if (newskb) {
1943                 /* this is fast path, we don't necessarily need to
1944                  * call to kfree_skb even though with newskb == NULL
1945                  * this - does no harm
1946                  */
1947                 consume_skb(newskb);
1948                 newskb = NULL;
1949         }
1950 
1951         if (skb_append_pagefrags(skb, page, offset, size)) {
1952                 tail = skb;
1953                 goto alloc_skb;
1954         }
1955 
1956         skb->len += size;
1957         skb->data_len += size;
1958         skb->truesize += size;
1959         refcount_add(size, &sk->sk_wmem_alloc);
1960 
1961         if (newskb) {
1962                 err = unix_scm_to_skb(&scm, skb, false);
1963                 if (err)
1964                         goto err_state_unlock;
1965                 spin_lock(&other->sk_receive_queue.lock);
1966                 __skb_queue_tail(&other->sk_receive_queue, newskb);
1967                 spin_unlock(&other->sk_receive_queue.lock);
1968         }
1969 
1970         unix_state_unlock(other);
1971         mutex_unlock(&unix_sk(other)->iolock);
1972 
1973         other->sk_data_ready(other);
1974         scm_destroy(&scm);
1975         return size;
1976 
1977 err_state_unlock:
1978         unix_state_unlock(other);
1979 err_unlock:
1980         mutex_unlock(&unix_sk(other)->iolock);
1981 err:
1982         kfree_skb(newskb);
1983         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1984                 send_sig(SIGPIPE, current, 0);
1985         if (!init_scm)
1986                 scm_destroy(&scm);
1987         return err;
1988 }
1989 
1990 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1991                                   size_t len)
1992 {
1993         int err;
1994         struct sock *sk = sock->sk;
1995 
1996         err = sock_error(sk);
1997         if (err)
1998                 return err;
1999 
2000         if (sk->sk_state != TCP_ESTABLISHED)
2001                 return -ENOTCONN;
2002 
2003         if (msg->msg_namelen)
2004                 msg->msg_namelen = 0;
2005 
2006         return unix_dgram_sendmsg(sock, msg, len);
2007 }
2008 
2009 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2010                                   size_t size, int flags)
2011 {
2012         struct sock *sk = sock->sk;
2013 
2014         if (sk->sk_state != TCP_ESTABLISHED)
2015                 return -ENOTCONN;
2016 
2017         return unix_dgram_recvmsg(sock, msg, size, flags);
2018 }
2019 
2020 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2021 {
2022         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2023 
2024         if (addr) {
2025                 msg->msg_namelen = addr->len;
2026                 memcpy(msg->msg_name, addr->name, addr->len);
2027         }
2028 }
2029 
2030 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2031                               size_t size, int flags)
2032 {
2033         struct scm_cookie scm;
2034         struct sock *sk = sock->sk;
2035         struct unix_sock *u = unix_sk(sk);
2036         struct sk_buff *skb, *last;
2037         long timeo;
2038         int skip;
2039         int err;
2040 
2041         err = -EOPNOTSUPP;
2042         if (flags&MSG_OOB)
2043                 goto out;
2044 
2045         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2046 
2047         do {
2048                 mutex_lock(&u->iolock);
2049 
2050                 skip = sk_peek_offset(sk, flags);
2051                 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2052                                               &last);
2053                 if (skb)
2054                         break;
2055 
2056                 mutex_unlock(&u->iolock);
2057 
2058                 if (err != -EAGAIN)
2059                         break;
2060         } while (timeo &&
2061                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2062 
2063         if (!skb) { /* implies iolock unlocked */
2064                 unix_state_lock(sk);
2065                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2066                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2067                     (sk->sk_shutdown & RCV_SHUTDOWN))
2068                         err = 0;
2069                 unix_state_unlock(sk);
2070                 goto out;
2071         }
2072 
2073         if (wq_has_sleeper(&u->peer_wait))
2074                 wake_up_interruptible_sync_poll(&u->peer_wait,
2075                                                 EPOLLOUT | EPOLLWRNORM |
2076                                                 EPOLLWRBAND);
2077 
2078         if (ccs_socket_post_recvmsg_permission(sk, skb, flags)) {
2079                 err = -EAGAIN; /* Hope less harmful than -EPERM. */
2080                 goto out_unlock;
2081         }
2082         if (msg->msg_name)
2083                 unix_copy_addr(msg, skb->sk);
2084 
2085         if (size > skb->len - skip)
2086                 size = skb->len - skip;
2087         else if (size < skb->len - skip)
2088                 msg->msg_flags |= MSG_TRUNC;
2089 
2090         err = skb_copy_datagram_msg(skb, skip, msg, size);
2091         if (err)
2092                 goto out_free;
2093 
2094         if (sock_flag(sk, SOCK_RCVTSTAMP))
2095                 __sock_recv_timestamp(msg, sk, skb);
2096 
2097         memset(&scm, 0, sizeof(scm));
2098 
2099         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2100         unix_set_secdata(&scm, skb);
2101 
2102         if (!(flags & MSG_PEEK)) {
2103                 if (UNIXCB(skb).fp)
2104                         unix_detach_fds(&scm, skb);
2105 
2106                 sk_peek_offset_bwd(sk, skb->len);
2107         } else {
2108                 /* It is questionable: on PEEK we could:
2109                    - do not return fds - good, but too simple 8)
2110                    - return fds, and do not return them on read (old strategy,
2111                      apparently wrong)
2112                    - clone fds (I chose it for now, it is the most universal
2113                      solution)
2114 
2115                    POSIX 1003.1g does not actually define this clearly
2116                    at all. POSIX 1003.1g doesn't define a lot of things
2117                    clearly however!
2118 
2119                 */
2120 
2121                 sk_peek_offset_fwd(sk, size);
2122 
2123                 if (UNIXCB(skb).fp)
2124                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2125         }
2126         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2127 
2128         scm_recv(sock, msg, &scm, flags);
2129 
2130 out_free:
2131         skb_free_datagram(sk, skb);
2132 out_unlock:
2133         mutex_unlock(&u->iolock);
2134 out:
2135         return err;
2136 }
2137 
2138 /*
2139  *      Sleep until more data has arrived. But check for races..
2140  */
2141 static long unix_stream_data_wait(struct sock *sk, long timeo,
2142                                   struct sk_buff *last, unsigned int last_len,
2143                                   bool freezable)
2144 {
2145         struct sk_buff *tail;
2146         DEFINE_WAIT(wait);
2147 
2148         unix_state_lock(sk);
2149 
2150         for (;;) {
2151                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2152 
2153                 tail = skb_peek_tail(&sk->sk_receive_queue);
2154                 if (tail != last ||
2155                     (tail && tail->len != last_len) ||
2156                     sk->sk_err ||
2157                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2158                     signal_pending(current) ||
2159                     !timeo)
2160                         break;
2161 
2162                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2163                 unix_state_unlock(sk);
2164                 if (freezable)
2165                         timeo = freezable_schedule_timeout(timeo);
2166                 else
2167                         timeo = schedule_timeout(timeo);
2168                 unix_state_lock(sk);
2169 
2170                 if (sock_flag(sk, SOCK_DEAD))
2171                         break;
2172 
2173                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2174         }
2175 
2176         finish_wait(sk_sleep(sk), &wait);
2177         unix_state_unlock(sk);
2178         return timeo;
2179 }
2180 
2181 static unsigned int unix_skb_len(const struct sk_buff *skb)
2182 {
2183         return skb->len - UNIXCB(skb).consumed;
2184 }
2185 
2186 struct unix_stream_read_state {
2187         int (*recv_actor)(struct sk_buff *, int, int,
2188                           struct unix_stream_read_state *);
2189         struct socket *socket;
2190         struct msghdr *msg;
2191         struct pipe_inode_info *pipe;
2192         size_t size;
2193         int flags;
2194         unsigned int splice_flags;
2195 };
2196 
2197 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2198                                     bool freezable)
2199 {
2200         struct scm_cookie scm;
2201         struct socket *sock = state->socket;
2202         struct sock *sk = sock->sk;
2203         struct unix_sock *u = unix_sk(sk);
2204         int copied = 0;
2205         int flags = state->flags;
2206         int noblock = flags & MSG_DONTWAIT;
2207         bool check_creds = false;
2208         int target;
2209         int err = 0;
2210         long timeo;
2211         int skip;
2212         size_t size = state->size;
2213         unsigned int last_len;
2214 
2215         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2216                 err = -EINVAL;
2217                 goto out;
2218         }
2219 
2220         if (unlikely(flags & MSG_OOB)) {
2221                 err = -EOPNOTSUPP;
2222                 goto out;
2223         }
2224 
2225         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2226         timeo = sock_rcvtimeo(sk, noblock);
2227 
2228         memset(&scm, 0, sizeof(scm));
2229 
2230         /* Lock the socket to prevent queue disordering
2231          * while sleeps in memcpy_tomsg
2232          */
2233         mutex_lock(&u->iolock);
2234 
2235         skip = max(sk_peek_offset(sk, flags), 0);
2236 
2237         do {
2238                 int chunk;
2239                 bool drop_skb;
2240                 struct sk_buff *skb, *last;
2241 
2242 redo:
2243                 unix_state_lock(sk);
2244                 if (sock_flag(sk, SOCK_DEAD)) {
2245                         err = -ECONNRESET;
2246                         goto unlock;
2247                 }
2248                 last = skb = skb_peek(&sk->sk_receive_queue);
2249                 last_len = last ? last->len : 0;
2250 again:
2251                 if (skb == NULL) {
2252                         if (copied >= target)
2253                                 goto unlock;
2254 
2255                         /*
2256                          *      POSIX 1003.1g mandates this order.
2257                          */
2258 
2259                         err = sock_error(sk);
2260                         if (err)
2261                                 goto unlock;
2262                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2263                                 goto unlock;
2264 
2265                         unix_state_unlock(sk);
2266                         if (!timeo) {
2267                                 err = -EAGAIN;
2268                                 break;
2269                         }
2270 
2271                         mutex_unlock(&u->iolock);
2272 
2273                         timeo = unix_stream_data_wait(sk, timeo, last,
2274                                                       last_len, freezable);
2275 
2276                         if (signal_pending(current)) {
2277                                 err = sock_intr_errno(timeo);
2278                                 scm_destroy(&scm);
2279                                 goto out;
2280                         }
2281 
2282                         mutex_lock(&u->iolock);
2283                         goto redo;
2284 unlock:
2285                         unix_state_unlock(sk);
2286                         break;
2287                 }
2288 
2289                 while (skip >= unix_skb_len(skb)) {
2290                         skip -= unix_skb_len(skb);
2291                         last = skb;
2292                         last_len = skb->len;
2293                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2294                         if (!skb)
2295                                 goto again;
2296                 }
2297 
2298                 unix_state_unlock(sk);
2299 
2300                 if (check_creds) {
2301                         /* Never glue messages from different writers */
2302                         if (!unix_skb_scm_eq(skb, &scm))
2303                                 break;
2304                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2305                         /* Copy credentials */
2306                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2307                         unix_set_secdata(&scm, skb);
2308                         check_creds = true;
2309                 }
2310 
2311                 /* Copy address just once */
2312                 if (state->msg && state->msg->msg_name) {
2313                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2314                                          state->msg->msg_name);
2315                         unix_copy_addr(state->msg, skb->sk);
2316                         sunaddr = NULL;
2317                 }
2318 
2319                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2320                 skb_get(skb);
2321                 chunk = state->recv_actor(skb, skip, chunk, state);
2322                 drop_skb = !unix_skb_len(skb);
2323                 /* skb is only safe to use if !drop_skb */
2324                 consume_skb(skb);
2325                 if (chunk < 0) {
2326                         if (copied == 0)
2327                                 copied = -EFAULT;
2328                         break;
2329                 }
2330                 copied += chunk;
2331                 size -= chunk;
2332 
2333                 if (drop_skb) {
2334                         /* the skb was touched by a concurrent reader;
2335                          * we should not expect anything from this skb
2336                          * anymore and assume it invalid - we can be
2337                          * sure it was dropped from the socket queue
2338                          *
2339                          * let's report a short read
2340                          */
2341                         err = 0;
2342                         break;
2343                 }
2344 
2345                 /* Mark read part of skb as used */
2346                 if (!(flags & MSG_PEEK)) {
2347                         UNIXCB(skb).consumed += chunk;
2348 
2349                         sk_peek_offset_bwd(sk, chunk);
2350 
2351                         if (UNIXCB(skb).fp)
2352                                 unix_detach_fds(&scm, skb);
2353 
2354                         if (unix_skb_len(skb))
2355                                 break;
2356 
2357                         skb_unlink(skb, &sk->sk_receive_queue);
2358                         consume_skb(skb);
2359 
2360                         if (scm.fp)
2361                                 break;
2362                 } else {
2363                         /* It is questionable, see note in unix_dgram_recvmsg.
2364                          */
2365                         if (UNIXCB(skb).fp)
2366                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2367 
2368                         sk_peek_offset_fwd(sk, chunk);
2369 
2370                         if (UNIXCB(skb).fp)
2371                                 break;
2372 
2373                         skip = 0;
2374                         last = skb;
2375                         last_len = skb->len;
2376                         unix_state_lock(sk);
2377                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2378                         if (skb)
2379                                 goto again;
2380                         unix_state_unlock(sk);
2381                         break;
2382                 }
2383         } while (size);
2384 
2385         mutex_unlock(&u->iolock);
2386         if (state->msg)
2387                 scm_recv(sock, state->msg, &scm, flags);
2388         else
2389                 scm_destroy(&scm);
2390 out:
2391         return copied ? : err;
2392 }
2393 
2394 static int unix_stream_read_actor(struct sk_buff *skb,
2395                                   int skip, int chunk,
2396                                   struct unix_stream_read_state *state)
2397 {
2398         int ret;
2399 
2400         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2401                                     state->msg, chunk);
2402         return ret ?: chunk;
2403 }
2404 
2405 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2406                                size_t size, int flags)
2407 {
2408         struct unix_stream_read_state state = {
2409                 .recv_actor = unix_stream_read_actor,
2410                 .socket = sock,
2411                 .msg = msg,
2412                 .size = size,
2413                 .flags = flags
2414         };
2415 
2416         return unix_stream_read_generic(&state, true);
2417 }
2418 
2419 static int unix_stream_splice_actor(struct sk_buff *skb,
2420                                     int skip, int chunk,
2421                                     struct unix_stream_read_state *state)
2422 {
2423         return skb_splice_bits(skb, state->socket->sk,
2424                                UNIXCB(skb).consumed + skip,
2425                                state->pipe, chunk, state->splice_flags);
2426 }
2427 
2428 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2429                                        struct pipe_inode_info *pipe,
2430                                        size_t size, unsigned int flags)
2431 {
2432         struct unix_stream_read_state state = {
2433                 .recv_actor = unix_stream_splice_actor,
2434                 .socket = sock,
2435                 .pipe = pipe,
2436                 .size = size,
2437                 .splice_flags = flags,
2438         };
2439 
2440         if (unlikely(*ppos))
2441                 return -ESPIPE;
2442 
2443         if (sock->file->f_flags & O_NONBLOCK ||
2444             flags & SPLICE_F_NONBLOCK)
2445                 state.flags = MSG_DONTWAIT;
2446 
2447         return unix_stream_read_generic(&state, false);
2448 }
2449 
2450 static int unix_shutdown(struct socket *sock, int mode)
2451 {
2452         struct sock *sk = sock->sk;
2453         struct sock *other;
2454 
2455         if (mode < SHUT_RD || mode > SHUT_RDWR)
2456                 return -EINVAL;
2457         /* This maps:
2458          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2459          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2460          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2461          */
2462         ++mode;
2463 
2464         unix_state_lock(sk);
2465         sk->sk_shutdown |= mode;
2466         other = unix_peer(sk);
2467         if (other)
2468                 sock_hold(other);
2469         unix_state_unlock(sk);
2470         sk->sk_state_change(sk);
2471 
2472         if (other &&
2473                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2474 
2475                 int peer_mode = 0;
2476 
2477                 if (mode&RCV_SHUTDOWN)
2478                         peer_mode |= SEND_SHUTDOWN;
2479                 if (mode&SEND_SHUTDOWN)
2480                         peer_mode |= RCV_SHUTDOWN;
2481                 unix_state_lock(other);
2482                 other->sk_shutdown |= peer_mode;
2483                 unix_state_unlock(other);
2484                 other->sk_state_change(other);
2485                 if (peer_mode == SHUTDOWN_MASK)
2486                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2487                 else if (peer_mode & RCV_SHUTDOWN)
2488                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2489         }
2490         if (other)
2491                 sock_put(other);
2492 
2493         return 0;
2494 }
2495 
2496 long unix_inq_len(struct sock *sk)
2497 {
2498         struct sk_buff *skb;
2499         long amount = 0;
2500 
2501         if (sk->sk_state == TCP_LISTEN)
2502                 return -EINVAL;
2503 
2504         spin_lock(&sk->sk_receive_queue.lock);
2505         if (sk->sk_type == SOCK_STREAM ||
2506             sk->sk_type == SOCK_SEQPACKET) {
2507                 skb_queue_walk(&sk->sk_receive_queue, skb)
2508                         amount += unix_skb_len(skb);
2509         } else {
2510                 skb = skb_peek(&sk->sk_receive_queue);
2511                 if (skb)
2512                         amount = skb->len;
2513         }
2514         spin_unlock(&sk->sk_receive_queue.lock);
2515 
2516         return amount;
2517 }
2518 EXPORT_SYMBOL_GPL(unix_inq_len);
2519 
2520 long unix_outq_len(struct sock *sk)
2521 {
2522         return sk_wmem_alloc_get(sk);
2523 }
2524 EXPORT_SYMBOL_GPL(unix_outq_len);
2525 
2526 static int unix_open_file(struct sock *sk)
2527 {
2528         struct path path;
2529         struct file *f;
2530         int fd;
2531 
2532         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2533                 return -EPERM;
2534 
2535         if (!smp_load_acquire(&unix_sk(sk)->addr))
2536                 return -ENOENT;
2537 
2538         path = unix_sk(sk)->path;
2539         if (!path.dentry)
2540                 return -ENOENT;
2541 
2542         path_get(&path);
2543 
2544         fd = get_unused_fd_flags(O_CLOEXEC);
2545         if (fd < 0)
2546                 goto out;
2547 
2548         f = dentry_open(&path, O_PATH, current_cred());
2549         if (IS_ERR(f)) {
2550                 put_unused_fd(fd);
2551                 fd = PTR_ERR(f);
2552                 goto out;
2553         }
2554 
2555         fd_install(fd, f);
2556 out:
2557         path_put(&path);
2558 
2559         return fd;
2560 }
2561 
2562 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2563 {
2564         struct sock *sk = sock->sk;
2565         long amount = 0;
2566         int err;
2567 
2568         switch (cmd) {
2569         case SIOCOUTQ:
2570                 amount = unix_outq_len(sk);
2571                 err = put_user(amount, (int __user *)arg);
2572                 break;
2573         case SIOCINQ:
2574                 amount = unix_inq_len(sk);
2575                 if (amount < 0)
2576                         err = amount;
2577                 else
2578                         err = put_user(amount, (int __user *)arg);
2579                 break;
2580         case SIOCUNIXFILE:
2581                 err = unix_open_file(sk);
2582                 break;
2583         default:
2584                 err = -ENOIOCTLCMD;
2585                 break;
2586         }
2587         return err;
2588 }
2589 
2590 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2591 {
2592         struct sock *sk = sock->sk;
2593         __poll_t mask;
2594 
2595         sock_poll_wait(file, sock, wait);
2596         mask = 0;
2597 
2598         /* exceptional events? */
2599         if (sk->sk_err)
2600                 mask |= EPOLLERR;
2601         if (sk->sk_shutdown == SHUTDOWN_MASK)
2602                 mask |= EPOLLHUP;
2603         if (sk->sk_shutdown & RCV_SHUTDOWN)
2604                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2605 
2606         /* readable? */
2607         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2608                 mask |= EPOLLIN | EPOLLRDNORM;
2609 
2610         /* Connection-based need to check for termination and startup */
2611         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2612             sk->sk_state == TCP_CLOSE)
2613                 mask |= EPOLLHUP;
2614 
2615         /*
2616          * we set writable also when the other side has shut down the
2617          * connection. This prevents stuck sockets.
2618          */
2619         if (unix_writable(sk))
2620                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2621 
2622         return mask;
2623 }
2624 
2625 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2626                                     poll_table *wait)
2627 {
2628         struct sock *sk = sock->sk, *other;
2629         unsigned int writable;
2630         __poll_t mask;
2631 
2632         sock_poll_wait(file, sock, wait);
2633         mask = 0;
2634 
2635         /* exceptional events? */
2636         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2637                 mask |= EPOLLERR |
2638                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2639 
2640         if (sk->sk_shutdown & RCV_SHUTDOWN)
2641                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2642         if (sk->sk_shutdown == SHUTDOWN_MASK)
2643                 mask |= EPOLLHUP;
2644 
2645         /* readable? */
2646         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2647                 mask |= EPOLLIN | EPOLLRDNORM;
2648 
2649         /* Connection-based need to check for termination and startup */
2650         if (sk->sk_type == SOCK_SEQPACKET) {
2651                 if (sk->sk_state == TCP_CLOSE)
2652                         mask |= EPOLLHUP;
2653                 /* connection hasn't started yet? */
2654                 if (sk->sk_state == TCP_SYN_SENT)
2655                         return mask;
2656         }
2657 
2658         /* No write status requested, avoid expensive OUT tests. */
2659         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2660                 return mask;
2661 
2662         writable = unix_writable(sk);
2663         if (writable) {
2664                 unix_state_lock(sk);
2665 
2666                 other = unix_peer(sk);
2667                 if (other && unix_peer(other) != sk &&
2668                     unix_recvq_full(other) &&
2669                     unix_dgram_peer_wake_me(sk, other))
2670                         writable = 0;
2671 
2672                 unix_state_unlock(sk);
2673         }
2674 
2675         if (writable)
2676                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2677         else
2678                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2679 
2680         return mask;
2681 }
2682 
2683 #ifdef CONFIG_PROC_FS
2684 
2685 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2686 
2687 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2688 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2689 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2690 
2691 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2692 {
2693         unsigned long offset = get_offset(*pos);
2694         unsigned long bucket = get_bucket(*pos);
2695         struct sock *sk;
2696         unsigned long count = 0;
2697 
2698         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2699                 if (sock_net(sk) != seq_file_net(seq))
2700                         continue;
2701                 if (++count == offset)
2702                         break;
2703         }
2704 
2705         return sk;
2706 }
2707 
2708 static struct sock *unix_next_socket(struct seq_file *seq,
2709                                      struct sock *sk,
2710                                      loff_t *pos)
2711 {
2712         unsigned long bucket;
2713 
2714         while (sk > (struct sock *)SEQ_START_TOKEN) {
2715                 sk = sk_next(sk);
2716                 if (!sk)
2717                         goto next_bucket;
2718                 if (sock_net(sk) == seq_file_net(seq))
2719                         return sk;
2720         }
2721 
2722         do {
2723                 sk = unix_from_bucket(seq, pos);
2724                 if (sk)
2725                         return sk;
2726 
2727 next_bucket:
2728                 bucket = get_bucket(*pos) + 1;
2729                 *pos = set_bucket_offset(bucket, 1);
2730         } while (bucket < ARRAY_SIZE(unix_socket_table));
2731 
2732         return NULL;
2733 }
2734 
2735 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2736         __acquires(unix_table_lock)
2737 {
2738         spin_lock(&unix_table_lock);
2739 
2740         if (!*pos)
2741                 return SEQ_START_TOKEN;
2742 
2743         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2744                 return NULL;
2745 
2746         return unix_next_socket(seq, NULL, pos);
2747 }
2748 
2749 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2750 {
2751         ++*pos;
2752         return unix_next_socket(seq, v, pos);
2753 }
2754 
2755 static void unix_seq_stop(struct seq_file *seq, void *v)
2756         __releases(unix_table_lock)
2757 {
2758         spin_unlock(&unix_table_lock);
2759 }
2760 
2761 static int unix_seq_show(struct seq_file *seq, void *v)
2762 {
2763 
2764         if (v == SEQ_START_TOKEN)
2765                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2766                          "Inode Path\n");
2767         else {
2768                 struct sock *s = v;
2769                 struct unix_sock *u = unix_sk(s);
2770                 unix_state_lock(s);
2771 
2772                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2773                         s,
2774                         refcount_read(&s->sk_refcnt),
2775                         0,
2776                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2777                         s->sk_type,
2778                         s->sk_socket ?
2779                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2780                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2781                         sock_i_ino(s));
2782 
2783                 if (u->addr) {  // under unix_table_lock here
2784                         int i, len;
2785                         seq_putc(seq, ' ');
2786 
2787                         i = 0;
2788                         len = u->addr->len - sizeof(short);
2789                         if (!UNIX_ABSTRACT(s))
2790                                 len--;
2791                         else {
2792                                 seq_putc(seq, '@');
2793                                 i++;
2794                         }
2795                         for ( ; i < len; i++)
2796                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2797                                          '@');
2798                 }
2799                 unix_state_unlock(s);
2800                 seq_putc(seq, '\n');
2801         }
2802 
2803         return 0;
2804 }
2805 
2806 static const struct seq_operations unix_seq_ops = {
2807         .start  = unix_seq_start,
2808         .next   = unix_seq_next,
2809         .stop   = unix_seq_stop,
2810         .show   = unix_seq_show,
2811 };
2812 #endif
2813 
2814 static const struct net_proto_family unix_family_ops = {
2815         .family = PF_UNIX,
2816         .create = unix_create,
2817         .owner  = THIS_MODULE,
2818 };
2819 
2820 
2821 static int __net_init unix_net_init(struct net *net)
2822 {
2823         int error = -ENOMEM;
2824 
2825         net->unx.sysctl_max_dgram_qlen = 10;
2826         if (unix_sysctl_register(net))
2827                 goto out;
2828 
2829 #ifdef CONFIG_PROC_FS
2830         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2831                         sizeof(struct seq_net_private))) {
2832                 unix_sysctl_unregister(net);
2833                 goto out;
2834         }
2835 #endif
2836         error = 0;
2837 out:
2838         return error;
2839 }
2840 
2841 static void __net_exit unix_net_exit(struct net *net)
2842 {
2843         unix_sysctl_unregister(net);
2844         remove_proc_entry("unix", net->proc_net);
2845 }
2846 
2847 static struct pernet_operations unix_net_ops = {
2848         .init = unix_net_init,
2849         .exit = unix_net_exit,
2850 };
2851 
2852 static int __init af_unix_init(void)
2853 {
2854         int rc = -1;
2855 
2856         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2857 
2858         rc = proto_register(&unix_proto, 1);
2859         if (rc != 0) {
2860                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2861                 goto out;
2862         }
2863 
2864         sock_register(&unix_family_ops);
2865         register_pernet_subsys(&unix_net_ops);
2866 out:
2867         return rc;
2868 }
2869 
2870 static void __exit af_unix_exit(void)
2871 {
2872         sock_unregister(PF_UNIX);
2873         proto_unregister(&unix_proto);
2874         unregister_pernet_subsys(&unix_net_ops);
2875 }
2876 
2877 /* Earlier than device_initcall() so that other drivers invoking
2878    request_module() don't end up in a loop when modprobe tries
2879    to use a UNIX socket. But later than subsys_initcall() because
2880    we depend on stuff initialised there */
2881 fs_initcall(af_unix_init);
2882 module_exit(af_unix_exit);
2883 
2884 MODULE_LICENSE("GPL");
2885 MODULE_ALIAS_NETPROTO(PF_UNIX);
2886 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp