~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/unix/af_unix.c

Version: ~ [ linux-5.9-rc6 ] ~ [ linux-5.8.10 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.66 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.146 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.198 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.236 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.236 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * NET4:        Implementation of BSD Unix domain sockets.
  3  *
  4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
  5  *
  6  *              This program is free software; you can redistribute it and/or
  7  *              modify it under the terms of the GNU General Public License
  8  *              as published by the Free Software Foundation; either version
  9  *              2 of the License, or (at your option) any later version.
 10  *
 11  * Fixes:
 12  *              Linus Torvalds  :       Assorted bug cures.
 13  *              Niibe Yutaka    :       async I/O support.
 14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
 15  *              Alan Cox        :       Limit size of allocated blocks.
 16  *              Alan Cox        :       Fixed the stupid socketpair bug.
 17  *              Alan Cox        :       BSD compatibility fine tuning.
 18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
 19  *              Alan Cox        :       Sorted out a proper draft version of
 20  *                                      file descriptor passing hacked up from
 21  *                                      Mike Shaver's work.
 22  *              Marty Leisner   :       Fixes to fd passing
 23  *              Nick Nevin      :       recvmsg bugfix.
 24  *              Alan Cox        :       Started proper garbage collector
 25  *              Heiko EiBfeldt  :       Missing verify_area check
 26  *              Alan Cox        :       Started POSIXisms
 27  *              Andreas Schwab  :       Replace inode by dentry for proper
 28  *                                      reference counting
 29  *              Kirk Petersen   :       Made this a module
 30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
 31  *                                      Lots of bug fixes.
 32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
 33  *                                      by above two patches.
 34  *           Andrea Arcangeli   :       If possible we block in connect(2)
 35  *                                      if the max backlog of the listen socket
 36  *                                      is been reached. This won't break
 37  *                                      old apps and it will avoid huge amount
 38  *                                      of socks hashed (this for unix_gc()
 39  *                                      performances reasons).
 40  *                                      Security fix that limits the max
 41  *                                      number of socks to 2*max_files and
 42  *                                      the number of skb queueable in the
 43  *                                      dgram receiver.
 44  *              Artur Skawina   :       Hash function optimizations
 45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
 46  *            Malcolm Beattie   :       Set peercred for socketpair
 47  *           Michal Ostrowski   :       Module initialization cleanup.
 48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
 49  *                                      the core infrastructure is doing that
 50  *                                      for all net proto families now (2.5.69+)
 51  *
 52  *
 53  * Known differences from reference BSD that was tested:
 54  *
 55  *      [TO FIX]
 56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
 57  *              other the moment one end closes.
 58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
 59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
 60  *      [NOT TO FIX]
 61  *      accept() returns a path name even if the connecting socket has closed
 62  *              in the meantime (BSD loses the path and gives up).
 63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
 64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
 65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
 66  *      BSD af_unix apparently has connect forgetting to block properly.
 67  *              (need to check this with the POSIX spec in detail)
 68  *
 69  * Differences from 2.0.0-11-... (ANK)
 70  *      Bug fixes and improvements.
 71  *              - client shutdown killed server socket.
 72  *              - removed all useless cli/sti pairs.
 73  *
 74  *      Semantic changes/extensions.
 75  *              - generic control message passing.
 76  *              - SCM_CREDENTIALS control message.
 77  *              - "Abstract" (not FS based) socket bindings.
 78  *                Abstract names are sequences of bytes (not zero terminated)
 79  *                started by 0, so that this name space does not intersect
 80  *                with BSD names.
 81  */
 82 
 83 #include <linux/module.h>
 84 #include <linux/kernel.h>
 85 #include <linux/signal.h>
 86 #include <linux/sched.h>
 87 #include <linux/errno.h>
 88 #include <linux/string.h>
 89 #include <linux/stat.h>
 90 #include <linux/dcache.h>
 91 #include <linux/namei.h>
 92 #include <linux/socket.h>
 93 #include <linux/un.h>
 94 #include <linux/fcntl.h>
 95 #include <linux/termios.h>
 96 #include <linux/sockios.h>
 97 #include <linux/net.h>
 98 #include <linux/in.h>
 99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
119 EXPORT_SYMBOL_GPL(unix_socket_table);
120 DEFINE_SPINLOCK(unix_table_lock);
121 EXPORT_SYMBOL_GPL(unix_table_lock);
122 static atomic_long_t unix_nr_socks;
123 
124 
125 static struct hlist_head *unix_sockets_unbound(void *addr)
126 {
127         unsigned long hash = (unsigned long)addr;
128 
129         hash ^= hash >> 16;
130         hash ^= hash >> 8;
131         hash %= UNIX_HASH_SIZE;
132         return &unix_socket_table[UNIX_HASH_SIZE + hash];
133 }
134 
135 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
136 
137 #ifdef CONFIG_SECURITY_NETWORK
138 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
139 {
140         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
141 }
142 
143 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144 {
145         scm->secid = *UNIXSID(skb);
146 }
147 #else
148 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
149 { }
150 
151 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153 #endif /* CONFIG_SECURITY_NETWORK */
154 
155 /*
156  *  SMP locking strategy:
157  *    hash table is protected with spinlock unix_table_lock
158  *    each socket state is protected by separate spin lock.
159  */
160 
161 static inline unsigned int unix_hash_fold(__wsum n)
162 {
163         unsigned int hash = (__force unsigned int)csum_fold(n);
164 
165         hash ^= hash>>8;
166         return hash&(UNIX_HASH_SIZE-1);
167 }
168 
169 #define unix_peer(sk) (unix_sk(sk)->peer)
170 
171 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
172 {
173         return unix_peer(osk) == sk;
174 }
175 
176 static inline int unix_may_send(struct sock *sk, struct sock *osk)
177 {
178         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
179 }
180 
181 static inline int unix_recvq_full(struct sock const *sk)
182 {
183         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
184 }
185 
186 struct sock *unix_peer_get(struct sock *s)
187 {
188         struct sock *peer;
189 
190         unix_state_lock(s);
191         peer = unix_peer(s);
192         if (peer)
193                 sock_hold(peer);
194         unix_state_unlock(s);
195         return peer;
196 }
197 EXPORT_SYMBOL_GPL(unix_peer_get);
198 
199 static inline void unix_release_addr(struct unix_address *addr)
200 {
201         if (atomic_dec_and_test(&addr->refcnt))
202                 kfree(addr);
203 }
204 
205 /*
206  *      Check unix socket name:
207  *              - should be not zero length.
208  *              - if started by not zero, should be NULL terminated (FS object)
209  *              - if started by zero, it is abstract name.
210  */
211 
212 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
213 {
214         if (len <= sizeof(short) || len > sizeof(*sunaddr))
215                 return -EINVAL;
216         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
217                 return -EINVAL;
218         if (sunaddr->sun_path[0]) {
219                 /*
220                  * This may look like an off by one error but it is a bit more
221                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
222                  * sun_path[108] doesn't as such exist.  However in kernel space
223                  * we are guaranteed that it is a valid memory location in our
224                  * kernel address buffer.
225                  */
226                 ((char *)sunaddr)[len] = 0;
227                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
228                 return len;
229         }
230 
231         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
232         return len;
233 }
234 
235 static void __unix_remove_socket(struct sock *sk)
236 {
237         sk_del_node_init(sk);
238 }
239 
240 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
241 {
242         WARN_ON(!sk_unhashed(sk));
243         sk_add_node(sk, list);
244 }
245 
246 static inline void unix_remove_socket(struct sock *sk)
247 {
248         spin_lock(&unix_table_lock);
249         __unix_remove_socket(sk);
250         spin_unlock(&unix_table_lock);
251 }
252 
253 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255         spin_lock(&unix_table_lock);
256         __unix_insert_socket(list, sk);
257         spin_unlock(&unix_table_lock);
258 }
259 
260 static struct sock *__unix_find_socket_byname(struct net *net,
261                                               struct sockaddr_un *sunname,
262                                               int len, int type, unsigned int hash)
263 {
264         struct sock *s;
265 
266         sk_for_each(s, &unix_socket_table[hash ^ type]) {
267                 struct unix_sock *u = unix_sk(s);
268 
269                 if (!net_eq(sock_net(s), net))
270                         continue;
271 
272                 if (u->addr->len == len &&
273                     !memcmp(u->addr->name, sunname, len))
274                         goto found;
275         }
276         s = NULL;
277 found:
278         return s;
279 }
280 
281 static inline struct sock *unix_find_socket_byname(struct net *net,
282                                                    struct sockaddr_un *sunname,
283                                                    int len, int type,
284                                                    unsigned int hash)
285 {
286         struct sock *s;
287 
288         spin_lock(&unix_table_lock);
289         s = __unix_find_socket_byname(net, sunname, len, type, hash);
290         if (s)
291                 sock_hold(s);
292         spin_unlock(&unix_table_lock);
293         return s;
294 }
295 
296 static struct sock *unix_find_socket_byinode(struct inode *i)
297 {
298         struct sock *s;
299 
300         spin_lock(&unix_table_lock);
301         sk_for_each(s,
302                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
303                 struct dentry *dentry = unix_sk(s)->path.dentry;
304 
305                 if (dentry && dentry->d_inode == i) {
306                         sock_hold(s);
307                         goto found;
308                 }
309         }
310         s = NULL;
311 found:
312         spin_unlock(&unix_table_lock);
313         return s;
314 }
315 
316 /* Support code for asymmetrically connected dgram sockets
317  *
318  * If a datagram socket is connected to a socket not itself connected
319  * to the first socket (eg, /dev/log), clients may only enqueue more
320  * messages if the present receive queue of the server socket is not
321  * "too large". This means there's a second writeability condition
322  * poll and sendmsg need to test. The dgram recv code will do a wake
323  * up on the peer_wait wait queue of a socket upon reception of a
324  * datagram which needs to be propagated to sleeping would-be writers
325  * since these might not have sent anything so far. This can't be
326  * accomplished via poll_wait because the lifetime of the server
327  * socket might be less than that of its clients if these break their
328  * association with it or if the server socket is closed while clients
329  * are still connected to it and there's no way to inform "a polling
330  * implementation" that it should let go of a certain wait queue
331  *
332  * In order to propagate a wake up, a wait_queue_t of the client
333  * socket is enqueued on the peer_wait queue of the server socket
334  * whose wake function does a wake_up on the ordinary client socket
335  * wait queue. This connection is established whenever a write (or
336  * poll for write) hit the flow control condition and broken when the
337  * association to the server socket is dissolved or after a wake up
338  * was relayed.
339  */
340 
341 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
342                                       void *key)
343 {
344         struct unix_sock *u;
345         wait_queue_head_t *u_sleep;
346 
347         u = container_of(q, struct unix_sock, peer_wake);
348 
349         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
350                             q);
351         u->peer_wake.private = NULL;
352 
353         /* relaying can only happen while the wq still exists */
354         u_sleep = sk_sleep(&u->sk);
355         if (u_sleep)
356                 wake_up_interruptible_poll(u_sleep, key);
357 
358         return 0;
359 }
360 
361 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
362 {
363         struct unix_sock *u, *u_other;
364         int rc;
365 
366         u = unix_sk(sk);
367         u_other = unix_sk(other);
368         rc = 0;
369         spin_lock(&u_other->peer_wait.lock);
370 
371         if (!u->peer_wake.private) {
372                 u->peer_wake.private = other;
373                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
374 
375                 rc = 1;
376         }
377 
378         spin_unlock(&u_other->peer_wait.lock);
379         return rc;
380 }
381 
382 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
383                                             struct sock *other)
384 {
385         struct unix_sock *u, *u_other;
386 
387         u = unix_sk(sk);
388         u_other = unix_sk(other);
389         spin_lock(&u_other->peer_wait.lock);
390 
391         if (u->peer_wake.private == other) {
392                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
393                 u->peer_wake.private = NULL;
394         }
395 
396         spin_unlock(&u_other->peer_wait.lock);
397 }
398 
399 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
400                                                    struct sock *other)
401 {
402         unix_dgram_peer_wake_disconnect(sk, other);
403         wake_up_interruptible_poll(sk_sleep(sk),
404                                    POLLOUT |
405                                    POLLWRNORM |
406                                    POLLWRBAND);
407 }
408 
409 /* preconditions:
410  *      - unix_peer(sk) == other
411  *      - association is stable
412  */
413 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
414 {
415         int connected;
416 
417         connected = unix_dgram_peer_wake_connect(sk, other);
418 
419         if (unix_recvq_full(other))
420                 return 1;
421 
422         if (connected)
423                 unix_dgram_peer_wake_disconnect(sk, other);
424 
425         return 0;
426 }
427 
428 static inline int unix_writable(struct sock *sk)
429 {
430         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
431 }
432 
433 static void unix_write_space(struct sock *sk)
434 {
435         struct socket_wq *wq;
436 
437         rcu_read_lock();
438         if (unix_writable(sk)) {
439                 wq = rcu_dereference(sk->sk_wq);
440                 if (wq_has_sleeper(wq))
441                         wake_up_interruptible_sync_poll(&wq->wait,
442                                 POLLOUT | POLLWRNORM | POLLWRBAND);
443                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
444         }
445         rcu_read_unlock();
446 }
447 
448 /* When dgram socket disconnects (or changes its peer), we clear its receive
449  * queue of packets arrived from previous peer. First, it allows to do
450  * flow control based only on wmem_alloc; second, sk connected to peer
451  * may receive messages only from that peer. */
452 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
453 {
454         if (!skb_queue_empty(&sk->sk_receive_queue)) {
455                 skb_queue_purge(&sk->sk_receive_queue);
456                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
457 
458                 /* If one link of bidirectional dgram pipe is disconnected,
459                  * we signal error. Messages are lost. Do not make this,
460                  * when peer was not connected to us.
461                  */
462                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
463                         other->sk_err = ECONNRESET;
464                         other->sk_error_report(other);
465                 }
466         }
467 }
468 
469 static void unix_sock_destructor(struct sock *sk)
470 {
471         struct unix_sock *u = unix_sk(sk);
472 
473         skb_queue_purge(&sk->sk_receive_queue);
474 
475         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
476         WARN_ON(!sk_unhashed(sk));
477         WARN_ON(sk->sk_socket);
478         if (!sock_flag(sk, SOCK_DEAD)) {
479                 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
480                 return;
481         }
482 
483         if (u->addr)
484                 unix_release_addr(u->addr);
485 
486         atomic_long_dec(&unix_nr_socks);
487         local_bh_disable();
488         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
489         local_bh_enable();
490 #ifdef UNIX_REFCNT_DEBUG
491         printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
492                 atomic_long_read(&unix_nr_socks));
493 #endif
494 }
495 
496 static void unix_release_sock(struct sock *sk, int embrion)
497 {
498         struct unix_sock *u = unix_sk(sk);
499         struct path path;
500         struct sock *skpair;
501         struct sk_buff *skb;
502         int state;
503 
504         unix_remove_socket(sk);
505 
506         /* Clear state */
507         unix_state_lock(sk);
508         sock_orphan(sk);
509         sk->sk_shutdown = SHUTDOWN_MASK;
510         path         = u->path;
511         u->path.dentry = NULL;
512         u->path.mnt = NULL;
513         state = sk->sk_state;
514         sk->sk_state = TCP_CLOSE;
515         unix_state_unlock(sk);
516 
517         wake_up_interruptible_all(&u->peer_wait);
518 
519         skpair = unix_peer(sk);
520 
521         if (skpair != NULL) {
522                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
523                         unix_state_lock(skpair);
524                         /* No more writes */
525                         skpair->sk_shutdown = SHUTDOWN_MASK;
526                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
527                                 skpair->sk_err = ECONNRESET;
528                         unix_state_unlock(skpair);
529                         skpair->sk_state_change(skpair);
530                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
531                 }
532 
533                 unix_dgram_peer_wake_disconnect(sk, skpair);
534                 sock_put(skpair); /* It may now die */
535                 unix_peer(sk) = NULL;
536         }
537 
538         /* Try to flush out this socket. Throw out buffers at least */
539 
540         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
541                 if (state == TCP_LISTEN)
542                         unix_release_sock(skb->sk, 1);
543                 /* passed fds are erased in the kfree_skb hook        */
544                 kfree_skb(skb);
545         }
546 
547         if (path.dentry)
548                 path_put(&path);
549 
550         sock_put(sk);
551 
552         /* ---- Socket is dead now and most probably destroyed ---- */
553 
554         /*
555          * Fixme: BSD difference: In BSD all sockets connected to us get
556          *        ECONNRESET and we die on the spot. In Linux we behave
557          *        like files and pipes do and wait for the last
558          *        dereference.
559          *
560          * Can't we simply set sock->err?
561          *
562          *        What the above comment does talk about? --ANK(980817)
563          */
564 
565         if (unix_tot_inflight)
566                 unix_gc();              /* Garbage collect fds */
567 }
568 
569 static void init_peercred(struct sock *sk)
570 {
571         put_pid(sk->sk_peer_pid);
572         if (sk->sk_peer_cred)
573                 put_cred(sk->sk_peer_cred);
574         sk->sk_peer_pid  = get_pid(task_tgid(current));
575         sk->sk_peer_cred = get_current_cred();
576 }
577 
578 static void copy_peercred(struct sock *sk, struct sock *peersk)
579 {
580         put_pid(sk->sk_peer_pid);
581         if (sk->sk_peer_cred)
582                 put_cred(sk->sk_peer_cred);
583         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
584         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
585 }
586 
587 static int unix_listen(struct socket *sock, int backlog)
588 {
589         int err;
590         struct sock *sk = sock->sk;
591         struct unix_sock *u = unix_sk(sk);
592         struct pid *old_pid = NULL;
593 
594         err = -EOPNOTSUPP;
595         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
596                 goto out;       /* Only stream/seqpacket sockets accept */
597         err = -EINVAL;
598         if (!u->addr)
599                 goto out;       /* No listens on an unbound socket */
600         unix_state_lock(sk);
601         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
602                 goto out_unlock;
603         if (backlog > sk->sk_max_ack_backlog)
604                 wake_up_interruptible_all(&u->peer_wait);
605         sk->sk_max_ack_backlog  = backlog;
606         sk->sk_state            = TCP_LISTEN;
607         /* set credentials so connect can copy them */
608         init_peercred(sk);
609         err = 0;
610 
611 out_unlock:
612         unix_state_unlock(sk);
613         put_pid(old_pid);
614 out:
615         return err;
616 }
617 
618 static int unix_release(struct socket *);
619 static int unix_bind(struct socket *, struct sockaddr *, int);
620 static int unix_stream_connect(struct socket *, struct sockaddr *,
621                                int addr_len, int flags);
622 static int unix_socketpair(struct socket *, struct socket *);
623 static int unix_accept(struct socket *, struct socket *, int);
624 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
625 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
626 static unsigned int unix_dgram_poll(struct file *, struct socket *,
627                                     poll_table *);
628 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
629 static int unix_shutdown(struct socket *, int);
630 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
631                                struct msghdr *, size_t);
632 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
633                                struct msghdr *, size_t, int);
634 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
635                               struct msghdr *, size_t);
636 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
637                               struct msghdr *, size_t, int);
638 static int unix_dgram_connect(struct socket *, struct sockaddr *,
639                               int, int);
640 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
641                                   struct msghdr *, size_t);
642 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
643                                   struct msghdr *, size_t, int);
644 
645 static int unix_set_peek_off(struct sock *sk, int val)
646 {
647         struct unix_sock *u = unix_sk(sk);
648 
649         if (mutex_lock_interruptible(&u->readlock))
650                 return -EINTR;
651 
652         sk->sk_peek_off = val;
653         mutex_unlock(&u->readlock);
654 
655         return 0;
656 }
657 
658 
659 static const struct proto_ops unix_stream_ops = {
660         .family =       PF_UNIX,
661         .owner =        THIS_MODULE,
662         .release =      unix_release,
663         .bind =         unix_bind,
664         .connect =      unix_stream_connect,
665         .socketpair =   unix_socketpair,
666         .accept =       unix_accept,
667         .getname =      unix_getname,
668         .poll =         unix_poll,
669         .ioctl =        unix_ioctl,
670         .listen =       unix_listen,
671         .shutdown =     unix_shutdown,
672         .setsockopt =   sock_no_setsockopt,
673         .getsockopt =   sock_no_getsockopt,
674         .sendmsg =      unix_stream_sendmsg,
675         .recvmsg =      unix_stream_recvmsg,
676         .mmap =         sock_no_mmap,
677         .sendpage =     sock_no_sendpage,
678         .set_peek_off = unix_set_peek_off,
679 };
680 
681 static const struct proto_ops unix_dgram_ops = {
682         .family =       PF_UNIX,
683         .owner =        THIS_MODULE,
684         .release =      unix_release,
685         .bind =         unix_bind,
686         .connect =      unix_dgram_connect,
687         .socketpair =   unix_socketpair,
688         .accept =       sock_no_accept,
689         .getname =      unix_getname,
690         .poll =         unix_dgram_poll,
691         .ioctl =        unix_ioctl,
692         .listen =       sock_no_listen,
693         .shutdown =     unix_shutdown,
694         .setsockopt =   sock_no_setsockopt,
695         .getsockopt =   sock_no_getsockopt,
696         .sendmsg =      unix_dgram_sendmsg,
697         .recvmsg =      unix_dgram_recvmsg,
698         .mmap =         sock_no_mmap,
699         .sendpage =     sock_no_sendpage,
700         .set_peek_off = unix_set_peek_off,
701 };
702 
703 static const struct proto_ops unix_seqpacket_ops = {
704         .family =       PF_UNIX,
705         .owner =        THIS_MODULE,
706         .release =      unix_release,
707         .bind =         unix_bind,
708         .connect =      unix_stream_connect,
709         .socketpair =   unix_socketpair,
710         .accept =       unix_accept,
711         .getname =      unix_getname,
712         .poll =         unix_dgram_poll,
713         .ioctl =        unix_ioctl,
714         .listen =       unix_listen,
715         .shutdown =     unix_shutdown,
716         .setsockopt =   sock_no_setsockopt,
717         .getsockopt =   sock_no_getsockopt,
718         .sendmsg =      unix_seqpacket_sendmsg,
719         .recvmsg =      unix_seqpacket_recvmsg,
720         .mmap =         sock_no_mmap,
721         .sendpage =     sock_no_sendpage,
722         .set_peek_off = unix_set_peek_off,
723 };
724 
725 static struct proto unix_proto = {
726         .name                   = "UNIX",
727         .owner                  = THIS_MODULE,
728         .obj_size               = sizeof(struct unix_sock),
729 };
730 
731 /*
732  * AF_UNIX sockets do not interact with hardware, hence they
733  * dont trigger interrupts - so it's safe for them to have
734  * bh-unsafe locking for their sk_receive_queue.lock. Split off
735  * this special lock-class by reinitializing the spinlock key:
736  */
737 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
738 
739 static struct sock *unix_create1(struct net *net, struct socket *sock)
740 {
741         struct sock *sk = NULL;
742         struct unix_sock *u;
743 
744         atomic_long_inc(&unix_nr_socks);
745         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
746                 goto out;
747 
748         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
749         if (!sk)
750                 goto out;
751 
752         sock_init_data(sock, sk);
753         lockdep_set_class(&sk->sk_receive_queue.lock,
754                                 &af_unix_sk_receive_queue_lock_key);
755 
756         sk->sk_write_space      = unix_write_space;
757         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
758         sk->sk_destruct         = unix_sock_destructor;
759         u         = unix_sk(sk);
760         u->path.dentry = NULL;
761         u->path.mnt = NULL;
762         spin_lock_init(&u->lock);
763         atomic_long_set(&u->inflight, 0);
764         INIT_LIST_HEAD(&u->link);
765         mutex_init(&u->readlock); /* single task reading lock */
766         init_waitqueue_head(&u->peer_wait);
767         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
768         unix_insert_socket(unix_sockets_unbound(sk), sk);
769 out:
770         if (sk == NULL)
771                 atomic_long_dec(&unix_nr_socks);
772         else {
773                 local_bh_disable();
774                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
775                 local_bh_enable();
776         }
777         return sk;
778 }
779 
780 static int unix_create(struct net *net, struct socket *sock, int protocol,
781                        int kern)
782 {
783         if (protocol && protocol != PF_UNIX)
784                 return -EPROTONOSUPPORT;
785 
786         sock->state = SS_UNCONNECTED;
787 
788         switch (sock->type) {
789         case SOCK_STREAM:
790                 sock->ops = &unix_stream_ops;
791                 break;
792                 /*
793                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
794                  *      nothing uses it.
795                  */
796         case SOCK_RAW:
797                 sock->type = SOCK_DGRAM;
798         case SOCK_DGRAM:
799                 sock->ops = &unix_dgram_ops;
800                 break;
801         case SOCK_SEQPACKET:
802                 sock->ops = &unix_seqpacket_ops;
803                 break;
804         default:
805                 return -ESOCKTNOSUPPORT;
806         }
807 
808         return unix_create1(net, sock) ? 0 : -ENOMEM;
809 }
810 
811 static int unix_release(struct socket *sock)
812 {
813         struct sock *sk = sock->sk;
814 
815         if (!sk)
816                 return 0;
817 
818         unix_release_sock(sk, 0);
819         sock->sk = NULL;
820 
821         return 0;
822 }
823 
824 static int unix_autobind(struct socket *sock)
825 {
826         struct sock *sk = sock->sk;
827         struct net *net = sock_net(sk);
828         struct unix_sock *u = unix_sk(sk);
829         static u32 ordernum = 1;
830         struct unix_address *addr;
831         int err;
832         unsigned int retries = 0;
833 
834         err = mutex_lock_interruptible(&u->readlock);
835         if (err)
836                 return err;
837 
838         err = 0;
839         if (u->addr)
840                 goto out;
841 
842         err = -ENOMEM;
843         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
844         if (!addr)
845                 goto out;
846 
847         addr->name->sun_family = AF_UNIX;
848         atomic_set(&addr->refcnt, 1);
849 
850 retry:
851         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
852         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
853 
854         spin_lock(&unix_table_lock);
855         ordernum = (ordernum+1)&0xFFFFF;
856 
857         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
858                                       addr->hash)) {
859                 spin_unlock(&unix_table_lock);
860                 /*
861                  * __unix_find_socket_byname() may take long time if many names
862                  * are already in use.
863                  */
864                 cond_resched();
865                 /* Give up if all names seems to be in use. */
866                 if (retries++ == 0xFFFFF) {
867                         err = -ENOSPC;
868                         kfree(addr);
869                         goto out;
870                 }
871                 goto retry;
872         }
873         addr->hash ^= sk->sk_type;
874 
875         __unix_remove_socket(sk);
876         u->addr = addr;
877         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
878         spin_unlock(&unix_table_lock);
879         err = 0;
880 
881 out:    mutex_unlock(&u->readlock);
882         return err;
883 }
884 
885 static struct sock *unix_find_other(struct net *net,
886                                     struct sockaddr_un *sunname, int len,
887                                     int type, unsigned int hash, int *error)
888 {
889         struct sock *u;
890         struct path path;
891         int err = 0;
892 
893         if (sunname->sun_path[0]) {
894                 struct inode *inode;
895                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
896                 if (err)
897                         goto fail;
898                 inode = path.dentry->d_inode;
899                 err = inode_permission(inode, MAY_WRITE);
900                 if (err)
901                         goto put_fail;
902 
903                 err = -ECONNREFUSED;
904                 if (!S_ISSOCK(inode->i_mode))
905                         goto put_fail;
906                 u = unix_find_socket_byinode(inode);
907                 if (!u)
908                         goto put_fail;
909 
910                 if (u->sk_type == type)
911                         touch_atime(&path);
912 
913                 path_put(&path);
914 
915                 err = -EPROTOTYPE;
916                 if (u->sk_type != type) {
917                         sock_put(u);
918                         goto fail;
919                 }
920         } else {
921                 err = -ECONNREFUSED;
922                 u = unix_find_socket_byname(net, sunname, len, type, hash);
923                 if (u) {
924                         struct dentry *dentry;
925                         dentry = unix_sk(u)->path.dentry;
926                         if (dentry)
927                                 touch_atime(&unix_sk(u)->path);
928                 } else
929                         goto fail;
930         }
931         return u;
932 
933 put_fail:
934         path_put(&path);
935 fail:
936         *error = err;
937         return NULL;
938 }
939 
940 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
941 {
942         struct dentry *dentry;
943         struct path path;
944         int err = 0;
945         /*
946          * Get the parent directory, calculate the hash for last
947          * component.
948          */
949         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
950         err = PTR_ERR(dentry);
951         if (IS_ERR(dentry))
952                 return err;
953 
954         /*
955          * All right, let's create it.
956          */
957         err = security_path_mknod(&path, dentry, mode, 0);
958         if (!err) {
959                 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
960                 if (!err) {
961                         res->mnt = mntget(path.mnt);
962                         res->dentry = dget(dentry);
963                 }
964         }
965         done_path_create(&path, dentry);
966         return err;
967 }
968 
969 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
970 {
971         struct sock *sk = sock->sk;
972         struct net *net = sock_net(sk);
973         struct unix_sock *u = unix_sk(sk);
974         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
975         char *sun_path = sunaddr->sun_path;
976         int err;
977         unsigned int hash;
978         struct unix_address *addr;
979         struct hlist_head *list;
980         struct path path = { NULL, NULL };
981 
982         err = -EINVAL;
983         if (sunaddr->sun_family != AF_UNIX)
984                 goto out;
985 
986         if (addr_len == sizeof(short)) {
987                 err = unix_autobind(sock);
988                 goto out;
989         }
990 
991         err = unix_mkname(sunaddr, addr_len, &hash);
992         if (err < 0)
993                 goto out;
994         addr_len = err;
995 
996         if (sun_path[0]) {
997                 umode_t mode = S_IFSOCK |
998                        (SOCK_INODE(sock)->i_mode & ~current_umask());
999                 err = unix_mknod(sun_path, mode, &path);
1000                 if (err) {
1001                         if (err == -EEXIST)
1002                                 err = -EADDRINUSE;
1003                         goto out;
1004                 }
1005         }
1006 
1007         err = mutex_lock_interruptible(&u->readlock);
1008         if (err)
1009                 goto out_put;
1010 
1011         err = -EINVAL;
1012         if (u->addr)
1013                 goto out_up;
1014 
1015         err = -ENOMEM;
1016         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1017         if (!addr)
1018                 goto out_up;
1019 
1020         memcpy(addr->name, sunaddr, addr_len);
1021         addr->len = addr_len;
1022         addr->hash = hash ^ sk->sk_type;
1023         atomic_set(&addr->refcnt, 1);
1024 
1025         if (sun_path[0]) {
1026                 addr->hash = UNIX_HASH_SIZE;
1027                 hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
1028                 spin_lock(&unix_table_lock);
1029                 u->path = path;
1030                 list = &unix_socket_table[hash];
1031         } else {
1032                 spin_lock(&unix_table_lock);
1033                 err = -EADDRINUSE;
1034                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1035                                               sk->sk_type, hash)) {
1036                         unix_release_addr(addr);
1037                         goto out_unlock;
1038                 }
1039 
1040                 list = &unix_socket_table[addr->hash];
1041         }
1042 
1043         err = 0;
1044         __unix_remove_socket(sk);
1045         u->addr = addr;
1046         __unix_insert_socket(list, sk);
1047 
1048 out_unlock:
1049         spin_unlock(&unix_table_lock);
1050 out_up:
1051         mutex_unlock(&u->readlock);
1052 out_put:
1053         if (err)
1054                 path_put(&path);
1055 out:
1056         return err;
1057 }
1058 
1059 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1060 {
1061         if (unlikely(sk1 == sk2) || !sk2) {
1062                 unix_state_lock(sk1);
1063                 return;
1064         }
1065         if (sk1 < sk2) {
1066                 unix_state_lock(sk1);
1067                 unix_state_lock_nested(sk2);
1068         } else {
1069                 unix_state_lock(sk2);
1070                 unix_state_lock_nested(sk1);
1071         }
1072 }
1073 
1074 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1075 {
1076         if (unlikely(sk1 == sk2) || !sk2) {
1077                 unix_state_unlock(sk1);
1078                 return;
1079         }
1080         unix_state_unlock(sk1);
1081         unix_state_unlock(sk2);
1082 }
1083 
1084 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1085                               int alen, int flags)
1086 {
1087         struct sock *sk = sock->sk;
1088         struct net *net = sock_net(sk);
1089         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1090         struct sock *other;
1091         unsigned int hash;
1092         int err;
1093 
1094         if (addr->sa_family != AF_UNSPEC) {
1095                 err = unix_mkname(sunaddr, alen, &hash);
1096                 if (err < 0)
1097                         goto out;
1098                 alen = err;
1099 
1100                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1101                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1102                         goto out;
1103 
1104 restart:
1105                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1106                 if (!other)
1107                         goto out;
1108 
1109                 unix_state_double_lock(sk, other);
1110 
1111                 /* Apparently VFS overslept socket death. Retry. */
1112                 if (sock_flag(other, SOCK_DEAD)) {
1113                         unix_state_double_unlock(sk, other);
1114                         sock_put(other);
1115                         goto restart;
1116                 }
1117 
1118                 err = -EPERM;
1119                 if (!unix_may_send(sk, other))
1120                         goto out_unlock;
1121 
1122                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1123                 if (err)
1124                         goto out_unlock;
1125 
1126         } else {
1127                 /*
1128                  *      1003.1g breaking connected state with AF_UNSPEC
1129                  */
1130                 other = NULL;
1131                 unix_state_double_lock(sk, other);
1132         }
1133 
1134         /*
1135          * If it was connected, reconnect.
1136          */
1137         if (unix_peer(sk)) {
1138                 struct sock *old_peer = unix_peer(sk);
1139                 unix_peer(sk) = other;
1140                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1141 
1142                 unix_state_double_unlock(sk, other);
1143 
1144                 if (other != old_peer)
1145                         unix_dgram_disconnected(sk, old_peer);
1146                 sock_put(old_peer);
1147         } else {
1148                 unix_peer(sk) = other;
1149                 unix_state_double_unlock(sk, other);
1150         }
1151         return 0;
1152 
1153 out_unlock:
1154         unix_state_double_unlock(sk, other);
1155         sock_put(other);
1156 out:
1157         return err;
1158 }
1159 
1160 static long unix_wait_for_peer(struct sock *other, long timeo)
1161 {
1162         struct unix_sock *u = unix_sk(other);
1163         int sched;
1164         DEFINE_WAIT(wait);
1165 
1166         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1167 
1168         sched = !sock_flag(other, SOCK_DEAD) &&
1169                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1170                 unix_recvq_full(other);
1171 
1172         unix_state_unlock(other);
1173 
1174         if (sched)
1175                 timeo = schedule_timeout(timeo);
1176 
1177         finish_wait(&u->peer_wait, &wait);
1178         return timeo;
1179 }
1180 
1181 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1182                                int addr_len, int flags)
1183 {
1184         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1185         struct sock *sk = sock->sk;
1186         struct net *net = sock_net(sk);
1187         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1188         struct sock *newsk = NULL;
1189         struct sock *other = NULL;
1190         struct sk_buff *skb = NULL;
1191         unsigned int hash;
1192         int st;
1193         int err;
1194         long timeo;
1195 
1196         err = unix_mkname(sunaddr, addr_len, &hash);
1197         if (err < 0)
1198                 goto out;
1199         addr_len = err;
1200 
1201         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1202             (err = unix_autobind(sock)) != 0)
1203                 goto out;
1204 
1205         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1206 
1207         /* First of all allocate resources.
1208            If we will make it after state is locked,
1209            we will have to recheck all again in any case.
1210          */
1211 
1212         err = -ENOMEM;
1213 
1214         /* create new sock for complete connection */
1215         newsk = unix_create1(sock_net(sk), NULL);
1216         if (newsk == NULL)
1217                 goto out;
1218 
1219         /* Allocate skb for sending to listening sock */
1220         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1221         if (skb == NULL)
1222                 goto out;
1223 
1224 restart:
1225         /*  Find listening sock. */
1226         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1227         if (!other)
1228                 goto out;
1229 
1230         /* Latch state of peer */
1231         unix_state_lock(other);
1232 
1233         /* Apparently VFS overslept socket death. Retry. */
1234         if (sock_flag(other, SOCK_DEAD)) {
1235                 unix_state_unlock(other);
1236                 sock_put(other);
1237                 goto restart;
1238         }
1239 
1240         err = -ECONNREFUSED;
1241         if (other->sk_state != TCP_LISTEN)
1242                 goto out_unlock;
1243         if (other->sk_shutdown & RCV_SHUTDOWN)
1244                 goto out_unlock;
1245 
1246         if (unix_recvq_full(other)) {
1247                 err = -EAGAIN;
1248                 if (!timeo)
1249                         goto out_unlock;
1250 
1251                 timeo = unix_wait_for_peer(other, timeo);
1252 
1253                 err = sock_intr_errno(timeo);
1254                 if (signal_pending(current))
1255                         goto out;
1256                 sock_put(other);
1257                 goto restart;
1258         }
1259 
1260         /* Latch our state.
1261 
1262            It is tricky place. We need to grab our state lock and cannot
1263            drop lock on peer. It is dangerous because deadlock is
1264            possible. Connect to self case and simultaneous
1265            attempt to connect are eliminated by checking socket
1266            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1267            check this before attempt to grab lock.
1268 
1269            Well, and we have to recheck the state after socket locked.
1270          */
1271         st = sk->sk_state;
1272 
1273         switch (st) {
1274         case TCP_CLOSE:
1275                 /* This is ok... continue with connect */
1276                 break;
1277         case TCP_ESTABLISHED:
1278                 /* Socket is already connected */
1279                 err = -EISCONN;
1280                 goto out_unlock;
1281         default:
1282                 err = -EINVAL;
1283                 goto out_unlock;
1284         }
1285 
1286         unix_state_lock_nested(sk);
1287 
1288         if (sk->sk_state != st) {
1289                 unix_state_unlock(sk);
1290                 unix_state_unlock(other);
1291                 sock_put(other);
1292                 goto restart;
1293         }
1294 
1295         err = security_unix_stream_connect(sk, other, newsk);
1296         if (err) {
1297                 unix_state_unlock(sk);
1298                 goto out_unlock;
1299         }
1300 
1301         /* The way is open! Fastly set all the necessary fields... */
1302 
1303         sock_hold(sk);
1304         unix_peer(newsk)        = sk;
1305         newsk->sk_state         = TCP_ESTABLISHED;
1306         newsk->sk_type          = sk->sk_type;
1307         init_peercred(newsk);
1308         newu = unix_sk(newsk);
1309         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1310         otheru = unix_sk(other);
1311 
1312         /* copy address information from listening to new sock*/
1313         if (otheru->addr) {
1314                 atomic_inc(&otheru->addr->refcnt);
1315                 newu->addr = otheru->addr;
1316         }
1317         if (otheru->path.dentry) {
1318                 path_get(&otheru->path);
1319                 newu->path = otheru->path;
1320         }
1321 
1322         /* Set credentials */
1323         copy_peercred(sk, other);
1324 
1325         sock->state     = SS_CONNECTED;
1326         sk->sk_state    = TCP_ESTABLISHED;
1327         sock_hold(newsk);
1328 
1329         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1330         unix_peer(sk)   = newsk;
1331 
1332         unix_state_unlock(sk);
1333 
1334         /* take ten and and send info to listening sock */
1335         spin_lock(&other->sk_receive_queue.lock);
1336         __skb_queue_tail(&other->sk_receive_queue, skb);
1337         spin_unlock(&other->sk_receive_queue.lock);
1338         unix_state_unlock(other);
1339         other->sk_data_ready(other, 0);
1340         sock_put(other);
1341         return 0;
1342 
1343 out_unlock:
1344         if (other)
1345                 unix_state_unlock(other);
1346 
1347 out:
1348         kfree_skb(skb);
1349         if (newsk)
1350                 unix_release_sock(newsk, 0);
1351         if (other)
1352                 sock_put(other);
1353         return err;
1354 }
1355 
1356 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1357 {
1358         struct sock *ska = socka->sk, *skb = sockb->sk;
1359 
1360         /* Join our sockets back to back */
1361         sock_hold(ska);
1362         sock_hold(skb);
1363         unix_peer(ska) = skb;
1364         unix_peer(skb) = ska;
1365         init_peercred(ska);
1366         init_peercred(skb);
1367 
1368         if (ska->sk_type != SOCK_DGRAM) {
1369                 ska->sk_state = TCP_ESTABLISHED;
1370                 skb->sk_state = TCP_ESTABLISHED;
1371                 socka->state  = SS_CONNECTED;
1372                 sockb->state  = SS_CONNECTED;
1373         }
1374         return 0;
1375 }
1376 
1377 static void unix_sock_inherit_flags(const struct socket *old,
1378                                     struct socket *new)
1379 {
1380         if (test_bit(SOCK_PASSCRED, &old->flags))
1381                 set_bit(SOCK_PASSCRED, &new->flags);
1382         if (test_bit(SOCK_PASSSEC, &old->flags))
1383                 set_bit(SOCK_PASSSEC, &new->flags);
1384 }
1385 
1386 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1387 {
1388         struct sock *sk = sock->sk;
1389         struct sock *tsk;
1390         struct sk_buff *skb;
1391         int err;
1392 
1393         err = -EOPNOTSUPP;
1394         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1395                 goto out;
1396 
1397         err = -EINVAL;
1398         if (sk->sk_state != TCP_LISTEN)
1399                 goto out;
1400 
1401         /* If socket state is TCP_LISTEN it cannot change (for now...),
1402          * so that no locks are necessary.
1403          */
1404 
1405         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1406         if (!skb) {
1407                 /* This means receive shutdown. */
1408                 if (err == 0)
1409                         err = -EINVAL;
1410                 goto out;
1411         }
1412 
1413         tsk = skb->sk;
1414         skb_free_datagram(sk, skb);
1415         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1416 
1417         /* attach accepted sock to socket */
1418         unix_state_lock(tsk);
1419         newsock->state = SS_CONNECTED;
1420         unix_sock_inherit_flags(sock, newsock);
1421         sock_graft(tsk, newsock);
1422         unix_state_unlock(tsk);
1423         return 0;
1424 
1425 out:
1426         return err;
1427 }
1428 
1429 
1430 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1431 {
1432         struct sock *sk = sock->sk;
1433         struct unix_sock *u;
1434         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1435         int err = 0;
1436 
1437         if (peer) {
1438                 sk = unix_peer_get(sk);
1439 
1440                 err = -ENOTCONN;
1441                 if (!sk)
1442                         goto out;
1443                 err = 0;
1444         } else {
1445                 sock_hold(sk);
1446         }
1447 
1448         u = unix_sk(sk);
1449         unix_state_lock(sk);
1450         if (!u->addr) {
1451                 sunaddr->sun_family = AF_UNIX;
1452                 sunaddr->sun_path[0] = 0;
1453                 *uaddr_len = sizeof(short);
1454         } else {
1455                 struct unix_address *addr = u->addr;
1456 
1457                 *uaddr_len = addr->len;
1458                 memcpy(sunaddr, addr->name, *uaddr_len);
1459         }
1460         unix_state_unlock(sk);
1461         sock_put(sk);
1462 out:
1463         return err;
1464 }
1465 
1466 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1467 {
1468         int i;
1469 
1470         scm->fp = UNIXCB(skb).fp;
1471         UNIXCB(skb).fp = NULL;
1472 
1473         for (i = scm->fp->count-1; i >= 0; i--)
1474                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1475 }
1476 
1477 static void unix_destruct_scm(struct sk_buff *skb)
1478 {
1479         struct scm_cookie scm;
1480         memset(&scm, 0, sizeof(scm));
1481         scm.pid  = UNIXCB(skb).pid;
1482         if (UNIXCB(skb).fp)
1483                 unix_detach_fds(&scm, skb);
1484 
1485         /* Alas, it calls VFS */
1486         /* So fscking what? fput() had been SMP-safe since the last Summer */
1487         scm_destroy(&scm);
1488         sock_wfree(skb);
1489 }
1490 
1491 /*
1492  * The "user->unix_inflight" variable is protected by the garbage
1493  * collection lock, and we just read it locklessly here. If you go
1494  * over the limit, there might be a tiny race in actually noticing
1495  * it across threads. Tough.
1496  */
1497 static inline bool too_many_unix_fds(struct task_struct *p)
1498 {
1499         struct user_struct *user = current_user();
1500 
1501         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1502                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1503         return false;
1504 }
1505 
1506 #define MAX_RECURSION_LEVEL 4
1507 
1508 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1509 {
1510         int i;
1511         unsigned char max_level = 0;
1512         int unix_sock_count = 0;
1513 
1514         if (too_many_unix_fds(current))
1515                 return -ETOOMANYREFS;
1516 
1517         for (i = scm->fp->count - 1; i >= 0; i--) {
1518                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1519 
1520                 if (sk) {
1521                         unix_sock_count++;
1522                         max_level = max(max_level,
1523                                         unix_sk(sk)->recursion_level);
1524                 }
1525         }
1526         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1527                 return -ETOOMANYREFS;
1528 
1529         /*
1530          * Need to duplicate file references for the sake of garbage
1531          * collection.  Otherwise a socket in the fps might become a
1532          * candidate for GC while the skb is not yet queued.
1533          */
1534         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1535         if (!UNIXCB(skb).fp)
1536                 return -ENOMEM;
1537 
1538         for (i = scm->fp->count - 1; i >= 0; i--)
1539                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1540         return max_level;
1541 }
1542 
1543 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1544 {
1545         int err = 0;
1546 
1547         UNIXCB(skb).pid  = get_pid(scm->pid);
1548         UNIXCB(skb).uid = scm->creds.uid;
1549         UNIXCB(skb).gid = scm->creds.gid;
1550         UNIXCB(skb).fp = NULL;
1551         if (scm->fp && send_fds)
1552                 err = unix_attach_fds(scm, skb);
1553 
1554         skb->destructor = unix_destruct_scm;
1555         return err;
1556 }
1557 
1558 /*
1559  * Some apps rely on write() giving SCM_CREDENTIALS
1560  * We include credentials if source or destination socket
1561  * asserted SOCK_PASSCRED.
1562  */
1563 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1564                             const struct sock *other)
1565 {
1566         if (UNIXCB(skb).pid)
1567                 return;
1568         if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1569             !other->sk_socket ||
1570             test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1571                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1572                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1573         }
1574 }
1575 
1576 /*
1577  *      Send AF_UNIX data.
1578  */
1579 
1580 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1581                               struct msghdr *msg, size_t len)
1582 {
1583         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1584         struct sock *sk = sock->sk;
1585         struct net *net = sock_net(sk);
1586         struct unix_sock *u = unix_sk(sk);
1587         struct sockaddr_un *sunaddr = msg->msg_name;
1588         struct sock *other = NULL;
1589         int namelen = 0; /* fake GCC */
1590         int err;
1591         unsigned int hash;
1592         struct sk_buff *skb;
1593         long timeo;
1594         struct scm_cookie tmp_scm;
1595         int max_level;
1596         int data_len = 0;
1597         int sk_locked;
1598 
1599         if (NULL == siocb->scm)
1600                 siocb->scm = &tmp_scm;
1601         wait_for_unix_gc();
1602         err = scm_send(sock, msg, siocb->scm, false);
1603         if (err < 0)
1604                 return err;
1605 
1606         err = -EOPNOTSUPP;
1607         if (msg->msg_flags&MSG_OOB)
1608                 goto out;
1609 
1610         if (msg->msg_namelen) {
1611                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1612                 if (err < 0)
1613                         goto out;
1614                 namelen = err;
1615         } else {
1616                 sunaddr = NULL;
1617                 err = -ENOTCONN;
1618                 other = unix_peer_get(sk);
1619                 if (!other)
1620                         goto out;
1621         }
1622 
1623         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1624             && (err = unix_autobind(sock)) != 0)
1625                 goto out;
1626 
1627         err = -EMSGSIZE;
1628         if (len > sk->sk_sndbuf - 32)
1629                 goto out;
1630 
1631         if (len > SKB_MAX_ALLOC)
1632                 data_len = min_t(size_t,
1633                                  len - SKB_MAX_ALLOC,
1634                                  MAX_SKB_FRAGS * PAGE_SIZE);
1635 
1636         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1637                                    msg->msg_flags & MSG_DONTWAIT, &err);
1638         if (skb == NULL)
1639                 goto out;
1640 
1641         err = unix_scm_to_skb(siocb->scm, skb, true);
1642         if (err < 0)
1643                 goto out_free;
1644         max_level = err + 1;
1645         unix_get_secdata(siocb->scm, skb);
1646 
1647         skb_put(skb, len - data_len);
1648         skb->data_len = data_len;
1649         skb->len = len;
1650         err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1651         if (err)
1652                 goto out_free;
1653 
1654         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1655 
1656 restart:
1657         if (!other) {
1658                 err = -ECONNRESET;
1659                 if (sunaddr == NULL)
1660                         goto out_free;
1661 
1662                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1663                                         hash, &err);
1664                 if (other == NULL)
1665                         goto out_free;
1666         }
1667 
1668         if (sk_filter(other, skb) < 0) {
1669                 /* Toss the packet but do not return any error to the sender */
1670                 err = len;
1671                 goto out_free;
1672         }
1673 
1674         sk_locked = 0;
1675         unix_state_lock(other);
1676 restart_locked:
1677         err = -EPERM;
1678         if (!unix_may_send(sk, other))
1679                 goto out_unlock;
1680 
1681         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1682                 /*
1683                  *      Check with 1003.1g - what should
1684                  *      datagram error
1685                  */
1686                 unix_state_unlock(other);
1687                 sock_put(other);
1688 
1689                 if (!sk_locked)
1690                         unix_state_lock(sk);
1691 
1692                 err = 0;
1693                 if (unix_peer(sk) == other) {
1694                         unix_peer(sk) = NULL;
1695                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1696 
1697                         unix_state_unlock(sk);
1698 
1699                         unix_dgram_disconnected(sk, other);
1700                         sock_put(other);
1701                         err = -ECONNREFUSED;
1702                 } else {
1703                         unix_state_unlock(sk);
1704                 }
1705 
1706                 other = NULL;
1707                 if (err)
1708                         goto out_free;
1709                 goto restart;
1710         }
1711 
1712         err = -EPIPE;
1713         if (other->sk_shutdown & RCV_SHUTDOWN)
1714                 goto out_unlock;
1715 
1716         if (sk->sk_type != SOCK_SEQPACKET) {
1717                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1718                 if (err)
1719                         goto out_unlock;
1720         }
1721 
1722         if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1723                 if (timeo) {
1724                         timeo = unix_wait_for_peer(other, timeo);
1725 
1726                         err = sock_intr_errno(timeo);
1727                         if (signal_pending(current))
1728                                 goto out_free;
1729 
1730                         goto restart;
1731                 }
1732 
1733                 if (!sk_locked) {
1734                         unix_state_unlock(other);
1735                         unix_state_double_lock(sk, other);
1736                 }
1737 
1738                 if (unix_peer(sk) != other ||
1739                     unix_dgram_peer_wake_me(sk, other)) {
1740                         err = -EAGAIN;
1741                         sk_locked = 1;
1742                         goto out_unlock;
1743                 }
1744 
1745                 if (!sk_locked) {
1746                         sk_locked = 1;
1747                         goto restart_locked;
1748                 }
1749         }
1750 
1751         if (unlikely(sk_locked))
1752                 unix_state_unlock(sk);
1753 
1754         if (sock_flag(other, SOCK_RCVTSTAMP))
1755                 __net_timestamp(skb);
1756         maybe_add_creds(skb, sock, other);
1757         skb_queue_tail(&other->sk_receive_queue, skb);
1758         if (max_level > unix_sk(other)->recursion_level)
1759                 unix_sk(other)->recursion_level = max_level;
1760         unix_state_unlock(other);
1761         other->sk_data_ready(other, len);
1762         sock_put(other);
1763         scm_destroy(siocb->scm);
1764         return len;
1765 
1766 out_unlock:
1767         if (sk_locked)
1768                 unix_state_unlock(sk);
1769         unix_state_unlock(other);
1770 out_free:
1771         kfree_skb(skb);
1772 out:
1773         if (other)
1774                 sock_put(other);
1775         scm_destroy(siocb->scm);
1776         return err;
1777 }
1778 
1779 
1780 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1781                                struct msghdr *msg, size_t len)
1782 {
1783         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1784         struct sock *sk = sock->sk;
1785         struct sock *other = NULL;
1786         int err, size;
1787         struct sk_buff *skb;
1788         int sent = 0;
1789         struct scm_cookie tmp_scm;
1790         bool fds_sent = false;
1791         int max_level;
1792 
1793         if (NULL == siocb->scm)
1794                 siocb->scm = &tmp_scm;
1795         wait_for_unix_gc();
1796         err = scm_send(sock, msg, siocb->scm, false);
1797         if (err < 0)
1798                 return err;
1799 
1800         err = -EOPNOTSUPP;
1801         if (msg->msg_flags&MSG_OOB)
1802                 goto out_err;
1803 
1804         if (msg->msg_namelen) {
1805                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1806                 goto out_err;
1807         } else {
1808                 err = -ENOTCONN;
1809                 other = unix_peer(sk);
1810                 if (!other)
1811                         goto out_err;
1812         }
1813 
1814         if (sk->sk_shutdown & SEND_SHUTDOWN)
1815                 goto pipe_err;
1816 
1817         while (sent < len) {
1818                 /*
1819                  *      Optimisation for the fact that under 0.01% of X
1820                  *      messages typically need breaking up.
1821                  */
1822 
1823                 size = len-sent;
1824 
1825                 /* Keep two messages in the pipe so it schedules better */
1826                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1827                         size = (sk->sk_sndbuf >> 1) - 64;
1828 
1829                 if (size > SKB_MAX_ALLOC)
1830                         size = SKB_MAX_ALLOC;
1831 
1832                 /*
1833                  *      Grab a buffer
1834                  */
1835 
1836                 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1837                                           &err);
1838 
1839                 if (skb == NULL)
1840                         goto out_err;
1841 
1842                 /*
1843                  *      If you pass two values to the sock_alloc_send_skb
1844                  *      it tries to grab the large buffer with GFP_NOFS
1845                  *      (which can fail easily), and if it fails grab the
1846                  *      fallback size buffer which is under a page and will
1847                  *      succeed. [Alan]
1848                  */
1849                 size = min_t(int, size, skb_tailroom(skb));
1850 
1851 
1852                 /* Only send the fds in the first buffer */
1853                 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1854                 if (err < 0) {
1855                         kfree_skb(skb);
1856                         goto out_err;
1857                 }
1858                 max_level = err + 1;
1859                 fds_sent = true;
1860 
1861                 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1862                 if (err) {
1863                         kfree_skb(skb);
1864                         goto out_err;
1865                 }
1866 
1867                 unix_state_lock(other);
1868 
1869                 if (sock_flag(other, SOCK_DEAD) ||
1870                     (other->sk_shutdown & RCV_SHUTDOWN))
1871                         goto pipe_err_free;
1872 
1873                 maybe_add_creds(skb, sock, other);
1874                 skb_queue_tail(&other->sk_receive_queue, skb);
1875                 if (max_level > unix_sk(other)->recursion_level)
1876                         unix_sk(other)->recursion_level = max_level;
1877                 unix_state_unlock(other);
1878                 other->sk_data_ready(other, size);
1879                 sent += size;
1880         }
1881 
1882         scm_destroy(siocb->scm);
1883         siocb->scm = NULL;
1884 
1885         return sent;
1886 
1887 pipe_err_free:
1888         unix_state_unlock(other);
1889         kfree_skb(skb);
1890 pipe_err:
1891         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1892                 send_sig(SIGPIPE, current, 0);
1893         err = -EPIPE;
1894 out_err:
1895         scm_destroy(siocb->scm);
1896         siocb->scm = NULL;
1897         return sent ? : err;
1898 }
1899 
1900 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1901                                   struct msghdr *msg, size_t len)
1902 {
1903         int err;
1904         struct sock *sk = sock->sk;
1905 
1906         err = sock_error(sk);
1907         if (err)
1908                 return err;
1909 
1910         if (sk->sk_state != TCP_ESTABLISHED)
1911                 return -ENOTCONN;
1912 
1913         if (msg->msg_namelen)
1914                 msg->msg_namelen = 0;
1915 
1916         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1917 }
1918 
1919 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1920                               struct msghdr *msg, size_t size,
1921                               int flags)
1922 {
1923         struct sock *sk = sock->sk;
1924 
1925         if (sk->sk_state != TCP_ESTABLISHED)
1926                 return -ENOTCONN;
1927 
1928         return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1929 }
1930 
1931 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1932 {
1933         struct unix_sock *u = unix_sk(sk);
1934 
1935         if (u->addr) {
1936                 msg->msg_namelen = u->addr->len;
1937                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1938         }
1939 }
1940 
1941 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1942                               struct msghdr *msg, size_t size,
1943                               int flags)
1944 {
1945         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1946         struct scm_cookie tmp_scm;
1947         struct sock *sk = sock->sk;
1948         struct unix_sock *u = unix_sk(sk);
1949         int noblock = flags & MSG_DONTWAIT;
1950         struct sk_buff *skb;
1951         int err;
1952         int peeked, skip;
1953 
1954         err = -EOPNOTSUPP;
1955         if (flags&MSG_OOB)
1956                 goto out;
1957 
1958         err = mutex_lock_interruptible(&u->readlock);
1959         if (unlikely(err)) {
1960                 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1961                  * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1962                  */
1963                 err = noblock ? -EAGAIN : -ERESTARTSYS;
1964                 goto out;
1965         }
1966 
1967         skip = sk_peek_offset(sk, flags);
1968 
1969         skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1970         if (!skb) {
1971                 unix_state_lock(sk);
1972                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1973                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1974                     (sk->sk_shutdown & RCV_SHUTDOWN))
1975                         err = 0;
1976                 unix_state_unlock(sk);
1977                 goto out_unlock;
1978         }
1979 
1980         wake_up_interruptible_sync_poll(&u->peer_wait,
1981                                         POLLOUT | POLLWRNORM | POLLWRBAND);
1982 
1983         if (ccs_socket_post_recvmsg_permission(sk, skb, flags)) {
1984                 err = -EAGAIN; /* Hope less harmful than -EPERM. */
1985                 goto out_unlock;
1986         }
1987         if (msg->msg_name)
1988                 unix_copy_addr(msg, skb->sk);
1989 
1990         if (size > skb->len - skip)
1991                 size = skb->len - skip;
1992         else if (size < skb->len - skip)
1993                 msg->msg_flags |= MSG_TRUNC;
1994 
1995         err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1996         if (err)
1997                 goto out_free;
1998 
1999         if (sock_flag(sk, SOCK_RCVTSTAMP))
2000                 __sock_recv_timestamp(msg, sk, skb);
2001 
2002         if (!siocb->scm) {
2003                 siocb->scm = &tmp_scm;
2004                 memset(&tmp_scm, 0, sizeof(tmp_scm));
2005         }
2006         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2007         unix_set_secdata(siocb->scm, skb);
2008 
2009         if (!(flags & MSG_PEEK)) {
2010                 if (UNIXCB(skb).fp)
2011                         unix_detach_fds(siocb->scm, skb);
2012 
2013                 sk_peek_offset_bwd(sk, skb->len);
2014         } else {
2015                 /* It is questionable: on PEEK we could:
2016                    - do not return fds - good, but too simple 8)
2017                    - return fds, and do not return them on read (old strategy,
2018                      apparently wrong)
2019                    - clone fds (I chose it for now, it is the most universal
2020                      solution)
2021 
2022                    POSIX 1003.1g does not actually define this clearly
2023                    at all. POSIX 1003.1g doesn't define a lot of things
2024                    clearly however!
2025 
2026                 */
2027 
2028                 sk_peek_offset_fwd(sk, size);
2029 
2030                 if (UNIXCB(skb).fp)
2031                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2032         }
2033         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2034 
2035         scm_recv(sock, msg, siocb->scm, flags);
2036 
2037 out_free:
2038         skb_free_datagram(sk, skb);
2039 out_unlock:
2040         mutex_unlock(&u->readlock);
2041 out:
2042         return err;
2043 }
2044 
2045 /*
2046  *      Sleep until more data has arrived. But check for races..
2047  */
2048 static long unix_stream_data_wait(struct sock *sk, long timeo,
2049                                   struct sk_buff *last)
2050 {
2051         DEFINE_WAIT(wait);
2052 
2053         unix_state_lock(sk);
2054 
2055         for (;;) {
2056                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2057 
2058                 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
2059                     sk->sk_err ||
2060                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2061                     signal_pending(current) ||
2062                     !timeo)
2063                         break;
2064 
2065                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2066                 unix_state_unlock(sk);
2067                 timeo = schedule_timeout(timeo);
2068                 unix_state_lock(sk);
2069 
2070                 if (sock_flag(sk, SOCK_DEAD))
2071                         break;
2072 
2073                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2074         }
2075 
2076         finish_wait(sk_sleep(sk), &wait);
2077         unix_state_unlock(sk);
2078         return timeo;
2079 }
2080 
2081 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2082                                struct msghdr *msg, size_t size,
2083                                int flags)
2084 {
2085         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
2086         struct scm_cookie tmp_scm;
2087         struct sock *sk = sock->sk;
2088         struct unix_sock *u = unix_sk(sk);
2089         struct sockaddr_un *sunaddr = msg->msg_name;
2090         int copied = 0;
2091         int noblock = flags & MSG_DONTWAIT;
2092         int check_creds = 0;
2093         int target;
2094         int err = 0;
2095         long timeo;
2096         int skip;
2097 
2098         err = -EINVAL;
2099         if (sk->sk_state != TCP_ESTABLISHED)
2100                 goto out;
2101 
2102         err = -EOPNOTSUPP;
2103         if (flags&MSG_OOB)
2104                 goto out;
2105 
2106         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
2107         timeo = sock_rcvtimeo(sk, noblock);
2108 
2109         /* Lock the socket to prevent queue disordering
2110          * while sleeps in memcpy_tomsg
2111          */
2112 
2113         if (!siocb->scm) {
2114                 siocb->scm = &tmp_scm;
2115                 memset(&tmp_scm, 0, sizeof(tmp_scm));
2116         }
2117 
2118         mutex_lock(&u->readlock);
2119 
2120         do {
2121                 int chunk;
2122                 struct sk_buff *skb, *last;
2123 
2124                 unix_state_lock(sk);
2125                 if (sock_flag(sk, SOCK_DEAD)) {
2126                         err = -ECONNRESET;
2127                         goto unlock;
2128                 }
2129                 last = skb = skb_peek(&sk->sk_receive_queue);
2130 again:
2131                 if (skb == NULL) {
2132                         unix_sk(sk)->recursion_level = 0;
2133                         if (copied >= target)
2134                                 goto unlock;
2135 
2136                         /*
2137                          *      POSIX 1003.1g mandates this order.
2138                          */
2139 
2140                         err = sock_error(sk);
2141                         if (err)
2142                                 goto unlock;
2143                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2144                                 goto unlock;
2145 
2146                         unix_state_unlock(sk);
2147                         err = -EAGAIN;
2148                         if (!timeo)
2149                                 break;
2150                         mutex_unlock(&u->readlock);
2151 
2152                         timeo = unix_stream_data_wait(sk, timeo, last);
2153 
2154                         if (signal_pending(current)) {
2155                                 err = sock_intr_errno(timeo);
2156                                 goto out;
2157                         }
2158 
2159                         mutex_lock(&u->readlock);
2160                         continue;
2161  unlock:
2162                         unix_state_unlock(sk);
2163                         break;
2164                 }
2165 
2166                 skip = sk_peek_offset(sk, flags);
2167                 while (skip >= skb->len) {
2168                         skip -= skb->len;
2169                         last = skb;
2170                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2171                         if (!skb)
2172                                 goto again;
2173                 }
2174 
2175                 unix_state_unlock(sk);
2176 
2177                 if (check_creds) {
2178                         /* Never glue messages from different writers */
2179                         if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
2180                             !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
2181                             !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
2182                                 break;
2183                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2184                         /* Copy credentials */
2185                         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2186                         check_creds = 1;
2187                 }
2188 
2189                 /* Copy address just once */
2190                 if (sunaddr) {
2191                         unix_copy_addr(msg, skb->sk);
2192                         sunaddr = NULL;
2193                 }
2194 
2195                 chunk = min_t(unsigned int, skb->len - skip, size);
2196                 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2197                         if (copied == 0)
2198                                 copied = -EFAULT;
2199                         break;
2200                 }
2201                 copied += chunk;
2202                 size -= chunk;
2203 
2204                 /* Mark read part of skb as used */
2205                 if (!(flags & MSG_PEEK)) {
2206                         skb_pull(skb, chunk);
2207 
2208                         sk_peek_offset_bwd(sk, chunk);
2209 
2210                         if (UNIXCB(skb).fp)
2211                                 unix_detach_fds(siocb->scm, skb);
2212 
2213                         if (skb->len)
2214                                 break;
2215 
2216                         skb_unlink(skb, &sk->sk_receive_queue);
2217                         consume_skb(skb);
2218 
2219                         if (siocb->scm->fp)
2220                                 break;
2221                 } else {
2222                         /* It is questionable, see note in unix_dgram_recvmsg.
2223                          */
2224                         if (UNIXCB(skb).fp)
2225                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2226 
2227                         if (skip) {
2228                                 sk_peek_offset_fwd(sk, chunk);
2229                                 skip -= chunk;
2230                         }
2231 
2232                         if (UNIXCB(skb).fp)
2233                                 break;
2234 
2235                         last = skb;
2236                         unix_state_lock(sk);
2237                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2238                         if (skb)
2239                                 goto again;
2240                         unix_state_unlock(sk);
2241                         break;
2242                 }
2243         } while (size);
2244 
2245         mutex_unlock(&u->readlock);
2246         scm_recv(sock, msg, siocb->scm, flags);
2247 out:
2248         return copied ? : err;
2249 }
2250 
2251 static int unix_shutdown(struct socket *sock, int mode)
2252 {
2253         struct sock *sk = sock->sk;
2254         struct sock *other;
2255 
2256         if (mode < SHUT_RD || mode > SHUT_RDWR)
2257                 return -EINVAL;
2258         /* This maps:
2259          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2260          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2261          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2262          */
2263         ++mode;
2264 
2265         unix_state_lock(sk);
2266         sk->sk_shutdown |= mode;
2267         other = unix_peer(sk);
2268         if (other)
2269                 sock_hold(other);
2270         unix_state_unlock(sk);
2271         sk->sk_state_change(sk);
2272 
2273         if (other &&
2274                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2275 
2276                 int peer_mode = 0;
2277 
2278                 if (mode&RCV_SHUTDOWN)
2279                         peer_mode |= SEND_SHUTDOWN;
2280                 if (mode&SEND_SHUTDOWN)
2281                         peer_mode |= RCV_SHUTDOWN;
2282                 unix_state_lock(other);
2283                 other->sk_shutdown |= peer_mode;
2284                 unix_state_unlock(other);
2285                 other->sk_state_change(other);
2286                 if (peer_mode == SHUTDOWN_MASK)
2287                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2288                 else if (peer_mode & RCV_SHUTDOWN)
2289                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2290         }
2291         if (other)
2292                 sock_put(other);
2293 
2294         return 0;
2295 }
2296 
2297 long unix_inq_len(struct sock *sk)
2298 {
2299         struct sk_buff *skb;
2300         long amount = 0;
2301 
2302         if (sk->sk_state == TCP_LISTEN)
2303                 return -EINVAL;
2304 
2305         spin_lock(&sk->sk_receive_queue.lock);
2306         if (sk->sk_type == SOCK_STREAM ||
2307             sk->sk_type == SOCK_SEQPACKET) {
2308                 skb_queue_walk(&sk->sk_receive_queue, skb)
2309                         amount += skb->len;
2310         } else {
2311                 skb = skb_peek(&sk->sk_receive_queue);
2312                 if (skb)
2313                         amount = skb->len;
2314         }
2315         spin_unlock(&sk->sk_receive_queue.lock);
2316 
2317         return amount;
2318 }
2319 EXPORT_SYMBOL_GPL(unix_inq_len);
2320 
2321 long unix_outq_len(struct sock *sk)
2322 {
2323         return sk_wmem_alloc_get(sk);
2324 }
2325 EXPORT_SYMBOL_GPL(unix_outq_len);
2326 
2327 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2328 {
2329         struct sock *sk = sock->sk;
2330         long amount = 0;
2331         int err;
2332 
2333         switch (cmd) {
2334         case SIOCOUTQ:
2335                 amount = unix_outq_len(sk);
2336                 err = put_user(amount, (int __user *)arg);
2337                 break;
2338         case SIOCINQ:
2339                 amount = unix_inq_len(sk);
2340                 if (amount < 0)
2341                         err = amount;
2342                 else
2343                         err = put_user(amount, (int __user *)arg);
2344                 break;
2345         default:
2346                 err = -ENOIOCTLCMD;
2347                 break;
2348         }
2349         return err;
2350 }
2351 
2352 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2353 {
2354         struct sock *sk = sock->sk;
2355         unsigned int mask;
2356 
2357         sock_poll_wait(file, sk_sleep(sk), wait);
2358         mask = 0;
2359 
2360         /* exceptional events? */
2361         if (sk->sk_err)
2362                 mask |= POLLERR;
2363         if (sk->sk_shutdown == SHUTDOWN_MASK)
2364                 mask |= POLLHUP;
2365         if (sk->sk_shutdown & RCV_SHUTDOWN)
2366                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2367 
2368         /* readable? */
2369         if (!skb_queue_empty(&sk->sk_receive_queue))
2370                 mask |= POLLIN | POLLRDNORM;
2371 
2372         /* Connection-based need to check for termination and startup */
2373         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2374             sk->sk_state == TCP_CLOSE)
2375                 mask |= POLLHUP;
2376 
2377         /*
2378          * we set writable also when the other side has shut down the
2379          * connection. This prevents stuck sockets.
2380          */
2381         if (unix_writable(sk))
2382                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2383 
2384         return mask;
2385 }
2386 
2387 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2388                                     poll_table *wait)
2389 {
2390         struct sock *sk = sock->sk, *other;
2391         unsigned int mask, writable;
2392 
2393         sock_poll_wait(file, sk_sleep(sk), wait);
2394         mask = 0;
2395 
2396         /* exceptional events? */
2397         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2398                 mask |= POLLERR |
2399                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2400 
2401         if (sk->sk_shutdown & RCV_SHUTDOWN)
2402                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2403         if (sk->sk_shutdown == SHUTDOWN_MASK)
2404                 mask |= POLLHUP;
2405 
2406         /* readable? */
2407         if (!skb_queue_empty(&sk->sk_receive_queue))
2408                 mask |= POLLIN | POLLRDNORM;
2409 
2410         /* Connection-based need to check for termination and startup */
2411         if (sk->sk_type == SOCK_SEQPACKET) {
2412                 if (sk->sk_state == TCP_CLOSE)
2413                         mask |= POLLHUP;
2414                 /* connection hasn't started yet? */
2415                 if (sk->sk_state == TCP_SYN_SENT)
2416                         return mask;
2417         }
2418 
2419         /* No write status requested, avoid expensive OUT tests. */
2420         if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2421                 return mask;
2422 
2423         writable = unix_writable(sk);
2424         if (writable) {
2425                 unix_state_lock(sk);
2426 
2427                 other = unix_peer(sk);
2428                 if (other && unix_peer(other) != sk &&
2429                     unix_recvq_full(other) &&
2430                     unix_dgram_peer_wake_me(sk, other))
2431                         writable = 0;
2432 
2433                 unix_state_unlock(sk);
2434         }
2435 
2436         if (writable)
2437                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2438         else
2439                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2440 
2441         return mask;
2442 }
2443 
2444 #ifdef CONFIG_PROC_FS
2445 
2446 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2447 
2448 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2449 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2450 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2451 
2452 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2453 {
2454         unsigned long offset = get_offset(*pos);
2455         unsigned long bucket = get_bucket(*pos);
2456         struct sock *sk;
2457         unsigned long count = 0;
2458 
2459         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2460                 if (sock_net(sk) != seq_file_net(seq))
2461                         continue;
2462                 if (++count == offset)
2463                         break;
2464         }
2465 
2466         return sk;
2467 }
2468 
2469 static struct sock *unix_next_socket(struct seq_file *seq,
2470                                      struct sock *sk,
2471                                      loff_t *pos)
2472 {
2473         unsigned long bucket;
2474 
2475         while (sk > (struct sock *)SEQ_START_TOKEN) {
2476                 sk = sk_next(sk);
2477                 if (!sk)
2478                         goto next_bucket;
2479                 if (sock_net(sk) == seq_file_net(seq))
2480                         return sk;
2481         }
2482 
2483         do {
2484                 sk = unix_from_bucket(seq, pos);
2485                 if (sk)
2486                         return sk;
2487 
2488 next_bucket:
2489                 bucket = get_bucket(*pos) + 1;
2490                 *pos = set_bucket_offset(bucket, 1);
2491         } while (bucket < ARRAY_SIZE(unix_socket_table));
2492 
2493         return NULL;
2494 }
2495 
2496 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2497         __acquires(unix_table_lock)
2498 {
2499         spin_lock(&unix_table_lock);
2500 
2501         if (!*pos)
2502                 return SEQ_START_TOKEN;
2503 
2504         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2505                 return NULL;
2506 
2507         return unix_next_socket(seq, NULL, pos);
2508 }
2509 
2510 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2511 {
2512         ++*pos;
2513         return unix_next_socket(seq, v, pos);
2514 }
2515 
2516 static void unix_seq_stop(struct seq_file *seq, void *v)
2517         __releases(unix_table_lock)
2518 {
2519         spin_unlock(&unix_table_lock);
2520 }
2521 
2522 static int unix_seq_show(struct seq_file *seq, void *v)
2523 {
2524 
2525         if (v == SEQ_START_TOKEN)
2526                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2527                          "Inode Path\n");
2528         else {
2529                 struct sock *s = v;
2530                 struct unix_sock *u = unix_sk(s);
2531                 unix_state_lock(s);
2532 
2533                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2534                         s,
2535                         atomic_read(&s->sk_refcnt),
2536                         0,
2537                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2538                         s->sk_type,
2539                         s->sk_socket ?
2540                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2541                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2542                         sock_i_ino(s));
2543 
2544                 if (u->addr) {
2545                         int i, len;
2546                         seq_putc(seq, ' ');
2547 
2548                         i = 0;
2549                         len = u->addr->len - sizeof(short);
2550                         if (!UNIX_ABSTRACT(s))
2551                                 len--;
2552                         else {
2553                                 seq_putc(seq, '@');
2554                                 i++;
2555                         }
2556                         for ( ; i < len; i++)
2557                                 seq_putc(seq, u->addr->name->sun_path[i]);
2558                 }
2559                 unix_state_unlock(s);
2560                 seq_putc(seq, '\n');
2561         }
2562 
2563         return 0;
2564 }
2565 
2566 static const struct seq_operations unix_seq_ops = {
2567         .start  = unix_seq_start,
2568         .next   = unix_seq_next,
2569         .stop   = unix_seq_stop,
2570         .show   = unix_seq_show,
2571 };
2572 
2573 static int unix_seq_open(struct inode *inode, struct file *file)
2574 {
2575         return seq_open_net(inode, file, &unix_seq_ops,
2576                             sizeof(struct seq_net_private));
2577 }
2578 
2579 static const struct file_operations unix_seq_fops = {
2580         .owner          = THIS_MODULE,
2581         .open           = unix_seq_open,
2582         .read           = seq_read,
2583         .llseek         = seq_lseek,
2584         .release        = seq_release_net,
2585 };
2586 
2587 #endif
2588 
2589 static const struct net_proto_family unix_family_ops = {
2590         .family = PF_UNIX,
2591         .create = unix_create,
2592         .owner  = THIS_MODULE,
2593 };
2594 
2595 
2596 static int __net_init unix_net_init(struct net *net)
2597 {
2598         int error = -ENOMEM;
2599 
2600         net->unx.sysctl_max_dgram_qlen = 10;
2601         if (unix_sysctl_register(net))
2602                 goto out;
2603 
2604 #ifdef CONFIG_PROC_FS
2605         if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2606                 unix_sysctl_unregister(net);
2607                 goto out;
2608         }
2609 #endif
2610         error = 0;
2611 out:
2612         return error;
2613 }
2614 
2615 static void __net_exit unix_net_exit(struct net *net)
2616 {
2617         unix_sysctl_unregister(net);
2618         remove_proc_entry("unix", net->proc_net);
2619 }
2620 
2621 static struct pernet_operations unix_net_ops = {
2622         .init = unix_net_init,
2623         .exit = unix_net_exit,
2624 };
2625 
2626 static int __init af_unix_init(void)
2627 {
2628         int rc = -1;
2629 
2630         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2631 
2632         rc = proto_register(&unix_proto, 1);
2633         if (rc != 0) {
2634                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2635                        __func__);
2636                 goto out;
2637         }
2638 
2639         sock_register(&unix_family_ops);
2640         register_pernet_subsys(&unix_net_ops);
2641 out:
2642         return rc;
2643 }
2644 
2645 static void __exit af_unix_exit(void)
2646 {
2647         sock_unregister(PF_UNIX);
2648         proto_unregister(&unix_proto);
2649         unregister_pernet_subsys(&unix_net_ops);
2650 }
2651 
2652 /* Earlier than device_initcall() so that other drivers invoking
2653    request_module() don't end up in a loop when modprobe tries
2654    to use a UNIX socket. But later than subsys_initcall() because
2655    we depend on stuff initialised there */
2656 fs_initcall(af_unix_init);
2657 module_exit(af_unix_exit);
2658 
2659 MODULE_LICENSE("GPL");
2660 MODULE_ALIAS_NETPROTO(PF_UNIX);
2661 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp