~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/pipe.c

Version: ~ [ linux-5.15-rc1 ] ~ [ linux-5.14.5 ] ~ [ linux-5.13.18 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.66 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.147 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.206 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.246 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.282 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.283 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  *  linux/fs/pipe.c
  4  *
  5  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
  6  */
  7 
  8 #include <linux/mm.h>
  9 #include <linux/file.h>
 10 #include <linux/poll.h>
 11 #include <linux/slab.h>
 12 #include <linux/module.h>
 13 #include <linux/init.h>
 14 #include <linux/fs.h>
 15 #include <linux/log2.h>
 16 #include <linux/mount.h>
 17 #include <linux/pseudo_fs.h>
 18 #include <linux/magic.h>
 19 #include <linux/pipe_fs_i.h>
 20 #include <linux/uio.h>
 21 #include <linux/highmem.h>
 22 #include <linux/pagemap.h>
 23 #include <linux/audit.h>
 24 #include <linux/syscalls.h>
 25 #include <linux/fcntl.h>
 26 #include <linux/memcontrol.h>
 27 #include <linux/watch_queue.h>
 28 
 29 #include <linux/uaccess.h>
 30 #include <asm/ioctls.h>
 31 
 32 #include "internal.h"
 33 
 34 /*
 35  * The max size that a non-root user is allowed to grow the pipe. Can
 36  * be set by root in /proc/sys/fs/pipe-max-size
 37  */
 38 unsigned int pipe_max_size = 1048576;
 39 
 40 /* Maximum allocatable pages per user. Hard limit is unset by default, soft
 41  * matches default values.
 42  */
 43 unsigned long pipe_user_pages_hard;
 44 unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
 45 
 46 /*
 47  * We use head and tail indices that aren't masked off, except at the point of
 48  * dereference, but rather they're allowed to wrap naturally.  This means there
 49  * isn't a dead spot in the buffer, but the ring has to be a power of two and
 50  * <= 2^31.
 51  * -- David Howells 2019-09-23.
 52  *
 53  * Reads with count = 0 should always return 0.
 54  * -- Julian Bradfield 1999-06-07.
 55  *
 56  * FIFOs and Pipes now generate SIGIO for both readers and writers.
 57  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
 58  *
 59  * pipe_read & write cleanup
 60  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
 61  */
 62 
 63 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
 64 {
 65         if (pipe->files)
 66                 mutex_lock_nested(&pipe->mutex, subclass);
 67 }
 68 
 69 void pipe_lock(struct pipe_inode_info *pipe)
 70 {
 71         /*
 72          * pipe_lock() nests non-pipe inode locks (for writing to a file)
 73          */
 74         pipe_lock_nested(pipe, I_MUTEX_PARENT);
 75 }
 76 EXPORT_SYMBOL(pipe_lock);
 77 
 78 void pipe_unlock(struct pipe_inode_info *pipe)
 79 {
 80         if (pipe->files)
 81                 mutex_unlock(&pipe->mutex);
 82 }
 83 EXPORT_SYMBOL(pipe_unlock);
 84 
 85 static inline void __pipe_lock(struct pipe_inode_info *pipe)
 86 {
 87         mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
 88 }
 89 
 90 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
 91 {
 92         mutex_unlock(&pipe->mutex);
 93 }
 94 
 95 void pipe_double_lock(struct pipe_inode_info *pipe1,
 96                       struct pipe_inode_info *pipe2)
 97 {
 98         BUG_ON(pipe1 == pipe2);
 99 
100         if (pipe1 < pipe2) {
101                 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
102                 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
103         } else {
104                 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
105                 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
106         }
107 }
108 
109 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
110                                   struct pipe_buffer *buf)
111 {
112         struct page *page = buf->page;
113 
114         /*
115          * If nobody else uses this page, and we don't already have a
116          * temporary page, let's keep track of it as a one-deep
117          * allocation cache. (Otherwise just release our reference to it)
118          */
119         if (page_count(page) == 1 && !pipe->tmp_page)
120                 pipe->tmp_page = page;
121         else
122                 put_page(page);
123 }
124 
125 static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
126                 struct pipe_buffer *buf)
127 {
128         struct page *page = buf->page;
129 
130         if (page_count(page) != 1)
131                 return false;
132         memcg_kmem_uncharge_page(page, 0);
133         __SetPageLocked(page);
134         return true;
135 }
136 
137 /**
138  * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
139  * @pipe:       the pipe that the buffer belongs to
140  * @buf:        the buffer to attempt to steal
141  *
142  * Description:
143  *      This function attempts to steal the &struct page attached to
144  *      @buf. If successful, this function returns 0 and returns with
145  *      the page locked. The caller may then reuse the page for whatever
146  *      he wishes; the typical use is insertion into a different file
147  *      page cache.
148  */
149 bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
150                 struct pipe_buffer *buf)
151 {
152         struct page *page = buf->page;
153 
154         /*
155          * A reference of one is golden, that means that the owner of this
156          * page is the only one holding a reference to it. lock the page
157          * and return OK.
158          */
159         if (page_count(page) == 1) {
160                 lock_page(page);
161                 return true;
162         }
163         return false;
164 }
165 EXPORT_SYMBOL(generic_pipe_buf_try_steal);
166 
167 /**
168  * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
169  * @pipe:       the pipe that the buffer belongs to
170  * @buf:        the buffer to get a reference to
171  *
172  * Description:
173  *      This function grabs an extra reference to @buf. It's used in
174  *      in the tee() system call, when we duplicate the buffers in one
175  *      pipe into another.
176  */
177 bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
178 {
179         return try_get_page(buf->page);
180 }
181 EXPORT_SYMBOL(generic_pipe_buf_get);
182 
183 /**
184  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
185  * @pipe:       the pipe that the buffer belongs to
186  * @buf:        the buffer to put a reference to
187  *
188  * Description:
189  *      This function releases a reference to @buf.
190  */
191 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
192                               struct pipe_buffer *buf)
193 {
194         put_page(buf->page);
195 }
196 EXPORT_SYMBOL(generic_pipe_buf_release);
197 
198 static const struct pipe_buf_operations anon_pipe_buf_ops = {
199         .release        = anon_pipe_buf_release,
200         .try_steal      = anon_pipe_buf_try_steal,
201         .get            = generic_pipe_buf_get,
202 };
203 
204 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
205 static inline bool pipe_readable(const struct pipe_inode_info *pipe)
206 {
207         unsigned int head = READ_ONCE(pipe->head);
208         unsigned int tail = READ_ONCE(pipe->tail);
209         unsigned int writers = READ_ONCE(pipe->writers);
210 
211         return !pipe_empty(head, tail) || !writers;
212 }
213 
214 static ssize_t
215 pipe_read(struct kiocb *iocb, struct iov_iter *to)
216 {
217         size_t total_len = iov_iter_count(to);
218         struct file *filp = iocb->ki_filp;
219         struct pipe_inode_info *pipe = filp->private_data;
220         bool was_full, wake_next_reader = false;
221         ssize_t ret;
222 
223         /* Null read succeeds. */
224         if (unlikely(total_len == 0))
225                 return 0;
226 
227         ret = 0;
228         __pipe_lock(pipe);
229 
230         /*
231          * We only wake up writers if the pipe was full when we started
232          * reading in order to avoid unnecessary wakeups.
233          *
234          * But when we do wake up writers, we do so using a sync wakeup
235          * (WF_SYNC), because we want them to get going and generate more
236          * data for us.
237          */
238         was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
239         for (;;) {
240                 unsigned int head = pipe->head;
241                 unsigned int tail = pipe->tail;
242                 unsigned int mask = pipe->ring_size - 1;
243 
244 #ifdef CONFIG_WATCH_QUEUE
245                 if (pipe->note_loss) {
246                         struct watch_notification n;
247 
248                         if (total_len < 8) {
249                                 if (ret == 0)
250                                         ret = -ENOBUFS;
251                                 break;
252                         }
253 
254                         n.type = WATCH_TYPE_META;
255                         n.subtype = WATCH_META_LOSS_NOTIFICATION;
256                         n.info = watch_sizeof(n);
257                         if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
258                                 if (ret == 0)
259                                         ret = -EFAULT;
260                                 break;
261                         }
262                         ret += sizeof(n);
263                         total_len -= sizeof(n);
264                         pipe->note_loss = false;
265                 }
266 #endif
267 
268                 if (!pipe_empty(head, tail)) {
269                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
270                         size_t chars = buf->len;
271                         size_t written;
272                         int error;
273 
274                         if (chars > total_len) {
275                                 if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
276                                         if (ret == 0)
277                                                 ret = -ENOBUFS;
278                                         break;
279                                 }
280                                 chars = total_len;
281                         }
282 
283                         error = pipe_buf_confirm(pipe, buf);
284                         if (error) {
285                                 if (!ret)
286                                         ret = error;
287                                 break;
288                         }
289 
290                         written = copy_page_to_iter(buf->page, buf->offset, chars, to);
291                         if (unlikely(written < chars)) {
292                                 if (!ret)
293                                         ret = -EFAULT;
294                                 break;
295                         }
296                         ret += chars;
297                         buf->offset += chars;
298                         buf->len -= chars;
299 
300                         /* Was it a packet buffer? Clean up and exit */
301                         if (buf->flags & PIPE_BUF_FLAG_PACKET) {
302                                 total_len = chars;
303                                 buf->len = 0;
304                         }
305 
306                         if (!buf->len) {
307                                 pipe_buf_release(pipe, buf);
308                                 spin_lock_irq(&pipe->rd_wait.lock);
309 #ifdef CONFIG_WATCH_QUEUE
310                                 if (buf->flags & PIPE_BUF_FLAG_LOSS)
311                                         pipe->note_loss = true;
312 #endif
313                                 tail++;
314                                 pipe->tail = tail;
315                                 spin_unlock_irq(&pipe->rd_wait.lock);
316                         }
317                         total_len -= chars;
318                         if (!total_len)
319                                 break;  /* common path: read succeeded */
320                         if (!pipe_empty(head, tail))    /* More to do? */
321                                 continue;
322                 }
323 
324                 if (!pipe->writers)
325                         break;
326                 if (ret)
327                         break;
328                 if (filp->f_flags & O_NONBLOCK) {
329                         ret = -EAGAIN;
330                         break;
331                 }
332                 __pipe_unlock(pipe);
333 
334                 /*
335                  * We only get here if we didn't actually read anything.
336                  *
337                  * However, we could have seen (and removed) a zero-sized
338                  * pipe buffer, and might have made space in the buffers
339                  * that way.
340                  *
341                  * You can't make zero-sized pipe buffers by doing an empty
342                  * write (not even in packet mode), but they can happen if
343                  * the writer gets an EFAULT when trying to fill a buffer
344                  * that already got allocated and inserted in the buffer
345                  * array.
346                  *
347                  * So we still need to wake up any pending writers in the
348                  * _very_ unlikely case that the pipe was full, but we got
349                  * no data.
350                  */
351                 if (unlikely(was_full)) {
352                         wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
353                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
354                 }
355 
356                 /*
357                  * But because we didn't read anything, at this point we can
358                  * just return directly with -ERESTARTSYS if we're interrupted,
359                  * since we've done any required wakeups and there's no need
360                  * to mark anything accessed. And we've dropped the lock.
361                  */
362                 if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
363                         return -ERESTARTSYS;
364 
365                 __pipe_lock(pipe);
366                 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
367                 wake_next_reader = true;
368         }
369         if (pipe_empty(pipe->head, pipe->tail))
370                 wake_next_reader = false;
371         __pipe_unlock(pipe);
372 
373         if (was_full) {
374                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
375                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
376         }
377         if (wake_next_reader)
378                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
379         if (ret > 0)
380                 file_accessed(filp);
381         return ret;
382 }
383 
384 static inline int is_packetized(struct file *file)
385 {
386         return (file->f_flags & O_DIRECT) != 0;
387 }
388 
389 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
390 static inline bool pipe_writable(const struct pipe_inode_info *pipe)
391 {
392         unsigned int head = READ_ONCE(pipe->head);
393         unsigned int tail = READ_ONCE(pipe->tail);
394         unsigned int max_usage = READ_ONCE(pipe->max_usage);
395 
396         return !pipe_full(head, tail, max_usage) ||
397                 !READ_ONCE(pipe->readers);
398 }
399 
400 static ssize_t
401 pipe_write(struct kiocb *iocb, struct iov_iter *from)
402 {
403         struct file *filp = iocb->ki_filp;
404         struct pipe_inode_info *pipe = filp->private_data;
405         unsigned int head;
406         ssize_t ret = 0;
407         size_t total_len = iov_iter_count(from);
408         ssize_t chars;
409         bool was_empty = false;
410         bool wake_next_writer = false;
411 
412         /* Null write succeeds. */
413         if (unlikely(total_len == 0))
414                 return 0;
415 
416         __pipe_lock(pipe);
417 
418         if (!pipe->readers) {
419                 send_sig(SIGPIPE, current, 0);
420                 ret = -EPIPE;
421                 goto out;
422         }
423 
424 #ifdef CONFIG_WATCH_QUEUE
425         if (pipe->watch_queue) {
426                 ret = -EXDEV;
427                 goto out;
428         }
429 #endif
430 
431         /*
432          * Only wake up if the pipe started out empty, since
433          * otherwise there should be no readers waiting.
434          *
435          * If it wasn't empty we try to merge new data into
436          * the last buffer.
437          *
438          * That naturally merges small writes, but it also
439          * page-aligs the rest of the writes for large writes
440          * spanning multiple pages.
441          */
442         head = pipe->head;
443         was_empty = pipe_empty(head, pipe->tail);
444         chars = total_len & (PAGE_SIZE-1);
445         if (chars && !was_empty) {
446                 unsigned int mask = pipe->ring_size - 1;
447                 struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
448                 int offset = buf->offset + buf->len;
449 
450                 if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
451                     offset + chars <= PAGE_SIZE) {
452                         ret = pipe_buf_confirm(pipe, buf);
453                         if (ret)
454                                 goto out;
455 
456                         ret = copy_page_from_iter(buf->page, offset, chars, from);
457                         if (unlikely(ret < chars)) {
458                                 ret = -EFAULT;
459                                 goto out;
460                         }
461 
462                         buf->len += ret;
463                         if (!iov_iter_count(from))
464                                 goto out;
465                 }
466         }
467 
468         for (;;) {
469                 if (!pipe->readers) {
470                         send_sig(SIGPIPE, current, 0);
471                         if (!ret)
472                                 ret = -EPIPE;
473                         break;
474                 }
475 
476                 head = pipe->head;
477                 if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
478                         unsigned int mask = pipe->ring_size - 1;
479                         struct pipe_buffer *buf = &pipe->bufs[head & mask];
480                         struct page *page = pipe->tmp_page;
481                         int copied;
482 
483                         if (!page) {
484                                 page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
485                                 if (unlikely(!page)) {
486                                         ret = ret ? : -ENOMEM;
487                                         break;
488                                 }
489                                 pipe->tmp_page = page;
490                         }
491 
492                         /* Allocate a slot in the ring in advance and attach an
493                          * empty buffer.  If we fault or otherwise fail to use
494                          * it, either the reader will consume it or it'll still
495                          * be there for the next write.
496                          */
497                         spin_lock_irq(&pipe->rd_wait.lock);
498 
499                         head = pipe->head;
500                         if (pipe_full(head, pipe->tail, pipe->max_usage)) {
501                                 spin_unlock_irq(&pipe->rd_wait.lock);
502                                 continue;
503                         }
504 
505                         pipe->head = head + 1;
506                         spin_unlock_irq(&pipe->rd_wait.lock);
507 
508                         /* Insert it into the buffer array */
509                         buf = &pipe->bufs[head & mask];
510                         buf->page = page;
511                         buf->ops = &anon_pipe_buf_ops;
512                         buf->offset = 0;
513                         buf->len = 0;
514                         if (is_packetized(filp))
515                                 buf->flags = PIPE_BUF_FLAG_PACKET;
516                         else
517                                 buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
518                         pipe->tmp_page = NULL;
519 
520                         copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
521                         if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
522                                 if (!ret)
523                                         ret = -EFAULT;
524                                 break;
525                         }
526                         ret += copied;
527                         buf->offset = 0;
528                         buf->len = copied;
529 
530                         if (!iov_iter_count(from))
531                                 break;
532                 }
533 
534                 if (!pipe_full(head, pipe->tail, pipe->max_usage))
535                         continue;
536 
537                 /* Wait for buffer space to become available. */
538                 if (filp->f_flags & O_NONBLOCK) {
539                         if (!ret)
540                                 ret = -EAGAIN;
541                         break;
542                 }
543                 if (signal_pending(current)) {
544                         if (!ret)
545                                 ret = -ERESTARTSYS;
546                         break;
547                 }
548 
549                 /*
550                  * We're going to release the pipe lock and wait for more
551                  * space. We wake up any readers if necessary, and then
552                  * after waiting we need to re-check whether the pipe
553                  * become empty while we dropped the lock.
554                  */
555                 __pipe_unlock(pipe);
556                 if (was_empty) {
557                         wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
558                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
559                 }
560                 wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
561                 __pipe_lock(pipe);
562                 was_empty = pipe_empty(pipe->head, pipe->tail);
563                 wake_next_writer = true;
564         }
565 out:
566         if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
567                 wake_next_writer = false;
568         __pipe_unlock(pipe);
569 
570         /*
571          * If we do do a wakeup event, we do a 'sync' wakeup, because we
572          * want the reader to start processing things asap, rather than
573          * leave the data pending.
574          *
575          * This is particularly important for small writes, because of
576          * how (for example) the GNU make jobserver uses small writes to
577          * wake up pending jobs
578          */
579         if (was_empty) {
580                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
581                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
582         }
583         if (wake_next_writer)
584                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
585         if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
586                 int err = file_update_time(filp);
587                 if (err)
588                         ret = err;
589                 sb_end_write(file_inode(filp)->i_sb);
590         }
591         return ret;
592 }
593 
594 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
595 {
596         struct pipe_inode_info *pipe = filp->private_data;
597         int count, head, tail, mask;
598 
599         switch (cmd) {
600         case FIONREAD:
601                 __pipe_lock(pipe);
602                 count = 0;
603                 head = pipe->head;
604                 tail = pipe->tail;
605                 mask = pipe->ring_size - 1;
606 
607                 while (tail != head) {
608                         count += pipe->bufs[tail & mask].len;
609                         tail++;
610                 }
611                 __pipe_unlock(pipe);
612 
613                 return put_user(count, (int __user *)arg);
614 
615 #ifdef CONFIG_WATCH_QUEUE
616         case IOC_WATCH_QUEUE_SET_SIZE: {
617                 int ret;
618                 __pipe_lock(pipe);
619                 ret = watch_queue_set_size(pipe, arg);
620                 __pipe_unlock(pipe);
621                 return ret;
622         }
623 
624         case IOC_WATCH_QUEUE_SET_FILTER:
625                 return watch_queue_set_filter(
626                         pipe, (struct watch_notification_filter __user *)arg);
627 #endif
628 
629         default:
630                 return -ENOIOCTLCMD;
631         }
632 }
633 
634 /* No kernel lock held - fine */
635 static __poll_t
636 pipe_poll(struct file *filp, poll_table *wait)
637 {
638         __poll_t mask;
639         struct pipe_inode_info *pipe = filp->private_data;
640         unsigned int head, tail;
641 
642         /*
643          * Reading pipe state only -- no need for acquiring the semaphore.
644          *
645          * But because this is racy, the code has to add the
646          * entry to the poll table _first_ ..
647          */
648         if (filp->f_mode & FMODE_READ)
649                 poll_wait(filp, &pipe->rd_wait, wait);
650         if (filp->f_mode & FMODE_WRITE)
651                 poll_wait(filp, &pipe->wr_wait, wait);
652 
653         /*
654          * .. and only then can you do the racy tests. That way,
655          * if something changes and you got it wrong, the poll
656          * table entry will wake you up and fix it.
657          */
658         head = READ_ONCE(pipe->head);
659         tail = READ_ONCE(pipe->tail);
660 
661         mask = 0;
662         if (filp->f_mode & FMODE_READ) {
663                 if (!pipe_empty(head, tail))
664                         mask |= EPOLLIN | EPOLLRDNORM;
665                 if (!pipe->writers && filp->f_version != pipe->w_counter)
666                         mask |= EPOLLHUP;
667         }
668 
669         if (filp->f_mode & FMODE_WRITE) {
670                 if (!pipe_full(head, tail, pipe->max_usage))
671                         mask |= EPOLLOUT | EPOLLWRNORM;
672                 /*
673                  * Most Unices do not set EPOLLERR for FIFOs but on Linux they
674                  * behave exactly like pipes for poll().
675                  */
676                 if (!pipe->readers)
677                         mask |= EPOLLERR;
678         }
679 
680         return mask;
681 }
682 
683 static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
684 {
685         int kill = 0;
686 
687         spin_lock(&inode->i_lock);
688         if (!--pipe->files) {
689                 inode->i_pipe = NULL;
690                 kill = 1;
691         }
692         spin_unlock(&inode->i_lock);
693 
694         if (kill)
695                 free_pipe_info(pipe);
696 }
697 
698 static int
699 pipe_release(struct inode *inode, struct file *file)
700 {
701         struct pipe_inode_info *pipe = file->private_data;
702 
703         __pipe_lock(pipe);
704         if (file->f_mode & FMODE_READ)
705                 pipe->readers--;
706         if (file->f_mode & FMODE_WRITE)
707                 pipe->writers--;
708 
709         /* Was that the last reader or writer, but not the other side? */
710         if (!pipe->readers != !pipe->writers) {
711                 wake_up_interruptible_all(&pipe->rd_wait);
712                 wake_up_interruptible_all(&pipe->wr_wait);
713                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
714                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
715         }
716         __pipe_unlock(pipe);
717 
718         put_pipe_info(inode, pipe);
719         return 0;
720 }
721 
722 static int
723 pipe_fasync(int fd, struct file *filp, int on)
724 {
725         struct pipe_inode_info *pipe = filp->private_data;
726         int retval = 0;
727 
728         __pipe_lock(pipe);
729         if (filp->f_mode & FMODE_READ)
730                 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
731         if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
732                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
733                 if (retval < 0 && (filp->f_mode & FMODE_READ))
734                         /* this can happen only if on == T */
735                         fasync_helper(-1, filp, 0, &pipe->fasync_readers);
736         }
737         __pipe_unlock(pipe);
738         return retval;
739 }
740 
741 unsigned long account_pipe_buffers(struct user_struct *user,
742                                    unsigned long old, unsigned long new)
743 {
744         return atomic_long_add_return(new - old, &user->pipe_bufs);
745 }
746 
747 bool too_many_pipe_buffers_soft(unsigned long user_bufs)
748 {
749         unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
750 
751         return soft_limit && user_bufs > soft_limit;
752 }
753 
754 bool too_many_pipe_buffers_hard(unsigned long user_bufs)
755 {
756         unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
757 
758         return hard_limit && user_bufs > hard_limit;
759 }
760 
761 bool pipe_is_unprivileged_user(void)
762 {
763         return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
764 }
765 
766 struct pipe_inode_info *alloc_pipe_info(void)
767 {
768         struct pipe_inode_info *pipe;
769         unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
770         struct user_struct *user = get_current_user();
771         unsigned long user_bufs;
772         unsigned int max_size = READ_ONCE(pipe_max_size);
773 
774         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
775         if (pipe == NULL)
776                 goto out_free_uid;
777 
778         if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
779                 pipe_bufs = max_size >> PAGE_SHIFT;
780 
781         user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
782 
783         if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
784                 user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
785                 pipe_bufs = 1;
786         }
787 
788         if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
789                 goto out_revert_acct;
790 
791         pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
792                              GFP_KERNEL_ACCOUNT);
793 
794         if (pipe->bufs) {
795                 init_waitqueue_head(&pipe->rd_wait);
796                 init_waitqueue_head(&pipe->wr_wait);
797                 pipe->r_counter = pipe->w_counter = 1;
798                 pipe->max_usage = pipe_bufs;
799                 pipe->ring_size = pipe_bufs;
800                 pipe->nr_accounted = pipe_bufs;
801                 pipe->user = user;
802                 mutex_init(&pipe->mutex);
803                 return pipe;
804         }
805 
806 out_revert_acct:
807         (void) account_pipe_buffers(user, pipe_bufs, 0);
808         kfree(pipe);
809 out_free_uid:
810         free_uid(user);
811         return NULL;
812 }
813 
814 void free_pipe_info(struct pipe_inode_info *pipe)
815 {
816         int i;
817 
818 #ifdef CONFIG_WATCH_QUEUE
819         if (pipe->watch_queue) {
820                 watch_queue_clear(pipe->watch_queue);
821                 put_watch_queue(pipe->watch_queue);
822         }
823 #endif
824 
825         (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
826         free_uid(pipe->user);
827         for (i = 0; i < pipe->ring_size; i++) {
828                 struct pipe_buffer *buf = pipe->bufs + i;
829                 if (buf->ops)
830                         pipe_buf_release(pipe, buf);
831         }
832         if (pipe->tmp_page)
833                 __free_page(pipe->tmp_page);
834         kfree(pipe->bufs);
835         kfree(pipe);
836 }
837 
838 static struct vfsmount *pipe_mnt __read_mostly;
839 
840 /*
841  * pipefs_dname() is called from d_path().
842  */
843 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
844 {
845         return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
846                                 d_inode(dentry)->i_ino);
847 }
848 
849 static const struct dentry_operations pipefs_dentry_operations = {
850         .d_dname        = pipefs_dname,
851 };
852 
853 static struct inode * get_pipe_inode(void)
854 {
855         struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
856         struct pipe_inode_info *pipe;
857 
858         if (!inode)
859                 goto fail_inode;
860 
861         inode->i_ino = get_next_ino();
862 
863         pipe = alloc_pipe_info();
864         if (!pipe)
865                 goto fail_iput;
866 
867         inode->i_pipe = pipe;
868         pipe->files = 2;
869         pipe->readers = pipe->writers = 1;
870         inode->i_fop = &pipefifo_fops;
871 
872         /*
873          * Mark the inode dirty from the very beginning,
874          * that way it will never be moved to the dirty
875          * list because "mark_inode_dirty()" will think
876          * that it already _is_ on the dirty list.
877          */
878         inode->i_state = I_DIRTY;
879         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
880         inode->i_uid = current_fsuid();
881         inode->i_gid = current_fsgid();
882         inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
883 
884         return inode;
885 
886 fail_iput:
887         iput(inode);
888 
889 fail_inode:
890         return NULL;
891 }
892 
893 int create_pipe_files(struct file **res, int flags)
894 {
895         struct inode *inode = get_pipe_inode();
896         struct file *f;
897         int error;
898 
899         if (!inode)
900                 return -ENFILE;
901 
902         if (flags & O_NOTIFICATION_PIPE) {
903                 error = watch_queue_init(inode->i_pipe);
904                 if (error) {
905                         free_pipe_info(inode->i_pipe);
906                         iput(inode);
907                         return error;
908                 }
909         }
910 
911         f = alloc_file_pseudo(inode, pipe_mnt, "",
912                                 O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
913                                 &pipefifo_fops);
914         if (IS_ERR(f)) {
915                 free_pipe_info(inode->i_pipe);
916                 iput(inode);
917                 return PTR_ERR(f);
918         }
919 
920         f->private_data = inode->i_pipe;
921 
922         res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
923                                   &pipefifo_fops);
924         if (IS_ERR(res[0])) {
925                 put_pipe_info(inode, inode->i_pipe);
926                 fput(f);
927                 return PTR_ERR(res[0]);
928         }
929         res[0]->private_data = inode->i_pipe;
930         res[1] = f;
931         stream_open(inode, res[0]);
932         stream_open(inode, res[1]);
933         return 0;
934 }
935 
936 static int __do_pipe_flags(int *fd, struct file **files, int flags)
937 {
938         int error;
939         int fdw, fdr;
940 
941         if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
942                 return -EINVAL;
943 
944         error = create_pipe_files(files, flags);
945         if (error)
946                 return error;
947 
948         error = get_unused_fd_flags(flags);
949         if (error < 0)
950                 goto err_read_pipe;
951         fdr = error;
952 
953         error = get_unused_fd_flags(flags);
954         if (error < 0)
955                 goto err_fdr;
956         fdw = error;
957 
958         audit_fd_pair(fdr, fdw);
959         fd[0] = fdr;
960         fd[1] = fdw;
961         return 0;
962 
963  err_fdr:
964         put_unused_fd(fdr);
965  err_read_pipe:
966         fput(files[0]);
967         fput(files[1]);
968         return error;
969 }
970 
971 int do_pipe_flags(int *fd, int flags)
972 {
973         struct file *files[2];
974         int error = __do_pipe_flags(fd, files, flags);
975         if (!error) {
976                 fd_install(fd[0], files[0]);
977                 fd_install(fd[1], files[1]);
978         }
979         return error;
980 }
981 
982 /*
983  * sys_pipe() is the normal C calling standard for creating
984  * a pipe. It's not the way Unix traditionally does this, though.
985  */
986 static int do_pipe2(int __user *fildes, int flags)
987 {
988         struct file *files[2];
989         int fd[2];
990         int error;
991 
992         error = __do_pipe_flags(fd, files, flags);
993         if (!error) {
994                 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
995                         fput(files[0]);
996                         fput(files[1]);
997                         put_unused_fd(fd[0]);
998                         put_unused_fd(fd[1]);
999                         error = -EFAULT;
1000                 } else {
1001                         fd_install(fd[0], files[0]);
1002                         fd_install(fd[1], files[1]);
1003                 }
1004         }
1005         return error;
1006 }
1007 
1008 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1009 {
1010         return do_pipe2(fildes, flags);
1011 }
1012 
1013 SYSCALL_DEFINE1(pipe, int __user *, fildes)
1014 {
1015         return do_pipe2(fildes, 0);
1016 }
1017 
1018 /*
1019  * This is the stupid "wait for pipe to be readable or writable"
1020  * model.
1021  *
1022  * See pipe_read/write() for the proper kind of exclusive wait,
1023  * but that requires that we wake up any other readers/writers
1024  * if we then do not end up reading everything (ie the whole
1025  * "wake_next_reader/writer" logic in pipe_read/write()).
1026  */
1027 void pipe_wait_readable(struct pipe_inode_info *pipe)
1028 {
1029         pipe_unlock(pipe);
1030         wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
1031         pipe_lock(pipe);
1032 }
1033 
1034 void pipe_wait_writable(struct pipe_inode_info *pipe)
1035 {
1036         pipe_unlock(pipe);
1037         wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
1038         pipe_lock(pipe);
1039 }
1040 
1041 /*
1042  * This depends on both the wait (here) and the wakeup (wake_up_partner)
1043  * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
1044  * race with the count check and waitqueue prep.
1045  *
1046  * Normally in order to avoid races, you'd do the prepare_to_wait() first,
1047  * then check the condition you're waiting for, and only then sleep. But
1048  * because of the pipe lock, we can check the condition before being on
1049  * the wait queue.
1050  *
1051  * We use the 'rd_wait' waitqueue for pipe partner waiting.
1052  */
1053 static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
1054 {
1055         DEFINE_WAIT(rdwait);
1056         int cur = *cnt;
1057 
1058         while (cur == *cnt) {
1059                 prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
1060                 pipe_unlock(pipe);
1061                 schedule();
1062                 finish_wait(&pipe->rd_wait, &rdwait);
1063                 pipe_lock(pipe);
1064                 if (signal_pending(current))
1065                         break;
1066         }
1067         return cur == *cnt ? -ERESTARTSYS : 0;
1068 }
1069 
1070 static void wake_up_partner(struct pipe_inode_info *pipe)
1071 {
1072         wake_up_interruptible_all(&pipe->rd_wait);
1073 }
1074 
1075 static int fifo_open(struct inode *inode, struct file *filp)
1076 {
1077         struct pipe_inode_info *pipe;
1078         bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
1079         int ret;
1080 
1081         filp->f_version = 0;
1082 
1083         spin_lock(&inode->i_lock);
1084         if (inode->i_pipe) {
1085                 pipe = inode->i_pipe;
1086                 pipe->files++;
1087                 spin_unlock(&inode->i_lock);
1088         } else {
1089                 spin_unlock(&inode->i_lock);
1090                 pipe = alloc_pipe_info();
1091                 if (!pipe)
1092                         return -ENOMEM;
1093                 pipe->files = 1;
1094                 spin_lock(&inode->i_lock);
1095                 if (unlikely(inode->i_pipe)) {
1096                         inode->i_pipe->files++;
1097                         spin_unlock(&inode->i_lock);
1098                         free_pipe_info(pipe);
1099                         pipe = inode->i_pipe;
1100                 } else {
1101                         inode->i_pipe = pipe;
1102                         spin_unlock(&inode->i_lock);
1103                 }
1104         }
1105         filp->private_data = pipe;
1106         /* OK, we have a pipe and it's pinned down */
1107 
1108         __pipe_lock(pipe);
1109 
1110         /* We can only do regular read/write on fifos */
1111         stream_open(inode, filp);
1112 
1113         switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
1114         case FMODE_READ:
1115         /*
1116          *  O_RDONLY
1117          *  POSIX.1 says that O_NONBLOCK means return with the FIFO
1118          *  opened, even when there is no process writing the FIFO.
1119          */
1120                 pipe->r_counter++;
1121                 if (pipe->readers++ == 0)
1122                         wake_up_partner(pipe);
1123 
1124                 if (!is_pipe && !pipe->writers) {
1125                         if ((filp->f_flags & O_NONBLOCK)) {
1126                                 /* suppress EPOLLHUP until we have
1127                                  * seen a writer */
1128                                 filp->f_version = pipe->w_counter;
1129                         } else {
1130                                 if (wait_for_partner(pipe, &pipe->w_counter))
1131                                         goto err_rd;
1132                         }
1133                 }
1134                 break;
1135 
1136         case FMODE_WRITE:
1137         /*
1138          *  O_WRONLY
1139          *  POSIX.1 says that O_NONBLOCK means return -1 with
1140          *  errno=ENXIO when there is no process reading the FIFO.
1141          */
1142                 ret = -ENXIO;
1143                 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
1144                         goto err;
1145 
1146                 pipe->w_counter++;
1147                 if (!pipe->writers++)
1148                         wake_up_partner(pipe);
1149 
1150                 if (!is_pipe && !pipe->readers) {
1151                         if (wait_for_partner(pipe, &pipe->r_counter))
1152                                 goto err_wr;
1153                 }
1154                 break;
1155 
1156         case FMODE_READ | FMODE_WRITE:
1157         /*
1158          *  O_RDWR
1159          *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
1160          *  This implementation will NEVER block on a O_RDWR open, since
1161          *  the process can at least talk to itself.
1162          */
1163 
1164                 pipe->readers++;
1165                 pipe->writers++;
1166                 pipe->r_counter++;
1167                 pipe->w_counter++;
1168                 if (pipe->readers == 1 || pipe->writers == 1)
1169                         wake_up_partner(pipe);
1170                 break;
1171 
1172         default:
1173                 ret = -EINVAL;
1174                 goto err;
1175         }
1176 
1177         /* Ok! */
1178         __pipe_unlock(pipe);
1179         return 0;
1180 
1181 err_rd:
1182         if (!--pipe->readers)
1183                 wake_up_interruptible(&pipe->wr_wait);
1184         ret = -ERESTARTSYS;
1185         goto err;
1186 
1187 err_wr:
1188         if (!--pipe->writers)
1189                 wake_up_interruptible_all(&pipe->rd_wait);
1190         ret = -ERESTARTSYS;
1191         goto err;
1192 
1193 err:
1194         __pipe_unlock(pipe);
1195 
1196         put_pipe_info(inode, pipe);
1197         return ret;
1198 }
1199 
1200 const struct file_operations pipefifo_fops = {
1201         .open           = fifo_open,
1202         .llseek         = no_llseek,
1203         .read_iter      = pipe_read,
1204         .write_iter     = pipe_write,
1205         .poll           = pipe_poll,
1206         .unlocked_ioctl = pipe_ioctl,
1207         .release        = pipe_release,
1208         .fasync         = pipe_fasync,
1209 };
1210 
1211 /*
1212  * Currently we rely on the pipe array holding a power-of-2 number
1213  * of pages. Returns 0 on error.
1214  */
1215 unsigned int round_pipe_size(unsigned long size)
1216 {
1217         if (size > (1U << 31))
1218                 return 0;
1219 
1220         /* Minimum pipe size, as required by POSIX */
1221         if (size < PAGE_SIZE)
1222                 return PAGE_SIZE;
1223 
1224         return roundup_pow_of_two(size);
1225 }
1226 
1227 /*
1228  * Resize the pipe ring to a number of slots.
1229  */
1230 int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
1231 {
1232         struct pipe_buffer *bufs;
1233         unsigned int head, tail, mask, n;
1234 
1235         /*
1236          * We can shrink the pipe, if arg is greater than the ring occupancy.
1237          * Since we don't expect a lot of shrink+grow operations, just free and
1238          * allocate again like we would do for growing.  If the pipe currently
1239          * contains more buffers than arg, then return busy.
1240          */
1241         mask = pipe->ring_size - 1;
1242         head = pipe->head;
1243         tail = pipe->tail;
1244         n = pipe_occupancy(pipe->head, pipe->tail);
1245         if (nr_slots < n)
1246                 return -EBUSY;
1247 
1248         bufs = kcalloc(nr_slots, sizeof(*bufs),
1249                        GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
1250         if (unlikely(!bufs))
1251                 return -ENOMEM;
1252 
1253         /*
1254          * The pipe array wraps around, so just start the new one at zero
1255          * and adjust the indices.
1256          */
1257         if (n > 0) {
1258                 unsigned int h = head & mask;
1259                 unsigned int t = tail & mask;
1260                 if (h > t) {
1261                         memcpy(bufs, pipe->bufs + t,
1262                                n * sizeof(struct pipe_buffer));
1263                 } else {
1264                         unsigned int tsize = pipe->ring_size - t;
1265                         if (h > 0)
1266                                 memcpy(bufs + tsize, pipe->bufs,
1267                                        h * sizeof(struct pipe_buffer));
1268                         memcpy(bufs, pipe->bufs + t,
1269                                tsize * sizeof(struct pipe_buffer));
1270                 }
1271         }
1272 
1273         head = n;
1274         tail = 0;
1275 
1276         kfree(pipe->bufs);
1277         pipe->bufs = bufs;
1278         pipe->ring_size = nr_slots;
1279         if (pipe->max_usage > nr_slots)
1280                 pipe->max_usage = nr_slots;
1281         pipe->tail = tail;
1282         pipe->head = head;
1283 
1284         /* This might have made more room for writers */
1285         wake_up_interruptible(&pipe->wr_wait);
1286         return 0;
1287 }
1288 
1289 /*
1290  * Allocate a new array of pipe buffers and copy the info over. Returns the
1291  * pipe size if successful, or return -ERROR on error.
1292  */
1293 static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1294 {
1295         unsigned long user_bufs;
1296         unsigned int nr_slots, size;
1297         long ret = 0;
1298 
1299 #ifdef CONFIG_WATCH_QUEUE
1300         if (pipe->watch_queue)
1301                 return -EBUSY;
1302 #endif
1303 
1304         size = round_pipe_size(arg);
1305         nr_slots = size >> PAGE_SHIFT;
1306 
1307         if (!nr_slots)
1308                 return -EINVAL;
1309 
1310         /*
1311          * If trying to increase the pipe capacity, check that an
1312          * unprivileged user is not trying to exceed various limits
1313          * (soft limit check here, hard limit check just below).
1314          * Decreasing the pipe capacity is always permitted, even
1315          * if the user is currently over a limit.
1316          */
1317         if (nr_slots > pipe->max_usage &&
1318                         size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
1319                 return -EPERM;
1320 
1321         user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
1322 
1323         if (nr_slots > pipe->max_usage &&
1324                         (too_many_pipe_buffers_hard(user_bufs) ||
1325                          too_many_pipe_buffers_soft(user_bufs)) &&
1326                         pipe_is_unprivileged_user()) {
1327                 ret = -EPERM;
1328                 goto out_revert_acct;
1329         }
1330 
1331         ret = pipe_resize_ring(pipe, nr_slots);
1332         if (ret < 0)
1333                 goto out_revert_acct;
1334 
1335         pipe->max_usage = nr_slots;
1336         pipe->nr_accounted = nr_slots;
1337         return pipe->max_usage * PAGE_SIZE;
1338 
1339 out_revert_acct:
1340         (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
1341         return ret;
1342 }
1343 
1344 /*
1345  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
1346  * location, so checking ->i_pipe is not enough to verify that this is a
1347  * pipe.
1348  */
1349 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
1350 {
1351         struct pipe_inode_info *pipe = file->private_data;
1352 
1353         if (file->f_op != &pipefifo_fops || !pipe)
1354                 return NULL;
1355 #ifdef CONFIG_WATCH_QUEUE
1356         if (for_splice && pipe->watch_queue)
1357                 return NULL;
1358 #endif
1359         return pipe;
1360 }
1361 
1362 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1363 {
1364         struct pipe_inode_info *pipe;
1365         long ret;
1366 
1367         pipe = get_pipe_info(file, false);
1368         if (!pipe)
1369                 return -EBADF;
1370 
1371         __pipe_lock(pipe);
1372 
1373         switch (cmd) {
1374         case F_SETPIPE_SZ:
1375                 ret = pipe_set_size(pipe, arg);
1376                 break;
1377         case F_GETPIPE_SZ:
1378                 ret = pipe->max_usage * PAGE_SIZE;
1379                 break;
1380         default:
1381                 ret = -EINVAL;
1382                 break;
1383         }
1384 
1385         __pipe_unlock(pipe);
1386         return ret;
1387 }
1388 
1389 static const struct super_operations pipefs_ops = {
1390         .destroy_inode = free_inode_nonrcu,
1391         .statfs = simple_statfs,
1392 };
1393 
1394 /*
1395  * pipefs should _never_ be mounted by userland - too much of security hassle,
1396  * no real gain from having the whole whorehouse mounted. So we don't need
1397  * any operations on the root directory. However, we need a non-trivial
1398  * d_name - pipe: will go nicely and kill the special-casing in procfs.
1399  */
1400 
1401 static int pipefs_init_fs_context(struct fs_context *fc)
1402 {
1403         struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
1404         if (!ctx)
1405                 return -ENOMEM;
1406         ctx->ops = &pipefs_ops;
1407         ctx->dops = &pipefs_dentry_operations;
1408         return 0;
1409 }
1410 
1411 static struct file_system_type pipe_fs_type = {
1412         .name           = "pipefs",
1413         .init_fs_context = pipefs_init_fs_context,
1414         .kill_sb        = kill_anon_super,
1415 };
1416 
1417 static int __init init_pipe_fs(void)
1418 {
1419         int err = register_filesystem(&pipe_fs_type);
1420 
1421         if (!err) {
1422                 pipe_mnt = kern_mount(&pipe_fs_type);
1423                 if (IS_ERR(pipe_mnt)) {
1424                         err = PTR_ERR(pipe_mnt);
1425                         unregister_filesystem(&pipe_fs_type);
1426                 }
1427         }
1428         return err;
1429 }
1430 
1431 fs_initcall(init_pipe_fs);
1432 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp