~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/ext4/file.c

Version: ~ [ linux-5.15-rc6 ] ~ [ linux-5.14.14 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.75 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.155 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.213 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.252 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.287 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.289 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  *  linux/fs/ext4/file.c
  4  *
  5  * Copyright (C) 1992, 1993, 1994, 1995
  6  * Remy Card (card@masi.ibp.fr)
  7  * Laboratoire MASI - Institut Blaise Pascal
  8  * Universite Pierre et Marie Curie (Paris VI)
  9  *
 10  *  from
 11  *
 12  *  linux/fs/minix/file.c
 13  *
 14  *  Copyright (C) 1991, 1992  Linus Torvalds
 15  *
 16  *  ext4 fs regular file handling primitives
 17  *
 18  *  64-bit file support on 64-bit platforms by Jakub Jelinek
 19  *      (jj@sunsite.ms.mff.cuni.cz)
 20  */
 21 
 22 #include <linux/time.h>
 23 #include <linux/fs.h>
 24 #include <linux/iomap.h>
 25 #include <linux/mount.h>
 26 #include <linux/path.h>
 27 #include <linux/dax.h>
 28 #include <linux/quotaops.h>
 29 #include <linux/pagevec.h>
 30 #include <linux/uio.h>
 31 #include <linux/mman.h>
 32 #include <linux/backing-dev.h>
 33 #include "ext4.h"
 34 #include "ext4_jbd2.h"
 35 #include "xattr.h"
 36 #include "acl.h"
 37 #include "truncate.h"
 38 
 39 static bool ext4_dio_supported(struct inode *inode)
 40 {
 41         if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
 42                 return false;
 43         if (fsverity_active(inode))
 44                 return false;
 45         if (ext4_should_journal_data(inode))
 46                 return false;
 47         if (ext4_has_inline_data(inode))
 48                 return false;
 49         return true;
 50 }
 51 
 52 static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 53 {
 54         ssize_t ret;
 55         struct inode *inode = file_inode(iocb->ki_filp);
 56 
 57         if (iocb->ki_flags & IOCB_NOWAIT) {
 58                 if (!inode_trylock_shared(inode))
 59                         return -EAGAIN;
 60         } else {
 61                 inode_lock_shared(inode);
 62         }
 63 
 64         if (!ext4_dio_supported(inode)) {
 65                 inode_unlock_shared(inode);
 66                 /*
 67                  * Fallback to buffered I/O if the operation being performed on
 68                  * the inode is not supported by direct I/O. The IOCB_DIRECT
 69                  * flag needs to be cleared here in order to ensure that the
 70                  * direct I/O path within generic_file_read_iter() is not
 71                  * taken.
 72                  */
 73                 iocb->ki_flags &= ~IOCB_DIRECT;
 74                 return generic_file_read_iter(iocb, to);
 75         }
 76 
 77         ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
 78                            is_sync_kiocb(iocb));
 79         inode_unlock_shared(inode);
 80 
 81         file_accessed(iocb->ki_filp);
 82         return ret;
 83 }
 84 
 85 #ifdef CONFIG_FS_DAX
 86 static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
 87 {
 88         struct inode *inode = file_inode(iocb->ki_filp);
 89         ssize_t ret;
 90 
 91         if (iocb->ki_flags & IOCB_NOWAIT) {
 92                 if (!inode_trylock_shared(inode))
 93                         return -EAGAIN;
 94         } else {
 95                 inode_lock_shared(inode);
 96         }
 97         /*
 98          * Recheck under inode lock - at this point we are sure it cannot
 99          * change anymore
100          */
101         if (!IS_DAX(inode)) {
102                 inode_unlock_shared(inode);
103                 /* Fallback to buffered IO in case we cannot support DAX */
104                 return generic_file_read_iter(iocb, to);
105         }
106         ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
107         inode_unlock_shared(inode);
108 
109         file_accessed(iocb->ki_filp);
110         return ret;
111 }
112 #endif
113 
114 static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
115 {
116         struct inode *inode = file_inode(iocb->ki_filp);
117 
118         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
119                 return -EIO;
120 
121         if (!iov_iter_count(to))
122                 return 0; /* skip atime */
123 
124 #ifdef CONFIG_FS_DAX
125         if (IS_DAX(inode))
126                 return ext4_dax_read_iter(iocb, to);
127 #endif
128         if (iocb->ki_flags & IOCB_DIRECT)
129                 return ext4_dio_read_iter(iocb, to);
130 
131         return generic_file_read_iter(iocb, to);
132 }
133 
134 /*
135  * Called when an inode is released. Note that this is different
136  * from ext4_file_open: open gets called at every open, but release
137  * gets called only when /all/ the files are closed.
138  */
139 static int ext4_release_file(struct inode *inode, struct file *filp)
140 {
141         if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
142                 ext4_alloc_da_blocks(inode);
143                 ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
144         }
145         /* if we are the last writer on the inode, drop the block reservation */
146         if ((filp->f_mode & FMODE_WRITE) &&
147                         (atomic_read(&inode->i_writecount) == 1) &&
148                         !EXT4_I(inode)->i_reserved_data_blocks)
149         {
150                 down_write(&EXT4_I(inode)->i_data_sem);
151                 ext4_discard_preallocations(inode);
152                 up_write(&EXT4_I(inode)->i_data_sem);
153         }
154         if (is_dx(inode) && filp->private_data)
155                 ext4_htree_free_dir_info(filp->private_data);
156 
157         return 0;
158 }
159 
160 /*
161  * This tests whether the IO in question is block-aligned or not.
162  * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
163  * are converted to written only after the IO is complete.  Until they are
164  * mapped, these blocks appear as holes, so dio_zero_block() will assume that
165  * it needs to zero out portions of the start and/or end block.  If 2 AIO
166  * threads are at work on the same unwritten block, they must be synchronized
167  * or one thread will zero the other's data, causing corruption.
168  */
169 static int
170 ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
171 {
172         struct super_block *sb = inode->i_sb;
173         int blockmask = sb->s_blocksize - 1;
174 
175         if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize))
176                 return 0;
177 
178         if ((pos | iov_iter_alignment(from)) & blockmask)
179                 return 1;
180 
181         return 0;
182 }
183 
184 /* Is IO overwriting allocated and initialized blocks? */
185 static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
186 {
187         struct ext4_map_blocks map;
188         unsigned int blkbits = inode->i_blkbits;
189         int err, blklen;
190 
191         if (pos + len > i_size_read(inode))
192                 return false;
193 
194         map.m_lblk = pos >> blkbits;
195         map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
196         blklen = map.m_len;
197 
198         err = ext4_map_blocks(NULL, inode, &map, 0);
199         /*
200          * 'err==len' means that all of the blocks have been preallocated,
201          * regardless of whether they have been initialized or not. To exclude
202          * unwritten extents, we need to check m_flags.
203          */
204         return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
205 }
206 
207 static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
208 {
209         struct inode *inode = file_inode(iocb->ki_filp);
210         ssize_t ret;
211 
212         if (unlikely(IS_IMMUTABLE(inode)))
213                 return -EPERM;
214 
215         ret = generic_write_checks(iocb, from);
216         if (ret <= 0)
217                 return ret;
218 
219         /*
220          * If we have encountered a bitmap-format file, the size limit
221          * is smaller than s_maxbytes, which is for extent-mapped files.
222          */
223         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
224                 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
225 
226                 if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
227                         return -EFBIG;
228                 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
229         }
230 
231         ret = file_modified(iocb->ki_filp);
232         if (ret)
233                 return ret;
234 
235         return iov_iter_count(from);
236 }
237 
238 static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
239                                         struct iov_iter *from)
240 {
241         ssize_t ret;
242         struct inode *inode = file_inode(iocb->ki_filp);
243 
244         if (iocb->ki_flags & IOCB_NOWAIT)
245                 return -EOPNOTSUPP;
246 
247         inode_lock(inode);
248         ret = ext4_write_checks(iocb, from);
249         if (ret <= 0)
250                 goto out;
251 
252         current->backing_dev_info = inode_to_bdi(inode);
253         ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
254         current->backing_dev_info = NULL;
255 
256 out:
257         inode_unlock(inode);
258         if (likely(ret > 0)) {
259                 iocb->ki_pos += ret;
260                 ret = generic_write_sync(iocb, ret);
261         }
262 
263         return ret;
264 }
265 
266 static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
267                                            ssize_t written, size_t count)
268 {
269         handle_t *handle;
270         bool truncate = false;
271         u8 blkbits = inode->i_blkbits;
272         ext4_lblk_t written_blk, end_blk;
273 
274         /*
275          * Note that EXT4_I(inode)->i_disksize can get extended up to
276          * inode->i_size while the I/O was running due to writeback of delalloc
277          * blocks. But, the code in ext4_iomap_alloc() is careful to use
278          * zeroed/unwritten extents if this is possible; thus we won't leave
279          * uninitialized blocks in a file even if we didn't succeed in writing
280          * as much as we intended.
281          */
282         WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
283         if (offset + count <= EXT4_I(inode)->i_disksize) {
284                 /*
285                  * We need to ensure that the inode is removed from the orphan
286                  * list if it has been added prematurely, due to writeback of
287                  * delalloc blocks.
288                  */
289                 if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
290                         handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
291 
292                         if (IS_ERR(handle)) {
293                                 ext4_orphan_del(NULL, inode);
294                                 return PTR_ERR(handle);
295                         }
296 
297                         ext4_orphan_del(handle, inode);
298                         ext4_journal_stop(handle);
299                 }
300 
301                 return written;
302         }
303 
304         if (written < 0)
305                 goto truncate;
306 
307         handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
308         if (IS_ERR(handle)) {
309                 written = PTR_ERR(handle);
310                 goto truncate;
311         }
312 
313         if (ext4_update_inode_size(inode, offset + written))
314                 ext4_mark_inode_dirty(handle, inode);
315 
316         /*
317          * We may need to truncate allocated but not written blocks beyond EOF.
318          */
319         written_blk = ALIGN(offset + written, 1 << blkbits);
320         end_blk = ALIGN(offset + count, 1 << blkbits);
321         if (written_blk < end_blk && ext4_can_truncate(inode))
322                 truncate = true;
323 
324         /*
325          * Remove the inode from the orphan list if it has been extended and
326          * everything went OK.
327          */
328         if (!truncate && inode->i_nlink)
329                 ext4_orphan_del(handle, inode);
330         ext4_journal_stop(handle);
331 
332         if (truncate) {
333 truncate:
334                 ext4_truncate_failed_write(inode);
335                 /*
336                  * If the truncate operation failed early, then the inode may
337                  * still be on the orphan list. In that case, we need to try
338                  * remove the inode from the in-memory linked list.
339                  */
340                 if (inode->i_nlink)
341                         ext4_orphan_del(NULL, inode);
342         }
343 
344         return written;
345 }
346 
347 static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
348                                  int error, unsigned int flags)
349 {
350         loff_t offset = iocb->ki_pos;
351         struct inode *inode = file_inode(iocb->ki_filp);
352 
353         if (error)
354                 return error;
355 
356         if (size && flags & IOMAP_DIO_UNWRITTEN)
357                 return ext4_convert_unwritten_extents(NULL, inode,
358                                                       offset, size);
359 
360         return 0;
361 }
362 
363 static const struct iomap_dio_ops ext4_dio_write_ops = {
364         .end_io = ext4_dio_write_end_io,
365 };
366 
367 static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
368 {
369         ssize_t ret;
370         size_t count;
371         loff_t offset;
372         handle_t *handle;
373         struct inode *inode = file_inode(iocb->ki_filp);
374         bool extend = false, overwrite = false, unaligned_aio = false;
375 
376         if (iocb->ki_flags & IOCB_NOWAIT) {
377                 if (!inode_trylock(inode))
378                         return -EAGAIN;
379         } else {
380                 inode_lock(inode);
381         }
382 
383         if (!ext4_dio_supported(inode)) {
384                 inode_unlock(inode);
385                 /*
386                  * Fallback to buffered I/O if the inode does not support
387                  * direct I/O.
388                  */
389                 return ext4_buffered_write_iter(iocb, from);
390         }
391 
392         ret = ext4_write_checks(iocb, from);
393         if (ret <= 0) {
394                 inode_unlock(inode);
395                 return ret;
396         }
397 
398         /*
399          * Unaligned asynchronous direct I/O must be serialized among each
400          * other as the zeroing of partial blocks of two competing unaligned
401          * asynchronous direct I/O writes can result in data corruption.
402          */
403         offset = iocb->ki_pos;
404         count = iov_iter_count(from);
405         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
406             !is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) {
407                 unaligned_aio = true;
408                 inode_dio_wait(inode);
409         }
410 
411         /*
412          * Determine whether the I/O will overwrite allocated and initialized
413          * blocks. If so, check to see whether it is possible to take the
414          * dioread_nolock path.
415          */
416         if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) &&
417             ext4_should_dioread_nolock(inode)) {
418                 overwrite = true;
419                 downgrade_write(&inode->i_rwsem);
420         }
421 
422         if (offset + count > EXT4_I(inode)->i_disksize) {
423                 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
424                 if (IS_ERR(handle)) {
425                         ret = PTR_ERR(handle);
426                         goto out;
427                 }
428 
429                 ret = ext4_orphan_add(handle, inode);
430                 if (ret) {
431                         ext4_journal_stop(handle);
432                         goto out;
433                 }
434 
435                 extend = true;
436                 ext4_journal_stop(handle);
437         }
438 
439         ret = iomap_dio_rw(iocb, from, &ext4_iomap_ops, &ext4_dio_write_ops,
440                            is_sync_kiocb(iocb) || unaligned_aio || extend);
441 
442         if (extend)
443                 ret = ext4_handle_inode_extension(inode, offset, ret, count);
444 
445 out:
446         if (overwrite)
447                 inode_unlock_shared(inode);
448         else
449                 inode_unlock(inode);
450 
451         if (ret >= 0 && iov_iter_count(from)) {
452                 ssize_t err;
453                 loff_t endbyte;
454 
455                 offset = iocb->ki_pos;
456                 err = ext4_buffered_write_iter(iocb, from);
457                 if (err < 0)
458                         return err;
459 
460                 /*
461                  * We need to ensure that the pages within the page cache for
462                  * the range covered by this I/O are written to disk and
463                  * invalidated. This is in attempt to preserve the expected
464                  * direct I/O semantics in the case we fallback to buffered I/O
465                  * to complete off the I/O request.
466                  */
467                 ret += err;
468                 endbyte = offset + err - 1;
469                 err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
470                                                    offset, endbyte);
471                 if (!err)
472                         invalidate_mapping_pages(iocb->ki_filp->f_mapping,
473                                                  offset >> PAGE_SHIFT,
474                                                  endbyte >> PAGE_SHIFT);
475         }
476 
477         return ret;
478 }
479 
480 #ifdef CONFIG_FS_DAX
481 static ssize_t
482 ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
483 {
484         ssize_t ret;
485         size_t count;
486         loff_t offset;
487         handle_t *handle;
488         bool extend = false;
489         struct inode *inode = file_inode(iocb->ki_filp);
490 
491         if (iocb->ki_flags & IOCB_NOWAIT) {
492                 if (!inode_trylock(inode))
493                         return -EAGAIN;
494         } else {
495                 inode_lock(inode);
496         }
497 
498         ret = ext4_write_checks(iocb, from);
499         if (ret <= 0)
500                 goto out;
501 
502         offset = iocb->ki_pos;
503         count = iov_iter_count(from);
504 
505         if (offset + count > EXT4_I(inode)->i_disksize) {
506                 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
507                 if (IS_ERR(handle)) {
508                         ret = PTR_ERR(handle);
509                         goto out;
510                 }
511 
512                 ret = ext4_orphan_add(handle, inode);
513                 if (ret) {
514                         ext4_journal_stop(handle);
515                         goto out;
516                 }
517 
518                 extend = true;
519                 ext4_journal_stop(handle);
520         }
521 
522         ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
523 
524         if (extend)
525                 ret = ext4_handle_inode_extension(inode, offset, ret, count);
526 out:
527         inode_unlock(inode);
528         if (ret > 0)
529                 ret = generic_write_sync(iocb, ret);
530         return ret;
531 }
532 #endif
533 
534 static ssize_t
535 ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
536 {
537         struct inode *inode = file_inode(iocb->ki_filp);
538 
539         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
540                 return -EIO;
541 
542 #ifdef CONFIG_FS_DAX
543         if (IS_DAX(inode))
544                 return ext4_dax_write_iter(iocb, from);
545 #endif
546         if (iocb->ki_flags & IOCB_DIRECT)
547                 return ext4_dio_write_iter(iocb, from);
548 
549         return ext4_buffered_write_iter(iocb, from);
550 }
551 
552 #ifdef CONFIG_FS_DAX
553 static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
554                 enum page_entry_size pe_size)
555 {
556         int error = 0;
557         vm_fault_t result;
558         int retries = 0;
559         handle_t *handle = NULL;
560         struct inode *inode = file_inode(vmf->vma->vm_file);
561         struct super_block *sb = inode->i_sb;
562 
563         /*
564          * We have to distinguish real writes from writes which will result in a
565          * COW page; COW writes should *not* poke the journal (the file will not
566          * be changed). Doing so would cause unintended failures when mounted
567          * read-only.
568          *
569          * We check for VM_SHARED rather than vmf->cow_page since the latter is
570          * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
571          * other sizes, dax_iomap_fault will handle splitting / fallback so that
572          * we eventually come back with a COW page.
573          */
574         bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
575                 (vmf->vma->vm_flags & VM_SHARED);
576         pfn_t pfn;
577 
578         if (write) {
579                 sb_start_pagefault(sb);
580                 file_update_time(vmf->vma->vm_file);
581                 down_read(&EXT4_I(inode)->i_mmap_sem);
582 retry:
583                 handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
584                                                EXT4_DATA_TRANS_BLOCKS(sb));
585                 if (IS_ERR(handle)) {
586                         up_read(&EXT4_I(inode)->i_mmap_sem);
587                         sb_end_pagefault(sb);
588                         return VM_FAULT_SIGBUS;
589                 }
590         } else {
591                 down_read(&EXT4_I(inode)->i_mmap_sem);
592         }
593         result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
594         if (write) {
595                 ext4_journal_stop(handle);
596 
597                 if ((result & VM_FAULT_ERROR) && error == -ENOSPC &&
598                     ext4_should_retry_alloc(sb, &retries))
599                         goto retry;
600                 /* Handling synchronous page fault? */
601                 if (result & VM_FAULT_NEEDDSYNC)
602                         result = dax_finish_sync_fault(vmf, pe_size, pfn);
603                 up_read(&EXT4_I(inode)->i_mmap_sem);
604                 sb_end_pagefault(sb);
605         } else {
606                 up_read(&EXT4_I(inode)->i_mmap_sem);
607         }
608 
609         return result;
610 }
611 
612 static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
613 {
614         return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
615 }
616 
617 static const struct vm_operations_struct ext4_dax_vm_ops = {
618         .fault          = ext4_dax_fault,
619         .huge_fault     = ext4_dax_huge_fault,
620         .page_mkwrite   = ext4_dax_fault,
621         .pfn_mkwrite    = ext4_dax_fault,
622 };
623 #else
624 #define ext4_dax_vm_ops ext4_file_vm_ops
625 #endif
626 
627 static const struct vm_operations_struct ext4_file_vm_ops = {
628         .fault          = ext4_filemap_fault,
629         .map_pages      = filemap_map_pages,
630         .page_mkwrite   = ext4_page_mkwrite,
631 };
632 
633 static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
634 {
635         struct inode *inode = file->f_mapping->host;
636         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
637         struct dax_device *dax_dev = sbi->s_daxdev;
638 
639         if (unlikely(ext4_forced_shutdown(sbi)))
640                 return -EIO;
641 
642         /*
643          * We don't support synchronous mappings for non-DAX files and
644          * for DAX files if underneath dax_device is not synchronous.
645          */
646         if (!daxdev_mapping_supported(vma, dax_dev))
647                 return -EOPNOTSUPP;
648 
649         file_accessed(file);
650         if (IS_DAX(file_inode(file))) {
651                 vma->vm_ops = &ext4_dax_vm_ops;
652                 vma->vm_flags |= VM_HUGEPAGE;
653         } else {
654                 vma->vm_ops = &ext4_file_vm_ops;
655         }
656         return 0;
657 }
658 
659 static int ext4_sample_last_mounted(struct super_block *sb,
660                                     struct vfsmount *mnt)
661 {
662         struct ext4_sb_info *sbi = EXT4_SB(sb);
663         struct path path;
664         char buf[64], *cp;
665         handle_t *handle;
666         int err;
667 
668         if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
669                 return 0;
670 
671         if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
672                 return 0;
673 
674         sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
675         /*
676          * Sample where the filesystem has been mounted and
677          * store it in the superblock for sysadmin convenience
678          * when trying to sort through large numbers of block
679          * devices or filesystem images.
680          */
681         memset(buf, 0, sizeof(buf));
682         path.mnt = mnt;
683         path.dentry = mnt->mnt_root;
684         cp = d_path(&path, buf, sizeof(buf));
685         err = 0;
686         if (IS_ERR(cp))
687                 goto out;
688 
689         handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
690         err = PTR_ERR(handle);
691         if (IS_ERR(handle))
692                 goto out;
693         BUFFER_TRACE(sbi->s_sbh, "get_write_access");
694         err = ext4_journal_get_write_access(handle, sbi->s_sbh);
695         if (err)
696                 goto out_journal;
697         strlcpy(sbi->s_es->s_last_mounted, cp,
698                 sizeof(sbi->s_es->s_last_mounted));
699         ext4_handle_dirty_super(handle, sb);
700 out_journal:
701         ext4_journal_stop(handle);
702 out:
703         sb_end_intwrite(sb);
704         return err;
705 }
706 
707 static int ext4_file_open(struct inode * inode, struct file * filp)
708 {
709         int ret;
710 
711         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
712                 return -EIO;
713 
714         ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
715         if (ret)
716                 return ret;
717 
718         ret = fscrypt_file_open(inode, filp);
719         if (ret)
720                 return ret;
721 
722         ret = fsverity_file_open(inode, filp);
723         if (ret)
724                 return ret;
725 
726         /*
727          * Set up the jbd2_inode if we are opening the inode for
728          * writing and the journal is present
729          */
730         if (filp->f_mode & FMODE_WRITE) {
731                 ret = ext4_inode_attach_jinode(inode);
732                 if (ret < 0)
733                         return ret;
734         }
735 
736         filp->f_mode |= FMODE_NOWAIT;
737         return dquot_file_open(inode, filp);
738 }
739 
740 /*
741  * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
742  * by calling generic_file_llseek_size() with the appropriate maxbytes
743  * value for each.
744  */
745 loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
746 {
747         struct inode *inode = file->f_mapping->host;
748         loff_t maxbytes;
749 
750         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
751                 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
752         else
753                 maxbytes = inode->i_sb->s_maxbytes;
754 
755         switch (whence) {
756         default:
757                 return generic_file_llseek_size(file, offset, whence,
758                                                 maxbytes, i_size_read(inode));
759         case SEEK_HOLE:
760                 inode_lock_shared(inode);
761                 offset = iomap_seek_hole(inode, offset,
762                                          &ext4_iomap_report_ops);
763                 inode_unlock_shared(inode);
764                 break;
765         case SEEK_DATA:
766                 inode_lock_shared(inode);
767                 offset = iomap_seek_data(inode, offset,
768                                          &ext4_iomap_report_ops);
769                 inode_unlock_shared(inode);
770                 break;
771         }
772 
773         if (offset < 0)
774                 return offset;
775         return vfs_setpos(file, offset, maxbytes);
776 }
777 
778 const struct file_operations ext4_file_operations = {
779         .llseek         = ext4_llseek,
780         .read_iter      = ext4_file_read_iter,
781         .write_iter     = ext4_file_write_iter,
782         .unlocked_ioctl = ext4_ioctl,
783 #ifdef CONFIG_COMPAT
784         .compat_ioctl   = ext4_compat_ioctl,
785 #endif
786         .mmap           = ext4_file_mmap,
787         .mmap_supported_flags = MAP_SYNC,
788         .open           = ext4_file_open,
789         .release        = ext4_release_file,
790         .fsync          = ext4_sync_file,
791         .get_unmapped_area = thp_get_unmapped_area,
792         .splice_read    = generic_file_splice_read,
793         .splice_write   = iter_file_splice_write,
794         .fallocate      = ext4_fallocate,
795 };
796 
797 const struct inode_operations ext4_file_inode_operations = {
798         .setattr        = ext4_setattr,
799         .getattr        = ext4_file_getattr,
800         .listxattr      = ext4_listxattr,
801         .get_acl        = ext4_get_acl,
802         .set_acl        = ext4_set_acl,
803         .fiemap         = ext4_fiemap,
804 };
805 
806 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp