~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/cifs/file.c

Version: ~ [ linux-5.16-rc3 ] ~ [ linux-5.15.5 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.82 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.162 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.218 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.256 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.291 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.293 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *   fs/cifs/file.c
  3  *
  4  *   vfs operations that deal with files
  5  *
  6  *   Copyright (C) International Business Machines  Corp., 2002,2010
  7  *   Author(s): Steve French (sfrench@us.ibm.com)
  8  *              Jeremy Allison (jra@samba.org)
  9  *
 10  *   This library is free software; you can redistribute it and/or modify
 11  *   it under the terms of the GNU Lesser General Public License as published
 12  *   by the Free Software Foundation; either version 2.1 of the License, or
 13  *   (at your option) any later version.
 14  *
 15  *   This library is distributed in the hope that it will be useful,
 16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 18  *   the GNU Lesser General Public License for more details.
 19  *
 20  *   You should have received a copy of the GNU Lesser General Public License
 21  *   along with this library; if not, write to the Free Software
 22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 23  */
 24 #include <linux/fs.h>
 25 #include <linux/backing-dev.h>
 26 #include <linux/stat.h>
 27 #include <linux/fcntl.h>
 28 #include <linux/pagemap.h>
 29 #include <linux/pagevec.h>
 30 #include <linux/writeback.h>
 31 #include <linux/task_io_accounting_ops.h>
 32 #include <linux/delay.h>
 33 #include <linux/mount.h>
 34 #include <linux/slab.h>
 35 #include <linux/swap.h>
 36 #include <linux/mm.h>
 37 #include <asm/div64.h>
 38 #include "cifsfs.h"
 39 #include "cifspdu.h"
 40 #include "cifsglob.h"
 41 #include "cifsproto.h"
 42 #include "cifs_unicode.h"
 43 #include "cifs_debug.h"
 44 #include "cifs_fs_sb.h"
 45 #include "fscache.h"
 46 #include "smbdirect.h"
 47 
 48 static inline int cifs_convert_flags(unsigned int flags)
 49 {
 50         if ((flags & O_ACCMODE) == O_RDONLY)
 51                 return GENERIC_READ;
 52         else if ((flags & O_ACCMODE) == O_WRONLY)
 53                 return GENERIC_WRITE;
 54         else if ((flags & O_ACCMODE) == O_RDWR) {
 55                 /* GENERIC_ALL is too much permission to request
 56                    can cause unnecessary access denied on create */
 57                 /* return GENERIC_ALL; */
 58                 return (GENERIC_READ | GENERIC_WRITE);
 59         }
 60 
 61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
 62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
 63                 FILE_READ_DATA);
 64 }
 65 
 66 static u32 cifs_posix_convert_flags(unsigned int flags)
 67 {
 68         u32 posix_flags = 0;
 69 
 70         if ((flags & O_ACCMODE) == O_RDONLY)
 71                 posix_flags = SMB_O_RDONLY;
 72         else if ((flags & O_ACCMODE) == O_WRONLY)
 73                 posix_flags = SMB_O_WRONLY;
 74         else if ((flags & O_ACCMODE) == O_RDWR)
 75                 posix_flags = SMB_O_RDWR;
 76 
 77         if (flags & O_CREAT) {
 78                 posix_flags |= SMB_O_CREAT;
 79                 if (flags & O_EXCL)
 80                         posix_flags |= SMB_O_EXCL;
 81         } else if (flags & O_EXCL)
 82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
 83                          current->comm, current->tgid);
 84 
 85         if (flags & O_TRUNC)
 86                 posix_flags |= SMB_O_TRUNC;
 87         /* be safe and imply O_SYNC for O_DSYNC */
 88         if (flags & O_DSYNC)
 89                 posix_flags |= SMB_O_SYNC;
 90         if (flags & O_DIRECTORY)
 91                 posix_flags |= SMB_O_DIRECTORY;
 92         if (flags & O_NOFOLLOW)
 93                 posix_flags |= SMB_O_NOFOLLOW;
 94         if (flags & O_DIRECT)
 95                 posix_flags |= SMB_O_DIRECT;
 96 
 97         return posix_flags;
 98 }
 99 
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113 
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125 
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127 
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131 
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137 
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140 
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146 
147         if (rc)
148                 goto posix_open_ret;
149 
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152 
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155 
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157 
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169 
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174 
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187 
188         if (!server->ops->open)
189                 return -ENOSYS;
190 
191         desired_access = cifs_convert_flags(f_flags);
192 
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216 
217         disposition = cifs_get_disposition(f_flags);
218 
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220 
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224 
225         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226         if (f_flags & O_SYNC)
227                 create_options |= CREATE_WRITE_THROUGH;
228 
229         if (f_flags & O_DIRECT)
230                 create_options |= CREATE_NO_BUFFER;
231 
232         oparms.tcon = tcon;
233         oparms.cifs_sb = cifs_sb;
234         oparms.desired_access = desired_access;
235         oparms.create_options = cifs_create_options(cifs_sb, create_options);
236         oparms.disposition = disposition;
237         oparms.path = full_path;
238         oparms.fid = fid;
239         oparms.reconnect = false;
240 
241         rc = server->ops->open(xid, &oparms, oplock, buf);
242 
243         if (rc)
244                 goto out;
245 
246         /* TODO: Add support for calling posix query info but with passing in fid */
247         if (tcon->unix_ext)
248                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
249                                               xid);
250         else
251                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
252                                          xid, fid);
253 
254         if (rc) {
255                 server->ops->close(xid, tcon, fid);
256                 if (rc == -ESTALE)
257                         rc = -EOPENSTALE;
258         }
259 
260 out:
261         kfree(buf);
262         return rc;
263 }
264 
265 static bool
266 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
267 {
268         struct cifs_fid_locks *cur;
269         bool has_locks = false;
270 
271         down_read(&cinode->lock_sem);
272         list_for_each_entry(cur, &cinode->llist, llist) {
273                 if (!list_empty(&cur->locks)) {
274                         has_locks = true;
275                         break;
276                 }
277         }
278         up_read(&cinode->lock_sem);
279         return has_locks;
280 }
281 
282 void
283 cifs_down_write(struct rw_semaphore *sem)
284 {
285         while (!down_write_trylock(sem))
286                 msleep(10);
287 }
288 
289 static void cifsFileInfo_put_work(struct work_struct *work);
290 
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293                   struct tcon_link *tlink, __u32 oplock)
294 {
295         struct dentry *dentry = file_dentry(file);
296         struct inode *inode = d_inode(dentry);
297         struct cifsInodeInfo *cinode = CIFS_I(inode);
298         struct cifsFileInfo *cfile;
299         struct cifs_fid_locks *fdlocks;
300         struct cifs_tcon *tcon = tlink_tcon(tlink);
301         struct TCP_Server_Info *server = tcon->ses->server;
302 
303         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
304         if (cfile == NULL)
305                 return cfile;
306 
307         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
308         if (!fdlocks) {
309                 kfree(cfile);
310                 return NULL;
311         }
312 
313         INIT_LIST_HEAD(&fdlocks->locks);
314         fdlocks->cfile = cfile;
315         cfile->llist = fdlocks;
316 
317         cfile->count = 1;
318         cfile->pid = current->tgid;
319         cfile->uid = current_fsuid();
320         cfile->dentry = dget(dentry);
321         cfile->f_flags = file->f_flags;
322         cfile->invalidHandle = false;
323         cfile->tlink = cifs_get_tlink(tlink);
324         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
326         mutex_init(&cfile->fh_mutex);
327         spin_lock_init(&cfile->file_info_lock);
328 
329         cifs_sb_active(inode->i_sb);
330 
331         /*
332          * If the server returned a read oplock and we have mandatory brlocks,
333          * set oplock level to None.
334          */
335         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
336                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
337                 oplock = 0;
338         }
339 
340         cifs_down_write(&cinode->lock_sem);
341         list_add(&fdlocks->llist, &cinode->llist);
342         up_write(&cinode->lock_sem);
343 
344         spin_lock(&tcon->open_file_lock);
345         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
346                 oplock = fid->pending_open->oplock;
347         list_del(&fid->pending_open->olist);
348 
349         fid->purge_cache = false;
350         server->ops->set_fid(cfile, fid, oplock);
351 
352         list_add(&cfile->tlist, &tcon->openFileList);
353         atomic_inc(&tcon->num_local_opens);
354 
355         /* if readable file instance put first in list*/
356         spin_lock(&cinode->open_file_lock);
357         if (file->f_mode & FMODE_READ)
358                 list_add(&cfile->flist, &cinode->openFileList);
359         else
360                 list_add_tail(&cfile->flist, &cinode->openFileList);
361         spin_unlock(&cinode->open_file_lock);
362         spin_unlock(&tcon->open_file_lock);
363 
364         if (fid->purge_cache)
365                 cifs_zap_mapping(inode);
366 
367         file->private_data = cfile;
368         return cfile;
369 }
370 
371 struct cifsFileInfo *
372 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
373 {
374         spin_lock(&cifs_file->file_info_lock);
375         cifsFileInfo_get_locked(cifs_file);
376         spin_unlock(&cifs_file->file_info_lock);
377         return cifs_file;
378 }
379 
380 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
381 {
382         struct inode *inode = d_inode(cifs_file->dentry);
383         struct cifsInodeInfo *cifsi = CIFS_I(inode);
384         struct cifsLockInfo *li, *tmp;
385         struct super_block *sb = inode->i_sb;
386 
387         /*
388          * Delete any outstanding lock records. We'll lose them when the file
389          * is closed anyway.
390          */
391         cifs_down_write(&cifsi->lock_sem);
392         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
393                 list_del(&li->llist);
394                 cifs_del_lock_waiters(li);
395                 kfree(li);
396         }
397         list_del(&cifs_file->llist->llist);
398         kfree(cifs_file->llist);
399         up_write(&cifsi->lock_sem);
400 
401         cifs_put_tlink(cifs_file->tlink);
402         dput(cifs_file->dentry);
403         cifs_sb_deactive(sb);
404         kfree(cifs_file);
405 }
406 
407 static void cifsFileInfo_put_work(struct work_struct *work)
408 {
409         struct cifsFileInfo *cifs_file = container_of(work,
410                         struct cifsFileInfo, put);
411 
412         cifsFileInfo_put_final(cifs_file);
413 }
414 
415 /**
416  * cifsFileInfo_put - release a reference of file priv data
417  *
418  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
419  */
420 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
421 {
422         _cifsFileInfo_put(cifs_file, true, true);
423 }
424 
425 /**
426  * _cifsFileInfo_put - release a reference of file priv data
427  *
428  * This may involve closing the filehandle @cifs_file out on the
429  * server. Must be called without holding tcon->open_file_lock,
430  * cinode->open_file_lock and cifs_file->file_info_lock.
431  *
432  * If @wait_for_oplock_handler is true and we are releasing the last
433  * reference, wait for any running oplock break handler of the file
434  * and cancel any pending one. If calling this function from the
435  * oplock break handler, you need to pass false.
436  *
437  */
438 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
439                        bool wait_oplock_handler, bool offload)
440 {
441         struct inode *inode = d_inode(cifs_file->dentry);
442         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
443         struct TCP_Server_Info *server = tcon->ses->server;
444         struct cifsInodeInfo *cifsi = CIFS_I(inode);
445         struct super_block *sb = inode->i_sb;
446         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
447         struct cifs_fid fid;
448         struct cifs_pending_open open;
449         bool oplock_break_cancelled;
450 
451         spin_lock(&tcon->open_file_lock);
452         spin_lock(&cifsi->open_file_lock);
453         spin_lock(&cifs_file->file_info_lock);
454         if (--cifs_file->count > 0) {
455                 spin_unlock(&cifs_file->file_info_lock);
456                 spin_unlock(&cifsi->open_file_lock);
457                 spin_unlock(&tcon->open_file_lock);
458                 return;
459         }
460         spin_unlock(&cifs_file->file_info_lock);
461 
462         if (server->ops->get_lease_key)
463                 server->ops->get_lease_key(inode, &fid);
464 
465         /* store open in pending opens to make sure we don't miss lease break */
466         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
467 
468         /* remove it from the lists */
469         list_del(&cifs_file->flist);
470         list_del(&cifs_file->tlist);
471         atomic_dec(&tcon->num_local_opens);
472 
473         if (list_empty(&cifsi->openFileList)) {
474                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
475                          d_inode(cifs_file->dentry));
476                 /*
477                  * In strict cache mode we need invalidate mapping on the last
478                  * close  because it may cause a error when we open this file
479                  * again and get at least level II oplock.
480                  */
481                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
482                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
483                 cifs_set_oplock_level(cifsi, 0);
484         }
485 
486         spin_unlock(&cifsi->open_file_lock);
487         spin_unlock(&tcon->open_file_lock);
488 
489         oplock_break_cancelled = wait_oplock_handler ?
490                 cancel_work_sync(&cifs_file->oplock_break) : false;
491 
492         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
493                 struct TCP_Server_Info *server = tcon->ses->server;
494                 unsigned int xid;
495 
496                 xid = get_xid();
497                 if (server->ops->close_getattr)
498                         server->ops->close_getattr(xid, tcon, cifs_file);
499                 else if (server->ops->close)
500                         server->ops->close(xid, tcon, &cifs_file->fid);
501                 _free_xid(xid);
502         }
503 
504         if (oplock_break_cancelled)
505                 cifs_done_oplock_break(cifsi);
506 
507         cifs_del_pending_open(&open);
508 
509         if (offload)
510                 queue_work(fileinfo_put_wq, &cifs_file->put);
511         else
512                 cifsFileInfo_put_final(cifs_file);
513 }
514 
515 int cifs_open(struct inode *inode, struct file *file)
516 
517 {
518         int rc = -EACCES;
519         unsigned int xid;
520         __u32 oplock;
521         struct cifs_sb_info *cifs_sb;
522         struct TCP_Server_Info *server;
523         struct cifs_tcon *tcon;
524         struct tcon_link *tlink;
525         struct cifsFileInfo *cfile = NULL;
526         char *full_path = NULL;
527         bool posix_open_ok = false;
528         struct cifs_fid fid;
529         struct cifs_pending_open open;
530 
531         xid = get_xid();
532 
533         cifs_sb = CIFS_SB(inode->i_sb);
534         tlink = cifs_sb_tlink(cifs_sb);
535         if (IS_ERR(tlink)) {
536                 free_xid(xid);
537                 return PTR_ERR(tlink);
538         }
539         tcon = tlink_tcon(tlink);
540         server = tcon->ses->server;
541 
542         full_path = build_path_from_dentry(file_dentry(file));
543         if (full_path == NULL) {
544                 rc = -ENOMEM;
545                 goto out;
546         }
547 
548         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
549                  inode, file->f_flags, full_path);
550 
551         if (file->f_flags & O_DIRECT &&
552             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
553                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
554                         file->f_op = &cifs_file_direct_nobrl_ops;
555                 else
556                         file->f_op = &cifs_file_direct_ops;
557         }
558 
559         if (server->oplocks)
560                 oplock = REQ_OPLOCK;
561         else
562                 oplock = 0;
563 
564         if (!tcon->broken_posix_open && tcon->unix_ext &&
565             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
566                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
567                 /* can not refresh inode info since size could be stale */
568                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
569                                 cifs_sb->mnt_file_mode /* ignored */,
570                                 file->f_flags, &oplock, &fid.netfid, xid);
571                 if (rc == 0) {
572                         cifs_dbg(FYI, "posix open succeeded\n");
573                         posix_open_ok = true;
574                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
575                         if (tcon->ses->serverNOS)
576                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
577                                          tcon->ses->serverName,
578                                          tcon->ses->serverNOS);
579                         tcon->broken_posix_open = true;
580                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
581                          (rc != -EOPNOTSUPP)) /* path not found or net err */
582                         goto out;
583                 /*
584                  * Else fallthrough to retry open the old way on network i/o
585                  * or DFS errors.
586                  */
587         }
588 
589         if (server->ops->get_lease_key)
590                 server->ops->get_lease_key(inode, &fid);
591 
592         cifs_add_pending_open(&fid, tlink, &open);
593 
594         if (!posix_open_ok) {
595                 if (server->ops->get_lease_key)
596                         server->ops->get_lease_key(inode, &fid);
597 
598                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
599                                   file->f_flags, &oplock, &fid, xid);
600                 if (rc) {
601                         cifs_del_pending_open(&open);
602                         goto out;
603                 }
604         }
605 
606         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
607         if (cfile == NULL) {
608                 if (server->ops->close)
609                         server->ops->close(xid, tcon, &fid);
610                 cifs_del_pending_open(&open);
611                 rc = -ENOMEM;
612                 goto out;
613         }
614 
615         cifs_fscache_set_inode_cookie(inode, file);
616 
617         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
618                 /*
619                  * Time to set mode which we can not set earlier due to
620                  * problems creating new read-only files.
621                  */
622                 struct cifs_unix_set_info_args args = {
623                         .mode   = inode->i_mode,
624                         .uid    = INVALID_UID, /* no change */
625                         .gid    = INVALID_GID, /* no change */
626                         .ctime  = NO_CHANGE_64,
627                         .atime  = NO_CHANGE_64,
628                         .mtime  = NO_CHANGE_64,
629                         .device = 0,
630                 };
631                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
632                                        cfile->pid);
633         }
634 
635 out:
636         kfree(full_path);
637         free_xid(xid);
638         cifs_put_tlink(tlink);
639         return rc;
640 }
641 
642 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
643 
644 /*
645  * Try to reacquire byte range locks that were released when session
646  * to server was lost.
647  */
648 static int
649 cifs_relock_file(struct cifsFileInfo *cfile)
650 {
651         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
652         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
653         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
654         int rc = 0;
655 
656         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
657         if (cinode->can_cache_brlcks) {
658                 /* can cache locks - no need to relock */
659                 up_read(&cinode->lock_sem);
660                 return rc;
661         }
662 
663         if (cap_unix(tcon->ses) &&
664             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
665             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
666                 rc = cifs_push_posix_locks(cfile);
667         else
668                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
669 
670         up_read(&cinode->lock_sem);
671         return rc;
672 }
673 
674 static int
675 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
676 {
677         int rc = -EACCES;
678         unsigned int xid;
679         __u32 oplock;
680         struct cifs_sb_info *cifs_sb;
681         struct cifs_tcon *tcon;
682         struct TCP_Server_Info *server;
683         struct cifsInodeInfo *cinode;
684         struct inode *inode;
685         char *full_path = NULL;
686         int desired_access;
687         int disposition = FILE_OPEN;
688         int create_options = CREATE_NOT_DIR;
689         struct cifs_open_parms oparms;
690 
691         xid = get_xid();
692         mutex_lock(&cfile->fh_mutex);
693         if (!cfile->invalidHandle) {
694                 mutex_unlock(&cfile->fh_mutex);
695                 rc = 0;
696                 free_xid(xid);
697                 return rc;
698         }
699 
700         inode = d_inode(cfile->dentry);
701         cifs_sb = CIFS_SB(inode->i_sb);
702         tcon = tlink_tcon(cfile->tlink);
703         server = tcon->ses->server;
704 
705         /*
706          * Can not grab rename sem here because various ops, including those
707          * that already have the rename sem can end up causing writepage to get
708          * called and if the server was down that means we end up here, and we
709          * can never tell if the caller already has the rename_sem.
710          */
711         full_path = build_path_from_dentry(cfile->dentry);
712         if (full_path == NULL) {
713                 rc = -ENOMEM;
714                 mutex_unlock(&cfile->fh_mutex);
715                 free_xid(xid);
716                 return rc;
717         }
718 
719         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
720                  inode, cfile->f_flags, full_path);
721 
722         if (tcon->ses->server->oplocks)
723                 oplock = REQ_OPLOCK;
724         else
725                 oplock = 0;
726 
727         if (tcon->unix_ext && cap_unix(tcon->ses) &&
728             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
729                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
730                 /*
731                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
732                  * original open. Must mask them off for a reopen.
733                  */
734                 unsigned int oflags = cfile->f_flags &
735                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
736 
737                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
738                                      cifs_sb->mnt_file_mode /* ignored */,
739                                      oflags, &oplock, &cfile->fid.netfid, xid);
740                 if (rc == 0) {
741                         cifs_dbg(FYI, "posix reopen succeeded\n");
742                         oparms.reconnect = true;
743                         goto reopen_success;
744                 }
745                 /*
746                  * fallthrough to retry open the old way on errors, especially
747                  * in the reconnect path it is important to retry hard
748                  */
749         }
750 
751         desired_access = cifs_convert_flags(cfile->f_flags);
752 
753         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
754         if (cfile->f_flags & O_SYNC)
755                 create_options |= CREATE_WRITE_THROUGH;
756 
757         if (cfile->f_flags & O_DIRECT)
758                 create_options |= CREATE_NO_BUFFER;
759 
760         if (server->ops->get_lease_key)
761                 server->ops->get_lease_key(inode, &cfile->fid);
762 
763         oparms.tcon = tcon;
764         oparms.cifs_sb = cifs_sb;
765         oparms.desired_access = desired_access;
766         oparms.create_options = cifs_create_options(cifs_sb, create_options);
767         oparms.disposition = disposition;
768         oparms.path = full_path;
769         oparms.fid = &cfile->fid;
770         oparms.reconnect = true;
771 
772         /*
773          * Can not refresh inode by passing in file_info buf to be returned by
774          * ops->open and then calling get_inode_info with returned buf since
775          * file might have write behind data that needs to be flushed and server
776          * version of file size can be stale. If we knew for sure that inode was
777          * not dirty locally we could do this.
778          */
779         rc = server->ops->open(xid, &oparms, &oplock, NULL);
780         if (rc == -ENOENT && oparms.reconnect == false) {
781                 /* durable handle timeout is expired - open the file again */
782                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
783                 /* indicate that we need to relock the file */
784                 oparms.reconnect = true;
785         }
786 
787         if (rc) {
788                 mutex_unlock(&cfile->fh_mutex);
789                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
790                 cifs_dbg(FYI, "oplock: %d\n", oplock);
791                 goto reopen_error_exit;
792         }
793 
794 reopen_success:
795         cfile->invalidHandle = false;
796         mutex_unlock(&cfile->fh_mutex);
797         cinode = CIFS_I(inode);
798 
799         if (can_flush) {
800                 rc = filemap_write_and_wait(inode->i_mapping);
801                 if (!is_interrupt_error(rc))
802                         mapping_set_error(inode->i_mapping, rc);
803 
804                 if (tcon->posix_extensions)
805                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
806                 else if (tcon->unix_ext)
807                         rc = cifs_get_inode_info_unix(&inode, full_path,
808                                                       inode->i_sb, xid);
809                 else
810                         rc = cifs_get_inode_info(&inode, full_path, NULL,
811                                                  inode->i_sb, xid, NULL);
812         }
813         /*
814          * Else we are writing out data to server already and could deadlock if
815          * we tried to flush data, and since we do not know if we have data that
816          * would invalidate the current end of file on the server we can not go
817          * to the server to get the new inode info.
818          */
819 
820         /*
821          * If the server returned a read oplock and we have mandatory brlocks,
822          * set oplock level to None.
823          */
824         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
825                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
826                 oplock = 0;
827         }
828 
829         server->ops->set_fid(cfile, &cfile->fid, oplock);
830         if (oparms.reconnect)
831                 cifs_relock_file(cfile);
832 
833 reopen_error_exit:
834         kfree(full_path);
835         free_xid(xid);
836         return rc;
837 }
838 
839 int cifs_close(struct inode *inode, struct file *file)
840 {
841         if (file->private_data != NULL) {
842                 _cifsFileInfo_put(file->private_data, true, false);
843                 file->private_data = NULL;
844         }
845 
846         /* return code from the ->release op is always ignored */
847         return 0;
848 }
849 
850 void
851 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
852 {
853         struct cifsFileInfo *open_file;
854         struct list_head *tmp;
855         struct list_head *tmp1;
856         struct list_head tmp_list;
857 
858         if (!tcon->use_persistent || !tcon->need_reopen_files)
859                 return;
860 
861         tcon->need_reopen_files = false;
862 
863         cifs_dbg(FYI, "Reopen persistent handles\n");
864         INIT_LIST_HEAD(&tmp_list);
865 
866         /* list all files open on tree connection, reopen resilient handles  */
867         spin_lock(&tcon->open_file_lock);
868         list_for_each(tmp, &tcon->openFileList) {
869                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
870                 if (!open_file->invalidHandle)
871                         continue;
872                 cifsFileInfo_get(open_file);
873                 list_add_tail(&open_file->rlist, &tmp_list);
874         }
875         spin_unlock(&tcon->open_file_lock);
876 
877         list_for_each_safe(tmp, tmp1, &tmp_list) {
878                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
879                 if (cifs_reopen_file(open_file, false /* do not flush */))
880                         tcon->need_reopen_files = true;
881                 list_del_init(&open_file->rlist);
882                 cifsFileInfo_put(open_file);
883         }
884 }
885 
886 int cifs_closedir(struct inode *inode, struct file *file)
887 {
888         int rc = 0;
889         unsigned int xid;
890         struct cifsFileInfo *cfile = file->private_data;
891         struct cifs_tcon *tcon;
892         struct TCP_Server_Info *server;
893         char *buf;
894 
895         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
896 
897         if (cfile == NULL)
898                 return rc;
899 
900         xid = get_xid();
901         tcon = tlink_tcon(cfile->tlink);
902         server = tcon->ses->server;
903 
904         cifs_dbg(FYI, "Freeing private data in close dir\n");
905         spin_lock(&cfile->file_info_lock);
906         if (server->ops->dir_needs_close(cfile)) {
907                 cfile->invalidHandle = true;
908                 spin_unlock(&cfile->file_info_lock);
909                 if (server->ops->close_dir)
910                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
911                 else
912                         rc = -ENOSYS;
913                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
914                 /* not much we can do if it fails anyway, ignore rc */
915                 rc = 0;
916         } else
917                 spin_unlock(&cfile->file_info_lock);
918 
919         buf = cfile->srch_inf.ntwrk_buf_start;
920         if (buf) {
921                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
922                 cfile->srch_inf.ntwrk_buf_start = NULL;
923                 if (cfile->srch_inf.smallBuf)
924                         cifs_small_buf_release(buf);
925                 else
926                         cifs_buf_release(buf);
927         }
928 
929         cifs_put_tlink(cfile->tlink);
930         kfree(file->private_data);
931         file->private_data = NULL;
932         /* BB can we lock the filestruct while this is going on? */
933         free_xid(xid);
934         return rc;
935 }
936 
937 static struct cifsLockInfo *
938 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
939 {
940         struct cifsLockInfo *lock =
941                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
942         if (!lock)
943                 return lock;
944         lock->offset = offset;
945         lock->length = length;
946         lock->type = type;
947         lock->pid = current->tgid;
948         lock->flags = flags;
949         INIT_LIST_HEAD(&lock->blist);
950         init_waitqueue_head(&lock->block_q);
951         return lock;
952 }
953 
954 void
955 cifs_del_lock_waiters(struct cifsLockInfo *lock)
956 {
957         struct cifsLockInfo *li, *tmp;
958         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
959                 list_del_init(&li->blist);
960                 wake_up(&li->block_q);
961         }
962 }
963 
964 #define CIFS_LOCK_OP    0
965 #define CIFS_READ_OP    1
966 #define CIFS_WRITE_OP   2
967 
968 /* @rw_check : 0 - no op, 1 - read, 2 - write */
969 static bool
970 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
971                             __u64 length, __u8 type, __u16 flags,
972                             struct cifsFileInfo *cfile,
973                             struct cifsLockInfo **conf_lock, int rw_check)
974 {
975         struct cifsLockInfo *li;
976         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
977         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
978 
979         list_for_each_entry(li, &fdlocks->locks, llist) {
980                 if (offset + length <= li->offset ||
981                     offset >= li->offset + li->length)
982                         continue;
983                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
984                     server->ops->compare_fids(cfile, cur_cfile)) {
985                         /* shared lock prevents write op through the same fid */
986                         if (!(li->type & server->vals->shared_lock_type) ||
987                             rw_check != CIFS_WRITE_OP)
988                                 continue;
989                 }
990                 if ((type & server->vals->shared_lock_type) &&
991                     ((server->ops->compare_fids(cfile, cur_cfile) &&
992                      current->tgid == li->pid) || type == li->type))
993                         continue;
994                 if (rw_check == CIFS_LOCK_OP &&
995                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
996                     server->ops->compare_fids(cfile, cur_cfile))
997                         continue;
998                 if (conf_lock)
999                         *conf_lock = li;
1000                 return true;
1001         }
1002         return false;
1003 }
1004 
1005 bool
1006 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1007                         __u8 type, __u16 flags,
1008                         struct cifsLockInfo **conf_lock, int rw_check)
1009 {
1010         bool rc = false;
1011         struct cifs_fid_locks *cur;
1012         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1013 
1014         list_for_each_entry(cur, &cinode->llist, llist) {
1015                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1016                                                  flags, cfile, conf_lock,
1017                                                  rw_check);
1018                 if (rc)
1019                         break;
1020         }
1021 
1022         return rc;
1023 }
1024 
1025 /*
1026  * Check if there is another lock that prevents us to set the lock (mandatory
1027  * style). If such a lock exists, update the flock structure with its
1028  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1029  * or leave it the same if we can't. Returns 0 if we don't need to request to
1030  * the server or 1 otherwise.
1031  */
1032 static int
1033 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1034                __u8 type, struct file_lock *flock)
1035 {
1036         int rc = 0;
1037         struct cifsLockInfo *conf_lock;
1038         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1039         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1040         bool exist;
1041 
1042         down_read(&cinode->lock_sem);
1043 
1044         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1045                                         flock->fl_flags, &conf_lock,
1046                                         CIFS_LOCK_OP);
1047         if (exist) {
1048                 flock->fl_start = conf_lock->offset;
1049                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1050                 flock->fl_pid = conf_lock->pid;
1051                 if (conf_lock->type & server->vals->shared_lock_type)
1052                         flock->fl_type = F_RDLCK;
1053                 else
1054                         flock->fl_type = F_WRLCK;
1055         } else if (!cinode->can_cache_brlcks)
1056                 rc = 1;
1057         else
1058                 flock->fl_type = F_UNLCK;
1059 
1060         up_read(&cinode->lock_sem);
1061         return rc;
1062 }
1063 
1064 static void
1065 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1066 {
1067         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1068         cifs_down_write(&cinode->lock_sem);
1069         list_add_tail(&lock->llist, &cfile->llist->locks);
1070         up_write(&cinode->lock_sem);
1071 }
1072 
1073 /*
1074  * Set the byte-range lock (mandatory style). Returns:
1075  * 1) 0, if we set the lock and don't need to request to the server;
1076  * 2) 1, if no locks prevent us but we need to request to the server;
1077  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1078  */
1079 static int
1080 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1081                  bool wait)
1082 {
1083         struct cifsLockInfo *conf_lock;
1084         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085         bool exist;
1086         int rc = 0;
1087 
1088 try_again:
1089         exist = false;
1090         cifs_down_write(&cinode->lock_sem);
1091 
1092         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1093                                         lock->type, lock->flags, &conf_lock,
1094                                         CIFS_LOCK_OP);
1095         if (!exist && cinode->can_cache_brlcks) {
1096                 list_add_tail(&lock->llist, &cfile->llist->locks);
1097                 up_write(&cinode->lock_sem);
1098                 return rc;
1099         }
1100 
1101         if (!exist)
1102                 rc = 1;
1103         else if (!wait)
1104                 rc = -EACCES;
1105         else {
1106                 list_add_tail(&lock->blist, &conf_lock->blist);
1107                 up_write(&cinode->lock_sem);
1108                 rc = wait_event_interruptible(lock->block_q,
1109                                         (lock->blist.prev == &lock->blist) &&
1110                                         (lock->blist.next == &lock->blist));
1111                 if (!rc)
1112                         goto try_again;
1113                 cifs_down_write(&cinode->lock_sem);
1114                 list_del_init(&lock->blist);
1115         }
1116 
1117         up_write(&cinode->lock_sem);
1118         return rc;
1119 }
1120 
1121 /*
1122  * Check if there is another lock that prevents us to set the lock (posix
1123  * style). If such a lock exists, update the flock structure with its
1124  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1125  * or leave it the same if we can't. Returns 0 if we don't need to request to
1126  * the server or 1 otherwise.
1127  */
1128 static int
1129 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1130 {
1131         int rc = 0;
1132         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1133         unsigned char saved_type = flock->fl_type;
1134 
1135         if ((flock->fl_flags & FL_POSIX) == 0)
1136                 return 1;
1137 
1138         down_read(&cinode->lock_sem);
1139         posix_test_lock(file, flock);
1140 
1141         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1142                 flock->fl_type = saved_type;
1143                 rc = 1;
1144         }
1145 
1146         up_read(&cinode->lock_sem);
1147         return rc;
1148 }
1149 
1150 /*
1151  * Set the byte-range lock (posix style). Returns:
1152  * 1) <0, if the error occurs while setting the lock;
1153  * 2) 0, if we set the lock and don't need to request to the server;
1154  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1155  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1156  */
1157 static int
1158 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1159 {
1160         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1161         int rc = FILE_LOCK_DEFERRED + 1;
1162 
1163         if ((flock->fl_flags & FL_POSIX) == 0)
1164                 return rc;
1165 
1166         cifs_down_write(&cinode->lock_sem);
1167         if (!cinode->can_cache_brlcks) {
1168                 up_write(&cinode->lock_sem);
1169                 return rc;
1170         }
1171 
1172         rc = posix_lock_file(file, flock, NULL);
1173         up_write(&cinode->lock_sem);
1174         return rc;
1175 }
1176 
1177 int
1178 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1179 {
1180         unsigned int xid;
1181         int rc = 0, stored_rc;
1182         struct cifsLockInfo *li, *tmp;
1183         struct cifs_tcon *tcon;
1184         unsigned int num, max_num, max_buf;
1185         LOCKING_ANDX_RANGE *buf, *cur;
1186         static const int types[] = {
1187                 LOCKING_ANDX_LARGE_FILES,
1188                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1189         };
1190         int i;
1191 
1192         xid = get_xid();
1193         tcon = tlink_tcon(cfile->tlink);
1194 
1195         /*
1196          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1197          * and check it before using.
1198          */
1199         max_buf = tcon->ses->server->maxBuf;
1200         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1201                 free_xid(xid);
1202                 return -EINVAL;
1203         }
1204 
1205         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1206                      PAGE_SIZE);
1207         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1208                         PAGE_SIZE);
1209         max_num = (max_buf - sizeof(struct smb_hdr)) /
1210                                                 sizeof(LOCKING_ANDX_RANGE);
1211         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1212         if (!buf) {
1213                 free_xid(xid);
1214                 return -ENOMEM;
1215         }
1216 
1217         for (i = 0; i < 2; i++) {
1218                 cur = buf;
1219                 num = 0;
1220                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1221                         if (li->type != types[i])
1222                                 continue;
1223                         cur->Pid = cpu_to_le16(li->pid);
1224                         cur->LengthLow = cpu_to_le32((u32)li->length);
1225                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1226                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1227                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1228                         if (++num == max_num) {
1229                                 stored_rc = cifs_lockv(xid, tcon,
1230                                                        cfile->fid.netfid,
1231                                                        (__u8)li->type, 0, num,
1232                                                        buf);
1233                                 if (stored_rc)
1234                                         rc = stored_rc;
1235                                 cur = buf;
1236                                 num = 0;
1237                         } else
1238                                 cur++;
1239                 }
1240 
1241                 if (num) {
1242                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1243                                                (__u8)types[i], 0, num, buf);
1244                         if (stored_rc)
1245                                 rc = stored_rc;
1246                 }
1247         }
1248 
1249         kfree(buf);
1250         free_xid(xid);
1251         return rc;
1252 }
1253 
1254 static __u32
1255 hash_lockowner(fl_owner_t owner)
1256 {
1257         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1258 }
1259 
1260 struct lock_to_push {
1261         struct list_head llist;
1262         __u64 offset;
1263         __u64 length;
1264         __u32 pid;
1265         __u16 netfid;
1266         __u8 type;
1267 };
1268 
1269 static int
1270 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1271 {
1272         struct inode *inode = d_inode(cfile->dentry);
1273         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1274         struct file_lock *flock;
1275         struct file_lock_context *flctx = inode->i_flctx;
1276         unsigned int count = 0, i;
1277         int rc = 0, xid, type;
1278         struct list_head locks_to_send, *el;
1279         struct lock_to_push *lck, *tmp;
1280         __u64 length;
1281 
1282         xid = get_xid();
1283 
1284         if (!flctx)
1285                 goto out;
1286 
1287         spin_lock(&flctx->flc_lock);
1288         list_for_each(el, &flctx->flc_posix) {
1289                 count++;
1290         }
1291         spin_unlock(&flctx->flc_lock);
1292 
1293         INIT_LIST_HEAD(&locks_to_send);
1294 
1295         /*
1296          * Allocating count locks is enough because no FL_POSIX locks can be
1297          * added to the list while we are holding cinode->lock_sem that
1298          * protects locking operations of this inode.
1299          */
1300         for (i = 0; i < count; i++) {
1301                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1302                 if (!lck) {
1303                         rc = -ENOMEM;
1304                         goto err_out;
1305                 }
1306                 list_add_tail(&lck->llist, &locks_to_send);
1307         }
1308 
1309         el = locks_to_send.next;
1310         spin_lock(&flctx->flc_lock);
1311         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1312                 if (el == &locks_to_send) {
1313                         /*
1314                          * The list ended. We don't have enough allocated
1315                          * structures - something is really wrong.
1316                          */
1317                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1318                         break;
1319                 }
1320                 length = 1 + flock->fl_end - flock->fl_start;
1321                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1322                         type = CIFS_RDLCK;
1323                 else
1324                         type = CIFS_WRLCK;
1325                 lck = list_entry(el, struct lock_to_push, llist);
1326                 lck->pid = hash_lockowner(flock->fl_owner);
1327                 lck->netfid = cfile->fid.netfid;
1328                 lck->length = length;
1329                 lck->type = type;
1330                 lck->offset = flock->fl_start;
1331         }
1332         spin_unlock(&flctx->flc_lock);
1333 
1334         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1335                 int stored_rc;
1336 
1337                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1338                                              lck->offset, lck->length, NULL,
1339                                              lck->type, 0);
1340                 if (stored_rc)
1341                         rc = stored_rc;
1342                 list_del(&lck->llist);
1343                 kfree(lck);
1344         }
1345 
1346 out:
1347         free_xid(xid);
1348         return rc;
1349 err_out:
1350         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1351                 list_del(&lck->llist);
1352                 kfree(lck);
1353         }
1354         goto out;
1355 }
1356 
1357 static int
1358 cifs_push_locks(struct cifsFileInfo *cfile)
1359 {
1360         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1361         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1362         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1363         int rc = 0;
1364 
1365         /* we are going to update can_cache_brlcks here - need a write access */
1366         cifs_down_write(&cinode->lock_sem);
1367         if (!cinode->can_cache_brlcks) {
1368                 up_write(&cinode->lock_sem);
1369                 return rc;
1370         }
1371 
1372         if (cap_unix(tcon->ses) &&
1373             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1374             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1375                 rc = cifs_push_posix_locks(cfile);
1376         else
1377                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1378 
1379         cinode->can_cache_brlcks = false;
1380         up_write(&cinode->lock_sem);
1381         return rc;
1382 }
1383 
1384 static void
1385 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1386                 bool *wait_flag, struct TCP_Server_Info *server)
1387 {
1388         if (flock->fl_flags & FL_POSIX)
1389                 cifs_dbg(FYI, "Posix\n");
1390         if (flock->fl_flags & FL_FLOCK)
1391                 cifs_dbg(FYI, "Flock\n");
1392         if (flock->fl_flags & FL_SLEEP) {
1393                 cifs_dbg(FYI, "Blocking lock\n");
1394                 *wait_flag = true;
1395         }
1396         if (flock->fl_flags & FL_ACCESS)
1397                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1398         if (flock->fl_flags & FL_LEASE)
1399                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1400         if (flock->fl_flags &
1401             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1402                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1403                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1404 
1405         *type = server->vals->large_lock_type;
1406         if (flock->fl_type == F_WRLCK) {
1407                 cifs_dbg(FYI, "F_WRLCK\n");
1408                 *type |= server->vals->exclusive_lock_type;
1409                 *lock = 1;
1410         } else if (flock->fl_type == F_UNLCK) {
1411                 cifs_dbg(FYI, "F_UNLCK\n");
1412                 *type |= server->vals->unlock_lock_type;
1413                 *unlock = 1;
1414                 /* Check if unlock includes more than one lock range */
1415         } else if (flock->fl_type == F_RDLCK) {
1416                 cifs_dbg(FYI, "F_RDLCK\n");
1417                 *type |= server->vals->shared_lock_type;
1418                 *lock = 1;
1419         } else if (flock->fl_type == F_EXLCK) {
1420                 cifs_dbg(FYI, "F_EXLCK\n");
1421                 *type |= server->vals->exclusive_lock_type;
1422                 *lock = 1;
1423         } else if (flock->fl_type == F_SHLCK) {
1424                 cifs_dbg(FYI, "F_SHLCK\n");
1425                 *type |= server->vals->shared_lock_type;
1426                 *lock = 1;
1427         } else
1428                 cifs_dbg(FYI, "Unknown type of lock\n");
1429 }
1430 
1431 static int
1432 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1433            bool wait_flag, bool posix_lck, unsigned int xid)
1434 {
1435         int rc = 0;
1436         __u64 length = 1 + flock->fl_end - flock->fl_start;
1437         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1438         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1439         struct TCP_Server_Info *server = tcon->ses->server;
1440         __u16 netfid = cfile->fid.netfid;
1441 
1442         if (posix_lck) {
1443                 int posix_lock_type;
1444 
1445                 rc = cifs_posix_lock_test(file, flock);
1446                 if (!rc)
1447                         return rc;
1448 
1449                 if (type & server->vals->shared_lock_type)
1450                         posix_lock_type = CIFS_RDLCK;
1451                 else
1452                         posix_lock_type = CIFS_WRLCK;
1453                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1454                                       hash_lockowner(flock->fl_owner),
1455                                       flock->fl_start, length, flock,
1456                                       posix_lock_type, wait_flag);
1457                 return rc;
1458         }
1459 
1460         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1461         if (!rc)
1462                 return rc;
1463 
1464         /* BB we could chain these into one lock request BB */
1465         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1466                                     1, 0, false);
1467         if (rc == 0) {
1468                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1469                                             type, 0, 1, false);
1470                 flock->fl_type = F_UNLCK;
1471                 if (rc != 0)
1472                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1473                                  rc);
1474                 return 0;
1475         }
1476 
1477         if (type & server->vals->shared_lock_type) {
1478                 flock->fl_type = F_WRLCK;
1479                 return 0;
1480         }
1481 
1482         type &= ~server->vals->exclusive_lock_type;
1483 
1484         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1485                                     type | server->vals->shared_lock_type,
1486                                     1, 0, false);
1487         if (rc == 0) {
1488                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1489                         type | server->vals->shared_lock_type, 0, 1, false);
1490                 flock->fl_type = F_RDLCK;
1491                 if (rc != 0)
1492                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1493                                  rc);
1494         } else
1495                 flock->fl_type = F_WRLCK;
1496 
1497         return 0;
1498 }
1499 
1500 void
1501 cifs_move_llist(struct list_head *source, struct list_head *dest)
1502 {
1503         struct list_head *li, *tmp;
1504         list_for_each_safe(li, tmp, source)
1505                 list_move(li, dest);
1506 }
1507 
1508 void
1509 cifs_free_llist(struct list_head *llist)
1510 {
1511         struct cifsLockInfo *li, *tmp;
1512         list_for_each_entry_safe(li, tmp, llist, llist) {
1513                 cifs_del_lock_waiters(li);
1514                 list_del(&li->llist);
1515                 kfree(li);
1516         }
1517 }
1518 
1519 int
1520 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1521                   unsigned int xid)
1522 {
1523         int rc = 0, stored_rc;
1524         static const int types[] = {
1525                 LOCKING_ANDX_LARGE_FILES,
1526                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1527         };
1528         unsigned int i;
1529         unsigned int max_num, num, max_buf;
1530         LOCKING_ANDX_RANGE *buf, *cur;
1531         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1532         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1533         struct cifsLockInfo *li, *tmp;
1534         __u64 length = 1 + flock->fl_end - flock->fl_start;
1535         struct list_head tmp_llist;
1536 
1537         INIT_LIST_HEAD(&tmp_llist);
1538 
1539         /*
1540          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1541          * and check it before using.
1542          */
1543         max_buf = tcon->ses->server->maxBuf;
1544         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1545                 return -EINVAL;
1546 
1547         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1548                      PAGE_SIZE);
1549         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1550                         PAGE_SIZE);
1551         max_num = (max_buf - sizeof(struct smb_hdr)) /
1552                                                 sizeof(LOCKING_ANDX_RANGE);
1553         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1554         if (!buf)
1555                 return -ENOMEM;
1556 
1557         cifs_down_write(&cinode->lock_sem);
1558         for (i = 0; i < 2; i++) {
1559                 cur = buf;
1560                 num = 0;
1561                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1562                         if (flock->fl_start > li->offset ||
1563                             (flock->fl_start + length) <
1564                             (li->offset + li->length))
1565                                 continue;
1566                         if (current->tgid != li->pid)
1567                                 continue;
1568                         if (types[i] != li->type)
1569                                 continue;
1570                         if (cinode->can_cache_brlcks) {
1571                                 /*
1572                                  * We can cache brlock requests - simply remove
1573                                  * a lock from the file's list.
1574                                  */
1575                                 list_del(&li->llist);
1576                                 cifs_del_lock_waiters(li);
1577                                 kfree(li);
1578                                 continue;
1579                         }
1580                         cur->Pid = cpu_to_le16(li->pid);
1581                         cur->LengthLow = cpu_to_le32((u32)li->length);
1582                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1583                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1584                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1585                         /*
1586                          * We need to save a lock here to let us add it again to
1587                          * the file's list if the unlock range request fails on
1588                          * the server.
1589                          */
1590                         list_move(&li->llist, &tmp_llist);
1591                         if (++num == max_num) {
1592                                 stored_rc = cifs_lockv(xid, tcon,
1593                                                        cfile->fid.netfid,
1594                                                        li->type, num, 0, buf);
1595                                 if (stored_rc) {
1596                                         /*
1597                                          * We failed on the unlock range
1598                                          * request - add all locks from the tmp
1599                                          * list to the head of the file's list.
1600                                          */
1601                                         cifs_move_llist(&tmp_llist,
1602                                                         &cfile->llist->locks);
1603                                         rc = stored_rc;
1604                                 } else
1605                                         /*
1606                                          * The unlock range request succeed -
1607                                          * free the tmp list.
1608                                          */
1609                                         cifs_free_llist(&tmp_llist);
1610                                 cur = buf;
1611                                 num = 0;
1612                         } else
1613                                 cur++;
1614                 }
1615                 if (num) {
1616                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1617                                                types[i], num, 0, buf);
1618                         if (stored_rc) {
1619                                 cifs_move_llist(&tmp_llist,
1620                                                 &cfile->llist->locks);
1621                                 rc = stored_rc;
1622                         } else
1623                                 cifs_free_llist(&tmp_llist);
1624                 }
1625         }
1626 
1627         up_write(&cinode->lock_sem);
1628         kfree(buf);
1629         return rc;
1630 }
1631 
1632 static int
1633 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1634            bool wait_flag, bool posix_lck, int lock, int unlock,
1635            unsigned int xid)
1636 {
1637         int rc = 0;
1638         __u64 length = 1 + flock->fl_end - flock->fl_start;
1639         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1640         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1641         struct TCP_Server_Info *server = tcon->ses->server;
1642         struct inode *inode = d_inode(cfile->dentry);
1643 
1644         if (posix_lck) {
1645                 int posix_lock_type;
1646 
1647                 rc = cifs_posix_lock_set(file, flock);
1648                 if (rc <= FILE_LOCK_DEFERRED)
1649                         return rc;
1650 
1651                 if (type & server->vals->shared_lock_type)
1652                         posix_lock_type = CIFS_RDLCK;
1653                 else
1654                         posix_lock_type = CIFS_WRLCK;
1655 
1656                 if (unlock == 1)
1657                         posix_lock_type = CIFS_UNLCK;
1658 
1659                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1660                                       hash_lockowner(flock->fl_owner),
1661                                       flock->fl_start, length,
1662                                       NULL, posix_lock_type, wait_flag);
1663                 goto out;
1664         }
1665 
1666         if (lock) {
1667                 struct cifsLockInfo *lock;
1668 
1669                 lock = cifs_lock_init(flock->fl_start, length, type,
1670                                       flock->fl_flags);
1671                 if (!lock)
1672                         return -ENOMEM;
1673 
1674                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1675                 if (rc < 0) {
1676                         kfree(lock);
1677                         return rc;
1678                 }
1679                 if (!rc)
1680                         goto out;
1681 
1682                 /*
1683                  * Windows 7 server can delay breaking lease from read to None
1684                  * if we set a byte-range lock on a file - break it explicitly
1685                  * before sending the lock to the server to be sure the next
1686                  * read won't conflict with non-overlapted locks due to
1687                  * pagereading.
1688                  */
1689                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1690                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1691                         cifs_zap_mapping(inode);
1692                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1693                                  inode);
1694                         CIFS_I(inode)->oplock = 0;
1695                 }
1696 
1697                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1698                                             type, 1, 0, wait_flag);
1699                 if (rc) {
1700                         kfree(lock);
1701                         return rc;
1702                 }
1703 
1704                 cifs_lock_add(cfile, lock);
1705         } else if (unlock)
1706                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1707 
1708 out:
1709         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1710                 /*
1711                  * If this is a request to remove all locks because we
1712                  * are closing the file, it doesn't matter if the
1713                  * unlocking failed as both cifs.ko and the SMB server
1714                  * remove the lock on file close
1715                  */
1716                 if (rc) {
1717                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1718                         if (!(flock->fl_flags & FL_CLOSE))
1719                                 return rc;
1720                 }
1721                 rc = locks_lock_file_wait(file, flock);
1722         }
1723         return rc;
1724 }
1725 
1726 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1727 {
1728         int rc, xid;
1729         int lock = 0, unlock = 0;
1730         bool wait_flag = false;
1731         bool posix_lck = false;
1732         struct cifs_sb_info *cifs_sb;
1733         struct cifs_tcon *tcon;
1734         struct cifsFileInfo *cfile;
1735         __u32 type;
1736 
1737         rc = -EACCES;
1738         xid = get_xid();
1739 
1740         if (!(fl->fl_flags & FL_FLOCK))
1741                 return -ENOLCK;
1742 
1743         cfile = (struct cifsFileInfo *)file->private_data;
1744         tcon = tlink_tcon(cfile->tlink);
1745 
1746         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1747                         tcon->ses->server);
1748         cifs_sb = CIFS_FILE_SB(file);
1749 
1750         if (cap_unix(tcon->ses) &&
1751             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1752             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1753                 posix_lck = true;
1754 
1755         if (!lock && !unlock) {
1756                 /*
1757                  * if no lock or unlock then nothing to do since we do not
1758                  * know what it is
1759                  */
1760                 free_xid(xid);
1761                 return -EOPNOTSUPP;
1762         }
1763 
1764         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1765                         xid);
1766         free_xid(xid);
1767         return rc;
1768 
1769 
1770 }
1771 
1772 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1773 {
1774         int rc, xid;
1775         int lock = 0, unlock = 0;
1776         bool wait_flag = false;
1777         bool posix_lck = false;
1778         struct cifs_sb_info *cifs_sb;
1779         struct cifs_tcon *tcon;
1780         struct cifsFileInfo *cfile;
1781         __u32 type;
1782 
1783         rc = -EACCES;
1784         xid = get_xid();
1785 
1786         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1787                  cmd, flock->fl_flags, flock->fl_type,
1788                  flock->fl_start, flock->fl_end);
1789 
1790         cfile = (struct cifsFileInfo *)file->private_data;
1791         tcon = tlink_tcon(cfile->tlink);
1792 
1793         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1794                         tcon->ses->server);
1795         cifs_sb = CIFS_FILE_SB(file);
1796 
1797         if (cap_unix(tcon->ses) &&
1798             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1799             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1800                 posix_lck = true;
1801         /*
1802          * BB add code here to normalize offset and length to account for
1803          * negative length which we can not accept over the wire.
1804          */
1805         if (IS_GETLK(cmd)) {
1806                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1807                 free_xid(xid);
1808                 return rc;
1809         }
1810 
1811         if (!lock && !unlock) {
1812                 /*
1813                  * if no lock or unlock then nothing to do since we do not
1814                  * know what it is
1815                  */
1816                 free_xid(xid);
1817                 return -EOPNOTSUPP;
1818         }
1819 
1820         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1821                         xid);
1822         free_xid(xid);
1823         return rc;
1824 }
1825 
1826 /*
1827  * update the file size (if needed) after a write. Should be called with
1828  * the inode->i_lock held
1829  */
1830 void
1831 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1832                       unsigned int bytes_written)
1833 {
1834         loff_t end_of_write = offset + bytes_written;
1835 
1836         if (end_of_write > cifsi->server_eof)
1837                 cifsi->server_eof = end_of_write;
1838 }
1839 
1840 static ssize_t
1841 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1842            size_t write_size, loff_t *offset)
1843 {
1844         int rc = 0;
1845         unsigned int bytes_written = 0;
1846         unsigned int total_written;
1847         struct cifs_tcon *tcon;
1848         struct TCP_Server_Info *server;
1849         unsigned int xid;
1850         struct dentry *dentry = open_file->dentry;
1851         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1852         struct cifs_io_parms io_parms = {0};
1853 
1854         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1855                  write_size, *offset, dentry);
1856 
1857         tcon = tlink_tcon(open_file->tlink);
1858         server = tcon->ses->server;
1859 
1860         if (!server->ops->sync_write)
1861                 return -ENOSYS;
1862 
1863         xid = get_xid();
1864 
1865         for (total_written = 0; write_size > total_written;
1866              total_written += bytes_written) {
1867                 rc = -EAGAIN;
1868                 while (rc == -EAGAIN) {
1869                         struct kvec iov[2];
1870                         unsigned int len;
1871 
1872                         if (open_file->invalidHandle) {
1873                                 /* we could deadlock if we called
1874                                    filemap_fdatawait from here so tell
1875                                    reopen_file not to flush data to
1876                                    server now */
1877                                 rc = cifs_reopen_file(open_file, false);
1878                                 if (rc != 0)
1879                                         break;
1880                         }
1881 
1882                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1883                                   (unsigned int)write_size - total_written);
1884                         /* iov[0] is reserved for smb header */
1885                         iov[1].iov_base = (char *)write_data + total_written;
1886                         iov[1].iov_len = len;
1887                         io_parms.pid = pid;
1888                         io_parms.tcon = tcon;
1889                         io_parms.offset = *offset;
1890                         io_parms.length = len;
1891                         rc = server->ops->sync_write(xid, &open_file->fid,
1892                                         &io_parms, &bytes_written, iov, 1);
1893                 }
1894                 if (rc || (bytes_written == 0)) {
1895                         if (total_written)
1896                                 break;
1897                         else {
1898                                 free_xid(xid);
1899                                 return rc;
1900                         }
1901                 } else {
1902                         spin_lock(&d_inode(dentry)->i_lock);
1903                         cifs_update_eof(cifsi, *offset, bytes_written);
1904                         spin_unlock(&d_inode(dentry)->i_lock);
1905                         *offset += bytes_written;
1906                 }
1907         }
1908 
1909         cifs_stats_bytes_written(tcon, total_written);
1910 
1911         if (total_written > 0) {
1912                 spin_lock(&d_inode(dentry)->i_lock);
1913                 if (*offset > d_inode(dentry)->i_size)
1914                         i_size_write(d_inode(dentry), *offset);
1915                 spin_unlock(&d_inode(dentry)->i_lock);
1916         }
1917         mark_inode_dirty_sync(d_inode(dentry));
1918         free_xid(xid);
1919         return total_written;
1920 }
1921 
1922 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1923                                         bool fsuid_only)
1924 {
1925         struct cifsFileInfo *open_file = NULL;
1926         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1927 
1928         /* only filter by fsuid on multiuser mounts */
1929         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1930                 fsuid_only = false;
1931 
1932         spin_lock(&cifs_inode->open_file_lock);
1933         /* we could simply get the first_list_entry since write-only entries
1934            are always at the end of the list but since the first entry might
1935            have a close pending, we go through the whole list */
1936         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1937                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1938                         continue;
1939                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1940                         if (!open_file->invalidHandle) {
1941                                 /* found a good file */
1942                                 /* lock it so it will not be closed on us */
1943                                 cifsFileInfo_get(open_file);
1944                                 spin_unlock(&cifs_inode->open_file_lock);
1945                                 return open_file;
1946                         } /* else might as well continue, and look for
1947                              another, or simply have the caller reopen it
1948                              again rather than trying to fix this handle */
1949                 } else /* write only file */
1950                         break; /* write only files are last so must be done */
1951         }
1952         spin_unlock(&cifs_inode->open_file_lock);
1953         return NULL;
1954 }
1955 
1956 /* Return -EBADF if no handle is found and general rc otherwise */
1957 int
1958 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1959                        struct cifsFileInfo **ret_file)
1960 {
1961         struct cifsFileInfo *open_file, *inv_file = NULL;
1962         struct cifs_sb_info *cifs_sb;
1963         bool any_available = false;
1964         int rc = -EBADF;
1965         unsigned int refind = 0;
1966         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1967         bool with_delete = flags & FIND_WR_WITH_DELETE;
1968         *ret_file = NULL;
1969 
1970         /*
1971          * Having a null inode here (because mapping->host was set to zero by
1972          * the VFS or MM) should not happen but we had reports of on oops (due
1973          * to it being zero) during stress testcases so we need to check for it
1974          */
1975 
1976         if (cifs_inode == NULL) {
1977                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1978                 dump_stack();
1979                 return rc;
1980         }
1981 
1982         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1983 
1984         /* only filter by fsuid on multiuser mounts */
1985         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1986                 fsuid_only = false;
1987 
1988         spin_lock(&cifs_inode->open_file_lock);
1989 refind_writable:
1990         if (refind > MAX_REOPEN_ATT) {
1991                 spin_unlock(&cifs_inode->open_file_lock);
1992                 return rc;
1993         }
1994         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1995                 if (!any_available && open_file->pid != current->tgid)
1996                         continue;
1997                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1998                         continue;
1999                 if (with_delete && !(open_file->fid.access & DELETE))
2000                         continue;
2001                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2002                         if (!open_file->invalidHandle) {
2003                                 /* found a good writable file */
2004                                 cifsFileInfo_get(open_file);
2005                                 spin_unlock(&cifs_inode->open_file_lock);
2006                                 *ret_file = open_file;
2007                                 return 0;
2008                         } else {
2009                                 if (!inv_file)
2010                                         inv_file = open_file;
2011                         }
2012                 }
2013         }
2014         /* couldn't find useable FH with same pid, try any available */
2015         if (!any_available) {
2016                 any_available = true;
2017                 goto refind_writable;
2018         }
2019 
2020         if (inv_file) {
2021                 any_available = false;
2022                 cifsFileInfo_get(inv_file);
2023         }
2024 
2025         spin_unlock(&cifs_inode->open_file_lock);
2026 
2027         if (inv_file) {
2028                 rc = cifs_reopen_file(inv_file, false);
2029                 if (!rc) {
2030                         *ret_file = inv_file;
2031                         return 0;
2032                 }
2033 
2034                 spin_lock(&cifs_inode->open_file_lock);
2035                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2036                 spin_unlock(&cifs_inode->open_file_lock);
2037                 cifsFileInfo_put(inv_file);
2038                 ++refind;
2039                 inv_file = NULL;
2040                 spin_lock(&cifs_inode->open_file_lock);
2041                 goto refind_writable;
2042         }
2043 
2044         return rc;
2045 }
2046 
2047 struct cifsFileInfo *
2048 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2049 {
2050         struct cifsFileInfo *cfile;
2051         int rc;
2052 
2053         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2054         if (rc)
2055                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2056 
2057         return cfile;
2058 }
2059 
2060 int
2061 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2062                        int flags,
2063                        struct cifsFileInfo **ret_file)
2064 {
2065         struct list_head *tmp;
2066         struct cifsFileInfo *cfile;
2067         struct cifsInodeInfo *cinode;
2068         char *full_path;
2069 
2070         *ret_file = NULL;
2071 
2072         spin_lock(&tcon->open_file_lock);
2073         list_for_each(tmp, &tcon->openFileList) {
2074                 cfile = list_entry(tmp, struct cifsFileInfo,
2075                              tlist);
2076                 full_path = build_path_from_dentry(cfile->dentry);
2077                 if (full_path == NULL) {
2078                         spin_unlock(&tcon->open_file_lock);
2079                         return -ENOMEM;
2080                 }
2081                 if (strcmp(full_path, name)) {
2082                         kfree(full_path);
2083                         continue;
2084                 }
2085 
2086                 kfree(full_path);
2087                 cinode = CIFS_I(d_inode(cfile->dentry));
2088                 spin_unlock(&tcon->open_file_lock);
2089                 return cifs_get_writable_file(cinode, flags, ret_file);
2090         }
2091 
2092         spin_unlock(&tcon->open_file_lock);
2093         return -ENOENT;
2094 }
2095 
2096 int
2097 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2098                        struct cifsFileInfo **ret_file)
2099 {
2100         struct list_head *tmp;
2101         struct cifsFileInfo *cfile;
2102         struct cifsInodeInfo *cinode;
2103         char *full_path;
2104 
2105         *ret_file = NULL;
2106 
2107         spin_lock(&tcon->open_file_lock);
2108         list_for_each(tmp, &tcon->openFileList) {
2109                 cfile = list_entry(tmp, struct cifsFileInfo,
2110                              tlist);
2111                 full_path = build_path_from_dentry(cfile->dentry);
2112                 if (full_path == NULL) {
2113                         spin_unlock(&tcon->open_file_lock);
2114                         return -ENOMEM;
2115                 }
2116                 if (strcmp(full_path, name)) {
2117                         kfree(full_path);
2118                         continue;
2119                 }
2120 
2121                 kfree(full_path);
2122                 cinode = CIFS_I(d_inode(cfile->dentry));
2123                 spin_unlock(&tcon->open_file_lock);
2124                 *ret_file = find_readable_file(cinode, 0);
2125                 return *ret_file ? 0 : -ENOENT;
2126         }
2127 
2128         spin_unlock(&tcon->open_file_lock);
2129         return -ENOENT;
2130 }
2131 
2132 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2133 {
2134         struct address_space *mapping = page->mapping;
2135         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2136         char *write_data;
2137         int rc = -EFAULT;
2138         int bytes_written = 0;
2139         struct inode *inode;
2140         struct cifsFileInfo *open_file;
2141 
2142         if (!mapping || !mapping->host)
2143                 return -EFAULT;
2144 
2145         inode = page->mapping->host;
2146 
2147         offset += (loff_t)from;
2148         write_data = kmap(page);
2149         write_data += from;
2150 
2151         if ((to > PAGE_SIZE) || (from > to)) {
2152                 kunmap(page);
2153                 return -EIO;
2154         }
2155 
2156         /* racing with truncate? */
2157         if (offset > mapping->host->i_size) {
2158                 kunmap(page);
2159                 return 0; /* don't care */
2160         }
2161 
2162         /* check to make sure that we are not extending the file */
2163         if (mapping->host->i_size - offset < (loff_t)to)
2164                 to = (unsigned)(mapping->host->i_size - offset);
2165 
2166         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2167                                     &open_file);
2168         if (!rc) {
2169                 bytes_written = cifs_write(open_file, open_file->pid,
2170                                            write_data, to - from, &offset);
2171                 cifsFileInfo_put(open_file);
2172                 /* Does mm or vfs already set times? */
2173                 inode->i_atime = inode->i_mtime = current_time(inode);
2174                 if ((bytes_written > 0) && (offset))
2175                         rc = 0;
2176                 else if (bytes_written < 0)
2177                         rc = bytes_written;
2178                 else
2179                         rc = -EFAULT;
2180         } else {
2181                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2182                 if (!is_retryable_error(rc))
2183                         rc = -EIO;
2184         }
2185 
2186         kunmap(page);
2187         return rc;
2188 }
2189 
2190 static struct cifs_writedata *
2191 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2192                           pgoff_t end, pgoff_t *index,
2193                           unsigned int *found_pages)
2194 {
2195         struct cifs_writedata *wdata;
2196 
2197         wdata = cifs_writedata_alloc((unsigned int)tofind,
2198                                      cifs_writev_complete);
2199         if (!wdata)
2200                 return NULL;
2201 
2202         *found_pages = find_get_pages_range_tag(mapping, index, end,
2203                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2204         return wdata;
2205 }
2206 
2207 static unsigned int
2208 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2209                     struct address_space *mapping,
2210                     struct writeback_control *wbc,
2211                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2212 {
2213         unsigned int nr_pages = 0, i;
2214         struct page *page;
2215 
2216         for (i = 0; i < found_pages; i++) {
2217                 page = wdata->pages[i];
2218                 /*
2219                  * At this point we hold neither the i_pages lock nor the
2220                  * page lock: the page may be truncated or invalidated
2221                  * (changing page->mapping to NULL), or even swizzled
2222                  * back from swapper_space to tmpfs file mapping
2223                  */
2224 
2225                 if (nr_pages == 0)
2226                         lock_page(page);
2227                 else if (!trylock_page(page))
2228                         break;
2229 
2230                 if (unlikely(page->mapping != mapping)) {
2231                         unlock_page(page);
2232                         break;
2233                 }
2234 
2235                 if (!wbc->range_cyclic && page->index > end) {
2236                         *done = true;
2237                         unlock_page(page);
2238                         break;
2239                 }
2240 
2241                 if (*next && (page->index != *next)) {
2242                         /* Not next consecutive page */
2243                         unlock_page(page);
2244                         break;
2245                 }
2246 
2247                 if (wbc->sync_mode != WB_SYNC_NONE)
2248                         wait_on_page_writeback(page);
2249 
2250                 if (PageWriteback(page) ||
2251                                 !clear_page_dirty_for_io(page)) {
2252                         unlock_page(page);
2253                         break;
2254                 }
2255 
2256                 /*
2257                  * This actually clears the dirty bit in the radix tree.
2258                  * See cifs_writepage() for more commentary.
2259                  */
2260                 set_page_writeback(page);
2261                 if (page_offset(page) >= i_size_read(mapping->host)) {
2262                         *done = true;
2263                         unlock_page(page);
2264                         end_page_writeback(page);
2265                         break;
2266                 }
2267 
2268                 wdata->pages[i] = page;
2269                 *next = page->index + 1;
2270                 ++nr_pages;
2271         }
2272 
2273         /* reset index to refind any pages skipped */
2274         if (nr_pages == 0)
2275                 *index = wdata->pages[0]->index + 1;
2276 
2277         /* put any pages we aren't going to use */
2278         for (i = nr_pages; i < found_pages; i++) {
2279                 put_page(wdata->pages[i]);
2280                 wdata->pages[i] = NULL;
2281         }
2282 
2283         return nr_pages;
2284 }
2285 
2286 static int
2287 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2288                  struct address_space *mapping, struct writeback_control *wbc)
2289 {
2290         int rc;
2291 
2292         wdata->sync_mode = wbc->sync_mode;
2293         wdata->nr_pages = nr_pages;
2294         wdata->offset = page_offset(wdata->pages[0]);
2295         wdata->pagesz = PAGE_SIZE;
2296         wdata->tailsz = min(i_size_read(mapping->host) -
2297                         page_offset(wdata->pages[nr_pages - 1]),
2298                         (loff_t)PAGE_SIZE);
2299         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2300         wdata->pid = wdata->cfile->pid;
2301 
2302         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2303         if (rc)
2304                 return rc;
2305 
2306         if (wdata->cfile->invalidHandle)
2307                 rc = -EAGAIN;
2308         else
2309                 rc = wdata->server->ops->async_writev(wdata,
2310                                                       cifs_writedata_release);
2311 
2312         return rc;
2313 }
2314 
2315 static int cifs_writepages(struct address_space *mapping,
2316                            struct writeback_control *wbc)
2317 {
2318         struct inode *inode = mapping->host;
2319         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2320         struct TCP_Server_Info *server;
2321         bool done = false, scanned = false, range_whole = false;
2322         pgoff_t end, index;
2323         struct cifs_writedata *wdata;
2324         struct cifsFileInfo *cfile = NULL;
2325         int rc = 0;
2326         int saved_rc = 0;
2327         unsigned int xid;
2328 
2329         /*
2330          * If wsize is smaller than the page cache size, default to writing
2331          * one page at a time via cifs_writepage
2332          */
2333         if (cifs_sb->wsize < PAGE_SIZE)
2334                 return generic_writepages(mapping, wbc);
2335 
2336         xid = get_xid();
2337         if (wbc->range_cyclic) {
2338                 index = mapping->writeback_index; /* Start from prev offset */
2339                 end = -1;
2340         } else {
2341                 index = wbc->range_start >> PAGE_SHIFT;
2342                 end = wbc->range_end >> PAGE_SHIFT;
2343                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2344                         range_whole = true;
2345                 scanned = true;
2346         }
2347         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2348 
2349 retry:
2350         while (!done && index <= end) {
2351                 unsigned int i, nr_pages, found_pages, wsize;
2352                 pgoff_t next = 0, tofind, saved_index = index;
2353                 struct cifs_credits credits_on_stack;
2354                 struct cifs_credits *credits = &credits_on_stack;
2355                 int get_file_rc = 0;
2356 
2357                 if (cfile)
2358                         cifsFileInfo_put(cfile);
2359 
2360                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2361 
2362                 /* in case of an error store it to return later */
2363                 if (rc)
2364                         get_file_rc = rc;
2365 
2366                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2367                                                    &wsize, credits);
2368                 if (rc != 0) {
2369                         done = true;
2370                         break;
2371                 }
2372 
2373                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2374 
2375                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2376                                                   &found_pages);
2377                 if (!wdata) {
2378                         rc = -ENOMEM;
2379                         done = true;
2380                         add_credits_and_wake_if(server, credits, 0);
2381                         break;
2382                 }
2383 
2384                 if (found_pages == 0) {
2385                         kref_put(&wdata->refcount, cifs_writedata_release);
2386                         add_credits_and_wake_if(server, credits, 0);
2387                         break;
2388                 }
2389 
2390                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2391                                                end, &index, &next, &done);
2392 
2393                 /* nothing to write? */
2394                 if (nr_pages == 0) {
2395                         kref_put(&wdata->refcount, cifs_writedata_release);
2396                         add_credits_and_wake_if(server, credits, 0);
2397                         continue;
2398                 }
2399 
2400                 wdata->credits = credits_on_stack;
2401                 wdata->cfile = cfile;
2402                 wdata->server = server;
2403                 cfile = NULL;
2404 
2405                 if (!wdata->cfile) {
2406                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2407                                  get_file_rc);
2408                         if (is_retryable_error(get_file_rc))
2409                                 rc = get_file_rc;
2410                         else
2411                                 rc = -EBADF;
2412                 } else
2413                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2414 
2415                 for (i = 0; i < nr_pages; ++i)
2416                         unlock_page(wdata->pages[i]);
2417 
2418                 /* send failure -- clean up the mess */
2419                 if (rc != 0) {
2420                         add_credits_and_wake_if(server, &wdata->credits, 0);
2421                         for (i = 0; i < nr_pages; ++i) {
2422                                 if (is_retryable_error(rc))
2423                                         redirty_page_for_writepage(wbc,
2424                                                            wdata->pages[i]);
2425                                 else
2426                                         SetPageError(wdata->pages[i]);
2427                                 end_page_writeback(wdata->pages[i]);
2428                                 put_page(wdata->pages[i]);
2429                         }
2430                         if (!is_retryable_error(rc))
2431                                 mapping_set_error(mapping, rc);
2432                 }
2433                 kref_put(&wdata->refcount, cifs_writedata_release);
2434 
2435                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2436                         index = saved_index;
2437                         continue;
2438                 }
2439 
2440                 /* Return immediately if we received a signal during writing */
2441                 if (is_interrupt_error(rc)) {
2442                         done = true;
2443                         break;
2444                 }
2445 
2446                 if (rc != 0 && saved_rc == 0)
2447                         saved_rc = rc;
2448 
2449                 wbc->nr_to_write -= nr_pages;
2450                 if (wbc->nr_to_write <= 0)
2451                         done = true;
2452 
2453                 index = next;
2454         }
2455 
2456         if (!scanned && !done) {
2457                 /*
2458                  * We hit the last page and there is more work to be done: wrap
2459                  * back to the start of the file
2460                  */
2461                 scanned = true;
2462                 index = 0;
2463                 goto retry;
2464         }
2465 
2466         if (saved_rc != 0)
2467                 rc = saved_rc;
2468 
2469         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2470                 mapping->writeback_index = index;
2471 
2472         if (cfile)
2473                 cifsFileInfo_put(cfile);
2474         free_xid(xid);
2475         return rc;
2476 }
2477 
2478 static int
2479 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2480 {
2481         int rc;
2482         unsigned int xid;
2483 
2484         xid = get_xid();
2485 /* BB add check for wbc flags */
2486         get_page(page);
2487         if (!PageUptodate(page))
2488                 cifs_dbg(FYI, "ppw - page not up to date\n");
2489 
2490         /*
2491          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2492          *
2493          * A writepage() implementation always needs to do either this,
2494          * or re-dirty the page with "redirty_page_for_writepage()" in
2495          * the case of a failure.
2496          *
2497          * Just unlocking the page will cause the radix tree tag-bits
2498          * to fail to update with the state of the page correctly.
2499          */
2500         set_page_writeback(page);
2501 retry_write:
2502         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2503         if (is_retryable_error(rc)) {
2504                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2505                         goto retry_write;
2506                 redirty_page_for_writepage(wbc, page);
2507         } else if (rc != 0) {
2508                 SetPageError(page);
2509                 mapping_set_error(page->mapping, rc);
2510         } else {
2511                 SetPageUptodate(page);
2512         }
2513         end_page_writeback(page);
2514         put_page(page);
2515         free_xid(xid);
2516         return rc;
2517 }
2518 
2519 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2520 {
2521         int rc = cifs_writepage_locked(page, wbc);
2522         unlock_page(page);
2523         return rc;
2524 }
2525 
2526 static int cifs_write_end(struct file *file, struct address_space *mapping,
2527                         loff_t pos, unsigned len, unsigned copied,
2528                         struct page *page, void *fsdata)
2529 {
2530         int rc;
2531         struct inode *inode = mapping->host;
2532         struct cifsFileInfo *cfile = file->private_data;
2533         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2534         __u32 pid;
2535 
2536         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2537                 pid = cfile->pid;
2538         else
2539                 pid = current->tgid;
2540 
2541         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2542                  page, pos, copied);
2543 
2544         if (PageChecked(page)) {
2545                 if (copied == len)
2546                         SetPageUptodate(page);
2547                 ClearPageChecked(page);
2548         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2549                 SetPageUptodate(page);
2550 
2551         if (!PageUptodate(page)) {
2552                 char *page_data;
2553                 unsigned offset = pos & (PAGE_SIZE - 1);
2554                 unsigned int xid;
2555 
2556                 xid = get_xid();
2557                 /* this is probably better than directly calling
2558                    partialpage_write since in this function the file handle is
2559                    known which we might as well leverage */
2560                 /* BB check if anything else missing out of ppw
2561                    such as updating last write time */
2562                 page_data = kmap(page);
2563                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2564                 /* if (rc < 0) should we set writebehind rc? */
2565                 kunmap(page);
2566 
2567                 free_xid(xid);
2568         } else {
2569                 rc = copied;
2570                 pos += copied;
2571                 set_page_dirty(page);
2572         }
2573 
2574         if (rc > 0) {
2575                 spin_lock(&inode->i_lock);
2576                 if (pos > inode->i_size)
2577                         i_size_write(inode, pos);
2578                 spin_unlock(&inode->i_lock);
2579         }
2580 
2581         unlock_page(page);
2582         put_page(page);
2583 
2584         return rc;
2585 }
2586 
2587 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2588                       int datasync)
2589 {
2590         unsigned int xid;
2591         int rc = 0;
2592         struct cifs_tcon *tcon;
2593         struct TCP_Server_Info *server;
2594         struct cifsFileInfo *smbfile = file->private_data;
2595         struct inode *inode = file_inode(file);
2596         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2597 
2598         rc = file_write_and_wait_range(file, start, end);
2599         if (rc) {
2600                 trace_cifs_fsync_err(inode->i_ino, rc);
2601                 return rc;
2602         }
2603 
2604         xid = get_xid();
2605 
2606         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2607                  file, datasync);
2608 
2609         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2610                 rc = cifs_zap_mapping(inode);
2611                 if (rc) {
2612                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2613                         rc = 0; /* don't care about it in fsync */
2614                 }
2615         }
2616 
2617         tcon = tlink_tcon(smbfile->tlink);
2618         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2619                 server = tcon->ses->server;
2620                 if (server->ops->flush)
2621                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2622                 else
2623                         rc = -ENOSYS;
2624         }
2625 
2626         free_xid(xid);
2627         return rc;
2628 }
2629 
2630 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2631 {
2632         unsigned int xid;
2633         int rc = 0;
2634         struct cifs_tcon *tcon;
2635         struct TCP_Server_Info *server;
2636         struct cifsFileInfo *smbfile = file->private_data;
2637         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2638 
2639         rc = file_write_and_wait_range(file, start, end);
2640         if (rc) {
2641                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2642                 return rc;
2643         }
2644 
2645         xid = get_xid();
2646 
2647         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2648                  file, datasync);
2649 
2650         tcon = tlink_tcon(smbfile->tlink);
2651         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2652                 server = tcon->ses->server;
2653                 if (server->ops->flush)
2654                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2655                 else
2656                         rc = -ENOSYS;
2657         }
2658 
2659         free_xid(xid);
2660         return rc;
2661 }
2662 
2663 /*
2664  * As file closes, flush all cached write data for this inode checking
2665  * for write behind errors.
2666  */
2667 int cifs_flush(struct file *file, fl_owner_t id)
2668 {
2669         struct inode *inode = file_inode(file);
2670         int rc = 0;
2671 
2672         if (file->f_mode & FMODE_WRITE)
2673                 rc = filemap_write_and_wait(inode->i_mapping);
2674 
2675         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2676         if (rc)
2677                 trace_cifs_flush_err(inode->i_ino, rc);
2678         return rc;
2679 }
2680 
2681 static int
2682 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2683 {
2684         int rc = 0;
2685         unsigned long i;
2686 
2687         for (i = 0; i < num_pages; i++) {
2688                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2689                 if (!pages[i]) {
2690                         /*
2691                          * save number of pages we have already allocated and
2692                          * return with ENOMEM error
2693                          */
2694                         num_pages = i;
2695                         rc = -ENOMEM;
2696                         break;
2697                 }
2698         }
2699 
2700         if (rc) {
2701                 for (i = 0; i < num_pages; i++)
2702                         put_page(pages[i]);
2703         }
2704         return rc;
2705 }
2706 
2707 static inline
2708 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2709 {
2710         size_t num_pages;
2711         size_t clen;
2712 
2713         clen = min_t(const size_t, len, wsize);
2714         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2715 
2716         if (cur_len)
2717                 *cur_len = clen;
2718 
2719         return num_pages;
2720 }
2721 
2722 static void
2723 cifs_uncached_writedata_release(struct kref *refcount)
2724 {
2725         int i;
2726         struct cifs_writedata *wdata = container_of(refcount,
2727                                         struct cifs_writedata, refcount);
2728 
2729         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2730         for (i = 0; i < wdata->nr_pages; i++)
2731                 put_page(wdata->pages[i]);
2732         cifs_writedata_release(refcount);
2733 }
2734 
2735 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2736 
2737 static void
2738 cifs_uncached_writev_complete(struct work_struct *work)
2739 {
2740         struct cifs_writedata *wdata = container_of(work,
2741                                         struct cifs_writedata, work);
2742         struct inode *inode = d_inode(wdata->cfile->dentry);
2743         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2744 
2745         spin_lock(&inode->i_lock);
2746         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2747         if (cifsi->server_eof > inode->i_size)
2748                 i_size_write(inode, cifsi->server_eof);
2749         spin_unlock(&inode->i_lock);
2750 
2751         complete(&wdata->done);
2752         collect_uncached_write_data(wdata->ctx);
2753         /* the below call can possibly free the last ref to aio ctx */
2754         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2755 }
2756 
2757 static int
2758 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2759                       size_t *len, unsigned long *num_pages)
2760 {
2761         size_t save_len, copied, bytes, cur_len = *len;
2762         unsigned long i, nr_pages = *num_pages;
2763 
2764         save_len = cur_len;
2765         for (i = 0; i < nr_pages; i++) {
2766                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2767                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2768                 cur_len -= copied;
2769                 /*
2770                  * If we didn't copy as much as we expected, then that
2771                  * may mean we trod into an unmapped area. Stop copying
2772                  * at that point. On the next pass through the big
2773                  * loop, we'll likely end up getting a zero-length
2774                  * write and bailing out of it.
2775                  */
2776                 if (copied < bytes)
2777                         break;
2778         }
2779         cur_len = save_len - cur_len;
2780         *len = cur_len;
2781 
2782         /*
2783          * If we have no data to send, then that probably means that
2784          * the copy above failed altogether. That's most likely because
2785          * the address in the iovec was bogus. Return -EFAULT and let
2786          * the caller free anything we allocated and bail out.
2787          */
2788         if (!cur_len)
2789                 return -EFAULT;
2790 
2791         /*
2792          * i + 1 now represents the number of pages we actually used in
2793          * the copy phase above.
2794          */
2795         *num_pages = i + 1;
2796         return 0;
2797 }
2798 
2799 static int
2800 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2801         struct cifs_aio_ctx *ctx)
2802 {
2803         unsigned int wsize;
2804         struct cifs_credits credits;
2805         int rc;
2806         struct TCP_Server_Info *server = wdata->server;
2807 
2808         do {
2809                 if (wdata->cfile->invalidHandle) {
2810                         rc = cifs_reopen_file(wdata->cfile, false);
2811                         if (rc == -EAGAIN)
2812                                 continue;
2813                         else if (rc)
2814                                 break;
2815                 }
2816 
2817 
2818                 /*
2819                  * Wait for credits to resend this wdata.
2820                  * Note: we are attempting to resend the whole wdata not in
2821                  * segments
2822                  */
2823                 do {
2824                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2825                                                 &wsize, &credits);
2826                         if (rc)
2827                                 goto fail;
2828 
2829                         if (wsize < wdata->bytes) {
2830                                 add_credits_and_wake_if(server, &credits, 0);
2831                                 msleep(1000);
2832                         }
2833                 } while (wsize < wdata->bytes);
2834                 wdata->credits = credits;
2835 
2836                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2837 
2838                 if (!rc) {
2839                         if (wdata->cfile->invalidHandle)
2840                                 rc = -EAGAIN;
2841                         else {
2842 #ifdef CONFIG_CIFS_SMB_DIRECT
2843                                 if (wdata->mr) {
2844                                         wdata->mr->need_invalidate = true;
2845                                         smbd_deregister_mr(wdata->mr);
2846                                         wdata->mr = NULL;
2847                                 }
2848 #endif
2849                                 rc = server->ops->async_writev(wdata,
2850                                         cifs_uncached_writedata_release);
2851                         }
2852                 }
2853 
2854                 /* If the write was successfully sent, we are done */
2855                 if (!rc) {
2856                         list_add_tail(&wdata->list, wdata_list);
2857                         return 0;
2858                 }
2859 
2860                 /* Roll back credits and retry if needed */
2861                 add_credits_and_wake_if(server, &wdata->credits, 0);
2862         } while (rc == -EAGAIN);
2863 
2864 fail:
2865         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2866         return rc;
2867 }
2868 
2869 static int
2870 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2871                      struct cifsFileInfo *open_file,
2872                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2873                      struct cifs_aio_ctx *ctx)
2874 {
2875         int rc = 0;
2876         size_t cur_len;
2877         unsigned long nr_pages, num_pages, i;
2878         struct cifs_writedata *wdata;
2879         struct iov_iter saved_from = *from;
2880         loff_t saved_offset = offset;
2881         pid_t pid;
2882         struct TCP_Server_Info *server;
2883         struct page **pagevec;
2884         size_t start;
2885         unsigned int xid;
2886 
2887         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2888                 pid = open_file->pid;
2889         else
2890                 pid = current->tgid;
2891 
2892         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2893         xid = get_xid();
2894 
2895         do {
2896                 unsigned int wsize;
2897                 struct cifs_credits credits_on_stack;
2898                 struct cifs_credits *credits = &credits_on_stack;
2899 
2900                 if (open_file->invalidHandle) {
2901                         rc = cifs_reopen_file(open_file, false);
2902                         if (rc == -EAGAIN)
2903                                 continue;
2904                         else if (rc)
2905                                 break;
2906                 }
2907 
2908                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2909                                                    &wsize, credits);
2910                 if (rc)
2911                         break;
2912 
2913                 cur_len = min_t(const size_t, len, wsize);
2914 
2915                 if (ctx->direct_io) {
2916                         ssize_t result;
2917 
2918                         result = iov_iter_get_pages_alloc(
2919                                 from, &pagevec, cur_len, &start);
2920                         if (result < 0) {
2921                                 cifs_dbg(VFS,
2922                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2923                                          result, iov_iter_type(from),
2924                                          from->iov_offset, from->count);
2925                                 dump_stack();
2926 
2927                                 rc = result;
2928                                 add_credits_and_wake_if(server, credits, 0);
2929                                 break;
2930                         }
2931                         cur_len = (size_t)result;
2932                         iov_iter_advance(from, cur_len);
2933 
2934                         nr_pages =
2935                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2936 
2937                         wdata = cifs_writedata_direct_alloc(pagevec,
2938                                              cifs_uncached_writev_complete);
2939                         if (!wdata) {
2940                                 rc = -ENOMEM;
2941                                 add_credits_and_wake_if(server, credits, 0);
2942                                 break;
2943                         }
2944 
2945 
2946                         wdata->page_offset = start;
2947                         wdata->tailsz =
2948                                 nr_pages > 1 ?
2949                                         cur_len - (PAGE_SIZE - start) -
2950                                         (nr_pages - 2) * PAGE_SIZE :
2951                                         cur_len;
2952                 } else {
2953                         nr_pages = get_numpages(wsize, len, &cur_len);
2954                         wdata = cifs_writedata_alloc(nr_pages,
2955                                              cifs_uncached_writev_complete);
2956                         if (!wdata) {
2957                                 rc = -ENOMEM;
2958                                 add_credits_and_wake_if(server, credits, 0);
2959                                 break;
2960                         }
2961 
2962                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2963                         if (rc) {
2964                                 kvfree(wdata->pages);
2965                                 kfree(wdata);
2966                                 add_credits_and_wake_if(server, credits, 0);
2967                                 break;
2968                         }
2969 
2970                         num_pages = nr_pages;
2971                         rc = wdata_fill_from_iovec(
2972                                 wdata, from, &cur_len, &num_pages);
2973                         if (rc) {
2974                                 for (i = 0; i < nr_pages; i++)
2975                                         put_page(wdata->pages[i]);
2976                                 kvfree(wdata->pages);
2977                                 kfree(wdata);
2978                                 add_credits_and_wake_if(server, credits, 0);
2979                                 break;
2980                         }
2981 
2982                         /*
2983                          * Bring nr_pages down to the number of pages we
2984                          * actually used, and free any pages that we didn't use.
2985                          */
2986                         for ( ; nr_pages > num_pages; nr_pages--)
2987                                 put_page(wdata->pages[nr_pages - 1]);
2988 
2989                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2990                 }
2991 
2992                 wdata->sync_mode = WB_SYNC_ALL;
2993                 wdata->nr_pages = nr_pages;
2994                 wdata->offset = (__u64)offset;
2995                 wdata->cfile = cifsFileInfo_get(open_file);
2996                 wdata->server = server;
2997                 wdata->pid = pid;
2998                 wdata->bytes = cur_len;
2999                 wdata->pagesz = PAGE_SIZE;
3000                 wdata->credits = credits_on_stack;
3001                 wdata->ctx = ctx;
3002                 kref_get(&ctx->refcount);
3003 
3004                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3005 
3006                 if (!rc) {
3007                         if (wdata->cfile->invalidHandle)
3008                                 rc = -EAGAIN;
3009                         else
3010                                 rc = server->ops->async_writev(wdata,
3011                                         cifs_uncached_writedata_release);
3012                 }
3013 
3014                 if (rc) {
3015                         add_credits_and_wake_if(server, &wdata->credits, 0);
3016                         kref_put(&wdata->refcount,
3017                                  cifs_uncached_writedata_release);
3018                         if (rc == -EAGAIN) {
3019                                 *from = saved_from;
3020                                 iov_iter_advance(from, offset - saved_offset);
3021                                 continue;
3022                         }
3023                         break;
3024                 }
3025 
3026                 list_add_tail(&wdata->list, wdata_list);
3027                 offset += cur_len;
3028                 len -= cur_len;
3029         } while (len > 0);
3030 
3031         free_xid(xid);
3032         return rc;
3033 }
3034 
3035 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3036 {
3037         struct cifs_writedata *wdata, *tmp;
3038         struct cifs_tcon *tcon;
3039         struct cifs_sb_info *cifs_sb;
3040         struct dentry *dentry = ctx->cfile->dentry;
3041         int rc;
3042 
3043         tcon = tlink_tcon(ctx->cfile->tlink);
3044         cifs_sb = CIFS_SB(dentry->d_sb);
3045 
3046         mutex_lock(&ctx->aio_mutex);
3047 
3048         if (list_empty(&ctx->list)) {
3049                 mutex_unlock(&ctx->aio_mutex);
3050                 return;
3051         }
3052 
3053         rc = ctx->rc;
3054         /*
3055          * Wait for and collect replies for any successful sends in order of
3056          * increasing offset. Once an error is hit, then return without waiting
3057          * for any more replies.
3058          */
3059 restart_loop:
3060         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3061                 if (!rc) {
3062                         if (!try_wait_for_completion(&wdata->done)) {
3063                                 mutex_unlock(&ctx->aio_mutex);
3064                                 return;
3065                         }
3066 
3067                         if (wdata->result)
3068                                 rc = wdata->result;
3069                         else
3070                                 ctx->total_len += wdata->bytes;
3071 
3072                         /* resend call if it's a retryable error */
3073                         if (rc == -EAGAIN) {
3074                                 struct list_head tmp_list;
3075                                 struct iov_iter tmp_from = ctx->iter;
3076 
3077                                 INIT_LIST_HEAD(&tmp_list);
3078                                 list_del_init(&wdata->list);
3079 
3080                                 if (ctx->direct_io)
3081                                         rc = cifs_resend_wdata(
3082                                                 wdata, &tmp_list, ctx);
3083                                 else {
3084                                         iov_iter_advance(&tmp_from,
3085                                                  wdata->offset - ctx->pos);
3086 
3087                                         rc = cifs_write_from_iter(wdata->offset,
3088                                                 wdata->bytes, &tmp_from,
3089                                                 ctx->cfile, cifs_sb, &tmp_list,
3090                                                 ctx);
3091 
3092                                         kref_put(&wdata->refcount,
3093                                                 cifs_uncached_writedata_release);
3094                                 }
3095 
3096                                 list_splice(&tmp_list, &ctx->list);
3097                                 goto restart_loop;
3098                         }
3099                 }
3100                 list_del_init(&wdata->list);
3101                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3102         }
3103 
3104         cifs_stats_bytes_written(tcon, ctx->total_len);
3105         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3106 
3107         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3108 
3109         mutex_unlock(&ctx->aio_mutex);
3110 
3111         if (ctx->iocb && ctx->iocb->ki_complete)
3112                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3113         else
3114                 complete(&ctx->done);
3115 }
3116 
3117 static ssize_t __cifs_writev(
3118         struct kiocb *iocb, struct iov_iter *from, bool direct)
3119 {
3120         struct file *file = iocb->ki_filp;
3121         ssize_t total_written = 0;
3122         struct cifsFileInfo *cfile;
3123         struct cifs_tcon *tcon;
3124         struct cifs_sb_info *cifs_sb;
3125         struct cifs_aio_ctx *ctx;
3126         struct iov_iter saved_from = *from;
3127         size_t len = iov_iter_count(from);
3128         int rc;
3129 
3130         /*
3131          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3132          * In this case, fall back to non-direct write function.
3133          * this could be improved by getting pages directly in ITER_KVEC
3134          */
3135         if (direct && iov_iter_is_kvec(from)) {
3136                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3137                 direct = false;
3138         }
3139 
3140         rc = generic_write_checks(iocb, from);
3141         if (rc <= 0)
3142                 return rc;
3143 
3144         cifs_sb = CIFS_FILE_SB(file);
3145         cfile = file->private_data;
3146         tcon = tlink_tcon(cfile->tlink);
3147 
3148         if (!tcon->ses->server->ops->async_writev)
3149                 return -ENOSYS;
3150 
3151         ctx = cifs_aio_ctx_alloc();
3152         if (!ctx)
3153                 return -ENOMEM;
3154 
3155         ctx->cfile = cifsFileInfo_get(cfile);
3156 
3157         if (!is_sync_kiocb(iocb))
3158                 ctx->iocb = iocb;
3159 
3160         ctx->pos = iocb->ki_pos;
3161 
3162         if (direct) {
3163                 ctx->direct_io = true;
3164                 ctx->iter = *from;
3165                 ctx->len = len;
3166         } else {
3167                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3168                 if (rc) {
3169                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3170                         return rc;
3171                 }
3172         }
3173 
3174         /* grab a lock here due to read response handlers can access ctx */
3175         mutex_lock(&ctx->aio_mutex);
3176 
3177         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3178                                   cfile, cifs_sb, &ctx->list, ctx);
3179 
3180         /*
3181          * If at least one write was successfully sent, then discard any rc
3182          * value from the later writes. If the other write succeeds, then
3183          * we'll end up returning whatever was written. If it fails, then
3184          * we'll get a new rc value from that.
3185          */
3186         if (!list_empty(&ctx->list))
3187                 rc = 0;
3188 
3189         mutex_unlock(&ctx->aio_mutex);
3190 
3191         if (rc) {
3192                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3193                 return rc;
3194         }
3195 
3196         if (!is_sync_kiocb(iocb)) {
3197                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3198                 return -EIOCBQUEUED;
3199         }
3200 
3201         rc = wait_for_completion_killable(&ctx->done);
3202         if (rc) {
3203                 mutex_lock(&ctx->aio_mutex);
3204                 ctx->rc = rc = -EINTR;
3205                 total_written = ctx->total_len;
3206                 mutex_unlock(&ctx->aio_mutex);
3207         } else {
3208                 rc = ctx->rc;
3209                 total_written = ctx->total_len;
3210         }
3211 
3212         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3213 
3214         if (unlikely(!total_written))
3215                 return rc;
3216 
3217         iocb->ki_pos += total_written;
3218         return total_written;
3219 }
3220 
3221 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3222 {
3223         return __cifs_writev(iocb, from, true);
3224 }
3225 
3226 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3227 {
3228         return __cifs_writev(iocb, from, false);
3229 }
3230 
3231 static ssize_t
3232 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3233 {
3234         struct file *file = iocb->ki_filp;
3235         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3236         struct inode *inode = file->f_mapping->host;
3237         struct cifsInodeInfo *cinode = CIFS_I(inode);
3238         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3239         ssize_t rc;
3240 
3241         inode_lock(inode);
3242         /*
3243          * We need to hold the sem to be sure nobody modifies lock list
3244          * with a brlock that prevents writing.
3245          */
3246         down_read(&cinode->lock_sem);
3247 
3248         rc = generic_write_checks(iocb, from);
3249         if (rc <= 0)
3250                 goto out;
3251 
3252         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3253                                      server->vals->exclusive_lock_type, 0,
3254                                      NULL, CIFS_WRITE_OP))
3255                 rc = __generic_file_write_iter(iocb, from);
3256         else
3257                 rc = -EACCES;
3258 out:
3259         up_read(&cinode->lock_sem);
3260         inode_unlock(inode);
3261 
3262         if (rc > 0)
3263                 rc = generic_write_sync(iocb, rc);
3264         return rc;
3265 }
3266 
3267 ssize_t
3268 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3269 {
3270         struct inode *inode = file_inode(iocb->ki_filp);
3271         struct cifsInodeInfo *cinode = CIFS_I(inode);
3272         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3273         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3274                                                 iocb->ki_filp->private_data;
3275         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3276         ssize_t written;
3277 
3278         written = cifs_get_writer(cinode);
3279         if (written)
3280                 return written;
3281 
3282         if (CIFS_CACHE_WRITE(cinode)) {
3283                 if (cap_unix(tcon->ses) &&
3284                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3285                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3286                         written = generic_file_write_iter(iocb, from);
3287                         goto out;
3288                 }
3289                 written = cifs_writev(iocb, from);
3290                 goto out;
3291         }
3292         /*
3293          * For non-oplocked files in strict cache mode we need to write the data
3294          * to the server exactly from the pos to pos+len-1 rather than flush all
3295          * affected pages because it may cause a error with mandatory locks on
3296          * these pages but not on the region from pos to ppos+len-1.
3297          */
3298         written = cifs_user_writev(iocb, from);
3299         if (CIFS_CACHE_READ(cinode)) {
3300                 /*
3301                  * We have read level caching and we have just sent a write
3302                  * request to the server thus making data in the cache stale.
3303                  * Zap the cache and set oplock/lease level to NONE to avoid
3304                  * reading stale data from the cache. All subsequent read
3305                  * operations will read new data from the server.
3306                  */
3307                 cifs_zap_mapping(inode);
3308                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3309                          inode);
3310                 cinode->oplock = 0;
3311         }
3312 out:
3313         cifs_put_writer(cinode);
3314         return written;
3315 }
3316 
3317 static struct cifs_readdata *
3318 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3319 {
3320         struct cifs_readdata *rdata;
3321 
3322         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3323         if (rdata != NULL) {
3324                 rdata->pages = pages;
3325                 kref_init(&rdata->refcount);
3326                 INIT_LIST_HEAD(&rdata->list);
3327                 init_completion(&rdata->done);
3328                 INIT_WORK(&rdata->work, complete);
3329         }
3330 
3331         return rdata;
3332 }
3333 
3334 static struct cifs_readdata *
3335 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3336 {
3337         struct page **pages =
3338                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3339         struct cifs_readdata *ret = NULL;
3340 
3341         if (pages) {
3342                 ret = cifs_readdata_direct_alloc(pages, complete);
3343                 if (!ret)
3344                         kfree(pages);
3345         }
3346 
3347         return ret;
3348 }
3349 
3350 void
3351 cifs_readdata_release(struct kref *refcount)
3352 {
3353         struct cifs_readdata *rdata = container_of(refcount,
3354                                         struct cifs_readdata, refcount);
3355 #ifdef CONFIG_CIFS_SMB_DIRECT
3356         if (rdata->mr) {
3357                 smbd_deregister_mr(rdata->mr);
3358                 rdata->mr = NULL;
3359         }
3360 #endif
3361         if (rdata->cfile)
3362                 cifsFileInfo_put(rdata->cfile);
3363 
3364         kvfree(rdata->pages);
3365         kfree(rdata);
3366 }
3367 
3368 static int
3369 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3370 {
3371         int rc = 0;
3372         struct page *page;
3373         unsigned int i;
3374 
3375         for (i = 0; i < nr_pages; i++) {
3376                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3377                 if (!page) {
3378                         rc = -ENOMEM;
3379                         break;
3380                 }
3381                 rdata->pages[i] = page;
3382         }
3383 
3384         if (rc) {
3385                 unsigned int nr_page_failed = i;
3386 
3387                 for (i = 0; i < nr_page_failed; i++) {
3388                         put_page(rdata->pages[i]);
3389                         rdata->pages[i] = NULL;
3390                 }
3391         }
3392         return rc;
3393 }
3394 
3395 static void
3396 cifs_uncached_readdata_release(struct kref *refcount)
3397 {
3398         struct cifs_readdata *rdata = container_of(refcount,
3399                                         struct cifs_readdata, refcount);
3400         unsigned int i;
3401 
3402         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3403         for (i = 0; i < rdata->nr_pages; i++) {
3404                 put_page(rdata->pages[i]);
3405         }
3406         cifs_readdata_release(refcount);
3407 }
3408 
3409 /**
3410  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3411  * @rdata:      the readdata response with list of pages holding data
3412  * @iter:       destination for our data
3413  *
3414  * This function copies data from a list of pages in a readdata response into
3415  * an array of iovecs. It will first calculate where the data should go
3416  * based on the info in the readdata and then copy the data into that spot.
3417  */
3418 static int
3419 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3420 {
3421         size_t remaining = rdata->got_bytes;
3422         unsigned int i;
3423 
3424         for (i = 0; i < rdata->nr_pages; i++) {
3425                 struct page *page = rdata->pages[i];
3426                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3427                 size_t written;
3428 
3429                 if (unlikely(iov_iter_is_pipe(iter))) {
3430                         void *addr = kmap_atomic(page);
3431 
3432                         written = copy_to_iter(addr, copy, iter);
3433                         kunmap_atomic(addr);
3434                 } else
3435                         written = copy_page_to_iter(page, 0, copy, iter);
3436                 remaining -= written;
3437                 if (written < copy && iov_iter_count(iter) > 0)
3438                         break;
3439         }
3440         return remaining ? -EFAULT : 0;
3441 }
3442 
3443 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3444 
3445 static void
3446 cifs_uncached_readv_complete(struct work_struct *work)
3447 {
3448         struct cifs_readdata *rdata = container_of(work,
3449                                                 struct cifs_readdata, work);
3450 
3451         complete(&rdata->done);
3452         collect_uncached_read_data(rdata->ctx);
3453         /* the below call can possibly free the last ref to aio ctx */
3454         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3455 }
3456 
3457 static int
3458 uncached_fill_pages(struct TCP_Server_Info *server,
3459                     struct cifs_readdata *rdata, struct iov_iter *iter,
3460                     unsigned int len)
3461 {
3462         int result = 0;
3463         unsigned int i;
3464         unsigned int nr_pages = rdata->nr_pages;
3465         unsigned int page_offset = rdata->page_offset;
3466 
3467         rdata->got_bytes = 0;
3468         rdata->tailsz = PAGE_SIZE;
3469         for (i = 0; i < nr_pages; i++) {
3470                 struct page *page = rdata->pages[i];
3471                 size_t n;
3472                 unsigned int segment_size = rdata->pagesz;
3473 
3474                 if (i == 0)
3475                         segment_size -= page_offset;
3476                 else
3477                         page_offset = 0;
3478 
3479 
3480                 if (len <= 0) {
3481                         /* no need to hold page hostage */
3482                         rdata->pages[i] = NULL;
3483                         rdata->nr_pages--;
3484                         put_page(page);
3485                         continue;
3486                 }
3487 
3488                 n = len;
3489                 if (len >= segment_size)
3490                         /* enough data to fill the page */
3491                         n = segment_size;
3492                 else
3493                         rdata->tailsz = len;
3494                 len -= n;
3495 
3496                 if (iter)
3497                         result = copy_page_from_iter(
3498                                         page, page_offset, n, iter);
3499 #ifdef CONFIG_CIFS_SMB_DIRECT
3500                 else if (rdata->mr)
3501                         result = n;
3502 #endif
3503                 else
3504                         result = cifs_read_page_from_socket(
3505                                         server, page, page_offset, n);
3506                 if (result < 0)
3507                         break;
3508 
3509                 rdata->got_bytes += result;
3510         }
3511 
3512         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3513                                                 rdata->got_bytes : result;
3514 }
3515 
3516 static int
3517 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3518                               struct cifs_readdata *rdata, unsigned int len)
3519 {
3520         return uncached_fill_pages(server, rdata, NULL, len);
3521 }
3522 
3523 static int
3524 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3525                               struct cifs_readdata *rdata,
3526                               struct iov_iter *iter)
3527 {
3528         return uncached_fill_pages(server, rdata, iter, iter->count);
3529 }
3530 
3531 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3532                         struct list_head *rdata_list,
3533                         struct cifs_aio_ctx *ctx)
3534 {
3535         unsigned int rsize;
3536         struct cifs_credits credits;
3537         int rc;
3538         struct TCP_Server_Info *server;
3539 
3540         /* XXX: should we pick a new channel here? */
3541         server = rdata->server;
3542 
3543         do {
3544                 if (rdata->cfile->invalidHandle) {
3545                         rc = cifs_reopen_file(rdata->cfile, true);
3546                         if (rc == -EAGAIN)
3547                                 continue;
3548                         else if (rc)
3549                                 break;
3550                 }
3551 
3552                 /*
3553                  * Wait for credits to resend this rdata.
3554                  * Note: we are attempting to resend the whole rdata not in
3555                  * segments
3556                  */
3557                 do {
3558                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3559                                                 &rsize, &credits);
3560 
3561                         if (rc)
3562                                 goto fail;
3563 
3564                         if (rsize < rdata->bytes) {
3565                                 add_credits_and_wake_if(server, &credits, 0);
3566                                 msleep(1000);
3567                         }
3568                 } while (rsize < rdata->bytes);
3569                 rdata->credits = credits;
3570 
3571                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3572                 if (!rc) {
3573                         if (rdata->cfile->invalidHandle)
3574                                 rc = -EAGAIN;
3575                         else {
3576 #ifdef CONFIG_CIFS_SMB_DIRECT
3577                                 if (rdata->mr) {
3578                                         rdata->mr->need_invalidate = true;
3579                                         smbd_deregister_mr(rdata->mr);
3580                                         rdata->mr = NULL;
3581                                 }
3582 #endif
3583                                 rc = server->ops->async_readv(rdata);
3584                         }
3585                 }
3586 
3587                 /* If the read was successfully sent, we are done */
3588                 if (!rc) {
3589                         /* Add to aio pending list */
3590                         list_add_tail(&rdata->list, rdata_list);
3591                         return 0;
3592                 }
3593 
3594                 /* Roll back credits and retry if needed */
3595                 add_credits_and_wake_if(server, &rdata->credits, 0);
3596         } while (rc == -EAGAIN);
3597 
3598 fail:
3599         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3600         return rc;
3601 }
3602 
3603 static int
3604 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3605                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3606                      struct cifs_aio_ctx *ctx)
3607 {
3608         struct cifs_readdata *rdata;
3609         unsigned int npages, rsize;
3610         struct cifs_credits credits_on_stack;
3611         struct cifs_credits *credits = &credits_on_stack;
3612         size_t cur_len;
3613         int rc;
3614         pid_t pid;
3615         struct TCP_Server_Info *server;
3616         struct page **pagevec;
3617         size_t start;
3618         struct iov_iter direct_iov = ctx->iter;
3619 
3620         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3621 
3622         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3623                 pid = open_file->pid;
3624         else
3625                 pid = current->tgid;
3626 
3627         if (ctx->direct_io)
3628                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3629 
3630         do {
3631                 if (open_file->invalidHandle) {
3632                         rc = cifs_reopen_file(open_file, true);
3633                         if (rc == -EAGAIN)
3634                                 continue;
3635                         else if (rc)
3636                                 break;
3637                 }
3638 
3639                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3640                                                    &rsize, credits);
3641                 if (rc)
3642                         break;
3643 
3644                 cur_len = min_t(const size_t, len, rsize);
3645 
3646                 if (ctx->direct_io) {
3647                         ssize_t result;
3648 
3649                         result = iov_iter_get_pages_alloc(
3650                                         &direct_iov, &pagevec,
3651                                         cur_len, &start);
3652                         if (result < 0) {
3653                                 cifs_dbg(VFS,
3654                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3655                                          result, iov_iter_type(&direct_iov),
3656                                          direct_iov.iov_offset,
3657                                          direct_iov.count);
3658                                 dump_stack();
3659 
3660                                 rc = result;
3661                                 add_credits_and_wake_if(server, credits, 0);
3662                                 break;
3663                         }
3664                         cur_len = (size_t)result;
3665                         iov_iter_advance(&direct_iov, cur_len);
3666 
3667                         rdata = cifs_readdata_direct_alloc(
3668                                         pagevec, cifs_uncached_readv_complete);
3669                         if (!rdata) {
3670                                 add_credits_and_wake_if(server, credits, 0);
3671                                 rc = -ENOMEM;
3672                                 break;
3673                         }
3674 
3675                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3676                         rdata->page_offset = start;
3677                         rdata->tailsz = npages > 1 ?
3678                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3679                                 cur_len;
3680 
3681                 } else {
3682 
3683                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3684                         /* allocate a readdata struct */
3685                         rdata = cifs_readdata_alloc(npages,
3686                                             cifs_uncached_readv_complete);
3687                         if (!rdata) {
3688                                 add_credits_and_wake_if(server, credits, 0);
3689                                 rc = -ENOMEM;
3690                                 break;
3691                         }
3692 
3693                         rc = cifs_read_allocate_pages(rdata, npages);
3694                         if (rc) {
3695                                 kvfree(rdata->pages);
3696                                 kfree(rdata);
3697                                 add_credits_and_wake_if(server, credits, 0);
3698                                 break;
3699                         }
3700 
3701                         rdata->tailsz = PAGE_SIZE;
3702                 }
3703 
3704                 rdata->server = server;
3705                 rdata->cfile = cifsFileInfo_get(open_file);
3706                 rdata->nr_pages = npages;
3707                 rdata->offset = offset;
3708                 rdata->bytes = cur_len;
3709                 rdata->pid = pid;
3710                 rdata->pagesz = PAGE_SIZE;
3711                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3712                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3713                 rdata->credits = credits_on_stack;
3714                 rdata->ctx = ctx;
3715                 kref_get(&ctx->refcount);
3716 
3717                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3718 
3719                 if (!rc) {
3720                         if (rdata->cfile->invalidHandle)
3721                                 rc = -EAGAIN;
3722                         else
3723                                 rc = server->ops->async_readv(rdata);
3724                 }
3725 
3726                 if (rc) {
3727                         add_credits_and_wake_if(server, &rdata->credits, 0);
3728                         kref_put(&rdata->refcount,
3729                                 cifs_uncached_readdata_release);
3730                         if (rc == -EAGAIN) {
3731                                 iov_iter_revert(&direct_iov, cur_len);
3732                                 continue;
3733                         }
3734                         break;
3735                 }
3736 
3737                 list_add_tail(&rdata->list, rdata_list);
3738                 offset += cur_len;
3739                 len -= cur_len;
3740         } while (len > 0);
3741 
3742         return rc;
3743 }
3744 
3745 static void
3746 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3747 {
3748         struct cifs_readdata *rdata, *tmp;
3749         struct iov_iter *to = &ctx->iter;
3750         struct cifs_sb_info *cifs_sb;
3751         int rc;
3752 
3753         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3754 
3755         mutex_lock(&ctx->aio_mutex);
3756 
3757         if (list_empty(&ctx->list)) {
3758                 mutex_unlock(&ctx->aio_mutex);
3759                 return;
3760         }
3761 
3762         rc = ctx->rc;
3763         /* the loop below should proceed in the order of increasing offsets */
3764 again:
3765         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3766                 if (!rc) {
3767                         if (!try_wait_for_completion(&rdata->done)) {
3768                                 mutex_unlock(&ctx->aio_mutex);
3769                                 return;
3770                         }
3771 
3772                         if (rdata->result == -EAGAIN) {
3773                                 /* resend call if it's a retryable error */
3774                                 struct list_head tmp_list;
3775                                 unsigned int got_bytes = rdata->got_bytes;
3776 
3777                                 list_del_init(&rdata->list);
3778                                 INIT_LIST_HEAD(&tmp_list);
3779 
3780                                 /*
3781                                  * Got a part of data and then reconnect has
3782                                  * happened -- fill the buffer and continue
3783                                  * reading.
3784                                  */
3785                                 if (got_bytes && got_bytes < rdata->bytes) {
3786                                         rc = 0;
3787                                         if (!ctx->direct_io)
3788                                                 rc = cifs_readdata_to_iov(rdata, to);
3789                                         if (rc) {
3790                                                 kref_put(&rdata->refcount,
3791                                                         cifs_uncached_readdata_release);
3792                                                 continue;
3793                                         }
3794                                 }
3795 
3796                                 if (ctx->direct_io) {
3797                                         /*
3798                                          * Re-use rdata as this is a
3799                                          * direct I/O
3800                                          */
3801                                         rc = cifs_resend_rdata(
3802                                                 rdata,
3803                                                 &tmp_list, ctx);
3804                                 } else {
3805                                         rc = cifs_send_async_read(
3806                                                 rdata->offset + got_bytes,
3807                                                 rdata->bytes - got_bytes,
3808                                                 rdata->cfile, cifs_sb,
3809                                                 &tmp_list, ctx);
3810 
3811                                         kref_put(&rdata->refcount,
3812                                                 cifs_uncached_readdata_release);
3813                                 }
3814 
3815                                 list_splice(&tmp_list, &ctx->list);
3816 
3817                                 goto again;
3818                         } else if (rdata->result)
3819                                 rc = rdata->result;
3820                         else if (!ctx->direct_io)
3821                                 rc = cifs_readdata_to_iov(rdata, to);
3822 
3823                         /* if there was a short read -- discard anything left */
3824                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3825                                 rc = -ENODATA;
3826 
3827                         ctx->total_len += rdata->got_bytes;
3828                 }
3829                 list_del_init(&rdata->list);
3830                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3831         }
3832 
3833         if (!ctx->direct_io)
3834                 ctx->total_len = ctx->len - iov_iter_count(to);
3835 
3836         /* mask nodata case */
3837         if (rc == -ENODATA)
3838                 rc = 0;
3839 
3840         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3841 
3842         mutex_unlock(&ctx->aio_mutex);
3843 
3844         if (ctx->iocb && ctx->iocb->ki_complete)
3845                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3846         else
3847                 complete(&ctx->done);
3848 }
3849 
3850 static ssize_t __cifs_readv(
3851         struct kiocb *iocb, struct iov_iter *to, bool direct)
3852 {
3853         size_t len;
3854         struct file *file = iocb->ki_filp;
3855         struct cifs_sb_info *cifs_sb;
3856         struct cifsFileInfo *cfile;
3857         struct cifs_tcon *tcon;
3858         ssize_t rc, total_read = 0;
3859         loff_t offset = iocb->ki_pos;
3860         struct cifs_aio_ctx *ctx;
3861 
3862         /*
3863          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3864          * fall back to data copy read path
3865          * this could be improved by getting pages directly in ITER_KVEC
3866          */
3867         if (direct && iov_iter_is_kvec(to)) {
3868                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3869                 direct = false;
3870         }
3871 
3872         len = iov_iter_count(to);
3873         if (!len)
3874                 return 0;
3875 
3876         cifs_sb = CIFS_FILE_SB(file);
3877         cfile = file->private_data;
3878         tcon = tlink_tcon(cfile->tlink);
3879 
3880         if (!tcon->ses->server->ops->async_readv)
3881                 return -ENOSYS;
3882 
3883         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3884                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3885 
3886         ctx = cifs_aio_ctx_alloc();
3887         if (!ctx)
3888                 return -ENOMEM;
3889 
3890         ctx->cfile = cifsFileInfo_get(cfile);
3891 
3892         if (!is_sync_kiocb(iocb))
3893                 ctx->iocb = iocb;
3894 
3895         if (iter_is_iovec(to))
3896                 ctx->should_dirty = true;
3897 
3898         if (direct) {
3899                 ctx->pos = offset;
3900                 ctx->direct_io = true;
3901                 ctx->iter = *to;
3902                 ctx->len = len;
3903         } else {
3904                 rc = setup_aio_ctx_iter(ctx, to, READ);
3905                 if (rc) {
3906                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3907                         return rc;
3908                 }
3909                 len = ctx->len;
3910         }
3911 
3912         /* grab a lock here due to read response handlers can access ctx */
3913         mutex_lock(&ctx->aio_mutex);
3914 
3915         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3916 
3917         /* if at least one read request send succeeded, then reset rc */
3918         if (!list_empty(&ctx->list))
3919                 rc = 0;
3920 
3921         mutex_unlock(&ctx->aio_mutex);
3922 
3923         if (rc) {
3924                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3925                 return rc;
3926         }
3927 
3928         if (!is_sync_kiocb(iocb)) {
3929                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3930                 return -EIOCBQUEUED;
3931         }
3932 
3933         rc = wait_for_completion_killable(&ctx->done);
3934         if (rc) {
3935                 mutex_lock(&ctx->aio_mutex);
3936                 ctx->rc = rc = -EINTR;
3937                 total_read = ctx->total_len;
3938                 mutex_unlock(&ctx->aio_mutex);
3939         } else {
3940                 rc = ctx->rc;
3941                 total_read = ctx->total_len;
3942         }
3943 
3944         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3945 
3946         if (total_read) {
3947                 iocb->ki_pos += total_read;
3948                 return total_read;
3949         }
3950         return rc;
3951 }
3952 
3953 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3954 {
3955         return __cifs_readv(iocb, to, true);
3956 }
3957 
3958 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3959 {
3960         return __cifs_readv(iocb, to, false);
3961 }
3962 
3963 ssize_t
3964 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3965 {
3966         struct inode *inode = file_inode(iocb->ki_filp);
3967         struct cifsInodeInfo *cinode = CIFS_I(inode);
3968         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3969         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3970                                                 iocb->ki_filp->private_data;
3971         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3972         int rc = -EACCES;
3973 
3974         /*
3975          * In strict cache mode we need to read from the server all the time
3976          * if we don't have level II oplock because the server can delay mtime
3977          * change - so we can't make a decision about inode invalidating.
3978          * And we can also fail with pagereading if there are mandatory locks
3979          * on pages affected by this read but not on the region from pos to
3980          * pos+len-1.
3981          */
3982         if (!CIFS_CACHE_READ(cinode))
3983                 return cifs_user_readv(iocb, to);
3984 
3985         if (cap_unix(tcon->ses) &&
3986             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3987             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3988                 return generic_file_read_iter(iocb, to);
3989 
3990         /*
3991          * We need to hold the sem to be sure nobody modifies lock list
3992          * with a brlock that prevents reading.
3993          */
3994         down_read(&cinode->lock_sem);
3995         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3996                                      tcon->ses->server->vals->shared_lock_type,
3997                                      0, NULL, CIFS_READ_OP))
3998                 rc = generic_file_read_iter(iocb, to);
3999         up_read(&cinode->lock_sem);
4000         return rc;
4001 }
4002 
4003 static ssize_t
4004 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4005 {
4006         int rc = -EACCES;
4007         unsigned int bytes_read = 0;
4008         unsigned int total_read;
4009         unsigned int current_read_size;
4010         unsigned int rsize;
4011         struct cifs_sb_info *cifs_sb;
4012         struct cifs_tcon *tcon;
4013         struct TCP_Server_Info *server;
4014         unsigned int xid;
4015         char *cur_offset;
4016         struct cifsFileInfo *open_file;
4017         struct cifs_io_parms io_parms = {0};
4018         int buf_type = CIFS_NO_BUFFER;
4019         __u32 pid;
4020 
4021         xid = get_xid();
4022         cifs_sb = CIFS_FILE_SB(file);
4023 
4024         /* FIXME: set up handlers for larger reads and/or convert to async */
4025         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4026 
4027         if (file->private_data == NULL) {
4028                 rc = -EBADF;
4029                 free_xid(xid);
4030                 return rc;
4031         }
4032         open_file = file->private_data;
4033         tcon = tlink_tcon(open_file->tlink);
4034         server = cifs_pick_channel(tcon->ses);
4035 
4036         if (!server->ops->sync_read) {
4037                 free_xid(xid);
4038                 return -ENOSYS;
4039         }
4040 
4041         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4042                 pid = open_file->pid;
4043         else
4044                 pid = current->tgid;
4045 
4046         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4047                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4048 
4049         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4050              total_read += bytes_read, cur_offset += bytes_read) {
4051                 do {
4052                         current_read_size = min_t(uint, read_size - total_read,
4053                                                   rsize);
4054                         /*
4055                          * For windows me and 9x we do not want to request more
4056                          * than it negotiated since it will refuse the read
4057                          * then.
4058                          */
4059                         if (!(tcon->ses->capabilities &
4060                                 tcon->ses->server->vals->cap_large_files)) {
4061                                 current_read_size = min_t(uint,
4062                                         current_read_size, CIFSMaxBufSize);
4063                         }
4064                         if (open_file->invalidHandle) {
4065                                 rc = cifs_reopen_file(open_file, true);
4066                                 if (rc != 0)
4067                                         break;
4068                         }
4069                         io_parms.pid = pid;
4070                         io_parms.tcon = tcon;
4071                         io_parms.offset = *offset;
4072                         io_parms.length = current_read_size;
4073                         io_parms.server = server;
4074                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4075                                                     &bytes_read, &cur_offset,
4076                                                     &buf_type);
4077                 } while (rc == -EAGAIN);
4078 
4079                 if (rc || (bytes_read == 0)) {
4080                         if (total_read) {
4081                                 break;
4082                         } else {
4083                                 free_xid(xid);
4084                                 return rc;
4085                         }
4086                 } else {
4087                         cifs_stats_bytes_read(tcon, total_read);
4088                         *offset += bytes_read;
4089                 }
4090         }
4091         free_xid(xid);
4092         return total_read;
4093 }
4094 
4095 /*
4096  * If the page is mmap'ed into a process' page tables, then we need to make
4097  * sure that it doesn't change while being written back.
4098  */
4099 static vm_fault_t
4100 cifs_page_mkwrite(struct vm_fault *vmf)
4101 {
4102         struct page *page = vmf->page;
4103 
4104         lock_page(page);
4105         return VM_FAULT_LOCKED;
4106 }
4107 
4108 static const struct vm_operations_struct cifs_file_vm_ops = {
4109         .fault = filemap_fault,
4110         .map_pages = filemap_map_pages,
4111         .page_mkwrite = cifs_page_mkwrite,
4112 };
4113 
4114 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4115 {
4116         int xid, rc = 0;
4117         struct inode *inode = file_inode(file);
4118 
4119         xid = get_xid();
4120 
4121         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4122                 rc = cifs_zap_mapping(inode);
4123         if (!rc)
4124                 rc = generic_file_mmap(file, vma);
4125         if (!rc)
4126                 vma->vm_ops = &cifs_file_vm_ops;
4127 
4128         free_xid(xid);
4129         return rc;
4130 }
4131 
4132 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4133 {
4134         int rc, xid;
4135 
4136         xid = get_xid();
4137 
4138         rc = cifs_revalidate_file(file);
4139         if (rc)
4140                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4141                          rc);
4142         if (!rc)
4143                 rc = generic_file_mmap(file, vma);
4144         if (!rc)
4145                 vma->vm_ops = &cifs_file_vm_ops;
4146 
4147         free_xid(xid);
4148         return rc;
4149 }
4150 
4151 static void
4152 cifs_readv_complete(struct work_struct *work)
4153 {
4154         unsigned int i, got_bytes;
4155         struct cifs_readdata *rdata = container_of(work,
4156                                                 struct cifs_readdata, work);
4157 
4158         got_bytes = rdata->got_bytes;
4159         for (i = 0; i < rdata->nr_pages; i++) {
4160                 struct page *page = rdata->pages[i];
4161 
4162                 lru_cache_add(page);
4163 
4164                 if (rdata->result == 0 ||
4165                     (rdata->result == -EAGAIN && got_bytes)) {
4166                         flush_dcache_page(page);
4167                         SetPageUptodate(page);
4168                 }
4169 
4170                 unlock_page(page);
4171 
4172                 if (rdata->result == 0 ||
4173                     (rdata->result == -EAGAIN && got_bytes))
4174                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4175 
4176                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4177 
4178                 put_page(page);
4179                 rdata->pages[i] = NULL;
4180         }
4181         kref_put(&rdata->refcount, cifs_readdata_release);
4182 }
4183 
4184 static int
4185 readpages_fill_pages(struct TCP_Server_Info *server,
4186                      struct cifs_readdata *rdata, struct iov_iter *iter,
4187                      unsigned int len)
4188 {
4189         int result = 0;
4190         unsigned int i;
4191         u64 eof;
4192         pgoff_t eof_index;
4193         unsigned int nr_pages = rdata->nr_pages;
4194         unsigned int page_offset = rdata->page_offset;
4195 
4196         /* determine the eof that the server (probably) has */
4197         eof = CIFS_I(rdata->mapping->host)->server_eof;
4198         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4199         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4200 
4201         rdata->got_bytes = 0;
4202         rdata->tailsz = PAGE_SIZE;
4203         for (i = 0; i < nr_pages; i++) {
4204                 struct page *page = rdata->pages[i];
4205                 unsigned int to_read = rdata->pagesz;
4206                 size_t n;
4207 
4208                 if (i == 0)
4209                         to_read -= page_offset;
4210                 else
4211                         page_offset = 0;
4212 
4213                 n = to_read;
4214 
4215                 if (len >= to_read) {
4216                         len -= to_read;
4217                 } else if (len > 0) {
4218                         /* enough for partial page, fill and zero the rest */
4219                         zero_user(page, len + page_offset, to_read - len);
4220                         n = rdata->tailsz = len;
4221                         len = 0;
4222                 } else if (page->index > eof_index) {
4223                         /*
4224                          * The VFS will not try to do readahead past the
4225                          * i_size, but it's possible that we have outstanding
4226                          * writes with gaps in the middle and the i_size hasn't
4227                          * caught up yet. Populate those with zeroed out pages
4228                          * to prevent the VFS from repeatedly attempting to
4229                          * fill them until the writes are flushed.
4230                          */
4231                         zero_user(page, 0, PAGE_SIZE);
4232                         lru_cache_add(page);
4233                         flush_dcache_page(page);
4234                         SetPageUptodate(page);
4235                         unlock_page(page);
4236                         put_page(page);
4237                         rdata->pages[i] = NULL;
4238                         rdata->nr_pages--;
4239                         continue;
4240                 } else {
4241                         /* no need to hold page hostage */
4242                         lru_cache_add(page);
4243                         unlock_page(page);
4244                         put_page(page);
4245                         rdata->pages[i] = NULL;
4246                         rdata->nr_pages--;
4247                         continue;
4248                 }
4249 
4250                 if (iter)
4251                         result = copy_page_from_iter(
4252                                         page, page_offset, n, iter);
4253 #ifdef CONFIG_CIFS_SMB_DIRECT
4254                 else if (rdata->mr)
4255                         result = n;
4256 #endif
4257                 else
4258                         result = cifs_read_page_from_socket(
4259                                         server, page, page_offset, n);
4260                 if (result < 0)
4261                         break;
4262 
4263                 rdata->got_bytes += result;
4264         }
4265 
4266         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4267                                                 rdata->got_bytes : result;
4268 }
4269 
4270 static int
4271 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4272                                struct cifs_readdata *rdata, unsigned int len)
4273 {
4274         return readpages_fill_pages(server, rdata, NULL, len);
4275 }
4276 
4277 static int
4278 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4279                                struct cifs_readdata *rdata,
4280                                struct iov_iter *iter)
4281 {
4282         return readpages_fill_pages(server, rdata, iter, iter->count);
4283 }
4284 
4285 static int
4286 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4287                     unsigned int rsize, struct list_head *tmplist,
4288                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4289 {
4290         struct page *page, *tpage;
4291         unsigned int expected_index;
4292         int rc;
4293         gfp_t gfp = readahead_gfp_mask(mapping);
4294 
4295         INIT_LIST_HEAD(tmplist);
4296 
4297         page = lru_to_page(page_list);
4298 
4299         /*
4300          * Lock the page and put it in the cache. Since no one else
4301          * should have access to this page, we're safe to simply set
4302          * PG_locked without checking it first.
4303          */
4304         __SetPageLocked(page);
4305         rc = add_to_page_cache_locked(page, mapping,
4306                                       page->index, gfp);
4307 
4308         /* give up if we can't stick it in the cache */
4309         if (rc) {
4310                 __ClearPageLocked(page);
4311                 return rc;
4312         }
4313 
4314         /* move first page to the tmplist */
4315         *offset = (loff_t)page->index << PAGE_SHIFT;
4316         *bytes = PAGE_SIZE;
4317         *nr_pages = 1;
4318         list_move_tail(&page->lru, tmplist);
4319 
4320         /* now try and add more pages onto the request */
4321         expected_index = page->index + 1;
4322         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4323                 /* discontinuity ? */
4324                 if (page->index != expected_index)
4325                         break;
4326 
4327                 /* would this page push the read over the rsize? */
4328                 if (*bytes + PAGE_SIZE > rsize)
4329                         break;
4330 
4331                 __SetPageLocked(page);
4332                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4333                 if (rc) {
4334                         __ClearPageLocked(page);
4335                         break;
4336                 }
4337                 list_move_tail(&page->lru, tmplist);
4338                 (*bytes) += PAGE_SIZE;
4339                 expected_index++;
4340                 (*nr_pages)++;
4341         }
4342         return rc;
4343 }
4344 
4345 static int cifs_readpages(struct file *file, struct address_space *mapping,
4346         struct list_head *page_list, unsigned num_pages)
4347 {
4348         int rc;
4349         int err = 0;
4350         struct list_head tmplist;
4351         struct cifsFileInfo *open_file = file->private_data;
4352         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4353         struct TCP_Server_Info *server;
4354         pid_t pid;
4355         unsigned int xid;
4356 
4357         xid = get_xid();
4358         /*
4359          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4360          * immediately if the cookie is negative
4361          *
4362          * After this point, every page in the list might have PG_fscache set,
4363          * so we will need to clean that up off of every page we don't use.
4364          */
4365         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4366                                          &num_pages);
4367         if (rc == 0) {
4368                 free_xid(xid);
4369                 return rc;
4370         }
4371 
4372         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4373                 pid = open_file->pid;
4374         else
4375                 pid = current->tgid;
4376 
4377         rc = 0;
4378         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4379 
4380         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4381                  __func__, file, mapping, num_pages);
4382 
4383         /*
4384          * Start with the page at end of list and move it to private
4385          * list. Do the same with any following pages until we hit
4386          * the rsize limit, hit an index discontinuity, or run out of
4387          * pages. Issue the async read and then start the loop again
4388          * until the list is empty.
4389          *
4390          * Note that list order is important. The page_list is in
4391          * the order of declining indexes. When we put the pages in
4392          * the rdata->pages, then we want them in increasing order.
4393          */
4394         while (!list_empty(page_list) && !err) {
4395                 unsigned int i, nr_pages, bytes, rsize;
4396                 loff_t offset;
4397                 struct page *page, *tpage;
4398                 struct cifs_readdata *rdata;
4399                 struct cifs_credits credits_on_stack;
4400                 struct cifs_credits *credits = &credits_on_stack;
4401 
4402                 if (open_file->invalidHandle) {
4403                         rc = cifs_reopen_file(open_file, true);
4404                         if (rc == -EAGAIN)
4405                                 continue;
4406                         else if (rc)
4407                                 break;
4408                 }
4409 
4410                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4411                                                    &rsize, credits);
4412                 if (rc)
4413                         break;
4414 
4415                 /*
4416                  * Give up immediately if rsize is too small to read an entire
4417                  * page. The VFS will fall back to readpage. We should never
4418                  * reach this point however since we set ra_pages to 0 when the
4419                  * rsize is smaller than a cache page.
4420                  */
4421                 if (unlikely(rsize < PAGE_SIZE)) {
4422                         add_credits_and_wake_if(server, credits, 0);
4423                         free_xid(xid);
4424                         return 0;
4425                 }
4426 
4427                 nr_pages = 0;
4428                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4429                                          &nr_pages, &offset, &bytes);
4430                 if (!nr_pages) {
4431                         add_credits_and_wake_if(server, credits, 0);
4432                         break;
4433                 }
4434 
4435                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4436                 if (!rdata) {
4437                         /* best to give up if we're out of mem */
4438                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4439                                 list_del(&page->lru);
4440                                 lru_cache_add(page);
4441                                 unlock_page(page);
4442                                 put_page(page);
4443                         }
4444                         rc = -ENOMEM;
4445                         add_credits_and_wake_if(server, credits, 0);
4446                         break;
4447                 }
4448 
4449                 rdata->cfile = cifsFileInfo_get(open_file);
4450                 rdata->server = server;
4451                 rdata->mapping = mapping;
4452                 rdata->offset = offset;
4453                 rdata->bytes = bytes;
4454                 rdata->pid = pid;
4455                 rdata->pagesz = PAGE_SIZE;
4456                 rdata->tailsz = PAGE_SIZE;
4457                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4458                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4459                 rdata->credits = credits_on_stack;
4460 
4461                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4462                         list_del(&page->lru);
4463                         rdata->pages[rdata->nr_pages++] = page;
4464                 }
4465 
4466                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4467 
4468                 if (!rc) {
4469                         if (rdata->cfile->invalidHandle)
4470                                 rc = -EAGAIN;
4471                         else
4472                                 rc = server->ops->async_readv(rdata);
4473                 }
4474 
4475                 if (rc) {
4476                         add_credits_and_wake_if(server, &rdata->credits, 0);
4477                         for (i = 0; i < rdata->nr_pages; i++) {
4478                                 page = rdata->pages[i];
4479                                 lru_cache_add(page);
4480                                 unlock_page(page);
4481                                 put_page(page);
4482                         }
4483                         /* Fallback to the readpage in error/reconnect cases */
4484                         kref_put(&rdata->refcount, cifs_readdata_release);
4485                         break;
4486                 }
4487 
4488                 kref_put(&rdata->refcount, cifs_readdata_release);
4489         }
4490 
4491         /* Any pages that have been shown to fscache but didn't get added to
4492          * the pagecache must be uncached before they get returned to the
4493          * allocator.
4494          */
4495         cifs_fscache_readpages_cancel(mapping->host, page_list);
4496         free_xid(xid);
4497         return rc;
4498 }
4499 
4500 /*
4501  * cifs_readpage_worker must be called with the page pinned
4502  */
4503 static int cifs_readpage_worker(struct file *file, struct page *page,
4504         loff_t *poffset)
4505 {
4506         char *read_data;
4507         int rc;
4508 
4509         /* Is the page cached? */
4510         rc = cifs_readpage_from_fscache(file_inode(file), page);
4511         if (rc == 0)
4512                 goto read_complete;
4513 
4514         read_data = kmap(page);
4515         /* for reads over a certain size could initiate async read ahead */
4516 
4517         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4518 
4519         if (rc < 0)
4520                 goto io_error;
4521         else
4522                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4523 
4524         /* we do not want atime to be less than mtime, it broke some apps */
4525         file_inode(file)->i_atime = current_time(file_inode(file));
4526         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4527                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4528         else
4529                 file_inode(file)->i_atime = current_time(file_inode(file));
4530 
4531         if (PAGE_SIZE > rc)
4532                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4533 
4534         flush_dcache_page(page);
4535         SetPageUptodate(page);
4536 
4537         /* send this page to the cache */
4538         cifs_readpage_to_fscache(file_inode(file), page);
4539 
4540         rc = 0;
4541 
4542 io_error:
4543         kunmap(page);
4544         unlock_page(page);
4545 
4546 read_complete:
4547         return rc;
4548 }
4549 
4550 static int cifs_readpage(struct file *file, struct page *page)
4551 {
4552         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4553         int rc = -EACCES;
4554         unsigned int xid;
4555 
4556         xid = get_xid();
4557 
4558         if (file->private_data == NULL) {
4559                 rc = -EBADF;
4560                 free_xid(xid);
4561                 return rc;
4562         }
4563 
4564         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4565                  page, (int)offset, (int)offset);
4566 
4567         rc = cifs_readpage_worker(file, page, &offset);
4568 
4569         free_xid(xid);
4570         return rc;
4571 }
4572 
4573 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4574 {
4575         struct cifsFileInfo *open_file;
4576 
4577         spin_lock(&cifs_inode->open_file_lock);
4578         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4579                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4580                         spin_unlock(&cifs_inode->open_file_lock);
4581                         return 1;
4582                 }
4583         }
4584         spin_unlock(&cifs_inode->open_file_lock);
4585         return 0;
4586 }
4587 
4588 /* We do not want to update the file size from server for inodes
4589    open for write - to avoid races with writepage extending
4590    the file - in the future we could consider allowing
4591    refreshing the inode only on increases in the file size
4592    but this is tricky to do without racing with writebehind
4593    page caching in the current Linux kernel design */
4594 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4595 {
4596         if (!cifsInode)
4597                 return true;
4598 
4599         if (is_inode_writable(cifsInode)) {
4600                 /* This inode is open for write at least once */
4601                 struct cifs_sb_info *cifs_sb;
4602 
4603                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4604                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4605                         /* since no page cache to corrupt on directio
4606                         we can change size safely */
4607                         return true;
4608                 }
4609 
4610                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4611                         return true;
4612 
4613                 return false;
4614         } else
4615                 return true;
4616 }
4617 
4618 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4619                         loff_t pos, unsigned len, unsigned flags,
4620                         struct page **pagep, void **fsdata)
4621 {
4622         int oncethru = 0;
4623         pgoff_t index = pos >> PAGE_SHIFT;
4624         loff_t offset = pos & (PAGE_SIZE - 1);
4625         loff_t page_start = pos & PAGE_MASK;
4626         loff_t i_size;
4627         struct page *page;
4628         int rc = 0;
4629 
4630         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4631 
4632 start:
4633         page = grab_cache_page_write_begin(mapping, index, flags);
4634         if (!page) {
4635                 rc = -ENOMEM;
4636                 goto out;
4637         }
4638 
4639         if (PageUptodate(page))
4640                 goto out;
4641 
4642         /*
4643          * If we write a full page it will be up to date, no need to read from
4644          * the server. If the write is short, we'll end up doing a sync write
4645          * instead.
4646          */
4647         if (len == PAGE_SIZE)
4648                 goto out;
4649 
4650         /*
4651          * optimize away the read when we have an oplock, and we're not
4652          * expecting to use any of the data we'd be reading in. That
4653          * is, when the page lies beyond the EOF, or straddles the EOF
4654          * and the write will cover all of the existing data.
4655          */
4656         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4657                 i_size = i_size_read(mapping->host);
4658                 if (page_start >= i_size ||
4659                     (offset == 0 && (pos + len) >= i_size)) {
4660                         zero_user_segments(page, 0, offset,
4661                                            offset + len,
4662                                            PAGE_SIZE);
4663                         /*
4664                          * PageChecked means that the parts of the page
4665                          * to which we're not writing are considered up
4666                          * to date. Once the data is copied to the
4667                          * page, it can be set uptodate.
4668                          */
4669                         SetPageChecked(page);
4670                         goto out;
4671                 }
4672         }
4673 
4674         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4675                 /*
4676                  * might as well read a page, it is fast enough. If we get
4677                  * an error, we don't need to return it. cifs_write_end will
4678                  * do a sync write instead since PG_uptodate isn't set.
4679                  */
4680                 cifs_readpage_worker(file, page, &page_start);
4681                 put_page(page);
4682                 oncethru = 1;
4683                 goto start;
4684         } else {
4685                 /* we could try using another file handle if there is one -
4686                    but how would we lock it to prevent close of that handle
4687                    racing with this read? In any case
4688                    this will be written out by write_end so is fine */
4689         }
4690 out:
4691         *pagep = page;
4692         return rc;
4693 }
4694 
4695 static int cifs_release_page(struct page *page, gfp_t gfp)
4696 {
4697         if (PagePrivate(page))
4698                 return 0;
4699 
4700         return cifs_fscache_release_page(page, gfp);
4701 }
4702 
4703 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4704                                  unsigned int length)
4705 {
4706         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4707 
4708         if (offset == 0 && length == PAGE_SIZE)
4709                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4710 }
4711 
4712 static int cifs_launder_page(struct page *page)
4713 {
4714         int rc = 0;
4715         loff_t range_start = page_offset(page);
4716         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4717         struct writeback_control wbc = {
4718                 .sync_mode = WB_SYNC_ALL,
4719                 .nr_to_write = 0,
4720                 .range_start = range_start,
4721                 .range_end = range_end,
4722         };
4723 
4724         cifs_dbg(FYI, "Launder page: %p\n", page);
4725 
4726         if (clear_page_dirty_for_io(page))
4727                 rc = cifs_writepage_locked(page, &wbc);
4728 
4729         cifs_fscache_invalidate_page(page, page->mapping->host);
4730         return rc;
4731 }
4732 
4733 void cifs_oplock_break(struct work_struct *work)
4734 {
4735         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4736                                                   oplock_break);
4737         struct inode *inode = d_inode(cfile->dentry);
4738         struct cifsInodeInfo *cinode = CIFS_I(inode);
4739         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4740         struct TCP_Server_Info *server = tcon->ses->server;
4741         int rc = 0;
4742         bool purge_cache = false;
4743 
4744         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4745                         TASK_UNINTERRUPTIBLE);
4746 
4747         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4748                                       cfile->oplock_epoch, &purge_cache);
4749 
4750         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4751                                                 cifs_has_mand_locks(cinode)) {
4752                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4753                          inode);
4754                 cinode->oplock = 0;
4755         }
4756 
4757         if (inode && S_ISREG(inode->i_mode)) {
4758                 if (CIFS_CACHE_READ(cinode))
4759                         break_lease(inode, O_RDONLY);
4760                 else
4761                         break_lease(inode, O_WRONLY);
4762                 rc = filemap_fdatawrite(inode->i_mapping);
4763                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4764                         rc = filemap_fdatawait(inode->i_mapping);
4765                         mapping_set_error(inode->i_mapping, rc);
4766                         cifs_zap_mapping(inode);
4767                 }
4768                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4769                 if (CIFS_CACHE_WRITE(cinode))
4770                         goto oplock_break_ack;
4771         }
4772 
4773         rc = cifs_push_locks(cfile);
4774         if (rc)
4775                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4776 
4777 oplock_break_ack:
4778         /*
4779          * releasing stale oplock after recent reconnect of smb session using
4780          * a now incorrect file handle is not a data integrity issue but do
4781          * not bother sending an oplock release if session to server still is
4782          * disconnected since oplock already released by the server
4783          */
4784         if (!cfile->oplock_break_cancelled) {
4785                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4786                                                              cinode);
4787                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4788         }
4789         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4790         cifs_done_oplock_break(cinode);
4791 }
4792 
4793 /*
4794  * The presence of cifs_direct_io() in the address space ops vector
4795  * allowes open() O_DIRECT flags which would have failed otherwise.
4796  *
4797  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4798  * so this method should never be called.
4799  *
4800  * Direct IO is not yet supported in the cached mode. 
4801  */
4802 static ssize_t
4803 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4804 {
4805         /*
4806          * FIXME
4807          * Eventually need to support direct IO for non forcedirectio mounts
4808          */
4809         return -EINVAL;
4810 }
4811 
4812 static int cifs_swap_activate(struct swap_info_struct *sis,
4813                               struct file *swap_file, sector_t *span)
4814 {
4815         struct cifsFileInfo *cfile = swap_file->private_data;
4816         struct inode *inode = swap_file->f_mapping->host;
4817         unsigned long blocks;
4818         long long isize;
4819 
4820         cifs_dbg(FYI, "swap activate\n");
4821 
4822         spin_lock(&inode->i_lock);
4823         blocks = inode->i_blocks;
4824         isize = inode->i_size;
4825         spin_unlock(&inode->i_lock);
4826         if (blocks*512 < isize) {
4827                 pr_warn("swap activate: swapfile has holes\n");
4828                 return -EINVAL;
4829         }
4830         *span = sis->pages;
4831 
4832         pr_warn_once("Swap support over SMB3 is experimental\n");
4833 
4834         /*
4835          * TODO: consider adding ACL (or documenting how) to prevent other
4836          * users (on this or other systems) from reading it
4837          */
4838 
4839 
4840         /* TODO: add sk_set_memalloc(inet) or similar */
4841 
4842         if (cfile)
4843                 cfile->swapfile = true;
4844         /*
4845          * TODO: Since file already open, we can't open with DENY_ALL here
4846          * but we could add call to grab a byte range lock to prevent others
4847          * from reading or writing the file
4848          */
4849 
4850         return 0;
4851 }
4852 
4853 static void cifs_swap_deactivate(struct file *file)
4854 {
4855         struct cifsFileInfo *cfile = file->private_data;
4856 
4857         cifs_dbg(FYI, "swap deactivate\n");
4858 
4859         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4860 
4861         if (cfile)
4862                 cfile->swapfile = false;
4863 
4864         /* do we need to unpin (or unlock) the file */
4865 }
4866 
4867 const struct address_space_operations cifs_addr_ops = {
4868         .readpage = cifs_readpage,
4869         .readpages = cifs_readpages,
4870         .writepage = cifs_writepage,
4871         .writepages = cifs_writepages,
4872         .write_begin = cifs_write_begin,
4873         .write_end = cifs_write_end,
4874         .set_page_dirty = __set_page_dirty_nobuffers,
4875         .releasepage = cifs_release_page,
4876         .direct_IO = cifs_direct_io,
4877         .invalidatepage = cifs_invalidate_page,
4878         .launder_page = cifs_launder_page,
4879         /*
4880          * TODO: investigate and if useful we could add an cifs_migratePage
4881          * helper (under an CONFIG_MIGRATION) in the future, and also
4882          * investigate and add an is_dirty_writeback helper if needed
4883          */
4884         .swap_activate = cifs_swap_activate,
4885         .swap_deactivate = cifs_swap_deactivate,
4886 };
4887 
4888 /*
4889  * cifs_readpages requires the server to support a buffer large enough to
4890  * contain the header plus one complete page of data.  Otherwise, we need
4891  * to leave cifs_readpages out of the address space operations.
4892  */
4893 const struct address_space_operations cifs_addr_ops_smallbuf = {
4894         .readpage = cifs_readpage,
4895         .writepage = cifs_writepage,
4896         .writepages = cifs_writepages,
4897         .write_begin = cifs_write_begin,
4898         .write_end = cifs_write_end,
4899         .set_page_dirty = __set_page_dirty_nobuffers,
4900         .releasepage = cifs_release_page,
4901         .invalidatepage = cifs_invalidate_page,
4902         .launder_page = cifs_launder_page,
4903 };
4904 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp