~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/cifs/file.c

Version: ~ [ linux-5.10-rc5 ] ~ [ linux-5.9.10 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.79 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.159 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.208 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.245 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.245 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *   fs/cifs/file.c
  3  *
  4  *   vfs operations that deal with files
  5  *
  6  *   Copyright (C) International Business Machines  Corp., 2002,2010
  7  *   Author(s): Steve French (sfrench@us.ibm.com)
  8  *              Jeremy Allison (jra@samba.org)
  9  *
 10  *   This library is free software; you can redistribute it and/or modify
 11  *   it under the terms of the GNU Lesser General Public License as published
 12  *   by the Free Software Foundation; either version 2.1 of the License, or
 13  *   (at your option) any later version.
 14  *
 15  *   This library is distributed in the hope that it will be useful,
 16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 18  *   the GNU Lesser General Public License for more details.
 19  *
 20  *   You should have received a copy of the GNU Lesser General Public License
 21  *   along with this library; if not, write to the Free Software
 22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 23  */
 24 #include <linux/fs.h>
 25 #include <linux/backing-dev.h>
 26 #include <linux/stat.h>
 27 #include <linux/fcntl.h>
 28 #include <linux/pagemap.h>
 29 #include <linux/pagevec.h>
 30 #include <linux/writeback.h>
 31 #include <linux/task_io_accounting_ops.h>
 32 #include <linux/delay.h>
 33 #include <linux/mount.h>
 34 #include <linux/slab.h>
 35 #include <linux/swap.h>
 36 #include <asm/div64.h>
 37 #include "cifsfs.h"
 38 #include "cifspdu.h"
 39 #include "cifsglob.h"
 40 #include "cifsproto.h"
 41 #include "cifs_unicode.h"
 42 #include "cifs_debug.h"
 43 #include "cifs_fs_sb.h"
 44 #include "fscache.h"
 45 #include "smbdirect.h"
 46 
 47 static inline int cifs_convert_flags(unsigned int flags)
 48 {
 49         if ((flags & O_ACCMODE) == O_RDONLY)
 50                 return GENERIC_READ;
 51         else if ((flags & O_ACCMODE) == O_WRONLY)
 52                 return GENERIC_WRITE;
 53         else if ((flags & O_ACCMODE) == O_RDWR) {
 54                 /* GENERIC_ALL is too much permission to request
 55                    can cause unnecessary access denied on create */
 56                 /* return GENERIC_ALL; */
 57                 return (GENERIC_READ | GENERIC_WRITE);
 58         }
 59 
 60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
 61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
 62                 FILE_READ_DATA);
 63 }
 64 
 65 static u32 cifs_posix_convert_flags(unsigned int flags)
 66 {
 67         u32 posix_flags = 0;
 68 
 69         if ((flags & O_ACCMODE) == O_RDONLY)
 70                 posix_flags = SMB_O_RDONLY;
 71         else if ((flags & O_ACCMODE) == O_WRONLY)
 72                 posix_flags = SMB_O_WRONLY;
 73         else if ((flags & O_ACCMODE) == O_RDWR)
 74                 posix_flags = SMB_O_RDWR;
 75 
 76         if (flags & O_CREAT) {
 77                 posix_flags |= SMB_O_CREAT;
 78                 if (flags & O_EXCL)
 79                         posix_flags |= SMB_O_EXCL;
 80         } else if (flags & O_EXCL)
 81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
 82                          current->comm, current->tgid);
 83 
 84         if (flags & O_TRUNC)
 85                 posix_flags |= SMB_O_TRUNC;
 86         /* be safe and imply O_SYNC for O_DSYNC */
 87         if (flags & O_DSYNC)
 88                 posix_flags |= SMB_O_SYNC;
 89         if (flags & O_DIRECTORY)
 90                 posix_flags |= SMB_O_DIRECTORY;
 91         if (flags & O_NOFOLLOW)
 92                 posix_flags |= SMB_O_NOFOLLOW;
 93         if (flags & O_DIRECT)
 94                 posix_flags |= SMB_O_DIRECT;
 95 
 96         return posix_flags;
 97 }
 98 
 99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112 
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124 
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126 
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130 
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136 
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139 
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145 
146         if (rc)
147                 goto posix_open_ret;
148 
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151 
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154 
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156 
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168 
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173 
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186 
187         if (!server->ops->open)
188                 return -ENOSYS;
189 
190         desired_access = cifs_convert_flags(f_flags);
191 
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215 
216         disposition = cifs_get_disposition(f_flags);
217 
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219 
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223 
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226 
227         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228         if (f_flags & O_SYNC)
229                 create_options |= CREATE_WRITE_THROUGH;
230 
231         if (f_flags & O_DIRECT)
232                 create_options |= CREATE_NO_BUFFER;
233 
234         oparms.tcon = tcon;
235         oparms.cifs_sb = cifs_sb;
236         oparms.desired_access = desired_access;
237         oparms.create_options = create_options;
238         oparms.disposition = disposition;
239         oparms.path = full_path;
240         oparms.fid = fid;
241         oparms.reconnect = false;
242 
243         rc = server->ops->open(xid, &oparms, oplock, buf);
244 
245         if (rc)
246                 goto out;
247 
248         if (tcon->unix_ext)
249                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250                                               xid);
251         else
252                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253                                          xid, fid);
254 
255 out:
256         kfree(buf);
257         return rc;
258 }
259 
260 static bool
261 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
262 {
263         struct cifs_fid_locks *cur;
264         bool has_locks = false;
265 
266         down_read(&cinode->lock_sem);
267         list_for_each_entry(cur, &cinode->llist, llist) {
268                 if (!list_empty(&cur->locks)) {
269                         has_locks = true;
270                         break;
271                 }
272         }
273         up_read(&cinode->lock_sem);
274         return has_locks;
275 }
276 
277 struct cifsFileInfo *
278 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
279                   struct tcon_link *tlink, __u32 oplock)
280 {
281         struct dentry *dentry = file_dentry(file);
282         struct inode *inode = d_inode(dentry);
283         struct cifsInodeInfo *cinode = CIFS_I(inode);
284         struct cifsFileInfo *cfile;
285         struct cifs_fid_locks *fdlocks;
286         struct cifs_tcon *tcon = tlink_tcon(tlink);
287         struct TCP_Server_Info *server = tcon->ses->server;
288 
289         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
290         if (cfile == NULL)
291                 return cfile;
292 
293         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
294         if (!fdlocks) {
295                 kfree(cfile);
296                 return NULL;
297         }
298 
299         INIT_LIST_HEAD(&fdlocks->locks);
300         fdlocks->cfile = cfile;
301         cfile->llist = fdlocks;
302         down_write(&cinode->lock_sem);
303         list_add(&fdlocks->llist, &cinode->llist);
304         up_write(&cinode->lock_sem);
305 
306         cfile->count = 1;
307         cfile->pid = current->tgid;
308         cfile->uid = current_fsuid();
309         cfile->dentry = dget(dentry);
310         cfile->f_flags = file->f_flags;
311         cfile->invalidHandle = false;
312         cfile->tlink = cifs_get_tlink(tlink);
313         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
314         mutex_init(&cfile->fh_mutex);
315         spin_lock_init(&cfile->file_info_lock);
316 
317         cifs_sb_active(inode->i_sb);
318 
319         /*
320          * If the server returned a read oplock and we have mandatory brlocks,
321          * set oplock level to None.
322          */
323         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
324                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
325                 oplock = 0;
326         }
327 
328         spin_lock(&tcon->open_file_lock);
329         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
330                 oplock = fid->pending_open->oplock;
331         list_del(&fid->pending_open->olist);
332 
333         fid->purge_cache = false;
334         server->ops->set_fid(cfile, fid, oplock);
335 
336         list_add(&cfile->tlist, &tcon->openFileList);
337 
338         /* if readable file instance put first in list*/
339         if (file->f_mode & FMODE_READ)
340                 list_add(&cfile->flist, &cinode->openFileList);
341         else
342                 list_add_tail(&cfile->flist, &cinode->openFileList);
343         spin_unlock(&tcon->open_file_lock);
344 
345         if (fid->purge_cache)
346                 cifs_zap_mapping(inode);
347 
348         file->private_data = cfile;
349         return cfile;
350 }
351 
352 struct cifsFileInfo *
353 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
354 {
355         spin_lock(&cifs_file->file_info_lock);
356         cifsFileInfo_get_locked(cifs_file);
357         spin_unlock(&cifs_file->file_info_lock);
358         return cifs_file;
359 }
360 
361 /*
362  * Release a reference on the file private data. This may involve closing
363  * the filehandle out on the server. Must be called without holding
364  * tcon->open_file_lock and cifs_file->file_info_lock.
365  */
366 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
367 {
368         struct inode *inode = d_inode(cifs_file->dentry);
369         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
370         struct TCP_Server_Info *server = tcon->ses->server;
371         struct cifsInodeInfo *cifsi = CIFS_I(inode);
372         struct super_block *sb = inode->i_sb;
373         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
374         struct cifsLockInfo *li, *tmp;
375         struct cifs_fid fid;
376         struct cifs_pending_open open;
377         bool oplock_break_cancelled;
378 
379         spin_lock(&tcon->open_file_lock);
380 
381         spin_lock(&cifs_file->file_info_lock);
382         if (--cifs_file->count > 0) {
383                 spin_unlock(&cifs_file->file_info_lock);
384                 spin_unlock(&tcon->open_file_lock);
385                 return;
386         }
387         spin_unlock(&cifs_file->file_info_lock);
388 
389         if (server->ops->get_lease_key)
390                 server->ops->get_lease_key(inode, &fid);
391 
392         /* store open in pending opens to make sure we don't miss lease break */
393         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
394 
395         /* remove it from the lists */
396         list_del(&cifs_file->flist);
397         list_del(&cifs_file->tlist);
398 
399         if (list_empty(&cifsi->openFileList)) {
400                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
401                          d_inode(cifs_file->dentry));
402                 /*
403                  * In strict cache mode we need invalidate mapping on the last
404                  * close  because it may cause a error when we open this file
405                  * again and get at least level II oplock.
406                  */
407                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
408                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
409                 cifs_set_oplock_level(cifsi, 0);
410         }
411 
412         spin_unlock(&tcon->open_file_lock);
413 
414         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
415 
416         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
417                 struct TCP_Server_Info *server = tcon->ses->server;
418                 unsigned int xid;
419 
420                 xid = get_xid();
421                 if (server->ops->close)
422                         server->ops->close(xid, tcon, &cifs_file->fid);
423                 _free_xid(xid);
424         }
425 
426         if (oplock_break_cancelled)
427                 cifs_done_oplock_break(cifsi);
428 
429         cifs_del_pending_open(&open);
430 
431         /*
432          * Delete any outstanding lock records. We'll lose them when the file
433          * is closed anyway.
434          */
435         down_write(&cifsi->lock_sem);
436         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
437                 list_del(&li->llist);
438                 cifs_del_lock_waiters(li);
439                 kfree(li);
440         }
441         list_del(&cifs_file->llist->llist);
442         kfree(cifs_file->llist);
443         up_write(&cifsi->lock_sem);
444 
445         cifs_put_tlink(cifs_file->tlink);
446         dput(cifs_file->dentry);
447         cifs_sb_deactive(sb);
448         kfree(cifs_file);
449 }
450 
451 int cifs_open(struct inode *inode, struct file *file)
452 
453 {
454         int rc = -EACCES;
455         unsigned int xid;
456         __u32 oplock;
457         struct cifs_sb_info *cifs_sb;
458         struct TCP_Server_Info *server;
459         struct cifs_tcon *tcon;
460         struct tcon_link *tlink;
461         struct cifsFileInfo *cfile = NULL;
462         char *full_path = NULL;
463         bool posix_open_ok = false;
464         struct cifs_fid fid;
465         struct cifs_pending_open open;
466 
467         xid = get_xid();
468 
469         cifs_sb = CIFS_SB(inode->i_sb);
470         tlink = cifs_sb_tlink(cifs_sb);
471         if (IS_ERR(tlink)) {
472                 free_xid(xid);
473                 return PTR_ERR(tlink);
474         }
475         tcon = tlink_tcon(tlink);
476         server = tcon->ses->server;
477 
478         full_path = build_path_from_dentry(file_dentry(file));
479         if (full_path == NULL) {
480                 rc = -ENOMEM;
481                 goto out;
482         }
483 
484         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
485                  inode, file->f_flags, full_path);
486 
487         if (file->f_flags & O_DIRECT &&
488             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
489                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
490                         file->f_op = &cifs_file_direct_nobrl_ops;
491                 else
492                         file->f_op = &cifs_file_direct_ops;
493         }
494 
495         if (server->oplocks)
496                 oplock = REQ_OPLOCK;
497         else
498                 oplock = 0;
499 
500         if (!tcon->broken_posix_open && tcon->unix_ext &&
501             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
502                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
503                 /* can not refresh inode info since size could be stale */
504                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
505                                 cifs_sb->mnt_file_mode /* ignored */,
506                                 file->f_flags, &oplock, &fid.netfid, xid);
507                 if (rc == 0) {
508                         cifs_dbg(FYI, "posix open succeeded\n");
509                         posix_open_ok = true;
510                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
511                         if (tcon->ses->serverNOS)
512                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
513                                          tcon->ses->serverName,
514                                          tcon->ses->serverNOS);
515                         tcon->broken_posix_open = true;
516                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
517                          (rc != -EOPNOTSUPP)) /* path not found or net err */
518                         goto out;
519                 /*
520                  * Else fallthrough to retry open the old way on network i/o
521                  * or DFS errors.
522                  */
523         }
524 
525         if (server->ops->get_lease_key)
526                 server->ops->get_lease_key(inode, &fid);
527 
528         cifs_add_pending_open(&fid, tlink, &open);
529 
530         if (!posix_open_ok) {
531                 if (server->ops->get_lease_key)
532                         server->ops->get_lease_key(inode, &fid);
533 
534                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
535                                   file->f_flags, &oplock, &fid, xid);
536                 if (rc) {
537                         cifs_del_pending_open(&open);
538                         goto out;
539                 }
540         }
541 
542         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
543         if (cfile == NULL) {
544                 if (server->ops->close)
545                         server->ops->close(xid, tcon, &fid);
546                 cifs_del_pending_open(&open);
547                 rc = -ENOMEM;
548                 goto out;
549         }
550 
551         cifs_fscache_set_inode_cookie(inode, file);
552 
553         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
554                 /*
555                  * Time to set mode which we can not set earlier due to
556                  * problems creating new read-only files.
557                  */
558                 struct cifs_unix_set_info_args args = {
559                         .mode   = inode->i_mode,
560                         .uid    = INVALID_UID, /* no change */
561                         .gid    = INVALID_GID, /* no change */
562                         .ctime  = NO_CHANGE_64,
563                         .atime  = NO_CHANGE_64,
564                         .mtime  = NO_CHANGE_64,
565                         .device = 0,
566                 };
567                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
568                                        cfile->pid);
569         }
570 
571 out:
572         kfree(full_path);
573         free_xid(xid);
574         cifs_put_tlink(tlink);
575         return rc;
576 }
577 
578 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
579 
580 /*
581  * Try to reacquire byte range locks that were released when session
582  * to server was lost.
583  */
584 static int
585 cifs_relock_file(struct cifsFileInfo *cfile)
586 {
587         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
588         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
589         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
590         int rc = 0;
591 
592         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
593         if (cinode->can_cache_brlcks) {
594                 /* can cache locks - no need to relock */
595                 up_read(&cinode->lock_sem);
596                 return rc;
597         }
598 
599         if (cap_unix(tcon->ses) &&
600             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
601             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
602                 rc = cifs_push_posix_locks(cfile);
603         else
604                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
605 
606         up_read(&cinode->lock_sem);
607         return rc;
608 }
609 
610 static int
611 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
612 {
613         int rc = -EACCES;
614         unsigned int xid;
615         __u32 oplock;
616         struct cifs_sb_info *cifs_sb;
617         struct cifs_tcon *tcon;
618         struct TCP_Server_Info *server;
619         struct cifsInodeInfo *cinode;
620         struct inode *inode;
621         char *full_path = NULL;
622         int desired_access;
623         int disposition = FILE_OPEN;
624         int create_options = CREATE_NOT_DIR;
625         struct cifs_open_parms oparms;
626 
627         xid = get_xid();
628         mutex_lock(&cfile->fh_mutex);
629         if (!cfile->invalidHandle) {
630                 mutex_unlock(&cfile->fh_mutex);
631                 rc = 0;
632                 free_xid(xid);
633                 return rc;
634         }
635 
636         inode = d_inode(cfile->dentry);
637         cifs_sb = CIFS_SB(inode->i_sb);
638         tcon = tlink_tcon(cfile->tlink);
639         server = tcon->ses->server;
640 
641         /*
642          * Can not grab rename sem here because various ops, including those
643          * that already have the rename sem can end up causing writepage to get
644          * called and if the server was down that means we end up here, and we
645          * can never tell if the caller already has the rename_sem.
646          */
647         full_path = build_path_from_dentry(cfile->dentry);
648         if (full_path == NULL) {
649                 rc = -ENOMEM;
650                 mutex_unlock(&cfile->fh_mutex);
651                 free_xid(xid);
652                 return rc;
653         }
654 
655         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
656                  inode, cfile->f_flags, full_path);
657 
658         if (tcon->ses->server->oplocks)
659                 oplock = REQ_OPLOCK;
660         else
661                 oplock = 0;
662 
663         if (tcon->unix_ext && cap_unix(tcon->ses) &&
664             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
665                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
666                 /*
667                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
668                  * original open. Must mask them off for a reopen.
669                  */
670                 unsigned int oflags = cfile->f_flags &
671                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
672 
673                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
674                                      cifs_sb->mnt_file_mode /* ignored */,
675                                      oflags, &oplock, &cfile->fid.netfid, xid);
676                 if (rc == 0) {
677                         cifs_dbg(FYI, "posix reopen succeeded\n");
678                         oparms.reconnect = true;
679                         goto reopen_success;
680                 }
681                 /*
682                  * fallthrough to retry open the old way on errors, especially
683                  * in the reconnect path it is important to retry hard
684                  */
685         }
686 
687         desired_access = cifs_convert_flags(cfile->f_flags);
688 
689         if (backup_cred(cifs_sb))
690                 create_options |= CREATE_OPEN_BACKUP_INTENT;
691 
692         if (server->ops->get_lease_key)
693                 server->ops->get_lease_key(inode, &cfile->fid);
694 
695         oparms.tcon = tcon;
696         oparms.cifs_sb = cifs_sb;
697         oparms.desired_access = desired_access;
698         oparms.create_options = create_options;
699         oparms.disposition = disposition;
700         oparms.path = full_path;
701         oparms.fid = &cfile->fid;
702         oparms.reconnect = true;
703 
704         /*
705          * Can not refresh inode by passing in file_info buf to be returned by
706          * ops->open and then calling get_inode_info with returned buf since
707          * file might have write behind data that needs to be flushed and server
708          * version of file size can be stale. If we knew for sure that inode was
709          * not dirty locally we could do this.
710          */
711         rc = server->ops->open(xid, &oparms, &oplock, NULL);
712         if (rc == -ENOENT && oparms.reconnect == false) {
713                 /* durable handle timeout is expired - open the file again */
714                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715                 /* indicate that we need to relock the file */
716                 oparms.reconnect = true;
717         }
718 
719         if (rc) {
720                 mutex_unlock(&cfile->fh_mutex);
721                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
722                 cifs_dbg(FYI, "oplock: %d\n", oplock);
723                 goto reopen_error_exit;
724         }
725 
726 reopen_success:
727         cfile->invalidHandle = false;
728         mutex_unlock(&cfile->fh_mutex);
729         cinode = CIFS_I(inode);
730 
731         if (can_flush) {
732                 rc = filemap_write_and_wait(inode->i_mapping);
733                 mapping_set_error(inode->i_mapping, rc);
734 
735                 if (tcon->unix_ext)
736                         rc = cifs_get_inode_info_unix(&inode, full_path,
737                                                       inode->i_sb, xid);
738                 else
739                         rc = cifs_get_inode_info(&inode, full_path, NULL,
740                                                  inode->i_sb, xid, NULL);
741         }
742         /*
743          * Else we are writing out data to server already and could deadlock if
744          * we tried to flush data, and since we do not know if we have data that
745          * would invalidate the current end of file on the server we can not go
746          * to the server to get the new inode info.
747          */
748 
749         /*
750          * If the server returned a read oplock and we have mandatory brlocks,
751          * set oplock level to None.
752          */
753         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
754                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
755                 oplock = 0;
756         }
757 
758         server->ops->set_fid(cfile, &cfile->fid, oplock);
759         if (oparms.reconnect)
760                 cifs_relock_file(cfile);
761 
762 reopen_error_exit:
763         kfree(full_path);
764         free_xid(xid);
765         return rc;
766 }
767 
768 int cifs_close(struct inode *inode, struct file *file)
769 {
770         if (file->private_data != NULL) {
771                 cifsFileInfo_put(file->private_data);
772                 file->private_data = NULL;
773         }
774 
775         /* return code from the ->release op is always ignored */
776         return 0;
777 }
778 
779 void
780 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
781 {
782         struct cifsFileInfo *open_file;
783         struct list_head *tmp;
784         struct list_head *tmp1;
785         struct list_head tmp_list;
786 
787         if (!tcon->use_persistent || !tcon->need_reopen_files)
788                 return;
789 
790         tcon->need_reopen_files = false;
791 
792         cifs_dbg(FYI, "Reopen persistent handles");
793         INIT_LIST_HEAD(&tmp_list);
794 
795         /* list all files open on tree connection, reopen resilient handles  */
796         spin_lock(&tcon->open_file_lock);
797         list_for_each(tmp, &tcon->openFileList) {
798                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
799                 if (!open_file->invalidHandle)
800                         continue;
801                 cifsFileInfo_get(open_file);
802                 list_add_tail(&open_file->rlist, &tmp_list);
803         }
804         spin_unlock(&tcon->open_file_lock);
805 
806         list_for_each_safe(tmp, tmp1, &tmp_list) {
807                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
808                 if (cifs_reopen_file(open_file, false /* do not flush */))
809                         tcon->need_reopen_files = true;
810                 list_del_init(&open_file->rlist);
811                 cifsFileInfo_put(open_file);
812         }
813 }
814 
815 int cifs_closedir(struct inode *inode, struct file *file)
816 {
817         int rc = 0;
818         unsigned int xid;
819         struct cifsFileInfo *cfile = file->private_data;
820         struct cifs_tcon *tcon;
821         struct TCP_Server_Info *server;
822         char *buf;
823 
824         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
825 
826         if (cfile == NULL)
827                 return rc;
828 
829         xid = get_xid();
830         tcon = tlink_tcon(cfile->tlink);
831         server = tcon->ses->server;
832 
833         cifs_dbg(FYI, "Freeing private data in close dir\n");
834         spin_lock(&cfile->file_info_lock);
835         if (server->ops->dir_needs_close(cfile)) {
836                 cfile->invalidHandle = true;
837                 spin_unlock(&cfile->file_info_lock);
838                 if (server->ops->close_dir)
839                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
840                 else
841                         rc = -ENOSYS;
842                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
843                 /* not much we can do if it fails anyway, ignore rc */
844                 rc = 0;
845         } else
846                 spin_unlock(&cfile->file_info_lock);
847 
848         buf = cfile->srch_inf.ntwrk_buf_start;
849         if (buf) {
850                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
851                 cfile->srch_inf.ntwrk_buf_start = NULL;
852                 if (cfile->srch_inf.smallBuf)
853                         cifs_small_buf_release(buf);
854                 else
855                         cifs_buf_release(buf);
856         }
857 
858         cifs_put_tlink(cfile->tlink);
859         kfree(file->private_data);
860         file->private_data = NULL;
861         /* BB can we lock the filestruct while this is going on? */
862         free_xid(xid);
863         return rc;
864 }
865 
866 static struct cifsLockInfo *
867 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
868 {
869         struct cifsLockInfo *lock =
870                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
871         if (!lock)
872                 return lock;
873         lock->offset = offset;
874         lock->length = length;
875         lock->type = type;
876         lock->pid = current->tgid;
877         INIT_LIST_HEAD(&lock->blist);
878         init_waitqueue_head(&lock->block_q);
879         return lock;
880 }
881 
882 void
883 cifs_del_lock_waiters(struct cifsLockInfo *lock)
884 {
885         struct cifsLockInfo *li, *tmp;
886         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
887                 list_del_init(&li->blist);
888                 wake_up(&li->block_q);
889         }
890 }
891 
892 #define CIFS_LOCK_OP    0
893 #define CIFS_READ_OP    1
894 #define CIFS_WRITE_OP   2
895 
896 /* @rw_check : 0 - no op, 1 - read, 2 - write */
897 static bool
898 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
899                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
900                             struct cifsLockInfo **conf_lock, int rw_check)
901 {
902         struct cifsLockInfo *li;
903         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
904         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
905 
906         list_for_each_entry(li, &fdlocks->locks, llist) {
907                 if (offset + length <= li->offset ||
908                     offset >= li->offset + li->length)
909                         continue;
910                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
911                     server->ops->compare_fids(cfile, cur_cfile)) {
912                         /* shared lock prevents write op through the same fid */
913                         if (!(li->type & server->vals->shared_lock_type) ||
914                             rw_check != CIFS_WRITE_OP)
915                                 continue;
916                 }
917                 if ((type & server->vals->shared_lock_type) &&
918                     ((server->ops->compare_fids(cfile, cur_cfile) &&
919                      current->tgid == li->pid) || type == li->type))
920                         continue;
921                 if (conf_lock)
922                         *conf_lock = li;
923                 return true;
924         }
925         return false;
926 }
927 
928 bool
929 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
930                         __u8 type, struct cifsLockInfo **conf_lock,
931                         int rw_check)
932 {
933         bool rc = false;
934         struct cifs_fid_locks *cur;
935         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
936 
937         list_for_each_entry(cur, &cinode->llist, llist) {
938                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
939                                                  cfile, conf_lock, rw_check);
940                 if (rc)
941                         break;
942         }
943 
944         return rc;
945 }
946 
947 /*
948  * Check if there is another lock that prevents us to set the lock (mandatory
949  * style). If such a lock exists, update the flock structure with its
950  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
951  * or leave it the same if we can't. Returns 0 if we don't need to request to
952  * the server or 1 otherwise.
953  */
954 static int
955 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
956                __u8 type, struct file_lock *flock)
957 {
958         int rc = 0;
959         struct cifsLockInfo *conf_lock;
960         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
961         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
962         bool exist;
963 
964         down_read(&cinode->lock_sem);
965 
966         exist = cifs_find_lock_conflict(cfile, offset, length, type,
967                                         &conf_lock, CIFS_LOCK_OP);
968         if (exist) {
969                 flock->fl_start = conf_lock->offset;
970                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
971                 flock->fl_pid = conf_lock->pid;
972                 if (conf_lock->type & server->vals->shared_lock_type)
973                         flock->fl_type = F_RDLCK;
974                 else
975                         flock->fl_type = F_WRLCK;
976         } else if (!cinode->can_cache_brlcks)
977                 rc = 1;
978         else
979                 flock->fl_type = F_UNLCK;
980 
981         up_read(&cinode->lock_sem);
982         return rc;
983 }
984 
985 static void
986 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
987 {
988         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
989         down_write(&cinode->lock_sem);
990         list_add_tail(&lock->llist, &cfile->llist->locks);
991         up_write(&cinode->lock_sem);
992 }
993 
994 /*
995  * Set the byte-range lock (mandatory style). Returns:
996  * 1) 0, if we set the lock and don't need to request to the server;
997  * 2) 1, if no locks prevent us but we need to request to the server;
998  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
999  */
1000 static int
1001 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1002                  bool wait)
1003 {
1004         struct cifsLockInfo *conf_lock;
1005         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1006         bool exist;
1007         int rc = 0;
1008 
1009 try_again:
1010         exist = false;
1011         down_write(&cinode->lock_sem);
1012 
1013         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1014                                         lock->type, &conf_lock, CIFS_LOCK_OP);
1015         if (!exist && cinode->can_cache_brlcks) {
1016                 list_add_tail(&lock->llist, &cfile->llist->locks);
1017                 up_write(&cinode->lock_sem);
1018                 return rc;
1019         }
1020 
1021         if (!exist)
1022                 rc = 1;
1023         else if (!wait)
1024                 rc = -EACCES;
1025         else {
1026                 list_add_tail(&lock->blist, &conf_lock->blist);
1027                 up_write(&cinode->lock_sem);
1028                 rc = wait_event_interruptible(lock->block_q,
1029                                         (lock->blist.prev == &lock->blist) &&
1030                                         (lock->blist.next == &lock->blist));
1031                 if (!rc)
1032                         goto try_again;
1033                 down_write(&cinode->lock_sem);
1034                 list_del_init(&lock->blist);
1035         }
1036 
1037         up_write(&cinode->lock_sem);
1038         return rc;
1039 }
1040 
1041 /*
1042  * Check if there is another lock that prevents us to set the lock (posix
1043  * style). If such a lock exists, update the flock structure with its
1044  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1045  * or leave it the same if we can't. Returns 0 if we don't need to request to
1046  * the server or 1 otherwise.
1047  */
1048 static int
1049 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1050 {
1051         int rc = 0;
1052         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1053         unsigned char saved_type = flock->fl_type;
1054 
1055         if ((flock->fl_flags & FL_POSIX) == 0)
1056                 return 1;
1057 
1058         down_read(&cinode->lock_sem);
1059         posix_test_lock(file, flock);
1060 
1061         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1062                 flock->fl_type = saved_type;
1063                 rc = 1;
1064         }
1065 
1066         up_read(&cinode->lock_sem);
1067         return rc;
1068 }
1069 
1070 /*
1071  * Set the byte-range lock (posix style). Returns:
1072  * 1) 0, if we set the lock and don't need to request to the server;
1073  * 2) 1, if we need to request to the server;
1074  * 3) <0, if the error occurs while setting the lock.
1075  */
1076 static int
1077 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1078 {
1079         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1080         int rc = 1;
1081 
1082         if ((flock->fl_flags & FL_POSIX) == 0)
1083                 return rc;
1084 
1085 try_again:
1086         down_write(&cinode->lock_sem);
1087         if (!cinode->can_cache_brlcks) {
1088                 up_write(&cinode->lock_sem);
1089                 return rc;
1090         }
1091 
1092         rc = posix_lock_file(file, flock, NULL);
1093         up_write(&cinode->lock_sem);
1094         if (rc == FILE_LOCK_DEFERRED) {
1095                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1096                 if (!rc)
1097                         goto try_again;
1098                 posix_unblock_lock(flock);
1099         }
1100         return rc;
1101 }
1102 
1103 int
1104 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1105 {
1106         unsigned int xid;
1107         int rc = 0, stored_rc;
1108         struct cifsLockInfo *li, *tmp;
1109         struct cifs_tcon *tcon;
1110         unsigned int num, max_num, max_buf;
1111         LOCKING_ANDX_RANGE *buf, *cur;
1112         static const int types[] = {
1113                 LOCKING_ANDX_LARGE_FILES,
1114                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1115         };
1116         int i;
1117 
1118         xid = get_xid();
1119         tcon = tlink_tcon(cfile->tlink);
1120 
1121         /*
1122          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1123          * and check it for zero before using.
1124          */
1125         max_buf = tcon->ses->server->maxBuf;
1126         if (!max_buf) {
1127                 free_xid(xid);
1128                 return -EINVAL;
1129         }
1130 
1131         max_num = (max_buf - sizeof(struct smb_hdr)) /
1132                                                 sizeof(LOCKING_ANDX_RANGE);
1133         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1134         if (!buf) {
1135                 free_xid(xid);
1136                 return -ENOMEM;
1137         }
1138 
1139         for (i = 0; i < 2; i++) {
1140                 cur = buf;
1141                 num = 0;
1142                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1143                         if (li->type != types[i])
1144                                 continue;
1145                         cur->Pid = cpu_to_le16(li->pid);
1146                         cur->LengthLow = cpu_to_le32((u32)li->length);
1147                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1148                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1149                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1150                         if (++num == max_num) {
1151                                 stored_rc = cifs_lockv(xid, tcon,
1152                                                        cfile->fid.netfid,
1153                                                        (__u8)li->type, 0, num,
1154                                                        buf);
1155                                 if (stored_rc)
1156                                         rc = stored_rc;
1157                                 cur = buf;
1158                                 num = 0;
1159                         } else
1160                                 cur++;
1161                 }
1162 
1163                 if (num) {
1164                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1165                                                (__u8)types[i], 0, num, buf);
1166                         if (stored_rc)
1167                                 rc = stored_rc;
1168                 }
1169         }
1170 
1171         kfree(buf);
1172         free_xid(xid);
1173         return rc;
1174 }
1175 
1176 static __u32
1177 hash_lockowner(fl_owner_t owner)
1178 {
1179         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1180 }
1181 
1182 struct lock_to_push {
1183         struct list_head llist;
1184         __u64 offset;
1185         __u64 length;
1186         __u32 pid;
1187         __u16 netfid;
1188         __u8 type;
1189 };
1190 
1191 static int
1192 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1193 {
1194         struct inode *inode = d_inode(cfile->dentry);
1195         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1196         struct file_lock *flock;
1197         struct file_lock_context *flctx = inode->i_flctx;
1198         unsigned int count = 0, i;
1199         int rc = 0, xid, type;
1200         struct list_head locks_to_send, *el;
1201         struct lock_to_push *lck, *tmp;
1202         __u64 length;
1203 
1204         xid = get_xid();
1205 
1206         if (!flctx)
1207                 goto out;
1208 
1209         spin_lock(&flctx->flc_lock);
1210         list_for_each(el, &flctx->flc_posix) {
1211                 count++;
1212         }
1213         spin_unlock(&flctx->flc_lock);
1214 
1215         INIT_LIST_HEAD(&locks_to_send);
1216 
1217         /*
1218          * Allocating count locks is enough because no FL_POSIX locks can be
1219          * added to the list while we are holding cinode->lock_sem that
1220          * protects locking operations of this inode.
1221          */
1222         for (i = 0; i < count; i++) {
1223                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1224                 if (!lck) {
1225                         rc = -ENOMEM;
1226                         goto err_out;
1227                 }
1228                 list_add_tail(&lck->llist, &locks_to_send);
1229         }
1230 
1231         el = locks_to_send.next;
1232         spin_lock(&flctx->flc_lock);
1233         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1234                 if (el == &locks_to_send) {
1235                         /*
1236                          * The list ended. We don't have enough allocated
1237                          * structures - something is really wrong.
1238                          */
1239                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1240                         break;
1241                 }
1242                 length = 1 + flock->fl_end - flock->fl_start;
1243                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1244                         type = CIFS_RDLCK;
1245                 else
1246                         type = CIFS_WRLCK;
1247                 lck = list_entry(el, struct lock_to_push, llist);
1248                 lck->pid = hash_lockowner(flock->fl_owner);
1249                 lck->netfid = cfile->fid.netfid;
1250                 lck->length = length;
1251                 lck->type = type;
1252                 lck->offset = flock->fl_start;
1253         }
1254         spin_unlock(&flctx->flc_lock);
1255 
1256         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1257                 int stored_rc;
1258 
1259                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1260                                              lck->offset, lck->length, NULL,
1261                                              lck->type, 0);
1262                 if (stored_rc)
1263                         rc = stored_rc;
1264                 list_del(&lck->llist);
1265                 kfree(lck);
1266         }
1267 
1268 out:
1269         free_xid(xid);
1270         return rc;
1271 err_out:
1272         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1273                 list_del(&lck->llist);
1274                 kfree(lck);
1275         }
1276         goto out;
1277 }
1278 
1279 static int
1280 cifs_push_locks(struct cifsFileInfo *cfile)
1281 {
1282         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1283         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1284         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1285         int rc = 0;
1286 
1287         /* we are going to update can_cache_brlcks here - need a write access */
1288         down_write(&cinode->lock_sem);
1289         if (!cinode->can_cache_brlcks) {
1290                 up_write(&cinode->lock_sem);
1291                 return rc;
1292         }
1293 
1294         if (cap_unix(tcon->ses) &&
1295             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1296             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1297                 rc = cifs_push_posix_locks(cfile);
1298         else
1299                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1300 
1301         cinode->can_cache_brlcks = false;
1302         up_write(&cinode->lock_sem);
1303         return rc;
1304 }
1305 
1306 static void
1307 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1308                 bool *wait_flag, struct TCP_Server_Info *server)
1309 {
1310         if (flock->fl_flags & FL_POSIX)
1311                 cifs_dbg(FYI, "Posix\n");
1312         if (flock->fl_flags & FL_FLOCK)
1313                 cifs_dbg(FYI, "Flock\n");
1314         if (flock->fl_flags & FL_SLEEP) {
1315                 cifs_dbg(FYI, "Blocking lock\n");
1316                 *wait_flag = true;
1317         }
1318         if (flock->fl_flags & FL_ACCESS)
1319                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1320         if (flock->fl_flags & FL_LEASE)
1321                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1322         if (flock->fl_flags &
1323             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1324                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1325                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1326 
1327         *type = server->vals->large_lock_type;
1328         if (flock->fl_type == F_WRLCK) {
1329                 cifs_dbg(FYI, "F_WRLCK\n");
1330                 *type |= server->vals->exclusive_lock_type;
1331                 *lock = 1;
1332         } else if (flock->fl_type == F_UNLCK) {
1333                 cifs_dbg(FYI, "F_UNLCK\n");
1334                 *type |= server->vals->unlock_lock_type;
1335                 *unlock = 1;
1336                 /* Check if unlock includes more than one lock range */
1337         } else if (flock->fl_type == F_RDLCK) {
1338                 cifs_dbg(FYI, "F_RDLCK\n");
1339                 *type |= server->vals->shared_lock_type;
1340                 *lock = 1;
1341         } else if (flock->fl_type == F_EXLCK) {
1342                 cifs_dbg(FYI, "F_EXLCK\n");
1343                 *type |= server->vals->exclusive_lock_type;
1344                 *lock = 1;
1345         } else if (flock->fl_type == F_SHLCK) {
1346                 cifs_dbg(FYI, "F_SHLCK\n");
1347                 *type |= server->vals->shared_lock_type;
1348                 *lock = 1;
1349         } else
1350                 cifs_dbg(FYI, "Unknown type of lock\n");
1351 }
1352 
1353 static int
1354 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1355            bool wait_flag, bool posix_lck, unsigned int xid)
1356 {
1357         int rc = 0;
1358         __u64 length = 1 + flock->fl_end - flock->fl_start;
1359         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1360         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1361         struct TCP_Server_Info *server = tcon->ses->server;
1362         __u16 netfid = cfile->fid.netfid;
1363 
1364         if (posix_lck) {
1365                 int posix_lock_type;
1366 
1367                 rc = cifs_posix_lock_test(file, flock);
1368                 if (!rc)
1369                         return rc;
1370 
1371                 if (type & server->vals->shared_lock_type)
1372                         posix_lock_type = CIFS_RDLCK;
1373                 else
1374                         posix_lock_type = CIFS_WRLCK;
1375                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1376                                       hash_lockowner(flock->fl_owner),
1377                                       flock->fl_start, length, flock,
1378                                       posix_lock_type, wait_flag);
1379                 return rc;
1380         }
1381 
1382         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1383         if (!rc)
1384                 return rc;
1385 
1386         /* BB we could chain these into one lock request BB */
1387         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1388                                     1, 0, false);
1389         if (rc == 0) {
1390                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1391                                             type, 0, 1, false);
1392                 flock->fl_type = F_UNLCK;
1393                 if (rc != 0)
1394                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1395                                  rc);
1396                 return 0;
1397         }
1398 
1399         if (type & server->vals->shared_lock_type) {
1400                 flock->fl_type = F_WRLCK;
1401                 return 0;
1402         }
1403 
1404         type &= ~server->vals->exclusive_lock_type;
1405 
1406         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1407                                     type | server->vals->shared_lock_type,
1408                                     1, 0, false);
1409         if (rc == 0) {
1410                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1411                         type | server->vals->shared_lock_type, 0, 1, false);
1412                 flock->fl_type = F_RDLCK;
1413                 if (rc != 0)
1414                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1415                                  rc);
1416         } else
1417                 flock->fl_type = F_WRLCK;
1418 
1419         return 0;
1420 }
1421 
1422 void
1423 cifs_move_llist(struct list_head *source, struct list_head *dest)
1424 {
1425         struct list_head *li, *tmp;
1426         list_for_each_safe(li, tmp, source)
1427                 list_move(li, dest);
1428 }
1429 
1430 void
1431 cifs_free_llist(struct list_head *llist)
1432 {
1433         struct cifsLockInfo *li, *tmp;
1434         list_for_each_entry_safe(li, tmp, llist, llist) {
1435                 cifs_del_lock_waiters(li);
1436                 list_del(&li->llist);
1437                 kfree(li);
1438         }
1439 }
1440 
1441 int
1442 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1443                   unsigned int xid)
1444 {
1445         int rc = 0, stored_rc;
1446         static const int types[] = {
1447                 LOCKING_ANDX_LARGE_FILES,
1448                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1449         };
1450         unsigned int i;
1451         unsigned int max_num, num, max_buf;
1452         LOCKING_ANDX_RANGE *buf, *cur;
1453         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1454         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1455         struct cifsLockInfo *li, *tmp;
1456         __u64 length = 1 + flock->fl_end - flock->fl_start;
1457         struct list_head tmp_llist;
1458 
1459         INIT_LIST_HEAD(&tmp_llist);
1460 
1461         /*
1462          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1463          * and check it for zero before using.
1464          */
1465         max_buf = tcon->ses->server->maxBuf;
1466         if (!max_buf)
1467                 return -EINVAL;
1468 
1469         max_num = (max_buf - sizeof(struct smb_hdr)) /
1470                                                 sizeof(LOCKING_ANDX_RANGE);
1471         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1472         if (!buf)
1473                 return -ENOMEM;
1474 
1475         down_write(&cinode->lock_sem);
1476         for (i = 0; i < 2; i++) {
1477                 cur = buf;
1478                 num = 0;
1479                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1480                         if (flock->fl_start > li->offset ||
1481                             (flock->fl_start + length) <
1482                             (li->offset + li->length))
1483                                 continue;
1484                         if (current->tgid != li->pid)
1485                                 continue;
1486                         if (types[i] != li->type)
1487                                 continue;
1488                         if (cinode->can_cache_brlcks) {
1489                                 /*
1490                                  * We can cache brlock requests - simply remove
1491                                  * a lock from the file's list.
1492                                  */
1493                                 list_del(&li->llist);
1494                                 cifs_del_lock_waiters(li);
1495                                 kfree(li);
1496                                 continue;
1497                         }
1498                         cur->Pid = cpu_to_le16(li->pid);
1499                         cur->LengthLow = cpu_to_le32((u32)li->length);
1500                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1501                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1502                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1503                         /*
1504                          * We need to save a lock here to let us add it again to
1505                          * the file's list if the unlock range request fails on
1506                          * the server.
1507                          */
1508                         list_move(&li->llist, &tmp_llist);
1509                         if (++num == max_num) {
1510                                 stored_rc = cifs_lockv(xid, tcon,
1511                                                        cfile->fid.netfid,
1512                                                        li->type, num, 0, buf);
1513                                 if (stored_rc) {
1514                                         /*
1515                                          * We failed on the unlock range
1516                                          * request - add all locks from the tmp
1517                                          * list to the head of the file's list.
1518                                          */
1519                                         cifs_move_llist(&tmp_llist,
1520                                                         &cfile->llist->locks);
1521                                         rc = stored_rc;
1522                                 } else
1523                                         /*
1524                                          * The unlock range request succeed -
1525                                          * free the tmp list.
1526                                          */
1527                                         cifs_free_llist(&tmp_llist);
1528                                 cur = buf;
1529                                 num = 0;
1530                         } else
1531                                 cur++;
1532                 }
1533                 if (num) {
1534                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1535                                                types[i], num, 0, buf);
1536                         if (stored_rc) {
1537                                 cifs_move_llist(&tmp_llist,
1538                                                 &cfile->llist->locks);
1539                                 rc = stored_rc;
1540                         } else
1541                                 cifs_free_llist(&tmp_llist);
1542                 }
1543         }
1544 
1545         up_write(&cinode->lock_sem);
1546         kfree(buf);
1547         return rc;
1548 }
1549 
1550 static int
1551 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1552            bool wait_flag, bool posix_lck, int lock, int unlock,
1553            unsigned int xid)
1554 {
1555         int rc = 0;
1556         __u64 length = 1 + flock->fl_end - flock->fl_start;
1557         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1558         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1559         struct TCP_Server_Info *server = tcon->ses->server;
1560         struct inode *inode = d_inode(cfile->dentry);
1561 
1562         if (posix_lck) {
1563                 int posix_lock_type;
1564 
1565                 rc = cifs_posix_lock_set(file, flock);
1566                 if (!rc || rc < 0)
1567                         return rc;
1568 
1569                 if (type & server->vals->shared_lock_type)
1570                         posix_lock_type = CIFS_RDLCK;
1571                 else
1572                         posix_lock_type = CIFS_WRLCK;
1573 
1574                 if (unlock == 1)
1575                         posix_lock_type = CIFS_UNLCK;
1576 
1577                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1578                                       hash_lockowner(flock->fl_owner),
1579                                       flock->fl_start, length,
1580                                       NULL, posix_lock_type, wait_flag);
1581                 goto out;
1582         }
1583 
1584         if (lock) {
1585                 struct cifsLockInfo *lock;
1586 
1587                 lock = cifs_lock_init(flock->fl_start, length, type);
1588                 if (!lock)
1589                         return -ENOMEM;
1590 
1591                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1592                 if (rc < 0) {
1593                         kfree(lock);
1594                         return rc;
1595                 }
1596                 if (!rc)
1597                         goto out;
1598 
1599                 /*
1600                  * Windows 7 server can delay breaking lease from read to None
1601                  * if we set a byte-range lock on a file - break it explicitly
1602                  * before sending the lock to the server to be sure the next
1603                  * read won't conflict with non-overlapted locks due to
1604                  * pagereading.
1605                  */
1606                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1607                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1608                         cifs_zap_mapping(inode);
1609                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1610                                  inode);
1611                         CIFS_I(inode)->oplock = 0;
1612                 }
1613 
1614                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1615                                             type, 1, 0, wait_flag);
1616                 if (rc) {
1617                         kfree(lock);
1618                         return rc;
1619                 }
1620 
1621                 cifs_lock_add(cfile, lock);
1622         } else if (unlock)
1623                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1624 
1625 out:
1626         if (flock->fl_flags & FL_POSIX && !rc)
1627                 rc = locks_lock_file_wait(file, flock);
1628         return rc;
1629 }
1630 
1631 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1632 {
1633         int rc, xid;
1634         int lock = 0, unlock = 0;
1635         bool wait_flag = false;
1636         bool posix_lck = false;
1637         struct cifs_sb_info *cifs_sb;
1638         struct cifs_tcon *tcon;
1639         struct cifsInodeInfo *cinode;
1640         struct cifsFileInfo *cfile;
1641         __u16 netfid;
1642         __u32 type;
1643 
1644         rc = -EACCES;
1645         xid = get_xid();
1646 
1647         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1648                  cmd, flock->fl_flags, flock->fl_type,
1649                  flock->fl_start, flock->fl_end);
1650 
1651         cfile = (struct cifsFileInfo *)file->private_data;
1652         tcon = tlink_tcon(cfile->tlink);
1653 
1654         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1655                         tcon->ses->server);
1656 
1657         cifs_sb = CIFS_FILE_SB(file);
1658         netfid = cfile->fid.netfid;
1659         cinode = CIFS_I(file_inode(file));
1660 
1661         if (cap_unix(tcon->ses) &&
1662             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1663             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1664                 posix_lck = true;
1665         /*
1666          * BB add code here to normalize offset and length to account for
1667          * negative length which we can not accept over the wire.
1668          */
1669         if (IS_GETLK(cmd)) {
1670                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1671                 free_xid(xid);
1672                 return rc;
1673         }
1674 
1675         if (!lock && !unlock) {
1676                 /*
1677                  * if no lock or unlock then nothing to do since we do not
1678                  * know what it is
1679                  */
1680                 free_xid(xid);
1681                 return -EOPNOTSUPP;
1682         }
1683 
1684         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1685                         xid);
1686         free_xid(xid);
1687         return rc;
1688 }
1689 
1690 /*
1691  * update the file size (if needed) after a write. Should be called with
1692  * the inode->i_lock held
1693  */
1694 void
1695 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1696                       unsigned int bytes_written)
1697 {
1698         loff_t end_of_write = offset + bytes_written;
1699 
1700         if (end_of_write > cifsi->server_eof)
1701                 cifsi->server_eof = end_of_write;
1702 }
1703 
1704 static ssize_t
1705 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1706            size_t write_size, loff_t *offset)
1707 {
1708         int rc = 0;
1709         unsigned int bytes_written = 0;
1710         unsigned int total_written;
1711         struct cifs_sb_info *cifs_sb;
1712         struct cifs_tcon *tcon;
1713         struct TCP_Server_Info *server;
1714         unsigned int xid;
1715         struct dentry *dentry = open_file->dentry;
1716         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1717         struct cifs_io_parms io_parms;
1718 
1719         cifs_sb = CIFS_SB(dentry->d_sb);
1720 
1721         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1722                  write_size, *offset, dentry);
1723 
1724         tcon = tlink_tcon(open_file->tlink);
1725         server = tcon->ses->server;
1726 
1727         if (!server->ops->sync_write)
1728                 return -ENOSYS;
1729 
1730         xid = get_xid();
1731 
1732         for (total_written = 0; write_size > total_written;
1733              total_written += bytes_written) {
1734                 rc = -EAGAIN;
1735                 while (rc == -EAGAIN) {
1736                         struct kvec iov[2];
1737                         unsigned int len;
1738 
1739                         if (open_file->invalidHandle) {
1740                                 /* we could deadlock if we called
1741                                    filemap_fdatawait from here so tell
1742                                    reopen_file not to flush data to
1743                                    server now */
1744                                 rc = cifs_reopen_file(open_file, false);
1745                                 if (rc != 0)
1746                                         break;
1747                         }
1748 
1749                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1750                                   (unsigned int)write_size - total_written);
1751                         /* iov[0] is reserved for smb header */
1752                         iov[1].iov_base = (char *)write_data + total_written;
1753                         iov[1].iov_len = len;
1754                         io_parms.pid = pid;
1755                         io_parms.tcon = tcon;
1756                         io_parms.offset = *offset;
1757                         io_parms.length = len;
1758                         rc = server->ops->sync_write(xid, &open_file->fid,
1759                                         &io_parms, &bytes_written, iov, 1);
1760                 }
1761                 if (rc || (bytes_written == 0)) {
1762                         if (total_written)
1763                                 break;
1764                         else {
1765                                 free_xid(xid);
1766                                 return rc;
1767                         }
1768                 } else {
1769                         spin_lock(&d_inode(dentry)->i_lock);
1770                         cifs_update_eof(cifsi, *offset, bytes_written);
1771                         spin_unlock(&d_inode(dentry)->i_lock);
1772                         *offset += bytes_written;
1773                 }
1774         }
1775 
1776         cifs_stats_bytes_written(tcon, total_written);
1777 
1778         if (total_written > 0) {
1779                 spin_lock(&d_inode(dentry)->i_lock);
1780                 if (*offset > d_inode(dentry)->i_size)
1781                         i_size_write(d_inode(dentry), *offset);
1782                 spin_unlock(&d_inode(dentry)->i_lock);
1783         }
1784         mark_inode_dirty_sync(d_inode(dentry));
1785         free_xid(xid);
1786         return total_written;
1787 }
1788 
1789 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1790                                         bool fsuid_only)
1791 {
1792         struct cifsFileInfo *open_file = NULL;
1793         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1794         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1795 
1796         /* only filter by fsuid on multiuser mounts */
1797         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1798                 fsuid_only = false;
1799 
1800         spin_lock(&tcon->open_file_lock);
1801         /* we could simply get the first_list_entry since write-only entries
1802            are always at the end of the list but since the first entry might
1803            have a close pending, we go through the whole list */
1804         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1805                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1806                         continue;
1807                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1808                         if (!open_file->invalidHandle) {
1809                                 /* found a good file */
1810                                 /* lock it so it will not be closed on us */
1811                                 cifsFileInfo_get(open_file);
1812                                 spin_unlock(&tcon->open_file_lock);
1813                                 return open_file;
1814                         } /* else might as well continue, and look for
1815                              another, or simply have the caller reopen it
1816                              again rather than trying to fix this handle */
1817                 } else /* write only file */
1818                         break; /* write only files are last so must be done */
1819         }
1820         spin_unlock(&tcon->open_file_lock);
1821         return NULL;
1822 }
1823 
1824 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1825                                         bool fsuid_only)
1826 {
1827         struct cifsFileInfo *open_file, *inv_file = NULL;
1828         struct cifs_sb_info *cifs_sb;
1829         struct cifs_tcon *tcon;
1830         bool any_available = false;
1831         int rc;
1832         unsigned int refind = 0;
1833 
1834         /* Having a null inode here (because mapping->host was set to zero by
1835         the VFS or MM) should not happen but we had reports of on oops (due to
1836         it being zero) during stress testcases so we need to check for it */
1837 
1838         if (cifs_inode == NULL) {
1839                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1840                 dump_stack();
1841                 return NULL;
1842         }
1843 
1844         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1845         tcon = cifs_sb_master_tcon(cifs_sb);
1846 
1847         /* only filter by fsuid on multiuser mounts */
1848         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1849                 fsuid_only = false;
1850 
1851         spin_lock(&tcon->open_file_lock);
1852 refind_writable:
1853         if (refind > MAX_REOPEN_ATT) {
1854                 spin_unlock(&tcon->open_file_lock);
1855                 return NULL;
1856         }
1857         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1858                 if (!any_available && open_file->pid != current->tgid)
1859                         continue;
1860                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1861                         continue;
1862                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1863                         if (!open_file->invalidHandle) {
1864                                 /* found a good writable file */
1865                                 cifsFileInfo_get(open_file);
1866                                 spin_unlock(&tcon->open_file_lock);
1867                                 return open_file;
1868                         } else {
1869                                 if (!inv_file)
1870                                         inv_file = open_file;
1871                         }
1872                 }
1873         }
1874         /* couldn't find useable FH with same pid, try any available */
1875         if (!any_available) {
1876                 any_available = true;
1877                 goto refind_writable;
1878         }
1879 
1880         if (inv_file) {
1881                 any_available = false;
1882                 cifsFileInfo_get(inv_file);
1883         }
1884 
1885         spin_unlock(&tcon->open_file_lock);
1886 
1887         if (inv_file) {
1888                 rc = cifs_reopen_file(inv_file, false);
1889                 if (!rc)
1890                         return inv_file;
1891                 else {
1892                         spin_lock(&tcon->open_file_lock);
1893                         list_move_tail(&inv_file->flist,
1894                                         &cifs_inode->openFileList);
1895                         spin_unlock(&tcon->open_file_lock);
1896                         cifsFileInfo_put(inv_file);
1897                         ++refind;
1898                         inv_file = NULL;
1899                         spin_lock(&tcon->open_file_lock);
1900                         goto refind_writable;
1901                 }
1902         }
1903 
1904         return NULL;
1905 }
1906 
1907 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1908 {
1909         struct address_space *mapping = page->mapping;
1910         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1911         char *write_data;
1912         int rc = -EFAULT;
1913         int bytes_written = 0;
1914         struct inode *inode;
1915         struct cifsFileInfo *open_file;
1916 
1917         if (!mapping || !mapping->host)
1918                 return -EFAULT;
1919 
1920         inode = page->mapping->host;
1921 
1922         offset += (loff_t)from;
1923         write_data = kmap(page);
1924         write_data += from;
1925 
1926         if ((to > PAGE_SIZE) || (from > to)) {
1927                 kunmap(page);
1928                 return -EIO;
1929         }
1930 
1931         /* racing with truncate? */
1932         if (offset > mapping->host->i_size) {
1933                 kunmap(page);
1934                 return 0; /* don't care */
1935         }
1936 
1937         /* check to make sure that we are not extending the file */
1938         if (mapping->host->i_size - offset < (loff_t)to)
1939                 to = (unsigned)(mapping->host->i_size - offset);
1940 
1941         open_file = find_writable_file(CIFS_I(mapping->host), false);
1942         if (open_file) {
1943                 bytes_written = cifs_write(open_file, open_file->pid,
1944                                            write_data, to - from, &offset);
1945                 cifsFileInfo_put(open_file);
1946                 /* Does mm or vfs already set times? */
1947                 inode->i_atime = inode->i_mtime = current_time(inode);
1948                 if ((bytes_written > 0) && (offset))
1949                         rc = 0;
1950                 else if (bytes_written < 0)
1951                         rc = bytes_written;
1952         } else {
1953                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1954                 rc = -EIO;
1955         }
1956 
1957         kunmap(page);
1958         return rc;
1959 }
1960 
1961 static struct cifs_writedata *
1962 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1963                           pgoff_t end, pgoff_t *index,
1964                           unsigned int *found_pages)
1965 {
1966         struct cifs_writedata *wdata;
1967 
1968         wdata = cifs_writedata_alloc((unsigned int)tofind,
1969                                      cifs_writev_complete);
1970         if (!wdata)
1971                 return NULL;
1972 
1973         *found_pages = find_get_pages_range_tag(mapping, index, end,
1974                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
1975         return wdata;
1976 }
1977 
1978 static unsigned int
1979 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1980                     struct address_space *mapping,
1981                     struct writeback_control *wbc,
1982                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1983 {
1984         unsigned int nr_pages = 0, i;
1985         struct page *page;
1986 
1987         for (i = 0; i < found_pages; i++) {
1988                 page = wdata->pages[i];
1989                 /*
1990                  * At this point we hold neither the i_pages lock nor the
1991                  * page lock: the page may be truncated or invalidated
1992                  * (changing page->mapping to NULL), or even swizzled
1993                  * back from swapper_space to tmpfs file mapping
1994                  */
1995 
1996                 if (nr_pages == 0)
1997                         lock_page(page);
1998                 else if (!trylock_page(page))
1999                         break;
2000 
2001                 if (unlikely(page->mapping != mapping)) {
2002                         unlock_page(page);
2003                         break;
2004                 }
2005 
2006                 if (!wbc->range_cyclic && page->index > end) {
2007                         *done = true;
2008                         unlock_page(page);
2009                         break;
2010                 }
2011 
2012                 if (*next && (page->index != *next)) {
2013                         /* Not next consecutive page */
2014                         unlock_page(page);
2015                         break;
2016                 }
2017 
2018                 if (wbc->sync_mode != WB_SYNC_NONE)
2019                         wait_on_page_writeback(page);
2020 
2021                 if (PageWriteback(page) ||
2022                                 !clear_page_dirty_for_io(page)) {
2023                         unlock_page(page);
2024                         break;
2025                 }
2026 
2027                 /*
2028                  * This actually clears the dirty bit in the radix tree.
2029                  * See cifs_writepage() for more commentary.
2030                  */
2031                 set_page_writeback(page);
2032                 if (page_offset(page) >= i_size_read(mapping->host)) {
2033                         *done = true;
2034                         unlock_page(page);
2035                         end_page_writeback(page);
2036                         break;
2037                 }
2038 
2039                 wdata->pages[i] = page;
2040                 *next = page->index + 1;
2041                 ++nr_pages;
2042         }
2043 
2044         /* reset index to refind any pages skipped */
2045         if (nr_pages == 0)
2046                 *index = wdata->pages[0]->index + 1;
2047 
2048         /* put any pages we aren't going to use */
2049         for (i = nr_pages; i < found_pages; i++) {
2050                 put_page(wdata->pages[i]);
2051                 wdata->pages[i] = NULL;
2052         }
2053 
2054         return nr_pages;
2055 }
2056 
2057 static int
2058 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2059                  struct address_space *mapping, struct writeback_control *wbc)
2060 {
2061         int rc = 0;
2062         struct TCP_Server_Info *server;
2063         unsigned int i;
2064 
2065         wdata->sync_mode = wbc->sync_mode;
2066         wdata->nr_pages = nr_pages;
2067         wdata->offset = page_offset(wdata->pages[0]);
2068         wdata->pagesz = PAGE_SIZE;
2069         wdata->tailsz = min(i_size_read(mapping->host) -
2070                         page_offset(wdata->pages[nr_pages - 1]),
2071                         (loff_t)PAGE_SIZE);
2072         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2073 
2074         if (wdata->cfile != NULL)
2075                 cifsFileInfo_put(wdata->cfile);
2076         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2077         if (!wdata->cfile) {
2078                 cifs_dbg(VFS, "No writable handles for inode\n");
2079                 rc = -EBADF;
2080         } else {
2081                 wdata->pid = wdata->cfile->pid;
2082                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2083                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2084         }
2085 
2086         for (i = 0; i < nr_pages; ++i)
2087                 unlock_page(wdata->pages[i]);
2088 
2089         return rc;
2090 }
2091 
2092 static int cifs_writepages(struct address_space *mapping,
2093                            struct writeback_control *wbc)
2094 {
2095         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2096         struct TCP_Server_Info *server;
2097         bool done = false, scanned = false, range_whole = false;
2098         pgoff_t end, index;
2099         struct cifs_writedata *wdata;
2100         int rc = 0;
2101 
2102         /*
2103          * If wsize is smaller than the page cache size, default to writing
2104          * one page at a time via cifs_writepage
2105          */
2106         if (cifs_sb->wsize < PAGE_SIZE)
2107                 return generic_writepages(mapping, wbc);
2108 
2109         if (wbc->range_cyclic) {
2110                 index = mapping->writeback_index; /* Start from prev offset */
2111                 end = -1;
2112         } else {
2113                 index = wbc->range_start >> PAGE_SHIFT;
2114                 end = wbc->range_end >> PAGE_SHIFT;
2115                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2116                         range_whole = true;
2117                 scanned = true;
2118         }
2119         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2120 retry:
2121         while (!done && index <= end) {
2122                 unsigned int i, nr_pages, found_pages, wsize, credits;
2123                 pgoff_t next = 0, tofind, saved_index = index;
2124 
2125                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2126                                                    &wsize, &credits);
2127                 if (rc)
2128                         break;
2129 
2130                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2131 
2132                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2133                                                   &found_pages);
2134                 if (!wdata) {
2135                         rc = -ENOMEM;
2136                         add_credits_and_wake_if(server, credits, 0);
2137                         break;
2138                 }
2139 
2140                 if (found_pages == 0) {
2141                         kref_put(&wdata->refcount, cifs_writedata_release);
2142                         add_credits_and_wake_if(server, credits, 0);
2143                         break;
2144                 }
2145 
2146                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2147                                                end, &index, &next, &done);
2148 
2149                 /* nothing to write? */
2150                 if (nr_pages == 0) {
2151                         kref_put(&wdata->refcount, cifs_writedata_release);
2152                         add_credits_and_wake_if(server, credits, 0);
2153                         continue;
2154                 }
2155 
2156                 wdata->credits = credits;
2157 
2158                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2159 
2160                 /* send failure -- clean up the mess */
2161                 if (rc != 0) {
2162                         add_credits_and_wake_if(server, wdata->credits, 0);
2163                         for (i = 0; i < nr_pages; ++i) {
2164                                 if (rc == -EAGAIN)
2165                                         redirty_page_for_writepage(wbc,
2166                                                            wdata->pages[i]);
2167                                 else
2168                                         SetPageError(wdata->pages[i]);
2169                                 end_page_writeback(wdata->pages[i]);
2170                                 put_page(wdata->pages[i]);
2171                         }
2172                         if (rc != -EAGAIN)
2173                                 mapping_set_error(mapping, rc);
2174                 }
2175                 kref_put(&wdata->refcount, cifs_writedata_release);
2176 
2177                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2178                         index = saved_index;
2179                         continue;
2180                 }
2181 
2182                 wbc->nr_to_write -= nr_pages;
2183                 if (wbc->nr_to_write <= 0)
2184                         done = true;
2185 
2186                 index = next;
2187         }
2188 
2189         if (!scanned && !done) {
2190                 /*
2191                  * We hit the last page and there is more work to be done: wrap
2192                  * back to the start of the file
2193                  */
2194                 scanned = true;
2195                 index = 0;
2196                 goto retry;
2197         }
2198 
2199         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2200                 mapping->writeback_index = index;
2201 
2202         return rc;
2203 }
2204 
2205 static int
2206 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2207 {
2208         int rc;
2209         unsigned int xid;
2210 
2211         xid = get_xid();
2212 /* BB add check for wbc flags */
2213         get_page(page);
2214         if (!PageUptodate(page))
2215                 cifs_dbg(FYI, "ppw - page not up to date\n");
2216 
2217         /*
2218          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2219          *
2220          * A writepage() implementation always needs to do either this,
2221          * or re-dirty the page with "redirty_page_for_writepage()" in
2222          * the case of a failure.
2223          *
2224          * Just unlocking the page will cause the radix tree tag-bits
2225          * to fail to update with the state of the page correctly.
2226          */
2227         set_page_writeback(page);
2228 retry_write:
2229         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2230         if (rc == -EAGAIN) {
2231                 if (wbc->sync_mode == WB_SYNC_ALL)
2232                         goto retry_write;
2233                 redirty_page_for_writepage(wbc, page);
2234         } else if (rc != 0) {
2235                 SetPageError(page);
2236                 mapping_set_error(page->mapping, rc);
2237         } else {
2238                 SetPageUptodate(page);
2239         }
2240         end_page_writeback(page);
2241         put_page(page);
2242         free_xid(xid);
2243         return rc;
2244 }
2245 
2246 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2247 {
2248         int rc = cifs_writepage_locked(page, wbc);
2249         unlock_page(page);
2250         return rc;
2251 }
2252 
2253 static int cifs_write_end(struct file *file, struct address_space *mapping,
2254                         loff_t pos, unsigned len, unsigned copied,
2255                         struct page *page, void *fsdata)
2256 {
2257         int rc;
2258         struct inode *inode = mapping->host;
2259         struct cifsFileInfo *cfile = file->private_data;
2260         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2261         __u32 pid;
2262 
2263         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2264                 pid = cfile->pid;
2265         else
2266                 pid = current->tgid;
2267 
2268         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2269                  page, pos, copied);
2270 
2271         if (PageChecked(page)) {
2272                 if (copied == len)
2273                         SetPageUptodate(page);
2274                 ClearPageChecked(page);
2275         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2276                 SetPageUptodate(page);
2277 
2278         if (!PageUptodate(page)) {
2279                 char *page_data;
2280                 unsigned offset = pos & (PAGE_SIZE - 1);
2281                 unsigned int xid;
2282 
2283                 xid = get_xid();
2284                 /* this is probably better than directly calling
2285                    partialpage_write since in this function the file handle is
2286                    known which we might as well leverage */
2287                 /* BB check if anything else missing out of ppw
2288                    such as updating last write time */
2289                 page_data = kmap(page);
2290                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2291                 /* if (rc < 0) should we set writebehind rc? */
2292                 kunmap(page);
2293 
2294                 free_xid(xid);
2295         } else {
2296                 rc = copied;
2297                 pos += copied;
2298                 set_page_dirty(page);
2299         }
2300 
2301         if (rc > 0) {
2302                 spin_lock(&inode->i_lock);
2303                 if (pos > inode->i_size)
2304                         i_size_write(inode, pos);
2305                 spin_unlock(&inode->i_lock);
2306         }
2307 
2308         unlock_page(page);
2309         put_page(page);
2310 
2311         return rc;
2312 }
2313 
2314 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2315                       int datasync)
2316 {
2317         unsigned int xid;
2318         int rc = 0;
2319         struct cifs_tcon *tcon;
2320         struct TCP_Server_Info *server;
2321         struct cifsFileInfo *smbfile = file->private_data;
2322         struct inode *inode = file_inode(file);
2323         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2324 
2325         rc = file_write_and_wait_range(file, start, end);
2326         if (rc)
2327                 return rc;
2328         inode_lock(inode);
2329 
2330         xid = get_xid();
2331 
2332         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2333                  file, datasync);
2334 
2335         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2336                 rc = cifs_zap_mapping(inode);
2337                 if (rc) {
2338                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2339                         rc = 0; /* don't care about it in fsync */
2340                 }
2341         }
2342 
2343         tcon = tlink_tcon(smbfile->tlink);
2344         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2345                 server = tcon->ses->server;
2346                 if (server->ops->flush)
2347                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2348                 else
2349                         rc = -ENOSYS;
2350         }
2351 
2352         free_xid(xid);
2353         inode_unlock(inode);
2354         return rc;
2355 }
2356 
2357 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2358 {
2359         unsigned int xid;
2360         int rc = 0;
2361         struct cifs_tcon *tcon;
2362         struct TCP_Server_Info *server;
2363         struct cifsFileInfo *smbfile = file->private_data;
2364         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2365         struct inode *inode = file->f_mapping->host;
2366 
2367         rc = file_write_and_wait_range(file, start, end);
2368         if (rc)
2369                 return rc;
2370         inode_lock(inode);
2371 
2372         xid = get_xid();
2373 
2374         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2375                  file, datasync);
2376 
2377         tcon = tlink_tcon(smbfile->tlink);
2378         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2379                 server = tcon->ses->server;
2380                 if (server->ops->flush)
2381                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2382                 else
2383                         rc = -ENOSYS;
2384         }
2385 
2386         free_xid(xid);
2387         inode_unlock(inode);
2388         return rc;
2389 }
2390 
2391 /*
2392  * As file closes, flush all cached write data for this inode checking
2393  * for write behind errors.
2394  */
2395 int cifs_flush(struct file *file, fl_owner_t id)
2396 {
2397         struct inode *inode = file_inode(file);
2398         int rc = 0;
2399 
2400         if (file->f_mode & FMODE_WRITE)
2401                 rc = filemap_write_and_wait(inode->i_mapping);
2402 
2403         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2404 
2405         return rc;
2406 }
2407 
2408 static int
2409 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2410 {
2411         int rc = 0;
2412         unsigned long i;
2413 
2414         for (i = 0; i < num_pages; i++) {
2415                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2416                 if (!pages[i]) {
2417                         /*
2418                          * save number of pages we have already allocated and
2419                          * return with ENOMEM error
2420                          */
2421                         num_pages = i;
2422                         rc = -ENOMEM;
2423                         break;
2424                 }
2425         }
2426 
2427         if (rc) {
2428                 for (i = 0; i < num_pages; i++)
2429                         put_page(pages[i]);
2430         }
2431         return rc;
2432 }
2433 
2434 static inline
2435 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2436 {
2437         size_t num_pages;
2438         size_t clen;
2439 
2440         clen = min_t(const size_t, len, wsize);
2441         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2442 
2443         if (cur_len)
2444                 *cur_len = clen;
2445 
2446         return num_pages;
2447 }
2448 
2449 static void
2450 cifs_uncached_writedata_release(struct kref *refcount)
2451 {
2452         int i;
2453         struct cifs_writedata *wdata = container_of(refcount,
2454                                         struct cifs_writedata, refcount);
2455 
2456         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2457         for (i = 0; i < wdata->nr_pages; i++)
2458                 put_page(wdata->pages[i]);
2459         cifs_writedata_release(refcount);
2460 }
2461 
2462 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2463 
2464 static void
2465 cifs_uncached_writev_complete(struct work_struct *work)
2466 {
2467         struct cifs_writedata *wdata = container_of(work,
2468                                         struct cifs_writedata, work);
2469         struct inode *inode = d_inode(wdata->cfile->dentry);
2470         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2471 
2472         spin_lock(&inode->i_lock);
2473         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2474         if (cifsi->server_eof > inode->i_size)
2475                 i_size_write(inode, cifsi->server_eof);
2476         spin_unlock(&inode->i_lock);
2477 
2478         complete(&wdata->done);
2479         collect_uncached_write_data(wdata->ctx);
2480         /* the below call can possibly free the last ref to aio ctx */
2481         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2482 }
2483 
2484 static int
2485 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2486                       size_t *len, unsigned long *num_pages)
2487 {
2488         size_t save_len, copied, bytes, cur_len = *len;
2489         unsigned long i, nr_pages = *num_pages;
2490 
2491         save_len = cur_len;
2492         for (i = 0; i < nr_pages; i++) {
2493                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2494                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2495                 cur_len -= copied;
2496                 /*
2497                  * If we didn't copy as much as we expected, then that
2498                  * may mean we trod into an unmapped area. Stop copying
2499                  * at that point. On the next pass through the big
2500                  * loop, we'll likely end up getting a zero-length
2501                  * write and bailing out of it.
2502                  */
2503                 if (copied < bytes)
2504                         break;
2505         }
2506         cur_len = save_len - cur_len;
2507         *len = cur_len;
2508 
2509         /*
2510          * If we have no data to send, then that probably means that
2511          * the copy above failed altogether. That's most likely because
2512          * the address in the iovec was bogus. Return -EFAULT and let
2513          * the caller free anything we allocated and bail out.
2514          */
2515         if (!cur_len)
2516                 return -EFAULT;
2517 
2518         /*
2519          * i + 1 now represents the number of pages we actually used in
2520          * the copy phase above.
2521          */
2522         *num_pages = i + 1;
2523         return 0;
2524 }
2525 
2526 static int
2527 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2528                      struct cifsFileInfo *open_file,
2529                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2530                      struct cifs_aio_ctx *ctx)
2531 {
2532         int rc = 0;
2533         size_t cur_len;
2534         unsigned long nr_pages, num_pages, i;
2535         struct cifs_writedata *wdata;
2536         struct iov_iter saved_from = *from;
2537         loff_t saved_offset = offset;
2538         pid_t pid;
2539         struct TCP_Server_Info *server;
2540 
2541         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2542                 pid = open_file->pid;
2543         else
2544                 pid = current->tgid;
2545 
2546         server = tlink_tcon(open_file->tlink)->ses->server;
2547 
2548         do {
2549                 unsigned int wsize, credits;
2550 
2551                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2552                                                    &wsize, &credits);
2553                 if (rc)
2554                         break;
2555 
2556                 nr_pages = get_numpages(wsize, len, &cur_len);
2557                 wdata = cifs_writedata_alloc(nr_pages,
2558                                              cifs_uncached_writev_complete);
2559                 if (!wdata) {
2560                         rc = -ENOMEM;
2561                         add_credits_and_wake_if(server, credits, 0);
2562                         break;
2563                 }
2564 
2565                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2566                 if (rc) {
2567                         kfree(wdata);
2568                         add_credits_and_wake_if(server, credits, 0);
2569                         break;
2570                 }
2571 
2572                 num_pages = nr_pages;
2573                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2574                 if (rc) {
2575                         for (i = 0; i < nr_pages; i++)
2576                                 put_page(wdata->pages[i]);
2577                         kfree(wdata);
2578                         add_credits_and_wake_if(server, credits, 0);
2579                         break;
2580                 }
2581 
2582                 /*
2583                  * Bring nr_pages down to the number of pages we actually used,
2584                  * and free any pages that we didn't use.
2585                  */
2586                 for ( ; nr_pages > num_pages; nr_pages--)
2587                         put_page(wdata->pages[nr_pages - 1]);
2588 
2589                 wdata->sync_mode = WB_SYNC_ALL;
2590                 wdata->nr_pages = nr_pages;
2591                 wdata->offset = (__u64)offset;
2592                 wdata->cfile = cifsFileInfo_get(open_file);
2593                 wdata->pid = pid;
2594                 wdata->bytes = cur_len;
2595                 wdata->pagesz = PAGE_SIZE;
2596                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2597                 wdata->credits = credits;
2598                 wdata->ctx = ctx;
2599                 kref_get(&ctx->refcount);
2600 
2601                 if (!wdata->cfile->invalidHandle ||
2602                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2603                         rc = server->ops->async_writev(wdata,
2604                                         cifs_uncached_writedata_release);
2605                 if (rc) {
2606                         add_credits_and_wake_if(server, wdata->credits, 0);
2607                         kref_put(&wdata->refcount,
2608                                  cifs_uncached_writedata_release);
2609                         if (rc == -EAGAIN) {
2610                                 *from = saved_from;
2611                                 iov_iter_advance(from, offset - saved_offset);
2612                                 continue;
2613                         }
2614                         break;
2615                 }
2616 
2617                 list_add_tail(&wdata->list, wdata_list);
2618                 offset += cur_len;
2619                 len -= cur_len;
2620         } while (len > 0);
2621 
2622         return rc;
2623 }
2624 
2625 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2626 {
2627         struct cifs_writedata *wdata, *tmp;
2628         struct cifs_tcon *tcon;
2629         struct cifs_sb_info *cifs_sb;
2630         struct dentry *dentry = ctx->cfile->dentry;
2631         unsigned int i;
2632         int rc;
2633 
2634         tcon = tlink_tcon(ctx->cfile->tlink);
2635         cifs_sb = CIFS_SB(dentry->d_sb);
2636 
2637         mutex_lock(&ctx->aio_mutex);
2638 
2639         if (list_empty(&ctx->list)) {
2640                 mutex_unlock(&ctx->aio_mutex);
2641                 return;
2642         }
2643 
2644         rc = ctx->rc;
2645         /*
2646          * Wait for and collect replies for any successful sends in order of
2647          * increasing offset. Once an error is hit, then return without waiting
2648          * for any more replies.
2649          */
2650 restart_loop:
2651         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2652                 if (!rc) {
2653                         if (!try_wait_for_completion(&wdata->done)) {
2654                                 mutex_unlock(&ctx->aio_mutex);
2655                                 return;
2656                         }
2657 
2658                         if (wdata->result)
2659                                 rc = wdata->result;
2660                         else
2661                                 ctx->total_len += wdata->bytes;
2662 
2663                         /* resend call if it's a retryable error */
2664                         if (rc == -EAGAIN) {
2665                                 struct list_head tmp_list;
2666                                 struct iov_iter tmp_from = ctx->iter;
2667 
2668                                 INIT_LIST_HEAD(&tmp_list);
2669                                 list_del_init(&wdata->list);
2670 
2671                                 iov_iter_advance(&tmp_from,
2672                                                  wdata->offset - ctx->pos);
2673 
2674                                 rc = cifs_write_from_iter(wdata->offset,
2675                                                 wdata->bytes, &tmp_from,
2676                                                 ctx->cfile, cifs_sb, &tmp_list,
2677                                                 ctx);
2678 
2679                                 list_splice(&tmp_list, &ctx->list);
2680 
2681                                 kref_put(&wdata->refcount,
2682                                          cifs_uncached_writedata_release);
2683                                 goto restart_loop;
2684                         }
2685                 }
2686                 list_del_init(&wdata->list);
2687                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2688         }
2689 
2690         for (i = 0; i < ctx->npages; i++)
2691                 put_page(ctx->bv[i].bv_page);
2692 
2693         cifs_stats_bytes_written(tcon, ctx->total_len);
2694         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2695 
2696         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2697 
2698         mutex_unlock(&ctx->aio_mutex);
2699 
2700         if (ctx->iocb && ctx->iocb->ki_complete)
2701                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2702         else
2703                 complete(&ctx->done);
2704 }
2705 
2706 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2707 {
2708         struct file *file = iocb->ki_filp;
2709         ssize_t total_written = 0;
2710         struct cifsFileInfo *cfile;
2711         struct cifs_tcon *tcon;
2712         struct cifs_sb_info *cifs_sb;
2713         struct cifs_aio_ctx *ctx;
2714         struct iov_iter saved_from = *from;
2715         int rc;
2716 
2717         /*
2718          * BB - optimize the way when signing is disabled. We can drop this
2719          * extra memory-to-memory copying and use iovec buffers for constructing
2720          * write request.
2721          */
2722 
2723         rc = generic_write_checks(iocb, from);
2724         if (rc <= 0)
2725                 return rc;
2726 
2727         cifs_sb = CIFS_FILE_SB(file);
2728         cfile = file->private_data;
2729         tcon = tlink_tcon(cfile->tlink);
2730 
2731         if (!tcon->ses->server->ops->async_writev)
2732                 return -ENOSYS;
2733 
2734         ctx = cifs_aio_ctx_alloc();
2735         if (!ctx)
2736                 return -ENOMEM;
2737 
2738         ctx->cfile = cifsFileInfo_get(cfile);
2739 
2740         if (!is_sync_kiocb(iocb))
2741                 ctx->iocb = iocb;
2742 
2743         ctx->pos = iocb->ki_pos;
2744 
2745         rc = setup_aio_ctx_iter(ctx, from, WRITE);
2746         if (rc) {
2747                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2748                 return rc;
2749         }
2750 
2751         /* grab a lock here due to read response handlers can access ctx */
2752         mutex_lock(&ctx->aio_mutex);
2753 
2754         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2755                                   cfile, cifs_sb, &ctx->list, ctx);
2756 
2757         /*
2758          * If at least one write was successfully sent, then discard any rc
2759          * value from the later writes. If the other write succeeds, then
2760          * we'll end up returning whatever was written. If it fails, then
2761          * we'll get a new rc value from that.
2762          */
2763         if (!list_empty(&ctx->list))
2764                 rc = 0;
2765 
2766         mutex_unlock(&ctx->aio_mutex);
2767 
2768         if (rc) {
2769                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2770                 return rc;
2771         }
2772 
2773         if (!is_sync_kiocb(iocb)) {
2774                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2775                 return -EIOCBQUEUED;
2776         }
2777 
2778         rc = wait_for_completion_killable(&ctx->done);
2779         if (rc) {
2780                 mutex_lock(&ctx->aio_mutex);
2781                 ctx->rc = rc = -EINTR;
2782                 total_written = ctx->total_len;
2783                 mutex_unlock(&ctx->aio_mutex);
2784         } else {
2785                 rc = ctx->rc;
2786                 total_written = ctx->total_len;
2787         }
2788 
2789         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2790 
2791         if (unlikely(!total_written))
2792                 return rc;
2793 
2794         iocb->ki_pos += total_written;
2795         return total_written;
2796 }
2797 
2798 static ssize_t
2799 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2800 {
2801         struct file *file = iocb->ki_filp;
2802         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2803         struct inode *inode = file->f_mapping->host;
2804         struct cifsInodeInfo *cinode = CIFS_I(inode);
2805         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2806         ssize_t rc;
2807 
2808         inode_lock(inode);
2809         /*
2810          * We need to hold the sem to be sure nobody modifies lock list
2811          * with a brlock that prevents writing.
2812          */
2813         down_read(&cinode->lock_sem);
2814 
2815         rc = generic_write_checks(iocb, from);
2816         if (rc <= 0)
2817                 goto out;
2818 
2819         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2820                                      server->vals->exclusive_lock_type, NULL,
2821                                      CIFS_WRITE_OP))
2822                 rc = __generic_file_write_iter(iocb, from);
2823         else
2824                 rc = -EACCES;
2825 out:
2826         up_read(&cinode->lock_sem);
2827         inode_unlock(inode);
2828 
2829         if (rc > 0)
2830                 rc = generic_write_sync(iocb, rc);
2831         return rc;
2832 }
2833 
2834 ssize_t
2835 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2836 {
2837         struct inode *inode = file_inode(iocb->ki_filp);
2838         struct cifsInodeInfo *cinode = CIFS_I(inode);
2839         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2840         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2841                                                 iocb->ki_filp->private_data;
2842         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2843         ssize_t written;
2844 
2845         written = cifs_get_writer(cinode);
2846         if (written)
2847                 return written;
2848 
2849         if (CIFS_CACHE_WRITE(cinode)) {
2850                 if (cap_unix(tcon->ses) &&
2851                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2852                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2853                         written = generic_file_write_iter(iocb, from);
2854                         goto out;
2855                 }
2856                 written = cifs_writev(iocb, from);
2857                 goto out;
2858         }
2859         /*
2860          * For non-oplocked files in strict cache mode we need to write the data
2861          * to the server exactly from the pos to pos+len-1 rather than flush all
2862          * affected pages because it may cause a error with mandatory locks on
2863          * these pages but not on the region from pos to ppos+len-1.
2864          */
2865         written = cifs_user_writev(iocb, from);
2866         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2867                 /*
2868                  * Windows 7 server can delay breaking level2 oplock if a write
2869                  * request comes - break it on the client to prevent reading
2870                  * an old data.
2871                  */
2872                 cifs_zap_mapping(inode);
2873                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2874                          inode);
2875                 cinode->oplock = 0;
2876         }
2877 out:
2878         cifs_put_writer(cinode);
2879         return written;
2880 }
2881 
2882 static struct cifs_readdata *
2883 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2884 {
2885         struct cifs_readdata *rdata;
2886 
2887         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2888                         GFP_KERNEL);
2889         if (rdata != NULL) {
2890                 kref_init(&rdata->refcount);
2891                 INIT_LIST_HEAD(&rdata->list);
2892                 init_completion(&rdata->done);
2893                 INIT_WORK(&rdata->work, complete);
2894         }
2895 
2896         return rdata;
2897 }
2898 
2899 void
2900 cifs_readdata_release(struct kref *refcount)
2901 {
2902         struct cifs_readdata *rdata = container_of(refcount,
2903                                         struct cifs_readdata, refcount);
2904 #ifdef CONFIG_CIFS_SMB_DIRECT
2905         if (rdata->mr) {
2906                 smbd_deregister_mr(rdata->mr);
2907                 rdata->mr = NULL;
2908         }
2909 #endif
2910         if (rdata->cfile)
2911                 cifsFileInfo_put(rdata->cfile);
2912 
2913         kfree(rdata);
2914 }
2915 
2916 static int
2917 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2918 {
2919         int rc = 0;
2920         struct page *page;
2921         unsigned int i;
2922 
2923         for (i = 0; i < nr_pages; i++) {
2924                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2925                 if (!page) {
2926                         rc = -ENOMEM;
2927                         break;
2928                 }
2929                 rdata->pages[i] = page;
2930         }
2931 
2932         if (rc) {
2933                 for (i = 0; i < nr_pages; i++) {
2934                         put_page(rdata->pages[i]);
2935                         rdata->pages[i] = NULL;
2936                 }
2937         }
2938         return rc;
2939 }
2940 
2941 static void
2942 cifs_uncached_readdata_release(struct kref *refcount)
2943 {
2944         struct cifs_readdata *rdata = container_of(refcount,
2945                                         struct cifs_readdata, refcount);
2946         unsigned int i;
2947 
2948         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
2949         for (i = 0; i < rdata->nr_pages; i++) {
2950                 put_page(rdata->pages[i]);
2951                 rdata->pages[i] = NULL;
2952         }
2953         cifs_readdata_release(refcount);
2954 }
2955 
2956 /**
2957  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2958  * @rdata:      the readdata response with list of pages holding data
2959  * @iter:       destination for our data
2960  *
2961  * This function copies data from a list of pages in a readdata response into
2962  * an array of iovecs. It will first calculate where the data should go
2963  * based on the info in the readdata and then copy the data into that spot.
2964  */
2965 static int
2966 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2967 {
2968         size_t remaining = rdata->got_bytes;
2969         unsigned int i;
2970 
2971         for (i = 0; i < rdata->nr_pages; i++) {
2972                 struct page *page = rdata->pages[i];
2973                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2974                 size_t written;
2975 
2976                 if (unlikely(iter->type & ITER_PIPE)) {
2977                         void *addr = kmap_atomic(page);
2978 
2979                         written = copy_to_iter(addr, copy, iter);
2980                         kunmap_atomic(addr);
2981                 } else
2982                         written = copy_page_to_iter(page, 0, copy, iter);
2983                 remaining -= written;
2984                 if (written < copy && iov_iter_count(iter) > 0)
2985                         break;
2986         }
2987         return remaining ? -EFAULT : 0;
2988 }
2989 
2990 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
2991 
2992 static void
2993 cifs_uncached_readv_complete(struct work_struct *work)
2994 {
2995         struct cifs_readdata *rdata = container_of(work,
2996                                                 struct cifs_readdata, work);
2997 
2998         complete(&rdata->done);
2999         collect_uncached_read_data(rdata->ctx);
3000         /* the below call can possibly free the last ref to aio ctx */
3001         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3002 }
3003 
3004 static int
3005 uncached_fill_pages(struct TCP_Server_Info *server,
3006                     struct cifs_readdata *rdata, struct iov_iter *iter,
3007                     unsigned int len)
3008 {
3009         int result = 0;
3010         unsigned int i;
3011         unsigned int nr_pages = rdata->nr_pages;
3012 
3013         rdata->got_bytes = 0;
3014         rdata->tailsz = PAGE_SIZE;
3015         for (i = 0; i < nr_pages; i++) {
3016                 struct page *page = rdata->pages[i];
3017                 size_t n;
3018 
3019                 if (len <= 0) {
3020                         /* no need to hold page hostage */
3021                         rdata->pages[i] = NULL;
3022                         rdata->nr_pages--;
3023                         put_page(page);
3024                         continue;
3025                 }
3026                 n = len;
3027                 if (len >= PAGE_SIZE) {
3028                         /* enough data to fill the page */
3029                         n = PAGE_SIZE;
3030                         len -= n;
3031                 } else {
3032                         zero_user(page, len, PAGE_SIZE - len);
3033                         rdata->tailsz = len;
3034                         len = 0;
3035                 }
3036                 if (iter)
3037                         result = copy_page_from_iter(page, 0, n, iter);
3038 #ifdef CONFIG_CIFS_SMB_DIRECT
3039                 else if (rdata->mr)
3040                         result = n;
3041 #endif
3042                 else
3043                         result = cifs_read_page_from_socket(server, page, n);
3044                 if (result < 0)
3045                         break;
3046 
3047                 rdata->got_bytes += result;
3048         }
3049 
3050         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3051                                                 rdata->got_bytes : result;
3052 }
3053 
3054 static int
3055 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3056                               struct cifs_readdata *rdata, unsigned int len)
3057 {
3058         return uncached_fill_pages(server, rdata, NULL, len);
3059 }
3060 
3061 static int
3062 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3063                               struct cifs_readdata *rdata,
3064                               struct iov_iter *iter)
3065 {
3066         return uncached_fill_pages(server, rdata, iter, iter->count);
3067 }
3068 
3069 static int
3070 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3071                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3072                      struct cifs_aio_ctx *ctx)
3073 {
3074         struct cifs_readdata *rdata;
3075         unsigned int npages, rsize, credits;
3076         size_t cur_len;
3077         int rc;
3078         pid_t pid;
3079         struct TCP_Server_Info *server;
3080 
3081         server = tlink_tcon(open_file->tlink)->ses->server;
3082 
3083         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3084                 pid = open_file->pid;
3085         else
3086                 pid = current->tgid;
3087 
3088         do {
3089                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3090                                                    &rsize, &credits);
3091                 if (rc)
3092                         break;
3093 
3094                 cur_len = min_t(const size_t, len, rsize);
3095                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3096 
3097                 /* allocate a readdata struct */
3098                 rdata = cifs_readdata_alloc(npages,
3099                                             cifs_uncached_readv_complete);
3100                 if (!rdata) {
3101                         add_credits_and_wake_if(server, credits, 0);
3102                         rc = -ENOMEM;
3103                         break;
3104                 }
3105 
3106                 rc = cifs_read_allocate_pages(rdata, npages);
3107                 if (rc)
3108                         goto error;
3109 
3110                 rdata->cfile = cifsFileInfo_get(open_file);
3111                 rdata->nr_pages = npages;
3112                 rdata->offset = offset;
3113                 rdata->bytes = cur_len;
3114                 rdata->pid = pid;
3115                 rdata->pagesz = PAGE_SIZE;
3116                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3117                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3118                 rdata->credits = credits;
3119                 rdata->ctx = ctx;
3120                 kref_get(&ctx->refcount);
3121 
3122                 if (!rdata->cfile->invalidHandle ||
3123                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3124                         rc = server->ops->async_readv(rdata);
3125 error:
3126                 if (rc) {
3127                         add_credits_and_wake_if(server, rdata->credits, 0);
3128                         kref_put(&rdata->refcount,
3129                                  cifs_uncached_readdata_release);
3130                         if (rc == -EAGAIN)
3131                                 continue;
3132                         break;
3133                 }
3134 
3135                 list_add_tail(&rdata->list, rdata_list);
3136                 offset += cur_len;
3137                 len -= cur_len;
3138         } while (len > 0);
3139 
3140         return rc;
3141 }
3142 
3143 static void
3144 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3145 {
3146         struct cifs_readdata *rdata, *tmp;
3147         struct iov_iter *to = &ctx->iter;
3148         struct cifs_sb_info *cifs_sb;
3149         struct cifs_tcon *tcon;
3150         unsigned int i;
3151         int rc;
3152 
3153         tcon = tlink_tcon(ctx->cfile->tlink);
3154         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3155 
3156         mutex_lock(&ctx->aio_mutex);
3157 
3158         if (list_empty(&ctx->list)) {
3159                 mutex_unlock(&ctx->aio_mutex);
3160                 return;
3161         }
3162 
3163         rc = ctx->rc;
3164         /* the loop below should proceed in the order of increasing offsets */
3165 again:
3166         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3167                 if (!rc) {
3168                         if (!try_wait_for_completion(&rdata->done)) {
3169                                 mutex_unlock(&ctx->aio_mutex);
3170                                 return;
3171                         }
3172 
3173                         if (rdata->result == -EAGAIN) {
3174                                 /* resend call if it's a retryable error */
3175                                 struct list_head tmp_list;
3176                                 unsigned int got_bytes = rdata->got_bytes;
3177 
3178                                 list_del_init(&rdata->list);
3179                                 INIT_LIST_HEAD(&tmp_list);
3180 
3181                                 /*
3182                                  * Got a part of data and then reconnect has
3183                                  * happened -- fill the buffer and continue
3184                                  * reading.
3185                                  */
3186                                 if (got_bytes && got_bytes < rdata->bytes) {
3187                                         rc = cifs_readdata_to_iov(rdata, to);
3188                                         if (rc) {
3189                                                 kref_put(&rdata->refcount,
3190                                                 cifs_uncached_readdata_release);
3191                                                 continue;
3192                                         }
3193                                 }
3194 
3195                                 rc = cifs_send_async_read(
3196                                                 rdata->offset + got_bytes,
3197                                                 rdata->bytes - got_bytes,
3198                                                 rdata->cfile, cifs_sb,
3199                                                 &tmp_list, ctx);
3200 
3201                                 list_splice(&tmp_list, &ctx->list);
3202 
3203                                 kref_put(&rdata->refcount,
3204                                          cifs_uncached_readdata_release);
3205                                 goto again;
3206                         } else if (rdata->result)
3207                                 rc = rdata->result;
3208                         else
3209                                 rc = cifs_readdata_to_iov(rdata, to);
3210 
3211                         /* if there was a short read -- discard anything left */
3212                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3213                                 rc = -ENODATA;
3214                 }
3215                 list_del_init(&rdata->list);
3216                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3217         }
3218 
3219         for (i = 0; i < ctx->npages; i++) {
3220                 if (ctx->should_dirty)
3221                         set_page_dirty(ctx->bv[i].bv_page);
3222                 put_page(ctx->bv[i].bv_page);
3223         }
3224 
3225         ctx->total_len = ctx->len - iov_iter_count(to);
3226 
3227         cifs_stats_bytes_read(tcon, ctx->total_len);
3228 
3229         /* mask nodata case */
3230         if (rc == -ENODATA)
3231                 rc = 0;
3232 
3233         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3234 
3235         mutex_unlock(&ctx->aio_mutex);
3236 
3237         if (ctx->iocb && ctx->iocb->ki_complete)
3238                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3239         else
3240                 complete(&ctx->done);
3241 }
3242 
3243 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3244 {
3245         struct file *file = iocb->ki_filp;
3246         ssize_t rc;
3247         size_t len;
3248         ssize_t total_read = 0;
3249         loff_t offset = iocb->ki_pos;
3250         struct cifs_sb_info *cifs_sb;
3251         struct cifs_tcon *tcon;
3252         struct cifsFileInfo *cfile;
3253         struct cifs_aio_ctx *ctx;
3254 
3255         len = iov_iter_count(to);
3256         if (!len)
3257                 return 0;
3258 
3259         cifs_sb = CIFS_FILE_SB(file);
3260         cfile = file->private_data;
3261         tcon = tlink_tcon(cfile->tlink);
3262 
3263         if (!tcon->ses->server->ops->async_readv)
3264                 return -ENOSYS;
3265 
3266         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3267                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3268 
3269         ctx = cifs_aio_ctx_alloc();
3270         if (!ctx)
3271                 return -ENOMEM;
3272 
3273         ctx->cfile = cifsFileInfo_get(cfile);
3274 
3275         if (!is_sync_kiocb(iocb))
3276                 ctx->iocb = iocb;
3277 
3278         if (to->type == ITER_IOVEC)
3279                 ctx->should_dirty = true;
3280 
3281         rc = setup_aio_ctx_iter(ctx, to, READ);
3282         if (rc) {
3283                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3284                 return rc;
3285         }
3286 
3287         len = ctx->len;
3288 
3289         /* grab a lock here due to read response handlers can access ctx */
3290         mutex_lock(&ctx->aio_mutex);
3291 
3292         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3293 
3294         /* if at least one read request send succeeded, then reset rc */
3295         if (!list_empty(&ctx->list))
3296                 rc = 0;
3297 
3298         mutex_unlock(&ctx->aio_mutex);
3299 
3300         if (rc) {
3301                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3302                 return rc;
3303         }
3304 
3305         if (!is_sync_kiocb(iocb)) {
3306                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3307                 return -EIOCBQUEUED;
3308         }
3309 
3310         rc = wait_for_completion_killable(&ctx->done);
3311         if (rc) {
3312                 mutex_lock(&ctx->aio_mutex);
3313                 ctx->rc = rc = -EINTR;
3314                 total_read = ctx->total_len;
3315                 mutex_unlock(&ctx->aio_mutex);
3316         } else {
3317                 rc = ctx->rc;
3318                 total_read = ctx->total_len;
3319         }
3320 
3321         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3322 
3323         if (total_read) {
3324                 iocb->ki_pos += total_read;
3325                 return total_read;
3326         }
3327         return rc;
3328 }
3329 
3330 ssize_t
3331 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3332 {
3333         struct inode *inode = file_inode(iocb->ki_filp);
3334         struct cifsInodeInfo *cinode = CIFS_I(inode);
3335         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3336         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3337                                                 iocb->ki_filp->private_data;
3338         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3339         int rc = -EACCES;
3340 
3341         /*
3342          * In strict cache mode we need to read from the server all the time
3343          * if we don't have level II oplock because the server can delay mtime
3344          * change - so we can't make a decision about inode invalidating.
3345          * And we can also fail with pagereading if there are mandatory locks
3346          * on pages affected by this read but not on the region from pos to
3347          * pos+len-1.
3348          */
3349         if (!CIFS_CACHE_READ(cinode))
3350                 return cifs_user_readv(iocb, to);
3351 
3352         if (cap_unix(tcon->ses) &&
3353             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3354             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3355                 return generic_file_read_iter(iocb, to);
3356 
3357         /*
3358          * We need to hold the sem to be sure nobody modifies lock list
3359          * with a brlock that prevents reading.
3360          */
3361         down_read(&cinode->lock_sem);
3362         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3363                                      tcon->ses->server->vals->shared_lock_type,
3364                                      NULL, CIFS_READ_OP))
3365                 rc = generic_file_read_iter(iocb, to);
3366         up_read(&cinode->lock_sem);
3367         return rc;
3368 }
3369 
3370 static ssize_t
3371 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3372 {
3373         int rc = -EACCES;
3374         unsigned int bytes_read = 0;
3375         unsigned int total_read;
3376         unsigned int current_read_size;
3377         unsigned int rsize;
3378         struct cifs_sb_info *cifs_sb;
3379         struct cifs_tcon *tcon;
3380         struct TCP_Server_Info *server;
3381         unsigned int xid;
3382         char *cur_offset;
3383         struct cifsFileInfo *open_file;
3384         struct cifs_io_parms io_parms;
3385         int buf_type = CIFS_NO_BUFFER;
3386         __u32 pid;
3387 
3388         xid = get_xid();
3389         cifs_sb = CIFS_FILE_SB(file);
3390 
3391         /* FIXME: set up handlers for larger reads and/or convert to async */
3392         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3393 
3394         if (file->private_data == NULL) {
3395                 rc = -EBADF;
3396                 free_xid(xid);
3397                 return rc;
3398         }
3399         open_file = file->private_data;
3400         tcon = tlink_tcon(open_file->tlink);
3401         server = tcon->ses->server;
3402 
3403         if (!server->ops->sync_read) {
3404                 free_xid(xid);
3405                 return -ENOSYS;
3406         }
3407 
3408         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3409                 pid = open_file->pid;
3410         else
3411                 pid = current->tgid;
3412 
3413         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3414                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3415 
3416         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3417              total_read += bytes_read, cur_offset += bytes_read) {
3418                 do {
3419                         current_read_size = min_t(uint, read_size - total_read,
3420                                                   rsize);
3421                         /*
3422                          * For windows me and 9x we do not want to request more
3423                          * than it negotiated since it will refuse the read
3424                          * then.
3425                          */
3426                         if ((tcon->ses) && !(tcon->ses->capabilities &
3427                                 tcon->ses->server->vals->cap_large_files)) {
3428                                 current_read_size = min_t(uint,
3429                                         current_read_size, CIFSMaxBufSize);
3430                         }
3431                         if (open_file->invalidHandle) {
3432                                 rc = cifs_reopen_file(open_file, true);
3433                                 if (rc != 0)
3434                                         break;
3435                         }
3436                         io_parms.pid = pid;
3437                         io_parms.tcon = tcon;
3438                         io_parms.offset = *offset;
3439                         io_parms.length = current_read_size;
3440                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3441                                                     &bytes_read, &cur_offset,
3442                                                     &buf_type);
3443                 } while (rc == -EAGAIN);
3444 
3445                 if (rc || (bytes_read == 0)) {
3446                         if (total_read) {
3447                                 break;
3448                         } else {
3449                                 free_xid(xid);
3450                                 return rc;
3451                         }
3452                 } else {
3453                         cifs_stats_bytes_read(tcon, total_read);
3454                         *offset += bytes_read;
3455                 }
3456         }
3457         free_xid(xid);
3458         return total_read;
3459 }
3460 
3461 /*
3462  * If the page is mmap'ed into a process' page tables, then we need to make
3463  * sure that it doesn't change while being written back.
3464  */
3465 static vm_fault_t
3466 cifs_page_mkwrite(struct vm_fault *vmf)
3467 {
3468         struct page *page = vmf->page;
3469 
3470         lock_page(page);
3471         return VM_FAULT_LOCKED;
3472 }
3473 
3474 static const struct vm_operations_struct cifs_file_vm_ops = {
3475         .fault = filemap_fault,
3476         .map_pages = filemap_map_pages,
3477         .page_mkwrite = cifs_page_mkwrite,
3478 };
3479 
3480 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3481 {
3482         int xid, rc = 0;
3483         struct inode *inode = file_inode(file);
3484 
3485         xid = get_xid();
3486 
3487         if (!CIFS_CACHE_READ(CIFS_I(inode)))
3488                 rc = cifs_zap_mapping(inode);
3489         if (!rc)
3490                 rc = generic_file_mmap(file, vma);
3491         if (!rc)
3492                 vma->vm_ops = &cifs_file_vm_ops;
3493 
3494         free_xid(xid);
3495         return rc;
3496 }
3497 
3498 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3499 {
3500         int rc, xid;
3501 
3502         xid = get_xid();
3503 
3504         rc = cifs_revalidate_file(file);
3505         if (rc)
3506                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3507                          rc);
3508         if (!rc)
3509                 rc = generic_file_mmap(file, vma);
3510         if (!rc)
3511                 vma->vm_ops = &cifs_file_vm_ops;
3512 
3513         free_xid(xid);
3514         return rc;
3515 }
3516 
3517 static void
3518 cifs_readv_complete(struct work_struct *work)
3519 {
3520         unsigned int i, got_bytes;
3521         struct cifs_readdata *rdata = container_of(work,
3522                                                 struct cifs_readdata, work);
3523 
3524         got_bytes = rdata->got_bytes;
3525         for (i = 0; i < rdata->nr_pages; i++) {
3526                 struct page *page = rdata->pages[i];
3527 
3528                 lru_cache_add_file(page);
3529 
3530                 if (rdata->result == 0 ||
3531                     (rdata->result == -EAGAIN && got_bytes)) {
3532                         flush_dcache_page(page);
3533                         SetPageUptodate(page);
3534                 }
3535 
3536                 unlock_page(page);
3537 
3538                 if (rdata->result == 0 ||
3539                     (rdata->result == -EAGAIN && got_bytes))
3540                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3541 
3542                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3543 
3544                 put_page(page);
3545                 rdata->pages[i] = NULL;
3546         }
3547         kref_put(&rdata->refcount, cifs_readdata_release);
3548 }
3549 
3550 static int
3551 readpages_fill_pages(struct TCP_Server_Info *server,
3552                      struct cifs_readdata *rdata, struct iov_iter *iter,
3553                      unsigned int len)
3554 {
3555         int result = 0;
3556         unsigned int i;
3557         u64 eof;
3558         pgoff_t eof_index;
3559         unsigned int nr_pages = rdata->nr_pages;
3560 
3561         /* determine the eof that the server (probably) has */
3562         eof = CIFS_I(rdata->mapping->host)->server_eof;
3563         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3564         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3565 
3566         rdata->got_bytes = 0;
3567         rdata->tailsz = PAGE_SIZE;
3568         for (i = 0; i < nr_pages; i++) {
3569                 struct page *page = rdata->pages[i];
3570                 size_t n = PAGE_SIZE;
3571 
3572                 if (len >= PAGE_SIZE) {
3573                         len -= PAGE_SIZE;
3574                 } else if (len > 0) {
3575                         /* enough for partial page, fill and zero the rest */
3576                         zero_user(page, len, PAGE_SIZE - len);
3577                         n = rdata->tailsz = len;
3578                         len = 0;
3579                 } else if (page->index > eof_index) {
3580                         /*
3581                          * The VFS will not try to do readahead past the
3582                          * i_size, but it's possible that we have outstanding
3583                          * writes with gaps in the middle and the i_size hasn't
3584                          * caught up yet. Populate those with zeroed out pages
3585                          * to prevent the VFS from repeatedly attempting to
3586                          * fill them until the writes are flushed.
3587                          */
3588                         zero_user(page, 0, PAGE_SIZE);
3589                         lru_cache_add_file(page);
3590                         flush_dcache_page(page);
3591                         SetPageUptodate(page);
3592                         unlock_page(page);
3593                         put_page(page);
3594                         rdata->pages[i] = NULL;
3595                         rdata->nr_pages--;
3596                         continue;
3597                 } else {
3598                         /* no need to hold page hostage */
3599                         lru_cache_add_file(page);
3600                         unlock_page(page);
3601                         put_page(page);
3602                         rdata->pages[i] = NULL;
3603                         rdata->nr_pages--;
3604                         continue;
3605                 }
3606 
3607                 if (iter)
3608                         result = copy_page_from_iter(page, 0, n, iter);
3609 #ifdef CONFIG_CIFS_SMB_DIRECT
3610                 else if (rdata->mr)
3611                         result = n;
3612 #endif
3613                 else
3614                         result = cifs_read_page_from_socket(server, page, n);
3615                 if (result < 0)
3616                         break;
3617 
3618                 rdata->got_bytes += result;
3619         }
3620 
3621         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3622                                                 rdata->got_bytes : result;
3623 }
3624 
3625 static int
3626 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3627                                struct cifs_readdata *rdata, unsigned int len)
3628 {
3629         return readpages_fill_pages(server, rdata, NULL, len);
3630 }
3631 
3632 static int
3633 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3634                                struct cifs_readdata *rdata,
3635                                struct iov_iter *iter)
3636 {
3637         return readpages_fill_pages(server, rdata, iter, iter->count);
3638 }
3639 
3640 static int
3641 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3642                     unsigned int rsize, struct list_head *tmplist,
3643                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3644 {
3645         struct page *page, *tpage;
3646         unsigned int expected_index;
3647         int rc;
3648         gfp_t gfp = readahead_gfp_mask(mapping);
3649 
3650         INIT_LIST_HEAD(tmplist);
3651 
3652         page = list_entry(page_list->prev, struct page, lru);
3653 
3654         /*
3655          * Lock the page and put it in the cache. Since no one else
3656          * should have access to this page, we're safe to simply set
3657          * PG_locked without checking it first.
3658          */
3659         __SetPageLocked(page);
3660         rc = add_to_page_cache_locked(page, mapping,
3661                                       page->index, gfp);
3662 
3663         /* give up if we can't stick it in the cache */
3664         if (rc) {
3665                 __ClearPageLocked(page);
3666                 return rc;
3667         }
3668 
3669         /* move first page to the tmplist */
3670         *offset = (loff_t)page->index << PAGE_SHIFT;
3671         *bytes = PAGE_SIZE;
3672         *nr_pages = 1;
3673         list_move_tail(&page->lru, tmplist);
3674 
3675         /* now try and add more pages onto the request */
3676         expected_index = page->index + 1;
3677         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3678                 /* discontinuity ? */
3679                 if (page->index != expected_index)
3680                         break;
3681 
3682                 /* would this page push the read over the rsize? */
3683                 if (*bytes + PAGE_SIZE > rsize)
3684                         break;
3685 
3686                 __SetPageLocked(page);
3687                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3688                         __ClearPageLocked(page);
3689                         break;
3690                 }
3691                 list_move_tail(&page->lru, tmplist);
3692                 (*bytes) += PAGE_SIZE;
3693                 expected_index++;
3694                 (*nr_pages)++;
3695         }
3696         return rc;
3697 }
3698 
3699 static int cifs_readpages(struct file *file, struct address_space *mapping,
3700         struct list_head *page_list, unsigned num_pages)
3701 {
3702         int rc;
3703         struct list_head tmplist;
3704         struct cifsFileInfo *open_file = file->private_data;
3705         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3706         struct TCP_Server_Info *server;
3707         pid_t pid;
3708 
3709         /*
3710          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3711          * immediately if the cookie is negative
3712          *
3713          * After this point, every page in the list might have PG_fscache set,
3714          * so we will need to clean that up off of every page we don't use.
3715          */
3716         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3717                                          &num_pages);
3718         if (rc == 0)
3719                 return rc;
3720 
3721         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3722                 pid = open_file->pid;
3723         else
3724                 pid = current->tgid;
3725 
3726         rc = 0;
3727         server = tlink_tcon(open_file->tlink)->ses->server;
3728 
3729         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3730                  __func__, file, mapping, num_pages);
3731 
3732         /*
3733          * Start with the page at end of list and move it to private
3734          * list. Do the same with any following pages until we hit
3735          * the rsize limit, hit an index discontinuity, or run out of
3736          * pages. Issue the async read and then start the loop again
3737          * until the list is empty.
3738          *
3739          * Note that list order is important. The page_list is in
3740          * the order of declining indexes. When we put the pages in
3741          * the rdata->pages, then we want them in increasing order.
3742          */
3743         while (!list_empty(page_list)) {
3744                 unsigned int i, nr_pages, bytes, rsize;
3745                 loff_t offset;
3746                 struct page *page, *tpage;
3747                 struct cifs_readdata *rdata;
3748                 unsigned credits;
3749 
3750                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3751                                                    &rsize, &credits);
3752                 if (rc)
3753                         break;
3754 
3755                 /*
3756                  * Give up immediately if rsize is too small to read an entire
3757                  * page. The VFS will fall back to readpage. We should never
3758                  * reach this point however since we set ra_pages to 0 when the
3759                  * rsize is smaller than a cache page.
3760                  */
3761                 if (unlikely(rsize < PAGE_SIZE)) {
3762                         add_credits_and_wake_if(server, credits, 0);
3763                         return 0;
3764                 }
3765 
3766                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3767                                          &nr_pages, &offset, &bytes);
3768                 if (rc) {
3769                         add_credits_and_wake_if(server, credits, 0);
3770                         break;
3771                 }
3772 
3773                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3774                 if (!rdata) {
3775                         /* best to give up if we're out of mem */
3776                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3777                                 list_del(&page->lru);
3778                                 lru_cache_add_file(page);
3779                                 unlock_page(page);
3780                                 put_page(page);
3781                         }
3782                         rc = -ENOMEM;
3783                         add_credits_and_wake_if(server, credits, 0);
3784                         break;
3785                 }
3786 
3787                 rdata->cfile = cifsFileInfo_get(open_file);
3788                 rdata->mapping = mapping;
3789                 rdata->offset = offset;
3790                 rdata->bytes = bytes;
3791                 rdata->pid = pid;
3792                 rdata->pagesz = PAGE_SIZE;
3793                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3794                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3795                 rdata->credits = credits;
3796 
3797                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3798                         list_del(&page->lru);
3799                         rdata->pages[rdata->nr_pages++] = page;
3800                 }
3801 
3802                 if (!rdata->cfile->invalidHandle ||
3803                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3804                         rc = server->ops->async_readv(rdata);
3805                 if (rc) {
3806                         add_credits_and_wake_if(server, rdata->credits, 0);
3807                         for (i = 0; i < rdata->nr_pages; i++) {
3808                                 page = rdata->pages[i];
3809                                 lru_cache_add_file(page);
3810                                 unlock_page(page);
3811                                 put_page(page);
3812                         }
3813                         /* Fallback to the readpage in error/reconnect cases */
3814                         kref_put(&rdata->refcount, cifs_readdata_release);
3815                         break;
3816                 }
3817 
3818                 kref_put(&rdata->refcount, cifs_readdata_release);
3819         }
3820 
3821         /* Any pages that have been shown to fscache but didn't get added to
3822          * the pagecache must be uncached before they get returned to the
3823          * allocator.
3824          */
3825         cifs_fscache_readpages_cancel(mapping->host, page_list);
3826         return rc;
3827 }
3828 
3829 /*
3830  * cifs_readpage_worker must be called with the page pinned
3831  */
3832 static int cifs_readpage_worker(struct file *file, struct page *page,
3833         loff_t *poffset)
3834 {
3835         char *read_data;
3836         int rc;
3837 
3838         /* Is the page cached? */
3839         rc = cifs_readpage_from_fscache(file_inode(file), page);
3840         if (rc == 0)
3841                 goto read_complete;
3842 
3843         read_data = kmap(page);
3844         /* for reads over a certain size could initiate async read ahead */
3845 
3846         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3847 
3848         if (rc < 0)
3849                 goto io_error;
3850         else
3851                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3852 
3853         file_inode(file)->i_atime =
3854                 current_time(file_inode(file));
3855 
3856         if (PAGE_SIZE > rc)
3857                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3858 
3859         flush_dcache_page(page);
3860         SetPageUptodate(page);
3861 
3862         /* send this page to the cache */
3863         cifs_readpage_to_fscache(file_inode(file), page);
3864 
3865         rc = 0;
3866 
3867 io_error:
3868         kunmap(page);
3869         unlock_page(page);
3870 
3871 read_complete:
3872         return rc;
3873 }
3874 
3875 static int cifs_readpage(struct file *file, struct page *page)
3876 {
3877         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3878         int rc = -EACCES;
3879         unsigned int xid;
3880 
3881         xid = get_xid();
3882 
3883         if (file->private_data == NULL) {
3884                 rc = -EBADF;
3885                 free_xid(xid);
3886                 return rc;
3887         }
3888 
3889         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3890                  page, (int)offset, (int)offset);
3891 
3892         rc = cifs_readpage_worker(file, page, &offset);
3893 
3894         free_xid(xid);
3895         return rc;
3896 }
3897 
3898 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3899 {
3900         struct cifsFileInfo *open_file;
3901         struct cifs_tcon *tcon =
3902                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3903 
3904         spin_lock(&tcon->open_file_lock);
3905         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3906                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3907                         spin_unlock(&tcon->open_file_lock);
3908                         return 1;
3909                 }
3910         }
3911         spin_unlock(&tcon->open_file_lock);
3912         return 0;
3913 }
3914 
3915 /* We do not want to update the file size from server for inodes
3916    open for write - to avoid races with writepage extending
3917    the file - in the future we could consider allowing
3918    refreshing the inode only on increases in the file size
3919    but this is tricky to do without racing with writebehind
3920    page caching in the current Linux kernel design */
3921 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3922 {
3923         if (!cifsInode)
3924                 return true;
3925 
3926         if (is_inode_writable(cifsInode)) {
3927                 /* This inode is open for write at least once */
3928                 struct cifs_sb_info *cifs_sb;
3929 
3930                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3931                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3932                         /* since no page cache to corrupt on directio
3933                         we can change size safely */
3934                         return true;
3935                 }
3936 
3937                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3938                         return true;
3939 
3940                 return false;
3941         } else
3942                 return true;
3943 }
3944 
3945 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3946                         loff_t pos, unsigned len, unsigned flags,
3947                         struct page **pagep, void **fsdata)
3948 {
3949         int oncethru = 0;
3950         pgoff_t index = pos >> PAGE_SHIFT;
3951         loff_t offset = pos & (PAGE_SIZE - 1);
3952         loff_t page_start = pos & PAGE_MASK;
3953         loff_t i_size;
3954         struct page *page;
3955         int rc = 0;
3956 
3957         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3958 
3959 start:
3960         page = grab_cache_page_write_begin(mapping, index, flags);
3961         if (!page) {
3962                 rc = -ENOMEM;
3963                 goto out;
3964         }
3965 
3966         if (PageUptodate(page))
3967                 goto out;
3968 
3969         /*
3970          * If we write a full page it will be up to date, no need to read from
3971          * the server. If the write is short, we'll end up doing a sync write
3972          * instead.
3973          */
3974         if (len == PAGE_SIZE)
3975                 goto out;
3976 
3977         /*
3978          * optimize away the read when we have an oplock, and we're not
3979          * expecting to use any of the data we'd be reading in. That
3980          * is, when the page lies beyond the EOF, or straddles the EOF
3981          * and the write will cover all of the existing data.
3982          */
3983         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3984                 i_size = i_size_read(mapping->host);
3985                 if (page_start >= i_size ||
3986                     (offset == 0 && (pos + len) >= i_size)) {
3987                         zero_user_segments(page, 0, offset,
3988                                            offset + len,
3989                                            PAGE_SIZE);
3990                         /*
3991                          * PageChecked means that the parts of the page
3992                          * to which we're not writing are considered up
3993                          * to date. Once the data is copied to the
3994                          * page, it can be set uptodate.
3995                          */
3996                         SetPageChecked(page);
3997                         goto out;
3998                 }
3999         }
4000 
4001         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4002                 /*
4003                  * might as well read a page, it is fast enough. If we get
4004                  * an error, we don't need to return it. cifs_write_end will
4005                  * do a sync write instead since PG_uptodate isn't set.
4006                  */
4007                 cifs_readpage_worker(file, page, &page_start);
4008                 put_page(page);
4009                 oncethru = 1;
4010                 goto start;
4011         } else {
4012                 /* we could try using another file handle if there is one -
4013                    but how would we lock it to prevent close of that handle
4014                    racing with this read? In any case
4015                    this will be written out by write_end so is fine */
4016         }
4017 out:
4018         *pagep = page;
4019         return rc;
4020 }
4021 
4022 static int cifs_release_page(struct page *page, gfp_t gfp)
4023 {
4024         if (PagePrivate(page))
4025                 return 0;
4026 
4027         return cifs_fscache_release_page(page, gfp);
4028 }
4029 
4030 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4031                                  unsigned int length)
4032 {
4033         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4034 
4035         if (offset == 0 && length == PAGE_SIZE)
4036                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4037 }
4038 
4039 static int cifs_launder_page(struct page *page)
4040 {
4041         int rc = 0;
4042         loff_t range_start = page_offset(page);
4043         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4044         struct writeback_control wbc = {
4045                 .sync_mode = WB_SYNC_ALL,
4046                 .nr_to_write = 0,
4047                 .range_start = range_start,
4048                 .range_end = range_end,
4049         };
4050 
4051         cifs_dbg(FYI, "Launder page: %p\n", page);
4052 
4053         if (clear_page_dirty_for_io(page))
4054                 rc = cifs_writepage_locked(page, &wbc);
4055 
4056         cifs_fscache_invalidate_page(page, page->mapping->host);
4057         return rc;
4058 }
4059 
4060 void cifs_oplock_break(struct work_struct *work)
4061 {
4062         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4063                                                   oplock_break);
4064         struct inode *inode = d_inode(cfile->dentry);
4065         struct cifsInodeInfo *cinode = CIFS_I(inode);
4066         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4067         struct TCP_Server_Info *server = tcon->ses->server;
4068         int rc = 0;
4069 
4070         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4071                         TASK_UNINTERRUPTIBLE);
4072 
4073         server->ops->downgrade_oplock(server, cinode,
4074                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4075 
4076         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4077                                                 cifs_has_mand_locks(cinode)) {
4078                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4079                          inode);
4080                 cinode->oplock = 0;
4081         }
4082 
4083         if (inode && S_ISREG(inode->i_mode)) {
4084                 if (CIFS_CACHE_READ(cinode))
4085                         break_lease(inode, O_RDONLY);
4086                 else
4087                         break_lease(inode, O_WRONLY);
4088                 rc = filemap_fdatawrite(inode->i_mapping);
4089                 if (!CIFS_CACHE_READ(cinode)) {
4090                         rc = filemap_fdatawait(inode->i_mapping);
4091                         mapping_set_error(inode->i_mapping, rc);
4092                         cifs_zap_mapping(inode);
4093                 }
4094                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4095         }
4096 
4097         rc = cifs_push_locks(cfile);
4098         if (rc)
4099                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4100 
4101         /*
4102          * releasing stale oplock after recent reconnect of smb session using
4103          * a now incorrect file handle is not a data integrity issue but do
4104          * not bother sending an oplock release if session to server still is
4105          * disconnected since oplock already released by the server
4106          */
4107         if (!cfile->oplock_break_cancelled) {
4108                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4109                                                              cinode);
4110                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4111         }
4112         cifs_done_oplock_break(cinode);
4113 }
4114 
4115 /*
4116  * The presence of cifs_direct_io() in the address space ops vector
4117  * allowes open() O_DIRECT flags which would have failed otherwise.
4118  *
4119  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4120  * so this method should never be called.
4121  *
4122  * Direct IO is not yet supported in the cached mode. 
4123  */
4124 static ssize_t
4125 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4126 {
4127         /*
4128          * FIXME
4129          * Eventually need to support direct IO for non forcedirectio mounts
4130          */
4131         return -EINVAL;
4132 }
4133 
4134 
4135 const struct address_space_operations cifs_addr_ops = {
4136         .readpage = cifs_readpage,
4137         .readpages = cifs_readpages,
4138         .writepage = cifs_writepage,
4139         .writepages = cifs_writepages,
4140         .write_begin = cifs_write_begin,
4141         .write_end = cifs_write_end,
4142         .set_page_dirty = __set_page_dirty_nobuffers,
4143         .releasepage = cifs_release_page,
4144         .direct_IO = cifs_direct_io,
4145         .invalidatepage = cifs_invalidate_page,
4146         .launder_page = cifs_launder_page,
4147 };
4148 
4149 /*
4150  * cifs_readpages requires the server to support a buffer large enough to
4151  * contain the header plus one complete page of data.  Otherwise, we need
4152  * to leave cifs_readpages out of the address space operations.
4153  */
4154 const struct address_space_operations cifs_addr_ops_smallbuf = {
4155         .readpage = cifs_readpage,
4156         .writepage = cifs_writepage,
4157         .writepages = cifs_writepages,
4158         .write_begin = cifs_write_begin,
4159         .write_end = cifs_write_end,
4160         .set_page_dirty = __set_page_dirty_nobuffers,
4161         .releasepage = cifs_release_page,
4162         .invalidatepage = cifs_invalidate_page,
4163         .launder_page = cifs_launder_page,
4164 };
4165 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp