CIFS: Fix a deadlock when a file is reopened
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_sb->mnt_cifs_flags &
144                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         oparms.tcon = tcon;
229         oparms.cifs_sb = cifs_sb;
230         oparms.desired_access = desired_access;
231         oparms.create_options = create_options;
232         oparms.disposition = disposition;
233         oparms.path = full_path;
234         oparms.fid = fid;
235         oparms.reconnect = false;
236
237         rc = server->ops->open(xid, &oparms, oplock, buf);
238
239         if (rc)
240                 goto out;
241
242         if (tcon->unix_ext)
243                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
244                                               xid);
245         else
246                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
247                                          xid, &fid->netfid);
248
249 out:
250         kfree(buf);
251         return rc;
252 }
253
254 static bool
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
256 {
257         struct cifs_fid_locks *cur;
258         bool has_locks = false;
259
260         down_read(&cinode->lock_sem);
261         list_for_each_entry(cur, &cinode->llist, llist) {
262                 if (!list_empty(&cur->locks)) {
263                         has_locks = true;
264                         break;
265                 }
266         }
267         up_read(&cinode->lock_sem);
268         return has_locks;
269 }
270
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273                   struct tcon_link *tlink, __u32 oplock)
274 {
275         struct dentry *dentry = file->f_path.dentry;
276         struct inode *inode = dentry->d_inode;
277         struct cifsInodeInfo *cinode = CIFS_I(inode);
278         struct cifsFileInfo *cfile;
279         struct cifs_fid_locks *fdlocks;
280         struct cifs_tcon *tcon = tlink_tcon(tlink);
281         struct TCP_Server_Info *server = tcon->ses->server;
282
283         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
284         if (cfile == NULL)
285                 return cfile;
286
287         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
288         if (!fdlocks) {
289                 kfree(cfile);
290                 return NULL;
291         }
292
293         INIT_LIST_HEAD(&fdlocks->locks);
294         fdlocks->cfile = cfile;
295         cfile->llist = fdlocks;
296         down_write(&cinode->lock_sem);
297         list_add(&fdlocks->llist, &cinode->llist);
298         up_write(&cinode->lock_sem);
299
300         cfile->count = 1;
301         cfile->pid = current->tgid;
302         cfile->uid = current_fsuid();
303         cfile->dentry = dget(dentry);
304         cfile->f_flags = file->f_flags;
305         cfile->invalidHandle = false;
306         cfile->tlink = cifs_get_tlink(tlink);
307         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308         mutex_init(&cfile->fh_mutex);
309
310         cifs_sb_active(inode->i_sb);
311
312         /*
313          * If the server returned a read oplock and we have mandatory brlocks,
314          * set oplock level to None.
315          */
316         if (oplock == server->vals->oplock_read &&
317                                                 cifs_has_mand_locks(cinode)) {
318                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
319                 oplock = 0;
320         }
321
322         spin_lock(&cifs_file_list_lock);
323         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
324                 oplock = fid->pending_open->oplock;
325         list_del(&fid->pending_open->olist);
326
327         server->ops->set_fid(cfile, fid, oplock);
328
329         list_add(&cfile->tlist, &tcon->openFileList);
330         /* if readable file instance put first in list*/
331         if (file->f_mode & FMODE_READ)
332                 list_add(&cfile->flist, &cinode->openFileList);
333         else
334                 list_add_tail(&cfile->flist, &cinode->openFileList);
335         spin_unlock(&cifs_file_list_lock);
336
337         file->private_data = cfile;
338         return cfile;
339 }
340
341 struct cifsFileInfo *
342 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
343 {
344         spin_lock(&cifs_file_list_lock);
345         cifsFileInfo_get_locked(cifs_file);
346         spin_unlock(&cifs_file_list_lock);
347         return cifs_file;
348 }
349
350 /*
351  * Release a reference on the file private data. This may involve closing
352  * the filehandle out on the server. Must be called without holding
353  * cifs_file_list_lock.
354  */
355 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
356 {
357         struct inode *inode = cifs_file->dentry->d_inode;
358         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
359         struct TCP_Server_Info *server = tcon->ses->server;
360         struct cifsInodeInfo *cifsi = CIFS_I(inode);
361         struct super_block *sb = inode->i_sb;
362         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
363         struct cifsLockInfo *li, *tmp;
364         struct cifs_fid fid;
365         struct cifs_pending_open open;
366
367         spin_lock(&cifs_file_list_lock);
368         if (--cifs_file->count > 0) {
369                 spin_unlock(&cifs_file_list_lock);
370                 return;
371         }
372
373         if (server->ops->get_lease_key)
374                 server->ops->get_lease_key(inode, &fid);
375
376         /* store open in pending opens to make sure we don't miss lease break */
377         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
378
379         /* remove it from the lists */
380         list_del(&cifs_file->flist);
381         list_del(&cifs_file->tlist);
382
383         if (list_empty(&cifsi->openFileList)) {
384                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
385                          cifs_file->dentry->d_inode);
386                 /*
387                  * In strict cache mode we need invalidate mapping on the last
388                  * close  because it may cause a error when we open this file
389                  * again and get at least level II oplock.
390                  */
391                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
392                         CIFS_I(inode)->invalid_mapping = true;
393                 cifs_set_oplock_level(cifsi, 0);
394         }
395         spin_unlock(&cifs_file_list_lock);
396
397         cancel_work_sync(&cifs_file->oplock_break);
398
399         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
400                 struct TCP_Server_Info *server = tcon->ses->server;
401                 unsigned int xid;
402
403                 xid = get_xid();
404                 if (server->ops->close)
405                         server->ops->close(xid, tcon, &cifs_file->fid);
406                 _free_xid(xid);
407         }
408
409         cifs_del_pending_open(&open);
410
411         /*
412          * Delete any outstanding lock records. We'll lose them when the file
413          * is closed anyway.
414          */
415         down_write(&cifsi->lock_sem);
416         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
417                 list_del(&li->llist);
418                 cifs_del_lock_waiters(li);
419                 kfree(li);
420         }
421         list_del(&cifs_file->llist->llist);
422         kfree(cifs_file->llist);
423         up_write(&cifsi->lock_sem);
424
425         cifs_put_tlink(cifs_file->tlink);
426         dput(cifs_file->dentry);
427         cifs_sb_deactive(sb);
428         kfree(cifs_file);
429 }
430
431 int cifs_open(struct inode *inode, struct file *file)
432
433 {
434         int rc = -EACCES;
435         unsigned int xid;
436         __u32 oplock;
437         struct cifs_sb_info *cifs_sb;
438         struct TCP_Server_Info *server;
439         struct cifs_tcon *tcon;
440         struct tcon_link *tlink;
441         struct cifsFileInfo *cfile = NULL;
442         char *full_path = NULL;
443         bool posix_open_ok = false;
444         struct cifs_fid fid;
445         struct cifs_pending_open open;
446
447         xid = get_xid();
448
449         cifs_sb = CIFS_SB(inode->i_sb);
450         tlink = cifs_sb_tlink(cifs_sb);
451         if (IS_ERR(tlink)) {
452                 free_xid(xid);
453                 return PTR_ERR(tlink);
454         }
455         tcon = tlink_tcon(tlink);
456         server = tcon->ses->server;
457
458         full_path = build_path_from_dentry(file->f_path.dentry);
459         if (full_path == NULL) {
460                 rc = -ENOMEM;
461                 goto out;
462         }
463
464         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
465                  inode, file->f_flags, full_path);
466
467         if (server->oplocks)
468                 oplock = REQ_OPLOCK;
469         else
470                 oplock = 0;
471
472         if (!tcon->broken_posix_open && tcon->unix_ext &&
473             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
474                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
475                 /* can not refresh inode info since size could be stale */
476                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
477                                 cifs_sb->mnt_file_mode /* ignored */,
478                                 file->f_flags, &oplock, &fid.netfid, xid);
479                 if (rc == 0) {
480                         cifs_dbg(FYI, "posix open succeeded\n");
481                         posix_open_ok = true;
482                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
483                         if (tcon->ses->serverNOS)
484                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
485                                          tcon->ses->serverName,
486                                          tcon->ses->serverNOS);
487                         tcon->broken_posix_open = true;
488                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
489                          (rc != -EOPNOTSUPP)) /* path not found or net err */
490                         goto out;
491                 /*
492                  * Else fallthrough to retry open the old way on network i/o
493                  * or DFS errors.
494                  */
495         }
496
497         if (server->ops->get_lease_key)
498                 server->ops->get_lease_key(inode, &fid);
499
500         cifs_add_pending_open(&fid, tlink, &open);
501
502         if (!posix_open_ok) {
503                 if (server->ops->get_lease_key)
504                         server->ops->get_lease_key(inode, &fid);
505
506                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
507                                   file->f_flags, &oplock, &fid, xid);
508                 if (rc) {
509                         cifs_del_pending_open(&open);
510                         goto out;
511                 }
512         }
513
514         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
515         if (cfile == NULL) {
516                 if (server->ops->close)
517                         server->ops->close(xid, tcon, &fid);
518                 cifs_del_pending_open(&open);
519                 rc = -ENOMEM;
520                 goto out;
521         }
522
523         cifs_fscache_set_inode_cookie(inode, file);
524
525         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
526                 /*
527                  * Time to set mode which we can not set earlier due to
528                  * problems creating new read-only files.
529                  */
530                 struct cifs_unix_set_info_args args = {
531                         .mode   = inode->i_mode,
532                         .uid    = INVALID_UID, /* no change */
533                         .gid    = INVALID_GID, /* no change */
534                         .ctime  = NO_CHANGE_64,
535                         .atime  = NO_CHANGE_64,
536                         .mtime  = NO_CHANGE_64,
537                         .device = 0,
538                 };
539                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
540                                        cfile->pid);
541         }
542
543 out:
544         kfree(full_path);
545         free_xid(xid);
546         cifs_put_tlink(tlink);
547         return rc;
548 }
549
550 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
551
552 /*
553  * Try to reacquire byte range locks that were released when session
554  * to server was lost.
555  */
556 static int
557 cifs_relock_file(struct cifsFileInfo *cfile)
558 {
559         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
560         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
561         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
562         int rc = 0;
563
564         down_read(&cinode->lock_sem);
565         if (cinode->can_cache_brlcks) {
566                 /* can cache locks - no need to relock */
567                 up_read(&cinode->lock_sem);
568                 return rc;
569         }
570
571         if (cap_unix(tcon->ses) &&
572             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
573             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
574                 rc = cifs_push_posix_locks(cfile);
575         else
576                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
577
578         up_read(&cinode->lock_sem);
579         return rc;
580 }
581
582 static int
583 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
584 {
585         int rc = -EACCES;
586         unsigned int xid;
587         __u32 oplock;
588         struct cifs_sb_info *cifs_sb;
589         struct cifs_tcon *tcon;
590         struct TCP_Server_Info *server;
591         struct cifsInodeInfo *cinode;
592         struct inode *inode;
593         char *full_path = NULL;
594         int desired_access;
595         int disposition = FILE_OPEN;
596         int create_options = CREATE_NOT_DIR;
597         struct cifs_open_parms oparms;
598
599         xid = get_xid();
600         mutex_lock(&cfile->fh_mutex);
601         if (!cfile->invalidHandle) {
602                 mutex_unlock(&cfile->fh_mutex);
603                 rc = 0;
604                 free_xid(xid);
605                 return rc;
606         }
607
608         inode = cfile->dentry->d_inode;
609         cifs_sb = CIFS_SB(inode->i_sb);
610         tcon = tlink_tcon(cfile->tlink);
611         server = tcon->ses->server;
612
613         /*
614          * Can not grab rename sem here because various ops, including those
615          * that already have the rename sem can end up causing writepage to get
616          * called and if the server was down that means we end up here, and we
617          * can never tell if the caller already has the rename_sem.
618          */
619         full_path = build_path_from_dentry(cfile->dentry);
620         if (full_path == NULL) {
621                 rc = -ENOMEM;
622                 mutex_unlock(&cfile->fh_mutex);
623                 free_xid(xid);
624                 return rc;
625         }
626
627         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
628                  inode, cfile->f_flags, full_path);
629
630         if (tcon->ses->server->oplocks)
631                 oplock = REQ_OPLOCK;
632         else
633                 oplock = 0;
634
635         if (tcon->unix_ext && cap_unix(tcon->ses) &&
636             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
637                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
638                 /*
639                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
640                  * original open. Must mask them off for a reopen.
641                  */
642                 unsigned int oflags = cfile->f_flags &
643                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
644
645                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
646                                      cifs_sb->mnt_file_mode /* ignored */,
647                                      oflags, &oplock, &cfile->fid.netfid, xid);
648                 if (rc == 0) {
649                         cifs_dbg(FYI, "posix reopen succeeded\n");
650                         goto reopen_success;
651                 }
652                 /*
653                  * fallthrough to retry open the old way on errors, especially
654                  * in the reconnect path it is important to retry hard
655                  */
656         }
657
658         desired_access = cifs_convert_flags(cfile->f_flags);
659
660         if (backup_cred(cifs_sb))
661                 create_options |= CREATE_OPEN_BACKUP_INTENT;
662
663         if (server->ops->get_lease_key)
664                 server->ops->get_lease_key(inode, &cfile->fid);
665
666         oparms.tcon = tcon;
667         oparms.cifs_sb = cifs_sb;
668         oparms.desired_access = desired_access;
669         oparms.create_options = create_options;
670         oparms.disposition = disposition;
671         oparms.path = full_path;
672         oparms.fid = &cfile->fid;
673         oparms.reconnect = true;
674
675         /*
676          * Can not refresh inode by passing in file_info buf to be returned by
677          * CIFSSMBOpen and then calling get_inode_info with returned buf since
678          * file might have write behind data that needs to be flushed and server
679          * version of file size can be stale. If we knew for sure that inode was
680          * not dirty locally we could do this.
681          */
682         rc = server->ops->open(xid, &oparms, &oplock, NULL);
683         if (rc == -ENOENT && oparms.reconnect == false) {
684                 /* durable handle timeout is expired - open the file again */
685                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
686                 /* indicate that we need to relock the file */
687                 oparms.reconnect = true;
688         }
689
690         if (rc) {
691                 mutex_unlock(&cfile->fh_mutex);
692                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
693                 cifs_dbg(FYI, "oplock: %d\n", oplock);
694                 goto reopen_error_exit;
695         }
696
697 reopen_success:
698         cfile->invalidHandle = false;
699         mutex_unlock(&cfile->fh_mutex);
700         cinode = CIFS_I(inode);
701
702         if (can_flush) {
703                 rc = filemap_write_and_wait(inode->i_mapping);
704                 mapping_set_error(inode->i_mapping, rc);
705
706                 if (tcon->unix_ext)
707                         rc = cifs_get_inode_info_unix(&inode, full_path,
708                                                       inode->i_sb, xid);
709                 else
710                         rc = cifs_get_inode_info(&inode, full_path, NULL,
711                                                  inode->i_sb, xid, NULL);
712         }
713         /*
714          * Else we are writing out data to server already and could deadlock if
715          * we tried to flush data, and since we do not know if we have data that
716          * would invalidate the current end of file on the server we can not go
717          * to the server to get the new inode info.
718          */
719
720         server->ops->set_fid(cfile, &cfile->fid, oplock);
721         if (oparms.reconnect)
722                 cifs_relock_file(cfile);
723
724 reopen_error_exit:
725         kfree(full_path);
726         free_xid(xid);
727         return rc;
728 }
729
730 int cifs_close(struct inode *inode, struct file *file)
731 {
732         if (file->private_data != NULL) {
733                 cifsFileInfo_put(file->private_data);
734                 file->private_data = NULL;
735         }
736
737         /* return code from the ->release op is always ignored */
738         return 0;
739 }
740
741 int cifs_closedir(struct inode *inode, struct file *file)
742 {
743         int rc = 0;
744         unsigned int xid;
745         struct cifsFileInfo *cfile = file->private_data;
746         struct cifs_tcon *tcon;
747         struct TCP_Server_Info *server;
748         char *buf;
749
750         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
751
752         if (cfile == NULL)
753                 return rc;
754
755         xid = get_xid();
756         tcon = tlink_tcon(cfile->tlink);
757         server = tcon->ses->server;
758
759         cifs_dbg(FYI, "Freeing private data in close dir\n");
760         spin_lock(&cifs_file_list_lock);
761         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
762                 cfile->invalidHandle = true;
763                 spin_unlock(&cifs_file_list_lock);
764                 if (server->ops->close_dir)
765                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
766                 else
767                         rc = -ENOSYS;
768                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
769                 /* not much we can do if it fails anyway, ignore rc */
770                 rc = 0;
771         } else
772                 spin_unlock(&cifs_file_list_lock);
773
774         buf = cfile->srch_inf.ntwrk_buf_start;
775         if (buf) {
776                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
777                 cfile->srch_inf.ntwrk_buf_start = NULL;
778                 if (cfile->srch_inf.smallBuf)
779                         cifs_small_buf_release(buf);
780                 else
781                         cifs_buf_release(buf);
782         }
783
784         cifs_put_tlink(cfile->tlink);
785         kfree(file->private_data);
786         file->private_data = NULL;
787         /* BB can we lock the filestruct while this is going on? */
788         free_xid(xid);
789         return rc;
790 }
791
792 static struct cifsLockInfo *
793 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
794 {
795         struct cifsLockInfo *lock =
796                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
797         if (!lock)
798                 return lock;
799         lock->offset = offset;
800         lock->length = length;
801         lock->type = type;
802         lock->pid = current->tgid;
803         INIT_LIST_HEAD(&lock->blist);
804         init_waitqueue_head(&lock->block_q);
805         return lock;
806 }
807
808 void
809 cifs_del_lock_waiters(struct cifsLockInfo *lock)
810 {
811         struct cifsLockInfo *li, *tmp;
812         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
813                 list_del_init(&li->blist);
814                 wake_up(&li->block_q);
815         }
816 }
817
818 #define CIFS_LOCK_OP    0
819 #define CIFS_READ_OP    1
820 #define CIFS_WRITE_OP   2
821
822 /* @rw_check : 0 - no op, 1 - read, 2 - write */
823 static bool
824 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
825                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
826                             struct cifsLockInfo **conf_lock, int rw_check)
827 {
828         struct cifsLockInfo *li;
829         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
830         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
831
832         list_for_each_entry(li, &fdlocks->locks, llist) {
833                 if (offset + length <= li->offset ||
834                     offset >= li->offset + li->length)
835                         continue;
836                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
837                     server->ops->compare_fids(cfile, cur_cfile)) {
838                         /* shared lock prevents write op through the same fid */
839                         if (!(li->type & server->vals->shared_lock_type) ||
840                             rw_check != CIFS_WRITE_OP)
841                                 continue;
842                 }
843                 if ((type & server->vals->shared_lock_type) &&
844                     ((server->ops->compare_fids(cfile, cur_cfile) &&
845                      current->tgid == li->pid) || type == li->type))
846                         continue;
847                 if (conf_lock)
848                         *conf_lock = li;
849                 return true;
850         }
851         return false;
852 }
853
854 bool
855 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
856                         __u8 type, struct cifsLockInfo **conf_lock,
857                         int rw_check)
858 {
859         bool rc = false;
860         struct cifs_fid_locks *cur;
861         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
862
863         list_for_each_entry(cur, &cinode->llist, llist) {
864                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
865                                                  cfile, conf_lock, rw_check);
866                 if (rc)
867                         break;
868         }
869
870         return rc;
871 }
872
873 /*
874  * Check if there is another lock that prevents us to set the lock (mandatory
875  * style). If such a lock exists, update the flock structure with its
876  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
877  * or leave it the same if we can't. Returns 0 if we don't need to request to
878  * the server or 1 otherwise.
879  */
880 static int
881 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
882                __u8 type, struct file_lock *flock)
883 {
884         int rc = 0;
885         struct cifsLockInfo *conf_lock;
886         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
887         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
888         bool exist;
889
890         down_read(&cinode->lock_sem);
891
892         exist = cifs_find_lock_conflict(cfile, offset, length, type,
893                                         &conf_lock, CIFS_LOCK_OP);
894         if (exist) {
895                 flock->fl_start = conf_lock->offset;
896                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
897                 flock->fl_pid = conf_lock->pid;
898                 if (conf_lock->type & server->vals->shared_lock_type)
899                         flock->fl_type = F_RDLCK;
900                 else
901                         flock->fl_type = F_WRLCK;
902         } else if (!cinode->can_cache_brlcks)
903                 rc = 1;
904         else
905                 flock->fl_type = F_UNLCK;
906
907         up_read(&cinode->lock_sem);
908         return rc;
909 }
910
911 static void
912 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
913 {
914         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
915         down_write(&cinode->lock_sem);
916         list_add_tail(&lock->llist, &cfile->llist->locks);
917         up_write(&cinode->lock_sem);
918 }
919
920 /*
921  * Set the byte-range lock (mandatory style). Returns:
922  * 1) 0, if we set the lock and don't need to request to the server;
923  * 2) 1, if no locks prevent us but we need to request to the server;
924  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
925  */
926 static int
927 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
928                  bool wait)
929 {
930         struct cifsLockInfo *conf_lock;
931         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
932         bool exist;
933         int rc = 0;
934
935 try_again:
936         exist = false;
937         down_write(&cinode->lock_sem);
938
939         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
940                                         lock->type, &conf_lock, CIFS_LOCK_OP);
941         if (!exist && cinode->can_cache_brlcks) {
942                 list_add_tail(&lock->llist, &cfile->llist->locks);
943                 up_write(&cinode->lock_sem);
944                 return rc;
945         }
946
947         if (!exist)
948                 rc = 1;
949         else if (!wait)
950                 rc = -EACCES;
951         else {
952                 list_add_tail(&lock->blist, &conf_lock->blist);
953                 up_write(&cinode->lock_sem);
954                 rc = wait_event_interruptible(lock->block_q,
955                                         (lock->blist.prev == &lock->blist) &&
956                                         (lock->blist.next == &lock->blist));
957                 if (!rc)
958                         goto try_again;
959                 down_write(&cinode->lock_sem);
960                 list_del_init(&lock->blist);
961         }
962
963         up_write(&cinode->lock_sem);
964         return rc;
965 }
966
967 /*
968  * Check if there is another lock that prevents us to set the lock (posix
969  * style). If such a lock exists, update the flock structure with its
970  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
971  * or leave it the same if we can't. Returns 0 if we don't need to request to
972  * the server or 1 otherwise.
973  */
974 static int
975 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
976 {
977         int rc = 0;
978         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
979         unsigned char saved_type = flock->fl_type;
980
981         if ((flock->fl_flags & FL_POSIX) == 0)
982                 return 1;
983
984         down_read(&cinode->lock_sem);
985         posix_test_lock(file, flock);
986
987         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
988                 flock->fl_type = saved_type;
989                 rc = 1;
990         }
991
992         up_read(&cinode->lock_sem);
993         return rc;
994 }
995
996 /*
997  * Set the byte-range lock (posix style). Returns:
998  * 1) 0, if we set the lock and don't need to request to the server;
999  * 2) 1, if we need to request to the server;
1000  * 3) <0, if the error occurs while setting the lock.
1001  */
1002 static int
1003 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1004 {
1005         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1006         int rc = 1;
1007
1008         if ((flock->fl_flags & FL_POSIX) == 0)
1009                 return rc;
1010
1011 try_again:
1012         down_write(&cinode->lock_sem);
1013         if (!cinode->can_cache_brlcks) {
1014                 up_write(&cinode->lock_sem);
1015                 return rc;
1016         }
1017
1018         rc = posix_lock_file(file, flock, NULL);
1019         up_write(&cinode->lock_sem);
1020         if (rc == FILE_LOCK_DEFERRED) {
1021                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1022                 if (!rc)
1023                         goto try_again;
1024                 posix_unblock_lock(flock);
1025         }
1026         return rc;
1027 }
1028
1029 int
1030 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1031 {
1032         unsigned int xid;
1033         int rc = 0, stored_rc;
1034         struct cifsLockInfo *li, *tmp;
1035         struct cifs_tcon *tcon;
1036         unsigned int num, max_num, max_buf;
1037         LOCKING_ANDX_RANGE *buf, *cur;
1038         int types[] = {LOCKING_ANDX_LARGE_FILES,
1039                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1040         int i;
1041
1042         xid = get_xid();
1043         tcon = tlink_tcon(cfile->tlink);
1044
1045         /*
1046          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1047          * and check it for zero before using.
1048          */
1049         max_buf = tcon->ses->server->maxBuf;
1050         if (!max_buf) {
1051                 free_xid(xid);
1052                 return -EINVAL;
1053         }
1054
1055         max_num = (max_buf - sizeof(struct smb_hdr)) /
1056                                                 sizeof(LOCKING_ANDX_RANGE);
1057         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1058         if (!buf) {
1059                 free_xid(xid);
1060                 return -ENOMEM;
1061         }
1062
1063         for (i = 0; i < 2; i++) {
1064                 cur = buf;
1065                 num = 0;
1066                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1067                         if (li->type != types[i])
1068                                 continue;
1069                         cur->Pid = cpu_to_le16(li->pid);
1070                         cur->LengthLow = cpu_to_le32((u32)li->length);
1071                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1072                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1073                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1074                         if (++num == max_num) {
1075                                 stored_rc = cifs_lockv(xid, tcon,
1076                                                        cfile->fid.netfid,
1077                                                        (__u8)li->type, 0, num,
1078                                                        buf);
1079                                 if (stored_rc)
1080                                         rc = stored_rc;
1081                                 cur = buf;
1082                                 num = 0;
1083                         } else
1084                                 cur++;
1085                 }
1086
1087                 if (num) {
1088                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1089                                                (__u8)types[i], 0, num, buf);
1090                         if (stored_rc)
1091                                 rc = stored_rc;
1092                 }
1093         }
1094
1095         kfree(buf);
1096         free_xid(xid);
1097         return rc;
1098 }
1099
1100 /* copied from fs/locks.c with a name change */
1101 #define cifs_for_each_lock(inode, lockp) \
1102         for (lockp = &inode->i_flock; *lockp != NULL; \
1103              lockp = &(*lockp)->fl_next)
1104
1105 struct lock_to_push {
1106         struct list_head llist;
1107         __u64 offset;
1108         __u64 length;
1109         __u32 pid;
1110         __u16 netfid;
1111         __u8 type;
1112 };
1113
1114 static int
1115 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1116 {
1117         struct inode *inode = cfile->dentry->d_inode;
1118         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1119         struct file_lock *flock, **before;
1120         unsigned int count = 0, i = 0;
1121         int rc = 0, xid, type;
1122         struct list_head locks_to_send, *el;
1123         struct lock_to_push *lck, *tmp;
1124         __u64 length;
1125
1126         xid = get_xid();
1127
1128         spin_lock(&inode->i_lock);
1129         cifs_for_each_lock(inode, before) {
1130                 if ((*before)->fl_flags & FL_POSIX)
1131                         count++;
1132         }
1133         spin_unlock(&inode->i_lock);
1134
1135         INIT_LIST_HEAD(&locks_to_send);
1136
1137         /*
1138          * Allocating count locks is enough because no FL_POSIX locks can be
1139          * added to the list while we are holding cinode->lock_sem that
1140          * protects locking operations of this inode.
1141          */
1142         for (; i < count; i++) {
1143                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1144                 if (!lck) {
1145                         rc = -ENOMEM;
1146                         goto err_out;
1147                 }
1148                 list_add_tail(&lck->llist, &locks_to_send);
1149         }
1150
1151         el = locks_to_send.next;
1152         spin_lock(&inode->i_lock);
1153         cifs_for_each_lock(inode, before) {
1154                 flock = *before;
1155                 if ((flock->fl_flags & FL_POSIX) == 0)
1156                         continue;
1157                 if (el == &locks_to_send) {
1158                         /*
1159                          * The list ended. We don't have enough allocated
1160                          * structures - something is really wrong.
1161                          */
1162                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1163                         break;
1164                 }
1165                 length = 1 + flock->fl_end - flock->fl_start;
1166                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1167                         type = CIFS_RDLCK;
1168                 else
1169                         type = CIFS_WRLCK;
1170                 lck = list_entry(el, struct lock_to_push, llist);
1171                 lck->pid = flock->fl_pid;
1172                 lck->netfid = cfile->fid.netfid;
1173                 lck->length = length;
1174                 lck->type = type;
1175                 lck->offset = flock->fl_start;
1176                 el = el->next;
1177         }
1178         spin_unlock(&inode->i_lock);
1179
1180         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1181                 int stored_rc;
1182
1183                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1184                                              lck->offset, lck->length, NULL,
1185                                              lck->type, 0);
1186                 if (stored_rc)
1187                         rc = stored_rc;
1188                 list_del(&lck->llist);
1189                 kfree(lck);
1190         }
1191
1192 out:
1193         free_xid(xid);
1194         return rc;
1195 err_out:
1196         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1197                 list_del(&lck->llist);
1198                 kfree(lck);
1199         }
1200         goto out;
1201 }
1202
1203 static int
1204 cifs_push_locks(struct cifsFileInfo *cfile)
1205 {
1206         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1207         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1208         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1209         int rc = 0;
1210
1211         /* we are going to update can_cache_brlcks here - need a write access */
1212         down_write(&cinode->lock_sem);
1213         if (!cinode->can_cache_brlcks) {
1214                 up_write(&cinode->lock_sem);
1215                 return rc;
1216         }
1217
1218         if (cap_unix(tcon->ses) &&
1219             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1220             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1221                 rc = cifs_push_posix_locks(cfile);
1222         else
1223                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1224
1225         cinode->can_cache_brlcks = false;
1226         up_write(&cinode->lock_sem);
1227         return rc;
1228 }
1229
1230 static void
1231 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1232                 bool *wait_flag, struct TCP_Server_Info *server)
1233 {
1234         if (flock->fl_flags & FL_POSIX)
1235                 cifs_dbg(FYI, "Posix\n");
1236         if (flock->fl_flags & FL_FLOCK)
1237                 cifs_dbg(FYI, "Flock\n");
1238         if (flock->fl_flags & FL_SLEEP) {
1239                 cifs_dbg(FYI, "Blocking lock\n");
1240                 *wait_flag = true;
1241         }
1242         if (flock->fl_flags & FL_ACCESS)
1243                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1244         if (flock->fl_flags & FL_LEASE)
1245                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1246         if (flock->fl_flags &
1247             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1248                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1249                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1250
1251         *type = server->vals->large_lock_type;
1252         if (flock->fl_type == F_WRLCK) {
1253                 cifs_dbg(FYI, "F_WRLCK\n");
1254                 *type |= server->vals->exclusive_lock_type;
1255                 *lock = 1;
1256         } else if (flock->fl_type == F_UNLCK) {
1257                 cifs_dbg(FYI, "F_UNLCK\n");
1258                 *type |= server->vals->unlock_lock_type;
1259                 *unlock = 1;
1260                 /* Check if unlock includes more than one lock range */
1261         } else if (flock->fl_type == F_RDLCK) {
1262                 cifs_dbg(FYI, "F_RDLCK\n");
1263                 *type |= server->vals->shared_lock_type;
1264                 *lock = 1;
1265         } else if (flock->fl_type == F_EXLCK) {
1266                 cifs_dbg(FYI, "F_EXLCK\n");
1267                 *type |= server->vals->exclusive_lock_type;
1268                 *lock = 1;
1269         } else if (flock->fl_type == F_SHLCK) {
1270                 cifs_dbg(FYI, "F_SHLCK\n");
1271                 *type |= server->vals->shared_lock_type;
1272                 *lock = 1;
1273         } else
1274                 cifs_dbg(FYI, "Unknown type of lock\n");
1275 }
1276
1277 static int
1278 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1279            bool wait_flag, bool posix_lck, unsigned int xid)
1280 {
1281         int rc = 0;
1282         __u64 length = 1 + flock->fl_end - flock->fl_start;
1283         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1284         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1285         struct TCP_Server_Info *server = tcon->ses->server;
1286         __u16 netfid = cfile->fid.netfid;
1287
1288         if (posix_lck) {
1289                 int posix_lock_type;
1290
1291                 rc = cifs_posix_lock_test(file, flock);
1292                 if (!rc)
1293                         return rc;
1294
1295                 if (type & server->vals->shared_lock_type)
1296                         posix_lock_type = CIFS_RDLCK;
1297                 else
1298                         posix_lock_type = CIFS_WRLCK;
1299                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1300                                       flock->fl_start, length, flock,
1301                                       posix_lock_type, wait_flag);
1302                 return rc;
1303         }
1304
1305         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1306         if (!rc)
1307                 return rc;
1308
1309         /* BB we could chain these into one lock request BB */
1310         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1311                                     1, 0, false);
1312         if (rc == 0) {
1313                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1314                                             type, 0, 1, false);
1315                 flock->fl_type = F_UNLCK;
1316                 if (rc != 0)
1317                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1318                                  rc);
1319                 return 0;
1320         }
1321
1322         if (type & server->vals->shared_lock_type) {
1323                 flock->fl_type = F_WRLCK;
1324                 return 0;
1325         }
1326
1327         type &= ~server->vals->exclusive_lock_type;
1328
1329         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1330                                     type | server->vals->shared_lock_type,
1331                                     1, 0, false);
1332         if (rc == 0) {
1333                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1334                         type | server->vals->shared_lock_type, 0, 1, false);
1335                 flock->fl_type = F_RDLCK;
1336                 if (rc != 0)
1337                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1338                                  rc);
1339         } else
1340                 flock->fl_type = F_WRLCK;
1341
1342         return 0;
1343 }
1344
1345 void
1346 cifs_move_llist(struct list_head *source, struct list_head *dest)
1347 {
1348         struct list_head *li, *tmp;
1349         list_for_each_safe(li, tmp, source)
1350                 list_move(li, dest);
1351 }
1352
1353 void
1354 cifs_free_llist(struct list_head *llist)
1355 {
1356         struct cifsLockInfo *li, *tmp;
1357         list_for_each_entry_safe(li, tmp, llist, llist) {
1358                 cifs_del_lock_waiters(li);
1359                 list_del(&li->llist);
1360                 kfree(li);
1361         }
1362 }
1363
1364 int
1365 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1366                   unsigned int xid)
1367 {
1368         int rc = 0, stored_rc;
1369         int types[] = {LOCKING_ANDX_LARGE_FILES,
1370                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1371         unsigned int i;
1372         unsigned int max_num, num, max_buf;
1373         LOCKING_ANDX_RANGE *buf, *cur;
1374         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1375         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1376         struct cifsLockInfo *li, *tmp;
1377         __u64 length = 1 + flock->fl_end - flock->fl_start;
1378         struct list_head tmp_llist;
1379
1380         INIT_LIST_HEAD(&tmp_llist);
1381
1382         /*
1383          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1384          * and check it for zero before using.
1385          */
1386         max_buf = tcon->ses->server->maxBuf;
1387         if (!max_buf)
1388                 return -EINVAL;
1389
1390         max_num = (max_buf - sizeof(struct smb_hdr)) /
1391                                                 sizeof(LOCKING_ANDX_RANGE);
1392         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1393         if (!buf)
1394                 return -ENOMEM;
1395
1396         down_write(&cinode->lock_sem);
1397         for (i = 0; i < 2; i++) {
1398                 cur = buf;
1399                 num = 0;
1400                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1401                         if (flock->fl_start > li->offset ||
1402                             (flock->fl_start + length) <
1403                             (li->offset + li->length))
1404                                 continue;
1405                         if (current->tgid != li->pid)
1406                                 continue;
1407                         if (types[i] != li->type)
1408                                 continue;
1409                         if (cinode->can_cache_brlcks) {
1410                                 /*
1411                                  * We can cache brlock requests - simply remove
1412                                  * a lock from the file's list.
1413                                  */
1414                                 list_del(&li->llist);
1415                                 cifs_del_lock_waiters(li);
1416                                 kfree(li);
1417                                 continue;
1418                         }
1419                         cur->Pid = cpu_to_le16(li->pid);
1420                         cur->LengthLow = cpu_to_le32((u32)li->length);
1421                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1422                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1423                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1424                         /*
1425                          * We need to save a lock here to let us add it again to
1426                          * the file's list if the unlock range request fails on
1427                          * the server.
1428                          */
1429                         list_move(&li->llist, &tmp_llist);
1430                         if (++num == max_num) {
1431                                 stored_rc = cifs_lockv(xid, tcon,
1432                                                        cfile->fid.netfid,
1433                                                        li->type, num, 0, buf);
1434                                 if (stored_rc) {
1435                                         /*
1436                                          * We failed on the unlock range
1437                                          * request - add all locks from the tmp
1438                                          * list to the head of the file's list.
1439                                          */
1440                                         cifs_move_llist(&tmp_llist,
1441                                                         &cfile->llist->locks);
1442                                         rc = stored_rc;
1443                                 } else
1444                                         /*
1445                                          * The unlock range request succeed -
1446                                          * free the tmp list.
1447                                          */
1448                                         cifs_free_llist(&tmp_llist);
1449                                 cur = buf;
1450                                 num = 0;
1451                         } else
1452                                 cur++;
1453                 }
1454                 if (num) {
1455                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1456                                                types[i], num, 0, buf);
1457                         if (stored_rc) {
1458                                 cifs_move_llist(&tmp_llist,
1459                                                 &cfile->llist->locks);
1460                                 rc = stored_rc;
1461                         } else
1462                                 cifs_free_llist(&tmp_llist);
1463                 }
1464         }
1465
1466         up_write(&cinode->lock_sem);
1467         kfree(buf);
1468         return rc;
1469 }
1470
1471 static int
1472 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1473            bool wait_flag, bool posix_lck, int lock, int unlock,
1474            unsigned int xid)
1475 {
1476         int rc = 0;
1477         __u64 length = 1 + flock->fl_end - flock->fl_start;
1478         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1479         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1480         struct TCP_Server_Info *server = tcon->ses->server;
1481         struct inode *inode = cfile->dentry->d_inode;
1482
1483         if (posix_lck) {
1484                 int posix_lock_type;
1485
1486                 rc = cifs_posix_lock_set(file, flock);
1487                 if (!rc || rc < 0)
1488                         return rc;
1489
1490                 if (type & server->vals->shared_lock_type)
1491                         posix_lock_type = CIFS_RDLCK;
1492                 else
1493                         posix_lock_type = CIFS_WRLCK;
1494
1495                 if (unlock == 1)
1496                         posix_lock_type = CIFS_UNLCK;
1497
1498                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1499                                       current->tgid, flock->fl_start, length,
1500                                       NULL, posix_lock_type, wait_flag);
1501                 goto out;
1502         }
1503
1504         if (lock) {
1505                 struct cifsLockInfo *lock;
1506
1507                 lock = cifs_lock_init(flock->fl_start, length, type);
1508                 if (!lock)
1509                         return -ENOMEM;
1510
1511                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1512                 if (rc < 0) {
1513                         kfree(lock);
1514                         return rc;
1515                 }
1516                 if (!rc)
1517                         goto out;
1518
1519                 /*
1520                  * Windows 7 server can delay breaking lease from read to None
1521                  * if we set a byte-range lock on a file - break it explicitly
1522                  * before sending the lock to the server to be sure the next
1523                  * read won't conflict with non-overlapted locks due to
1524                  * pagereading.
1525                  */
1526                 if (!CIFS_I(inode)->clientCanCacheAll &&
1527                                         CIFS_I(inode)->clientCanCacheRead) {
1528                         cifs_invalidate_mapping(inode);
1529                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1530                                  inode);
1531                         CIFS_I(inode)->clientCanCacheRead = false;
1532                 }
1533
1534                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1535                                             type, 1, 0, wait_flag);
1536                 if (rc) {
1537                         kfree(lock);
1538                         return rc;
1539                 }
1540
1541                 cifs_lock_add(cfile, lock);
1542         } else if (unlock)
1543                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1544
1545 out:
1546         if (flock->fl_flags & FL_POSIX)
1547                 posix_lock_file_wait(file, flock);
1548         return rc;
1549 }
1550
1551 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1552 {
1553         int rc, xid;
1554         int lock = 0, unlock = 0;
1555         bool wait_flag = false;
1556         bool posix_lck = false;
1557         struct cifs_sb_info *cifs_sb;
1558         struct cifs_tcon *tcon;
1559         struct cifsInodeInfo *cinode;
1560         struct cifsFileInfo *cfile;
1561         __u16 netfid;
1562         __u32 type;
1563
1564         rc = -EACCES;
1565         xid = get_xid();
1566
1567         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1568                  cmd, flock->fl_flags, flock->fl_type,
1569                  flock->fl_start, flock->fl_end);
1570
1571         cfile = (struct cifsFileInfo *)file->private_data;
1572         tcon = tlink_tcon(cfile->tlink);
1573
1574         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1575                         tcon->ses->server);
1576
1577         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1578         netfid = cfile->fid.netfid;
1579         cinode = CIFS_I(file_inode(file));
1580
1581         if (cap_unix(tcon->ses) &&
1582             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1583             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1584                 posix_lck = true;
1585         /*
1586          * BB add code here to normalize offset and length to account for
1587          * negative length which we can not accept over the wire.
1588          */
1589         if (IS_GETLK(cmd)) {
1590                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1591                 free_xid(xid);
1592                 return rc;
1593         }
1594
1595         if (!lock && !unlock) {
1596                 /*
1597                  * if no lock or unlock then nothing to do since we do not
1598                  * know what it is
1599                  */
1600                 free_xid(xid);
1601                 return -EOPNOTSUPP;
1602         }
1603
1604         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1605                         xid);
1606         free_xid(xid);
1607         return rc;
1608 }
1609
1610 /*
1611  * update the file size (if needed) after a write. Should be called with
1612  * the inode->i_lock held
1613  */
1614 void
1615 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1616                       unsigned int bytes_written)
1617 {
1618         loff_t end_of_write = offset + bytes_written;
1619
1620         if (end_of_write > cifsi->server_eof)
1621                 cifsi->server_eof = end_of_write;
1622 }
1623
1624 static ssize_t
1625 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1626            size_t write_size, loff_t *offset)
1627 {
1628         int rc = 0;
1629         unsigned int bytes_written = 0;
1630         unsigned int total_written;
1631         struct cifs_sb_info *cifs_sb;
1632         struct cifs_tcon *tcon;
1633         struct TCP_Server_Info *server;
1634         unsigned int xid;
1635         struct dentry *dentry = open_file->dentry;
1636         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1637         struct cifs_io_parms io_parms;
1638
1639         cifs_sb = CIFS_SB(dentry->d_sb);
1640
1641         cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1642                  write_size, *offset, dentry->d_name.name);
1643
1644         tcon = tlink_tcon(open_file->tlink);
1645         server = tcon->ses->server;
1646
1647         if (!server->ops->sync_write)
1648                 return -ENOSYS;
1649
1650         xid = get_xid();
1651
1652         for (total_written = 0; write_size > total_written;
1653              total_written += bytes_written) {
1654                 rc = -EAGAIN;
1655                 while (rc == -EAGAIN) {
1656                         struct kvec iov[2];
1657                         unsigned int len;
1658
1659                         if (open_file->invalidHandle) {
1660                                 /* we could deadlock if we called
1661                                    filemap_fdatawait from here so tell
1662                                    reopen_file not to flush data to
1663                                    server now */
1664                                 rc = cifs_reopen_file(open_file, false);
1665                                 if (rc != 0)
1666                                         break;
1667                         }
1668
1669                         len = min((size_t)cifs_sb->wsize,
1670                                   write_size - total_written);
1671                         /* iov[0] is reserved for smb header */
1672                         iov[1].iov_base = (char *)write_data + total_written;
1673                         iov[1].iov_len = len;
1674                         io_parms.pid = pid;
1675                         io_parms.tcon = tcon;
1676                         io_parms.offset = *offset;
1677                         io_parms.length = len;
1678                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1679                                                      &bytes_written, iov, 1);
1680                 }
1681                 if (rc || (bytes_written == 0)) {
1682                         if (total_written)
1683                                 break;
1684                         else {
1685                                 free_xid(xid);
1686                                 return rc;
1687                         }
1688                 } else {
1689                         spin_lock(&dentry->d_inode->i_lock);
1690                         cifs_update_eof(cifsi, *offset, bytes_written);
1691                         spin_unlock(&dentry->d_inode->i_lock);
1692                         *offset += bytes_written;
1693                 }
1694         }
1695
1696         cifs_stats_bytes_written(tcon, total_written);
1697
1698         if (total_written > 0) {
1699                 spin_lock(&dentry->d_inode->i_lock);
1700                 if (*offset > dentry->d_inode->i_size)
1701                         i_size_write(dentry->d_inode, *offset);
1702                 spin_unlock(&dentry->d_inode->i_lock);
1703         }
1704         mark_inode_dirty_sync(dentry->d_inode);
1705         free_xid(xid);
1706         return total_written;
1707 }
1708
1709 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1710                                         bool fsuid_only)
1711 {
1712         struct cifsFileInfo *open_file = NULL;
1713         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1714
1715         /* only filter by fsuid on multiuser mounts */
1716         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1717                 fsuid_only = false;
1718
1719         spin_lock(&cifs_file_list_lock);
1720         /* we could simply get the first_list_entry since write-only entries
1721            are always at the end of the list but since the first entry might
1722            have a close pending, we go through the whole list */
1723         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1724                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1725                         continue;
1726                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1727                         if (!open_file->invalidHandle) {
1728                                 /* found a good file */
1729                                 /* lock it so it will not be closed on us */
1730                                 cifsFileInfo_get_locked(open_file);
1731                                 spin_unlock(&cifs_file_list_lock);
1732                                 return open_file;
1733                         } /* else might as well continue, and look for
1734                              another, or simply have the caller reopen it
1735                              again rather than trying to fix this handle */
1736                 } else /* write only file */
1737                         break; /* write only files are last so must be done */
1738         }
1739         spin_unlock(&cifs_file_list_lock);
1740         return NULL;
1741 }
1742
1743 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1744                                         bool fsuid_only)
1745 {
1746         struct cifsFileInfo *open_file, *inv_file = NULL;
1747         struct cifs_sb_info *cifs_sb;
1748         bool any_available = false;
1749         int rc;
1750         unsigned int refind = 0;
1751
1752         /* Having a null inode here (because mapping->host was set to zero by
1753         the VFS or MM) should not happen but we had reports of on oops (due to
1754         it being zero) during stress testcases so we need to check for it */
1755
1756         if (cifs_inode == NULL) {
1757                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1758                 dump_stack();
1759                 return NULL;
1760         }
1761
1762         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1763
1764         /* only filter by fsuid on multiuser mounts */
1765         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1766                 fsuid_only = false;
1767
1768         spin_lock(&cifs_file_list_lock);
1769 refind_writable:
1770         if (refind > MAX_REOPEN_ATT) {
1771                 spin_unlock(&cifs_file_list_lock);
1772                 return NULL;
1773         }
1774         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1775                 if (!any_available && open_file->pid != current->tgid)
1776                         continue;
1777                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1778                         continue;
1779                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1780                         if (!open_file->invalidHandle) {
1781                                 /* found a good writable file */
1782                                 cifsFileInfo_get_locked(open_file);
1783                                 spin_unlock(&cifs_file_list_lock);
1784                                 return open_file;
1785                         } else {
1786                                 if (!inv_file)
1787                                         inv_file = open_file;
1788                         }
1789                 }
1790         }
1791         /* couldn't find useable FH with same pid, try any available */
1792         if (!any_available) {
1793                 any_available = true;
1794                 goto refind_writable;
1795         }
1796
1797         if (inv_file) {
1798                 any_available = false;
1799                 cifsFileInfo_get_locked(inv_file);
1800         }
1801
1802         spin_unlock(&cifs_file_list_lock);
1803
1804         if (inv_file) {
1805                 rc = cifs_reopen_file(inv_file, false);
1806                 if (!rc)
1807                         return inv_file;
1808                 else {
1809                         spin_lock(&cifs_file_list_lock);
1810                         list_move_tail(&inv_file->flist,
1811                                         &cifs_inode->openFileList);
1812                         spin_unlock(&cifs_file_list_lock);
1813                         cifsFileInfo_put(inv_file);
1814                         spin_lock(&cifs_file_list_lock);
1815                         ++refind;
1816                         goto refind_writable;
1817                 }
1818         }
1819
1820         return NULL;
1821 }
1822
1823 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1824 {
1825         struct address_space *mapping = page->mapping;
1826         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1827         char *write_data;
1828         int rc = -EFAULT;
1829         int bytes_written = 0;
1830         struct inode *inode;
1831         struct cifsFileInfo *open_file;
1832
1833         if (!mapping || !mapping->host)
1834                 return -EFAULT;
1835
1836         inode = page->mapping->host;
1837
1838         offset += (loff_t)from;
1839         write_data = kmap(page);
1840         write_data += from;
1841
1842         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1843                 kunmap(page);
1844                 return -EIO;
1845         }
1846
1847         /* racing with truncate? */
1848         if (offset > mapping->host->i_size) {
1849                 kunmap(page);
1850                 return 0; /* don't care */
1851         }
1852
1853         /* check to make sure that we are not extending the file */
1854         if (mapping->host->i_size - offset < (loff_t)to)
1855                 to = (unsigned)(mapping->host->i_size - offset);
1856
1857         open_file = find_writable_file(CIFS_I(mapping->host), false);
1858         if (open_file) {
1859                 bytes_written = cifs_write(open_file, open_file->pid,
1860                                            write_data, to - from, &offset);
1861                 cifsFileInfo_put(open_file);
1862                 /* Does mm or vfs already set times? */
1863                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1864                 if ((bytes_written > 0) && (offset))
1865                         rc = 0;
1866                 else if (bytes_written < 0)
1867                         rc = bytes_written;
1868         } else {
1869                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1870                 rc = -EIO;
1871         }
1872
1873         kunmap(page);
1874         return rc;
1875 }
1876
1877 static int cifs_writepages(struct address_space *mapping,
1878                            struct writeback_control *wbc)
1879 {
1880         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1881         bool done = false, scanned = false, range_whole = false;
1882         pgoff_t end, index;
1883         struct cifs_writedata *wdata;
1884         struct TCP_Server_Info *server;
1885         struct page *page;
1886         int rc = 0;
1887
1888         /*
1889          * If wsize is smaller than the page cache size, default to writing
1890          * one page at a time via cifs_writepage
1891          */
1892         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1893                 return generic_writepages(mapping, wbc);
1894
1895         if (wbc->range_cyclic) {
1896                 index = mapping->writeback_index; /* Start from prev offset */
1897                 end = -1;
1898         } else {
1899                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1900                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1901                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1902                         range_whole = true;
1903                 scanned = true;
1904         }
1905 retry:
1906         while (!done && index <= end) {
1907                 unsigned int i, nr_pages, found_pages;
1908                 pgoff_t next = 0, tofind;
1909                 struct page **pages;
1910
1911                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1912                                 end - index) + 1;
1913
1914                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1915                                              cifs_writev_complete);
1916                 if (!wdata) {
1917                         rc = -ENOMEM;
1918                         break;
1919                 }
1920
1921                 /*
1922                  * find_get_pages_tag seems to return a max of 256 on each
1923                  * iteration, so we must call it several times in order to
1924                  * fill the array or the wsize is effectively limited to
1925                  * 256 * PAGE_CACHE_SIZE.
1926                  */
1927                 found_pages = 0;
1928                 pages = wdata->pages;
1929                 do {
1930                         nr_pages = find_get_pages_tag(mapping, &index,
1931                                                         PAGECACHE_TAG_DIRTY,
1932                                                         tofind, pages);
1933                         found_pages += nr_pages;
1934                         tofind -= nr_pages;
1935                         pages += nr_pages;
1936                 } while (nr_pages && tofind && index <= end);
1937
1938                 if (found_pages == 0) {
1939                         kref_put(&wdata->refcount, cifs_writedata_release);
1940                         break;
1941                 }
1942
1943                 nr_pages = 0;
1944                 for (i = 0; i < found_pages; i++) {
1945                         page = wdata->pages[i];
1946                         /*
1947                          * At this point we hold neither mapping->tree_lock nor
1948                          * lock on the page itself: the page may be truncated or
1949                          * invalidated (changing page->mapping to NULL), or even
1950                          * swizzled back from swapper_space to tmpfs file
1951                          * mapping
1952                          */
1953
1954                         if (nr_pages == 0)
1955                                 lock_page(page);
1956                         else if (!trylock_page(page))
1957                                 break;
1958
1959                         if (unlikely(page->mapping != mapping)) {
1960                                 unlock_page(page);
1961                                 break;
1962                         }
1963
1964                         if (!wbc->range_cyclic && page->index > end) {
1965                                 done = true;
1966                                 unlock_page(page);
1967                                 break;
1968                         }
1969
1970                         if (next && (page->index != next)) {
1971                                 /* Not next consecutive page */
1972                                 unlock_page(page);
1973                                 break;
1974                         }
1975
1976                         if (wbc->sync_mode != WB_SYNC_NONE)
1977                                 wait_on_page_writeback(page);
1978
1979                         if (PageWriteback(page) ||
1980                                         !clear_page_dirty_for_io(page)) {
1981                                 unlock_page(page);
1982                                 break;
1983                         }
1984
1985                         /*
1986                          * This actually clears the dirty bit in the radix tree.
1987                          * See cifs_writepage() for more commentary.
1988                          */
1989                         set_page_writeback(page);
1990
1991                         if (page_offset(page) >= i_size_read(mapping->host)) {
1992                                 done = true;
1993                                 unlock_page(page);
1994                                 end_page_writeback(page);
1995                                 break;
1996                         }
1997
1998                         wdata->pages[i] = page;
1999                         next = page->index + 1;
2000                         ++nr_pages;
2001                 }
2002
2003                 /* reset index to refind any pages skipped */
2004                 if (nr_pages == 0)
2005                         index = wdata->pages[0]->index + 1;
2006
2007                 /* put any pages we aren't going to use */
2008                 for (i = nr_pages; i < found_pages; i++) {
2009                         page_cache_release(wdata->pages[i]);
2010                         wdata->pages[i] = NULL;
2011                 }
2012
2013                 /* nothing to write? */
2014                 if (nr_pages == 0) {
2015                         kref_put(&wdata->refcount, cifs_writedata_release);
2016                         continue;
2017                 }
2018
2019                 wdata->sync_mode = wbc->sync_mode;
2020                 wdata->nr_pages = nr_pages;
2021                 wdata->offset = page_offset(wdata->pages[0]);
2022                 wdata->pagesz = PAGE_CACHE_SIZE;
2023                 wdata->tailsz =
2024                         min(i_size_read(mapping->host) -
2025                             page_offset(wdata->pages[nr_pages - 1]),
2026                             (loff_t)PAGE_CACHE_SIZE);
2027                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2028                                         wdata->tailsz;
2029
2030                 do {
2031                         if (wdata->cfile != NULL)
2032                                 cifsFileInfo_put(wdata->cfile);
2033                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2034                                                           false);
2035                         if (!wdata->cfile) {
2036                                 cifs_dbg(VFS, "No writable handles for inode\n");
2037                                 rc = -EBADF;
2038                                 break;
2039                         }
2040                         wdata->pid = wdata->cfile->pid;
2041                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2042                         rc = server->ops->async_writev(wdata);
2043                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2044
2045                 for (i = 0; i < nr_pages; ++i)
2046                         unlock_page(wdata->pages[i]);
2047
2048                 /* send failure -- clean up the mess */
2049                 if (rc != 0) {
2050                         for (i = 0; i < nr_pages; ++i) {
2051                                 if (rc == -EAGAIN)
2052                                         redirty_page_for_writepage(wbc,
2053                                                            wdata->pages[i]);
2054                                 else
2055                                         SetPageError(wdata->pages[i]);
2056                                 end_page_writeback(wdata->pages[i]);
2057                                 page_cache_release(wdata->pages[i]);
2058                         }
2059                         if (rc != -EAGAIN)
2060                                 mapping_set_error(mapping, rc);
2061                 }
2062                 kref_put(&wdata->refcount, cifs_writedata_release);
2063
2064                 wbc->nr_to_write -= nr_pages;
2065                 if (wbc->nr_to_write <= 0)
2066                         done = true;
2067
2068                 index = next;
2069         }
2070
2071         if (!scanned && !done) {
2072                 /*
2073                  * We hit the last page and there is more work to be done: wrap
2074                  * back to the start of the file
2075                  */
2076                 scanned = true;
2077                 index = 0;
2078                 goto retry;
2079         }
2080
2081         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2082                 mapping->writeback_index = index;
2083
2084         return rc;
2085 }
2086
2087 static int
2088 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2089 {
2090         int rc;
2091         unsigned int xid;
2092
2093         xid = get_xid();
2094 /* BB add check for wbc flags */
2095         page_cache_get(page);
2096         if (!PageUptodate(page))
2097                 cifs_dbg(FYI, "ppw - page not up to date\n");
2098
2099         /*
2100          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2101          *
2102          * A writepage() implementation always needs to do either this,
2103          * or re-dirty the page with "redirty_page_for_writepage()" in
2104          * the case of a failure.
2105          *
2106          * Just unlocking the page will cause the radix tree tag-bits
2107          * to fail to update with the state of the page correctly.
2108          */
2109         set_page_writeback(page);
2110 retry_write:
2111         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2112         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2113                 goto retry_write;
2114         else if (rc == -EAGAIN)
2115                 redirty_page_for_writepage(wbc, page);
2116         else if (rc != 0)
2117                 SetPageError(page);
2118         else
2119                 SetPageUptodate(page);
2120         end_page_writeback(page);
2121         page_cache_release(page);
2122         free_xid(xid);
2123         return rc;
2124 }
2125
2126 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2127 {
2128         int rc = cifs_writepage_locked(page, wbc);
2129         unlock_page(page);
2130         return rc;
2131 }
2132
2133 static int cifs_write_end(struct file *file, struct address_space *mapping,
2134                         loff_t pos, unsigned len, unsigned copied,
2135                         struct page *page, void *fsdata)
2136 {
2137         int rc;
2138         struct inode *inode = mapping->host;
2139         struct cifsFileInfo *cfile = file->private_data;
2140         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2141         __u32 pid;
2142
2143         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2144                 pid = cfile->pid;
2145         else
2146                 pid = current->tgid;
2147
2148         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2149                  page, pos, copied);
2150
2151         if (PageChecked(page)) {
2152                 if (copied == len)
2153                         SetPageUptodate(page);
2154                 ClearPageChecked(page);
2155         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2156                 SetPageUptodate(page);
2157
2158         if (!PageUptodate(page)) {
2159                 char *page_data;
2160                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2161                 unsigned int xid;
2162
2163                 xid = get_xid();
2164                 /* this is probably better than directly calling
2165                    partialpage_write since in this function the file handle is
2166                    known which we might as well leverage */
2167                 /* BB check if anything else missing out of ppw
2168                    such as updating last write time */
2169                 page_data = kmap(page);
2170                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2171                 /* if (rc < 0) should we set writebehind rc? */
2172                 kunmap(page);
2173
2174                 free_xid(xid);
2175         } else {
2176                 rc = copied;
2177                 pos += copied;
2178                 set_page_dirty(page);
2179         }
2180
2181         if (rc > 0) {
2182                 spin_lock(&inode->i_lock);
2183                 if (pos > inode->i_size)
2184                         i_size_write(inode, pos);
2185                 spin_unlock(&inode->i_lock);
2186         }
2187
2188         unlock_page(page);
2189         page_cache_release(page);
2190
2191         return rc;
2192 }
2193
2194 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2195                       int datasync)
2196 {
2197         unsigned int xid;
2198         int rc = 0;
2199         struct cifs_tcon *tcon;
2200         struct TCP_Server_Info *server;
2201         struct cifsFileInfo *smbfile = file->private_data;
2202         struct inode *inode = file_inode(file);
2203         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2204
2205         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2206         if (rc)
2207                 return rc;
2208         mutex_lock(&inode->i_mutex);
2209
2210         xid = get_xid();
2211
2212         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2213                  file->f_path.dentry->d_name.name, datasync);
2214
2215         if (!CIFS_I(inode)->clientCanCacheRead) {
2216                 rc = cifs_invalidate_mapping(inode);
2217                 if (rc) {
2218                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2219                         rc = 0; /* don't care about it in fsync */
2220                 }
2221         }
2222
2223         tcon = tlink_tcon(smbfile->tlink);
2224         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2225                 server = tcon->ses->server;
2226                 if (server->ops->flush)
2227                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2228                 else
2229                         rc = -ENOSYS;
2230         }
2231
2232         free_xid(xid);
2233         mutex_unlock(&inode->i_mutex);
2234         return rc;
2235 }
2236
2237 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2238 {
2239         unsigned int xid;
2240         int rc = 0;
2241         struct cifs_tcon *tcon;
2242         struct TCP_Server_Info *server;
2243         struct cifsFileInfo *smbfile = file->private_data;
2244         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2245         struct inode *inode = file->f_mapping->host;
2246
2247         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2248         if (rc)
2249                 return rc;
2250         mutex_lock(&inode->i_mutex);
2251
2252         xid = get_xid();
2253
2254         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2255                  file->f_path.dentry->d_name.name, datasync);
2256
2257         tcon = tlink_tcon(smbfile->tlink);
2258         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2259                 server = tcon->ses->server;
2260                 if (server->ops->flush)
2261                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2262                 else
2263                         rc = -ENOSYS;
2264         }
2265
2266         free_xid(xid);
2267         mutex_unlock(&inode->i_mutex);
2268         return rc;
2269 }
2270
2271 /*
2272  * As file closes, flush all cached write data for this inode checking
2273  * for write behind errors.
2274  */
2275 int cifs_flush(struct file *file, fl_owner_t id)
2276 {
2277         struct inode *inode = file_inode(file);
2278         int rc = 0;
2279
2280         if (file->f_mode & FMODE_WRITE)
2281                 rc = filemap_write_and_wait(inode->i_mapping);
2282
2283         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2284
2285         return rc;
2286 }
2287
2288 static int
2289 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2290 {
2291         int rc = 0;
2292         unsigned long i;
2293
2294         for (i = 0; i < num_pages; i++) {
2295                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2296                 if (!pages[i]) {
2297                         /*
2298                          * save number of pages we have already allocated and
2299                          * return with ENOMEM error
2300                          */
2301                         num_pages = i;
2302                         rc = -ENOMEM;
2303                         break;
2304                 }
2305         }
2306
2307         if (rc) {
2308                 for (i = 0; i < num_pages; i++)
2309                         put_page(pages[i]);
2310         }
2311         return rc;
2312 }
2313
2314 static inline
2315 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2316 {
2317         size_t num_pages;
2318         size_t clen;
2319
2320         clen = min_t(const size_t, len, wsize);
2321         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2322
2323         if (cur_len)
2324                 *cur_len = clen;
2325
2326         return num_pages;
2327 }
2328
2329 static void
2330 cifs_uncached_writev_complete(struct work_struct *work)
2331 {
2332         int i;
2333         struct cifs_writedata *wdata = container_of(work,
2334                                         struct cifs_writedata, work);
2335         struct inode *inode = wdata->cfile->dentry->d_inode;
2336         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2337
2338         spin_lock(&inode->i_lock);
2339         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2340         if (cifsi->server_eof > inode->i_size)
2341                 i_size_write(inode, cifsi->server_eof);
2342         spin_unlock(&inode->i_lock);
2343
2344         complete(&wdata->done);
2345
2346         if (wdata->result != -EAGAIN) {
2347                 for (i = 0; i < wdata->nr_pages; i++)
2348                         put_page(wdata->pages[i]);
2349         }
2350
2351         kref_put(&wdata->refcount, cifs_writedata_release);
2352 }
2353
2354 /* attempt to send write to server, retry on any -EAGAIN errors */
2355 static int
2356 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2357 {
2358         int rc;
2359         struct TCP_Server_Info *server;
2360
2361         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2362
2363         do {
2364                 if (wdata->cfile->invalidHandle) {
2365                         rc = cifs_reopen_file(wdata->cfile, false);
2366                         if (rc != 0)
2367                                 continue;
2368                 }
2369                 rc = server->ops->async_writev(wdata);
2370         } while (rc == -EAGAIN);
2371
2372         return rc;
2373 }
2374
2375 static ssize_t
2376 cifs_iovec_write(struct file *file, const struct iovec *iov,
2377                  unsigned long nr_segs, loff_t *poffset)
2378 {
2379         unsigned long nr_pages, i;
2380         size_t copied, len, cur_len;
2381         ssize_t total_written = 0;
2382         loff_t offset;
2383         struct iov_iter it;
2384         struct cifsFileInfo *open_file;
2385         struct cifs_tcon *tcon;
2386         struct cifs_sb_info *cifs_sb;
2387         struct cifs_writedata *wdata, *tmp;
2388         struct list_head wdata_list;
2389         int rc;
2390         pid_t pid;
2391
2392         len = iov_length(iov, nr_segs);
2393         if (!len)
2394                 return 0;
2395
2396         rc = generic_write_checks(file, poffset, &len, 0);
2397         if (rc)
2398                 return rc;
2399
2400         INIT_LIST_HEAD(&wdata_list);
2401         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2402         open_file = file->private_data;
2403         tcon = tlink_tcon(open_file->tlink);
2404
2405         if (!tcon->ses->server->ops->async_writev)
2406                 return -ENOSYS;
2407
2408         offset = *poffset;
2409
2410         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2411                 pid = open_file->pid;
2412         else
2413                 pid = current->tgid;
2414
2415         iov_iter_init(&it, iov, nr_segs, len, 0);
2416         do {
2417                 size_t save_len;
2418
2419                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2420                 wdata = cifs_writedata_alloc(nr_pages,
2421                                              cifs_uncached_writev_complete);
2422                 if (!wdata) {
2423                         rc = -ENOMEM;
2424                         break;
2425                 }
2426
2427                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2428                 if (rc) {
2429                         kfree(wdata);
2430                         break;
2431                 }
2432
2433                 save_len = cur_len;
2434                 for (i = 0; i < nr_pages; i++) {
2435                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2436                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2437                                                          0, copied);
2438                         cur_len -= copied;
2439                         iov_iter_advance(&it, copied);
2440                 }
2441                 cur_len = save_len - cur_len;
2442
2443                 wdata->sync_mode = WB_SYNC_ALL;
2444                 wdata->nr_pages = nr_pages;
2445                 wdata->offset = (__u64)offset;
2446                 wdata->cfile = cifsFileInfo_get(open_file);
2447                 wdata->pid = pid;
2448                 wdata->bytes = cur_len;
2449                 wdata->pagesz = PAGE_SIZE;
2450                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2451                 rc = cifs_uncached_retry_writev(wdata);
2452                 if (rc) {
2453                         kref_put(&wdata->refcount, cifs_writedata_release);
2454                         break;
2455                 }
2456
2457                 list_add_tail(&wdata->list, &wdata_list);
2458                 offset += cur_len;
2459                 len -= cur_len;
2460         } while (len > 0);
2461
2462         /*
2463          * If at least one write was successfully sent, then discard any rc
2464          * value from the later writes. If the other write succeeds, then
2465          * we'll end up returning whatever was written. If it fails, then
2466          * we'll get a new rc value from that.
2467          */
2468         if (!list_empty(&wdata_list))
2469                 rc = 0;
2470
2471         /*
2472          * Wait for and collect replies for any successful sends in order of
2473          * increasing offset. Once an error is hit or we get a fatal signal
2474          * while waiting, then return without waiting for any more replies.
2475          */
2476 restart_loop:
2477         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2478                 if (!rc) {
2479                         /* FIXME: freezable too? */
2480                         rc = wait_for_completion_killable(&wdata->done);
2481                         if (rc)
2482                                 rc = -EINTR;
2483                         else if (wdata->result)
2484                                 rc = wdata->result;
2485                         else
2486                                 total_written += wdata->bytes;
2487
2488                         /* resend call if it's a retryable error */
2489                         if (rc == -EAGAIN) {
2490                                 rc = cifs_uncached_retry_writev(wdata);
2491                                 goto restart_loop;
2492                         }
2493                 }
2494                 list_del_init(&wdata->list);
2495                 kref_put(&wdata->refcount, cifs_writedata_release);
2496         }
2497
2498         if (total_written > 0)
2499                 *poffset += total_written;
2500
2501         cifs_stats_bytes_written(tcon, total_written);
2502         return total_written ? total_written : (ssize_t)rc;
2503 }
2504
2505 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2506                                 unsigned long nr_segs, loff_t pos)
2507 {
2508         ssize_t written;
2509         struct inode *inode;
2510
2511         inode = file_inode(iocb->ki_filp);
2512
2513         /*
2514          * BB - optimize the way when signing is disabled. We can drop this
2515          * extra memory-to-memory copying and use iovec buffers for constructing
2516          * write request.
2517          */
2518
2519         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2520         if (written > 0) {
2521                 CIFS_I(inode)->invalid_mapping = true;
2522                 iocb->ki_pos = pos;
2523         }
2524
2525         return written;
2526 }
2527
2528 static ssize_t
2529 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2530             unsigned long nr_segs, loff_t pos)
2531 {
2532         struct file *file = iocb->ki_filp;
2533         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2534         struct inode *inode = file->f_mapping->host;
2535         struct cifsInodeInfo *cinode = CIFS_I(inode);
2536         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2537         ssize_t rc = -EACCES;
2538
2539         BUG_ON(iocb->ki_pos != pos);
2540
2541         /*
2542          * We need to hold the sem to be sure nobody modifies lock list
2543          * with a brlock that prevents writing.
2544          */
2545         down_read(&cinode->lock_sem);
2546         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2547                                      server->vals->exclusive_lock_type, NULL,
2548                                      CIFS_WRITE_OP)) {
2549                 mutex_lock(&inode->i_mutex);
2550                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2551                                                &iocb->ki_pos);
2552                 mutex_unlock(&inode->i_mutex);
2553         }
2554
2555         if (rc > 0 || rc == -EIOCBQUEUED) {
2556                 ssize_t err;
2557
2558                 err = generic_write_sync(file, pos, rc);
2559                 if (err < 0 && rc > 0)
2560                         rc = err;
2561         }
2562
2563         up_read(&cinode->lock_sem);
2564         return rc;
2565 }
2566
2567 ssize_t
2568 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2569                    unsigned long nr_segs, loff_t pos)
2570 {
2571         struct inode *inode = file_inode(iocb->ki_filp);
2572         struct cifsInodeInfo *cinode = CIFS_I(inode);
2573         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2574         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2575                                                 iocb->ki_filp->private_data;
2576         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2577         ssize_t written;
2578
2579         if (cinode->clientCanCacheAll) {
2580                 if (cap_unix(tcon->ses) &&
2581                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2582                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2583                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2584                 return cifs_writev(iocb, iov, nr_segs, pos);
2585         }
2586         /*
2587          * For non-oplocked files in strict cache mode we need to write the data
2588          * to the server exactly from the pos to pos+len-1 rather than flush all
2589          * affected pages because it may cause a error with mandatory locks on
2590          * these pages but not on the region from pos to ppos+len-1.
2591          */
2592         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2593         if (written > 0 && cinode->clientCanCacheRead) {
2594                 /*
2595                  * Windows 7 server can delay breaking level2 oplock if a write
2596                  * request comes - break it on the client to prevent reading
2597                  * an old data.
2598                  */
2599                 cifs_invalidate_mapping(inode);
2600                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2601                          inode);
2602                 cinode->clientCanCacheRead = false;
2603         }
2604         return written;
2605 }
2606
2607 static struct cifs_readdata *
2608 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2609 {
2610         struct cifs_readdata *rdata;
2611
2612         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2613                         GFP_KERNEL);
2614         if (rdata != NULL) {
2615                 kref_init(&rdata->refcount);
2616                 INIT_LIST_HEAD(&rdata->list);
2617                 init_completion(&rdata->done);
2618                 INIT_WORK(&rdata->work, complete);
2619         }
2620
2621         return rdata;
2622 }
2623
2624 void
2625 cifs_readdata_release(struct kref *refcount)
2626 {
2627         struct cifs_readdata *rdata = container_of(refcount,
2628                                         struct cifs_readdata, refcount);
2629
2630         if (rdata->cfile)
2631                 cifsFileInfo_put(rdata->cfile);
2632
2633         kfree(rdata);
2634 }
2635
2636 static int
2637 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2638 {
2639         int rc = 0;
2640         struct page *page;
2641         unsigned int i;
2642
2643         for (i = 0; i < nr_pages; i++) {
2644                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2645                 if (!page) {
2646                         rc = -ENOMEM;
2647                         break;
2648                 }
2649                 rdata->pages[i] = page;
2650         }
2651
2652         if (rc) {
2653                 for (i = 0; i < nr_pages; i++) {
2654                         put_page(rdata->pages[i]);
2655                         rdata->pages[i] = NULL;
2656                 }
2657         }
2658         return rc;
2659 }
2660
2661 static void
2662 cifs_uncached_readdata_release(struct kref *refcount)
2663 {
2664         struct cifs_readdata *rdata = container_of(refcount,
2665                                         struct cifs_readdata, refcount);
2666         unsigned int i;
2667
2668         for (i = 0; i < rdata->nr_pages; i++) {
2669                 put_page(rdata->pages[i]);
2670                 rdata->pages[i] = NULL;
2671         }
2672         cifs_readdata_release(refcount);
2673 }
2674
2675 static int
2676 cifs_retry_async_readv(struct cifs_readdata *rdata)
2677 {
2678         int rc;
2679         struct TCP_Server_Info *server;
2680
2681         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2682
2683         do {
2684                 if (rdata->cfile->invalidHandle) {
2685                         rc = cifs_reopen_file(rdata->cfile, true);
2686                         if (rc != 0)
2687                                 continue;
2688                 }
2689                 rc = server->ops->async_readv(rdata);
2690         } while (rc == -EAGAIN);
2691
2692         return rc;
2693 }
2694
2695 /**
2696  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2697  * @rdata:      the readdata response with list of pages holding data
2698  * @iov:        vector in which we should copy the data
2699  * @nr_segs:    number of segments in vector
2700  * @offset:     offset into file of the first iovec
2701  * @copied:     used to return the amount of data copied to the iov
2702  *
2703  * This function copies data from a list of pages in a readdata response into
2704  * an array of iovecs. It will first calculate where the data should go
2705  * based on the info in the readdata and then copy the data into that spot.
2706  */
2707 static ssize_t
2708 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2709                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2710 {
2711         int rc = 0;
2712         struct iov_iter ii;
2713         size_t pos = rdata->offset - offset;
2714         ssize_t remaining = rdata->bytes;
2715         unsigned char *pdata;
2716         unsigned int i;
2717
2718         /* set up iov_iter and advance to the correct offset */
2719         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2720         iov_iter_advance(&ii, pos);
2721
2722         *copied = 0;
2723         for (i = 0; i < rdata->nr_pages; i++) {
2724                 ssize_t copy;
2725                 struct page *page = rdata->pages[i];
2726
2727                 /* copy a whole page or whatever's left */
2728                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2729
2730                 /* ...but limit it to whatever space is left in the iov */
2731                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2732
2733                 /* go while there's data to be copied and no errors */
2734                 if (copy && !rc) {
2735                         pdata = kmap(page);
2736                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2737                                                 (int)copy);
2738                         kunmap(page);
2739                         if (!rc) {
2740                                 *copied += copy;
2741                                 remaining -= copy;
2742                                 iov_iter_advance(&ii, copy);
2743                         }
2744                 }
2745         }
2746
2747         return rc;
2748 }
2749
2750 static void
2751 cifs_uncached_readv_complete(struct work_struct *work)
2752 {
2753         struct cifs_readdata *rdata = container_of(work,
2754                                                 struct cifs_readdata, work);
2755
2756         complete(&rdata->done);
2757         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2758 }
2759
2760 static int
2761 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2762                         struct cifs_readdata *rdata, unsigned int len)
2763 {
2764         int total_read = 0, result = 0;
2765         unsigned int i;
2766         unsigned int nr_pages = rdata->nr_pages;
2767         struct kvec iov;
2768
2769         rdata->tailsz = PAGE_SIZE;
2770         for (i = 0; i < nr_pages; i++) {
2771                 struct page *page = rdata->pages[i];
2772
2773                 if (len >= PAGE_SIZE) {
2774                         /* enough data to fill the page */
2775                         iov.iov_base = kmap(page);
2776                         iov.iov_len = PAGE_SIZE;
2777                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2778                                  i, iov.iov_base, iov.iov_len);
2779                         len -= PAGE_SIZE;
2780                 } else if (len > 0) {
2781                         /* enough for partial page, fill and zero the rest */
2782                         iov.iov_base = kmap(page);
2783                         iov.iov_len = len;
2784                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2785                                  i, iov.iov_base, iov.iov_len);
2786                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2787                         rdata->tailsz = len;
2788                         len = 0;
2789                 } else {
2790                         /* no need to hold page hostage */
2791                         rdata->pages[i] = NULL;
2792                         rdata->nr_pages--;
2793                         put_page(page);
2794                         continue;
2795                 }
2796
2797                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2798                 kunmap(page);
2799                 if (result < 0)
2800                         break;
2801
2802                 total_read += result;
2803         }
2804
2805         return total_read > 0 ? total_read : result;
2806 }
2807
2808 static ssize_t
2809 cifs_iovec_read(struct file *file, const struct iovec *iov,
2810                  unsigned long nr_segs, loff_t *poffset)
2811 {
2812         ssize_t rc;
2813         size_t len, cur_len;
2814         ssize_t total_read = 0;
2815         loff_t offset = *poffset;
2816         unsigned int npages;
2817         struct cifs_sb_info *cifs_sb;
2818         struct cifs_tcon *tcon;
2819         struct cifsFileInfo *open_file;
2820         struct cifs_readdata *rdata, *tmp;
2821         struct list_head rdata_list;
2822         pid_t pid;
2823
2824         if (!nr_segs)
2825                 return 0;
2826
2827         len = iov_length(iov, nr_segs);
2828         if (!len)
2829                 return 0;
2830
2831         INIT_LIST_HEAD(&rdata_list);
2832         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2833         open_file = file->private_data;
2834         tcon = tlink_tcon(open_file->tlink);
2835
2836         if (!tcon->ses->server->ops->async_readv)
2837                 return -ENOSYS;
2838
2839         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2840                 pid = open_file->pid;
2841         else
2842                 pid = current->tgid;
2843
2844         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2845                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2846
2847         do {
2848                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2849                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2850
2851                 /* allocate a readdata struct */
2852                 rdata = cifs_readdata_alloc(npages,
2853                                             cifs_uncached_readv_complete);
2854                 if (!rdata) {
2855                         rc = -ENOMEM;
2856                         goto error;
2857                 }
2858
2859                 rc = cifs_read_allocate_pages(rdata, npages);
2860                 if (rc)
2861                         goto error;
2862
2863                 rdata->cfile = cifsFileInfo_get(open_file);
2864                 rdata->nr_pages = npages;
2865                 rdata->offset = offset;
2866                 rdata->bytes = cur_len;
2867                 rdata->pid = pid;
2868                 rdata->pagesz = PAGE_SIZE;
2869                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2870
2871                 rc = cifs_retry_async_readv(rdata);
2872 error:
2873                 if (rc) {
2874                         kref_put(&rdata->refcount,
2875                                  cifs_uncached_readdata_release);
2876                         break;
2877                 }
2878
2879                 list_add_tail(&rdata->list, &rdata_list);
2880                 offset += cur_len;
2881                 len -= cur_len;
2882         } while (len > 0);
2883
2884         /* if at least one read request send succeeded, then reset rc */
2885         if (!list_empty(&rdata_list))
2886                 rc = 0;
2887
2888         /* the loop below should proceed in the order of increasing offsets */
2889 restart_loop:
2890         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2891                 if (!rc) {
2892                         ssize_t copied;
2893
2894                         /* FIXME: freezable sleep too? */
2895                         rc = wait_for_completion_killable(&rdata->done);
2896                         if (rc)
2897                                 rc = -EINTR;
2898                         else if (rdata->result)
2899                                 rc = rdata->result;
2900                         else {
2901                                 rc = cifs_readdata_to_iov(rdata, iov,
2902                                                         nr_segs, *poffset,
2903                                                         &copied);
2904                                 total_read += copied;
2905                         }
2906
2907                         /* resend call if it's a retryable error */
2908                         if (rc == -EAGAIN) {
2909                                 rc = cifs_retry_async_readv(rdata);
2910                                 goto restart_loop;
2911                         }
2912                 }
2913                 list_del_init(&rdata->list);
2914                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2915         }
2916
2917         cifs_stats_bytes_read(tcon, total_read);
2918         *poffset += total_read;
2919
2920         /* mask nodata case */
2921         if (rc == -ENODATA)
2922                 rc = 0;
2923
2924         return total_read ? total_read : rc;
2925 }
2926
2927 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2928                                unsigned long nr_segs, loff_t pos)
2929 {
2930         ssize_t read;
2931
2932         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2933         if (read > 0)
2934                 iocb->ki_pos = pos;
2935
2936         return read;
2937 }
2938
2939 ssize_t
2940 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2941                   unsigned long nr_segs, loff_t pos)
2942 {
2943         struct inode *inode = file_inode(iocb->ki_filp);
2944         struct cifsInodeInfo *cinode = CIFS_I(inode);
2945         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2946         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2947                                                 iocb->ki_filp->private_data;
2948         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2949         int rc = -EACCES;
2950
2951         /*
2952          * In strict cache mode we need to read from the server all the time
2953          * if we don't have level II oplock because the server can delay mtime
2954          * change - so we can't make a decision about inode invalidating.
2955          * And we can also fail with pagereading if there are mandatory locks
2956          * on pages affected by this read but not on the region from pos to
2957          * pos+len-1.
2958          */
2959         if (!cinode->clientCanCacheRead)
2960                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2961
2962         if (cap_unix(tcon->ses) &&
2963             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2964             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2965                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2966
2967         /*
2968          * We need to hold the sem to be sure nobody modifies lock list
2969          * with a brlock that prevents reading.
2970          */
2971         down_read(&cinode->lock_sem);
2972         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2973                                      tcon->ses->server->vals->shared_lock_type,
2974                                      NULL, CIFS_READ_OP))
2975                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2976         up_read(&cinode->lock_sem);
2977         return rc;
2978 }
2979
2980 static ssize_t
2981 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2982 {
2983         int rc = -EACCES;
2984         unsigned int bytes_read = 0;
2985         unsigned int total_read;
2986         unsigned int current_read_size;
2987         unsigned int rsize;
2988         struct cifs_sb_info *cifs_sb;
2989         struct cifs_tcon *tcon;
2990         struct TCP_Server_Info *server;
2991         unsigned int xid;
2992         char *cur_offset;
2993         struct cifsFileInfo *open_file;
2994         struct cifs_io_parms io_parms;
2995         int buf_type = CIFS_NO_BUFFER;
2996         __u32 pid;
2997
2998         xid = get_xid();
2999         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3000
3001         /* FIXME: set up handlers for larger reads and/or convert to async */
3002         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3003
3004         if (file->private_data == NULL) {
3005                 rc = -EBADF;
3006                 free_xid(xid);
3007                 return rc;
3008         }
3009         open_file = file->private_data;
3010         tcon = tlink_tcon(open_file->tlink);
3011         server = tcon->ses->server;
3012
3013         if (!server->ops->sync_read) {
3014                 free_xid(xid);
3015                 return -ENOSYS;
3016         }
3017
3018         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3019                 pid = open_file->pid;
3020         else
3021                 pid = current->tgid;
3022
3023         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3024                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3025
3026         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3027              total_read += bytes_read, cur_offset += bytes_read) {
3028                 current_read_size = min_t(uint, read_size - total_read, rsize);
3029                 /*
3030                  * For windows me and 9x we do not want to request more than it
3031                  * negotiated since it will refuse the read then.
3032                  */
3033                 if ((tcon->ses) && !(tcon->ses->capabilities &
3034                                 tcon->ses->server->vals->cap_large_files)) {
3035                         current_read_size = min_t(uint, current_read_size,
3036                                         CIFSMaxBufSize);
3037                 }
3038                 rc = -EAGAIN;
3039                 while (rc == -EAGAIN) {
3040                         if (open_file->invalidHandle) {
3041                                 rc = cifs_reopen_file(open_file, true);
3042                                 if (rc != 0)
3043                                         break;
3044                         }
3045                         io_parms.pid = pid;
3046                         io_parms.tcon = tcon;
3047                         io_parms.offset = *offset;
3048                         io_parms.length = current_read_size;
3049                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3050                                                     &bytes_read, &cur_offset,
3051                                                     &buf_type);
3052                 }
3053                 if (rc || (bytes_read == 0)) {
3054                         if (total_read) {
3055                                 break;
3056                         } else {
3057                                 free_xid(xid);
3058                                 return rc;
3059                         }
3060                 } else {
3061                         cifs_stats_bytes_read(tcon, total_read);
3062                         *offset += bytes_read;
3063                 }
3064         }
3065         free_xid(xid);
3066         return total_read;
3067 }
3068
3069 /*
3070  * If the page is mmap'ed into a process' page tables, then we need to make
3071  * sure that it doesn't change while being written back.
3072  */
3073 static int
3074 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3075 {
3076         struct page *page = vmf->page;
3077
3078         lock_page(page);
3079         return VM_FAULT_LOCKED;
3080 }
3081
3082 static struct vm_operations_struct cifs_file_vm_ops = {
3083         .fault = filemap_fault,
3084         .page_mkwrite = cifs_page_mkwrite,
3085         .remap_pages = generic_file_remap_pages,
3086 };
3087
3088 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3089 {
3090         int rc, xid;
3091         struct inode *inode = file_inode(file);
3092
3093         xid = get_xid();
3094
3095         if (!CIFS_I(inode)->clientCanCacheRead) {
3096                 rc = cifs_invalidate_mapping(inode);
3097                 if (rc)
3098                         return rc;
3099         }
3100
3101         rc = generic_file_mmap(file, vma);
3102         if (rc == 0)
3103                 vma->vm_ops = &cifs_file_vm_ops;
3104         free_xid(xid);
3105         return rc;
3106 }
3107
3108 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3109 {
3110         int rc, xid;
3111
3112         xid = get_xid();
3113         rc = cifs_revalidate_file(file);
3114         if (rc) {
3115                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3116                          rc);
3117                 free_xid(xid);
3118                 return rc;
3119         }
3120         rc = generic_file_mmap(file, vma);
3121         if (rc == 0)
3122                 vma->vm_ops = &cifs_file_vm_ops;
3123         free_xid(xid);
3124         return rc;
3125 }
3126
3127 static void
3128 cifs_readv_complete(struct work_struct *work)
3129 {
3130         unsigned int i;
3131         struct cifs_readdata *rdata = container_of(work,
3132                                                 struct cifs_readdata, work);
3133
3134         for (i = 0; i < rdata->nr_pages; i++) {
3135                 struct page *page = rdata->pages[i];
3136
3137                 lru_cache_add_file(page);
3138
3139                 if (rdata->result == 0) {
3140                         flush_dcache_page(page);
3141                         SetPageUptodate(page);
3142                 }
3143
3144                 unlock_page(page);
3145
3146                 if (rdata->result == 0)
3147                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3148
3149                 page_cache_release(page);
3150                 rdata->pages[i] = NULL;
3151         }
3152         kref_put(&rdata->refcount, cifs_readdata_release);
3153 }
3154
3155 static int
3156 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3157                         struct cifs_readdata *rdata, unsigned int len)
3158 {
3159         int total_read = 0, result = 0;
3160         unsigned int i;
3161         u64 eof;
3162         pgoff_t eof_index;
3163         unsigned int nr_pages = rdata->nr_pages;
3164         struct kvec iov;
3165
3166         /* determine the eof that the server (probably) has */
3167         eof = CIFS_I(rdata->mapping->host)->server_eof;
3168         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3169         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3170
3171         rdata->tailsz = PAGE_CACHE_SIZE;
3172         for (i = 0; i < nr_pages; i++) {
3173                 struct page *page = rdata->pages[i];
3174
3175                 if (len >= PAGE_CACHE_SIZE) {
3176                         /* enough data to fill the page */
3177                         iov.iov_base = kmap(page);
3178                         iov.iov_len = PAGE_CACHE_SIZE;
3179                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3180                                  i, page->index, iov.iov_base, iov.iov_len);
3181                         len -= PAGE_CACHE_SIZE;
3182                 } else if (len > 0) {
3183                         /* enough for partial page, fill and zero the rest */
3184                         iov.iov_base = kmap(page);
3185                         iov.iov_len = len;
3186                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3187                                  i, page->index, iov.iov_base, iov.iov_len);
3188                         memset(iov.iov_base + len,
3189                                 '\0', PAGE_CACHE_SIZE - len);
3190                         rdata->tailsz = len;
3191                         len = 0;
3192                 } else if (page->index > eof_index) {
3193                         /*
3194                          * The VFS will not try to do readahead past the
3195                          * i_size, but it's possible that we have outstanding
3196                          * writes with gaps in the middle and the i_size hasn't
3197                          * caught up yet. Populate those with zeroed out pages
3198                          * to prevent the VFS from repeatedly attempting to
3199                          * fill them until the writes are flushed.
3200                          */
3201                         zero_user(page, 0, PAGE_CACHE_SIZE);
3202                         lru_cache_add_file(page);
3203                         flush_dcache_page(page);
3204                         SetPageUptodate(page);
3205                         unlock_page(page);
3206                         page_cache_release(page);
3207                         rdata->pages[i] = NULL;
3208                         rdata->nr_pages--;
3209                         continue;
3210                 } else {
3211                         /* no need to hold page hostage */
3212                         lru_cache_add_file(page);
3213                         unlock_page(page);
3214                         page_cache_release(page);
3215                         rdata->pages[i] = NULL;
3216                         rdata->nr_pages--;
3217                         continue;
3218                 }
3219
3220                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3221                 kunmap(page);
3222                 if (result < 0)
3223                         break;
3224
3225                 total_read += result;
3226         }
3227
3228         return total_read > 0 ? total_read : result;
3229 }
3230
3231 static int cifs_readpages(struct file *file, struct address_space *mapping,
3232         struct list_head *page_list, unsigned num_pages)
3233 {
3234         int rc;
3235         struct list_head tmplist;
3236         struct cifsFileInfo *open_file = file->private_data;
3237         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3238         unsigned int rsize = cifs_sb->rsize;
3239         pid_t pid;
3240
3241         /*
3242          * Give up immediately if rsize is too small to read an entire page.
3243          * The VFS will fall back to readpage. We should never reach this
3244          * point however since we set ra_pages to 0 when the rsize is smaller
3245          * than a cache page.
3246          */
3247         if (unlikely(rsize < PAGE_CACHE_SIZE))
3248                 return 0;
3249
3250         /*
3251          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3252          * immediately if the cookie is negative
3253          */
3254         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3255                                          &num_pages);
3256         if (rc == 0)
3257                 return rc;
3258
3259         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3260                 pid = open_file->pid;
3261         else
3262                 pid = current->tgid;
3263
3264         rc = 0;
3265         INIT_LIST_HEAD(&tmplist);
3266
3267         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3268                  __func__, file, mapping, num_pages);
3269
3270         /*
3271          * Start with the page at end of list and move it to private
3272          * list. Do the same with any following pages until we hit
3273          * the rsize limit, hit an index discontinuity, or run out of
3274          * pages. Issue the async read and then start the loop again
3275          * until the list is empty.
3276          *
3277          * Note that list order is important. The page_list is in
3278          * the order of declining indexes. When we put the pages in
3279          * the rdata->pages, then we want them in increasing order.
3280          */
3281         while (!list_empty(page_list)) {
3282                 unsigned int i;
3283                 unsigned int bytes = PAGE_CACHE_SIZE;
3284                 unsigned int expected_index;
3285                 unsigned int nr_pages = 1;
3286                 loff_t offset;
3287                 struct page *page, *tpage;
3288                 struct cifs_readdata *rdata;
3289
3290                 page = list_entry(page_list->prev, struct page, lru);
3291
3292                 /*
3293                  * Lock the page and put it in the cache. Since no one else
3294                  * should have access to this page, we're safe to simply set
3295                  * PG_locked without checking it first.
3296                  */
3297                 __set_page_locked(page);
3298                 rc = add_to_page_cache_locked(page, mapping,
3299                                               page->index, GFP_KERNEL);
3300
3301                 /* give up if we can't stick it in the cache */
3302                 if (rc) {
3303                         __clear_page_locked(page);
3304                         break;
3305                 }
3306
3307                 /* move first page to the tmplist */
3308                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3309                 list_move_tail(&page->lru, &tmplist);
3310
3311                 /* now try and add more pages onto the request */
3312                 expected_index = page->index + 1;
3313                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3314                         /* discontinuity ? */
3315                         if (page->index != expected_index)
3316                                 break;
3317
3318                         /* would this page push the read over the rsize? */
3319                         if (bytes + PAGE_CACHE_SIZE > rsize)
3320                                 break;
3321
3322                         __set_page_locked(page);
3323                         if (add_to_page_cache_locked(page, mapping,
3324                                                 page->index, GFP_KERNEL)) {
3325                                 __clear_page_locked(page);
3326                                 break;
3327                         }
3328                         list_move_tail(&page->lru, &tmplist);
3329                         bytes += PAGE_CACHE_SIZE;
3330                         expected_index++;
3331                         nr_pages++;
3332                 }
3333
3334                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3335                 if (!rdata) {
3336                         /* best to give up if we're out of mem */
3337                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3338                                 list_del(&page->lru);
3339                                 lru_cache_add_file(page);
3340                                 unlock_page(page);
3341                                 page_cache_release(page);
3342                         }
3343                         rc = -ENOMEM;
3344                         break;
3345                 }
3346
3347                 rdata->cfile = cifsFileInfo_get(open_file);
3348                 rdata->mapping = mapping;
3349                 rdata->offset = offset;
3350                 rdata->bytes = bytes;
3351                 rdata->pid = pid;
3352                 rdata->pagesz = PAGE_CACHE_SIZE;
3353                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3354
3355                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3356                         list_del(&page->lru);
3357                         rdata->pages[rdata->nr_pages++] = page;
3358                 }
3359
3360                 rc = cifs_retry_async_readv(rdata);
3361                 if (rc != 0) {
3362                         for (i = 0; i < rdata->nr_pages; i++) {
3363                                 page = rdata->pages[i];
3364                                 lru_cache_add_file(page);
3365                                 unlock_page(page);
3366                                 page_cache_release(page);
3367                         }
3368                         kref_put(&rdata->refcount, cifs_readdata_release);
3369                         break;
3370                 }
3371
3372                 kref_put(&rdata->refcount, cifs_readdata_release);
3373         }
3374
3375         return rc;
3376 }
3377
3378 static int cifs_readpage_worker(struct file *file, struct page *page,
3379         loff_t *poffset)
3380 {
3381         char *read_data;
3382         int rc;
3383
3384         /* Is the page cached? */
3385         rc = cifs_readpage_from_fscache(file_inode(file), page);
3386         if (rc == 0)
3387                 goto read_complete;
3388
3389         page_cache_get(page);
3390         read_data = kmap(page);
3391         /* for reads over a certain size could initiate async read ahead */
3392
3393         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3394
3395         if (rc < 0)
3396                 goto io_error;
3397         else
3398                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3399
3400         file_inode(file)->i_atime =
3401                 current_fs_time(file_inode(file)->i_sb);
3402
3403         if (PAGE_CACHE_SIZE > rc)
3404                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3405
3406         flush_dcache_page(page);
3407         SetPageUptodate(page);
3408
3409         /* send this page to the cache */
3410         cifs_readpage_to_fscache(file_inode(file), page);
3411
3412         rc = 0;
3413
3414 io_error:
3415         kunmap(page);
3416         page_cache_release(page);
3417
3418 read_complete:
3419         return rc;
3420 }
3421
3422 static int cifs_readpage(struct file *file, struct page *page)
3423 {
3424         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3425         int rc = -EACCES;
3426         unsigned int xid;
3427
3428         xid = get_xid();
3429
3430         if (file->private_data == NULL) {
3431                 rc = -EBADF;
3432                 free_xid(xid);
3433                 return rc;
3434         }
3435
3436         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3437                  page, (int)offset, (int)offset);
3438
3439         rc = cifs_readpage_worker(file, page, &offset);
3440
3441         unlock_page(page);
3442
3443         free_xid(xid);
3444         return rc;
3445 }
3446
3447 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3448 {
3449         struct cifsFileInfo *open_file;
3450
3451         spin_lock(&cifs_file_list_lock);
3452         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3453                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3454                         spin_unlock(&cifs_file_list_lock);
3455                         return 1;
3456                 }
3457         }
3458         spin_unlock(&cifs_file_list_lock);
3459         return 0;
3460 }
3461
3462 /* We do not want to update the file size from server for inodes
3463    open for write - to avoid races with writepage extending
3464    the file - in the future we could consider allowing
3465    refreshing the inode only on increases in the file size
3466    but this is tricky to do without racing with writebehind
3467    page caching in the current Linux kernel design */
3468 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3469 {
3470         if (!cifsInode)
3471                 return true;
3472
3473         if (is_inode_writable(cifsInode)) {
3474                 /* This inode is open for write at least once */
3475                 struct cifs_sb_info *cifs_sb;
3476
3477                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3478                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3479                         /* since no page cache to corrupt on directio
3480                         we can change size safely */
3481                         return true;
3482                 }
3483
3484                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3485                         return true;
3486
3487                 return false;
3488         } else
3489                 return true;
3490 }
3491
3492 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3493                         loff_t pos, unsigned len, unsigned flags,
3494                         struct page **pagep, void **fsdata)
3495 {
3496         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3497         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3498         loff_t page_start = pos & PAGE_MASK;
3499         loff_t i_size;
3500         struct page *page;
3501         int rc = 0;
3502
3503         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3504
3505         page = grab_cache_page_write_begin(mapping, index, flags);
3506         if (!page) {
3507                 rc = -ENOMEM;
3508                 goto out;
3509         }
3510
3511         if (PageUptodate(page))
3512                 goto out;
3513
3514         /*
3515          * If we write a full page it will be up to date, no need to read from
3516          * the server. If the write is short, we'll end up doing a sync write
3517          * instead.
3518          */
3519         if (len == PAGE_CACHE_SIZE)
3520                 goto out;
3521
3522         /*
3523          * optimize away the read when we have an oplock, and we're not
3524          * expecting to use any of the data we'd be reading in. That
3525          * is, when the page lies beyond the EOF, or straddles the EOF
3526          * and the write will cover all of the existing data.
3527          */
3528         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3529                 i_size = i_size_read(mapping->host);
3530                 if (page_start >= i_size ||
3531                     (offset == 0 && (pos + len) >= i_size)) {
3532                         zero_user_segments(page, 0, offset,
3533                                            offset + len,
3534                                            PAGE_CACHE_SIZE);
3535                         /*
3536                          * PageChecked means that the parts of the page
3537                          * to which we're not writing are considered up
3538                          * to date. Once the data is copied to the
3539                          * page, it can be set uptodate.
3540                          */
3541                         SetPageChecked(page);
3542                         goto out;
3543                 }
3544         }
3545
3546         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3547                 /*
3548                  * might as well read a page, it is fast enough. If we get
3549                  * an error, we don't need to return it. cifs_write_end will
3550                  * do a sync write instead since PG_uptodate isn't set.
3551                  */
3552                 cifs_readpage_worker(file, page, &page_start);
3553         } else {
3554                 /* we could try using another file handle if there is one -
3555                    but how would we lock it to prevent close of that handle
3556                    racing with this read? In any case
3557                    this will be written out by write_end so is fine */
3558         }
3559 out:
3560         *pagep = page;
3561         return rc;
3562 }
3563
3564 static int cifs_release_page(struct page *page, gfp_t gfp)
3565 {
3566         if (PagePrivate(page))
3567                 return 0;
3568
3569         return cifs_fscache_release_page(page, gfp);
3570 }
3571
3572 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3573                                  unsigned int length)
3574 {
3575         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3576
3577         if (offset == 0 && length == PAGE_CACHE_SIZE)
3578                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3579 }
3580
3581 static int cifs_launder_page(struct page *page)
3582 {
3583         int rc = 0;
3584         loff_t range_start = page_offset(page);
3585         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3586         struct writeback_control wbc = {
3587                 .sync_mode = WB_SYNC_ALL,
3588                 .nr_to_write = 0,
3589                 .range_start = range_start,
3590                 .range_end = range_end,
3591         };
3592
3593         cifs_dbg(FYI, "Launder page: %p\n", page);
3594
3595         if (clear_page_dirty_for_io(page))
3596                 rc = cifs_writepage_locked(page, &wbc);
3597
3598         cifs_fscache_invalidate_page(page, page->mapping->host);
3599         return rc;
3600 }
3601
3602 void cifs_oplock_break(struct work_struct *work)
3603 {
3604         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3605                                                   oplock_break);
3606         struct inode *inode = cfile->dentry->d_inode;
3607         struct cifsInodeInfo *cinode = CIFS_I(inode);
3608         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3609         int rc = 0;
3610
3611         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3612                                                 cifs_has_mand_locks(cinode)) {
3613                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3614                          inode);
3615                 cinode->clientCanCacheRead = false;
3616         }
3617
3618         if (inode && S_ISREG(inode->i_mode)) {
3619                 if (cinode->clientCanCacheRead)
3620                         break_lease(inode, O_RDONLY);
3621                 else
3622                         break_lease(inode, O_WRONLY);
3623                 rc = filemap_fdatawrite(inode->i_mapping);
3624                 if (cinode->clientCanCacheRead == 0) {
3625                         rc = filemap_fdatawait(inode->i_mapping);
3626                         mapping_set_error(inode->i_mapping, rc);
3627                         cifs_invalidate_mapping(inode);
3628                 }
3629                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3630         }
3631
3632         rc = cifs_push_locks(cfile);
3633         if (rc)
3634                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3635
3636         /*
3637          * releasing stale oplock after recent reconnect of smb session using
3638          * a now incorrect file handle is not a data integrity issue but do
3639          * not bother sending an oplock release if session to server still is
3640          * disconnected since oplock already released by the server
3641          */
3642         if (!cfile->oplock_break_cancelled) {
3643                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3644                                                              cinode);
3645                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3646         }
3647 }
3648
3649 const struct address_space_operations cifs_addr_ops = {
3650         .readpage = cifs_readpage,
3651         .readpages = cifs_readpages,
3652         .writepage = cifs_writepage,
3653         .writepages = cifs_writepages,
3654         .write_begin = cifs_write_begin,
3655         .write_end = cifs_write_end,
3656         .set_page_dirty = __set_page_dirty_nobuffers,
3657         .releasepage = cifs_release_page,
3658         .invalidatepage = cifs_invalidate_page,
3659         .launder_page = cifs_launder_page,
3660 };
3661
3662 /*
3663  * cifs_readpages requires the server to support a buffer large enough to
3664  * contain the header plus one complete page of data.  Otherwise, we need
3665  * to leave cifs_readpages out of the address space operations.
3666  */
3667 const struct address_space_operations cifs_addr_ops_smallbuf = {
3668         .readpage = cifs_readpage,
3669         .writepage = cifs_writepage,
3670         .writepages = cifs_writepages,
3671         .write_begin = cifs_write_begin,
3672         .write_end = cifs_write_end,
3673         .set_page_dirty = __set_page_dirty_nobuffers,
3674         .releasepage = cifs_release_page,
3675         .invalidatepage = cifs_invalidate_page,
3676         .launder_page = cifs_launder_page,
3677 };