1dc9dea2ae70d0506f3112bd13627a2fa0da6028
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_sb->mnt_cifs_flags &
144                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         oparms.tcon = tcon;
229         oparms.cifs_sb = cifs_sb;
230         oparms.desired_access = desired_access;
231         oparms.create_options = create_options;
232         oparms.disposition = disposition;
233         oparms.path = full_path;
234         oparms.fid = fid;
235         oparms.reconnect = false;
236
237         rc = server->ops->open(xid, &oparms, oplock, buf);
238
239         if (rc)
240                 goto out;
241
242         if (tcon->unix_ext)
243                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
244                                               xid);
245         else
246                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
247                                          xid, &fid->netfid);
248
249 out:
250         kfree(buf);
251         return rc;
252 }
253
254 static bool
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
256 {
257         struct cifs_fid_locks *cur;
258         bool has_locks = false;
259
260         down_read(&cinode->lock_sem);
261         list_for_each_entry(cur, &cinode->llist, llist) {
262                 if (!list_empty(&cur->locks)) {
263                         has_locks = true;
264                         break;
265                 }
266         }
267         up_read(&cinode->lock_sem);
268         return has_locks;
269 }
270
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273                   struct tcon_link *tlink, __u32 oplock)
274 {
275         struct dentry *dentry = file->f_path.dentry;
276         struct inode *inode = dentry->d_inode;
277         struct cifsInodeInfo *cinode = CIFS_I(inode);
278         struct cifsFileInfo *cfile;
279         struct cifs_fid_locks *fdlocks;
280         struct cifs_tcon *tcon = tlink_tcon(tlink);
281         struct TCP_Server_Info *server = tcon->ses->server;
282
283         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
284         if (cfile == NULL)
285                 return cfile;
286
287         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
288         if (!fdlocks) {
289                 kfree(cfile);
290                 return NULL;
291         }
292
293         INIT_LIST_HEAD(&fdlocks->locks);
294         fdlocks->cfile = cfile;
295         cfile->llist = fdlocks;
296         down_write(&cinode->lock_sem);
297         list_add(&fdlocks->llist, &cinode->llist);
298         up_write(&cinode->lock_sem);
299
300         cfile->count = 1;
301         cfile->pid = current->tgid;
302         cfile->uid = current_fsuid();
303         cfile->dentry = dget(dentry);
304         cfile->f_flags = file->f_flags;
305         cfile->invalidHandle = false;
306         cfile->tlink = cifs_get_tlink(tlink);
307         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308         mutex_init(&cfile->fh_mutex);
309
310         cifs_sb_active(inode->i_sb);
311
312         /*
313          * If the server returned a read oplock and we have mandatory brlocks,
314          * set oplock level to None.
315          */
316         if (oplock == server->vals->oplock_read &&
317                                                 cifs_has_mand_locks(cinode)) {
318                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
319                 oplock = 0;
320         }
321
322         spin_lock(&cifs_file_list_lock);
323         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
324                 oplock = fid->pending_open->oplock;
325         list_del(&fid->pending_open->olist);
326
327         server->ops->set_fid(cfile, fid, oplock);
328
329         list_add(&cfile->tlist, &tcon->openFileList);
330         /* if readable file instance put first in list*/
331         if (file->f_mode & FMODE_READ)
332                 list_add(&cfile->flist, &cinode->openFileList);
333         else
334                 list_add_tail(&cfile->flist, &cinode->openFileList);
335         spin_unlock(&cifs_file_list_lock);
336
337         file->private_data = cfile;
338         return cfile;
339 }
340
341 struct cifsFileInfo *
342 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
343 {
344         spin_lock(&cifs_file_list_lock);
345         cifsFileInfo_get_locked(cifs_file);
346         spin_unlock(&cifs_file_list_lock);
347         return cifs_file;
348 }
349
350 /*
351  * Release a reference on the file private data. This may involve closing
352  * the filehandle out on the server. Must be called without holding
353  * cifs_file_list_lock.
354  */
355 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
356 {
357         struct inode *inode = cifs_file->dentry->d_inode;
358         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
359         struct TCP_Server_Info *server = tcon->ses->server;
360         struct cifsInodeInfo *cifsi = CIFS_I(inode);
361         struct super_block *sb = inode->i_sb;
362         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
363         struct cifsLockInfo *li, *tmp;
364         struct cifs_fid fid;
365         struct cifs_pending_open open;
366
367         spin_lock(&cifs_file_list_lock);
368         if (--cifs_file->count > 0) {
369                 spin_unlock(&cifs_file_list_lock);
370                 return;
371         }
372
373         if (server->ops->get_lease_key)
374                 server->ops->get_lease_key(inode, &fid);
375
376         /* store open in pending opens to make sure we don't miss lease break */
377         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
378
379         /* remove it from the lists */
380         list_del(&cifs_file->flist);
381         list_del(&cifs_file->tlist);
382
383         if (list_empty(&cifsi->openFileList)) {
384                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
385                          cifs_file->dentry->d_inode);
386                 /*
387                  * In strict cache mode we need invalidate mapping on the last
388                  * close  because it may cause a error when we open this file
389                  * again and get at least level II oplock.
390                  */
391                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
392                         CIFS_I(inode)->invalid_mapping = true;
393                 cifs_set_oplock_level(cifsi, 0);
394         }
395         spin_unlock(&cifs_file_list_lock);
396
397         cancel_work_sync(&cifs_file->oplock_break);
398
399         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
400                 struct TCP_Server_Info *server = tcon->ses->server;
401                 unsigned int xid;
402
403                 xid = get_xid();
404                 if (server->ops->close)
405                         server->ops->close(xid, tcon, &cifs_file->fid);
406                 _free_xid(xid);
407         }
408
409         cifs_del_pending_open(&open);
410
411         /*
412          * Delete any outstanding lock records. We'll lose them when the file
413          * is closed anyway.
414          */
415         down_write(&cifsi->lock_sem);
416         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
417                 list_del(&li->llist);
418                 cifs_del_lock_waiters(li);
419                 kfree(li);
420         }
421         list_del(&cifs_file->llist->llist);
422         kfree(cifs_file->llist);
423         up_write(&cifsi->lock_sem);
424
425         cifs_put_tlink(cifs_file->tlink);
426         dput(cifs_file->dentry);
427         cifs_sb_deactive(sb);
428         kfree(cifs_file);
429 }
430
431 int cifs_open(struct inode *inode, struct file *file)
432
433 {
434         int rc = -EACCES;
435         unsigned int xid;
436         __u32 oplock;
437         struct cifs_sb_info *cifs_sb;
438         struct TCP_Server_Info *server;
439         struct cifs_tcon *tcon;
440         struct tcon_link *tlink;
441         struct cifsFileInfo *cfile = NULL;
442         char *full_path = NULL;
443         bool posix_open_ok = false;
444         struct cifs_fid fid;
445         struct cifs_pending_open open;
446
447         xid = get_xid();
448
449         cifs_sb = CIFS_SB(inode->i_sb);
450         tlink = cifs_sb_tlink(cifs_sb);
451         if (IS_ERR(tlink)) {
452                 free_xid(xid);
453                 return PTR_ERR(tlink);
454         }
455         tcon = tlink_tcon(tlink);
456         server = tcon->ses->server;
457
458         full_path = build_path_from_dentry(file->f_path.dentry);
459         if (full_path == NULL) {
460                 rc = -ENOMEM;
461                 goto out;
462         }
463
464         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
465                  inode, file->f_flags, full_path);
466
467         if (server->oplocks)
468                 oplock = REQ_OPLOCK;
469         else
470                 oplock = 0;
471
472         if (!tcon->broken_posix_open && tcon->unix_ext &&
473             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
474                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
475                 /* can not refresh inode info since size could be stale */
476                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
477                                 cifs_sb->mnt_file_mode /* ignored */,
478                                 file->f_flags, &oplock, &fid.netfid, xid);
479                 if (rc == 0) {
480                         cifs_dbg(FYI, "posix open succeeded\n");
481                         posix_open_ok = true;
482                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
483                         if (tcon->ses->serverNOS)
484                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
485                                          tcon->ses->serverName,
486                                          tcon->ses->serverNOS);
487                         tcon->broken_posix_open = true;
488                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
489                          (rc != -EOPNOTSUPP)) /* path not found or net err */
490                         goto out;
491                 /*
492                  * Else fallthrough to retry open the old way on network i/o
493                  * or DFS errors.
494                  */
495         }
496
497         if (server->ops->get_lease_key)
498                 server->ops->get_lease_key(inode, &fid);
499
500         cifs_add_pending_open(&fid, tlink, &open);
501
502         if (!posix_open_ok) {
503                 if (server->ops->get_lease_key)
504                         server->ops->get_lease_key(inode, &fid);
505
506                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
507                                   file->f_flags, &oplock, &fid, xid);
508                 if (rc) {
509                         cifs_del_pending_open(&open);
510                         goto out;
511                 }
512         }
513
514         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
515         if (cfile == NULL) {
516                 if (server->ops->close)
517                         server->ops->close(xid, tcon, &fid);
518                 cifs_del_pending_open(&open);
519                 rc = -ENOMEM;
520                 goto out;
521         }
522
523         cifs_fscache_set_inode_cookie(inode, file);
524
525         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
526                 /*
527                  * Time to set mode which we can not set earlier due to
528                  * problems creating new read-only files.
529                  */
530                 struct cifs_unix_set_info_args args = {
531                         .mode   = inode->i_mode,
532                         .uid    = INVALID_UID, /* no change */
533                         .gid    = INVALID_GID, /* no change */
534                         .ctime  = NO_CHANGE_64,
535                         .atime  = NO_CHANGE_64,
536                         .mtime  = NO_CHANGE_64,
537                         .device = 0,
538                 };
539                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
540                                        cfile->pid);
541         }
542
543 out:
544         kfree(full_path);
545         free_xid(xid);
546         cifs_put_tlink(tlink);
547         return rc;
548 }
549
550 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
551
552 /*
553  * Try to reacquire byte range locks that were released when session
554  * to server was lost.
555  */
556 static int
557 cifs_relock_file(struct cifsFileInfo *cfile)
558 {
559         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
560         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
561         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
562         int rc = 0;
563
564         /* we are going to update can_cache_brlcks here - need a write access */
565         down_write(&cinode->lock_sem);
566         if (cinode->can_cache_brlcks) {
567                 /* can cache locks - no need to push them */
568                 up_write(&cinode->lock_sem);
569                 return rc;
570         }
571
572         if (cap_unix(tcon->ses) &&
573             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
574             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
575                 rc = cifs_push_posix_locks(cfile);
576         else
577                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
578
579         up_write(&cinode->lock_sem);
580         return rc;
581 }
582
583 static int
584 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
585 {
586         int rc = -EACCES;
587         unsigned int xid;
588         __u32 oplock;
589         struct cifs_sb_info *cifs_sb;
590         struct cifs_tcon *tcon;
591         struct TCP_Server_Info *server;
592         struct cifsInodeInfo *cinode;
593         struct inode *inode;
594         char *full_path = NULL;
595         int desired_access;
596         int disposition = FILE_OPEN;
597         int create_options = CREATE_NOT_DIR;
598         struct cifs_open_parms oparms;
599
600         xid = get_xid();
601         mutex_lock(&cfile->fh_mutex);
602         if (!cfile->invalidHandle) {
603                 mutex_unlock(&cfile->fh_mutex);
604                 rc = 0;
605                 free_xid(xid);
606                 return rc;
607         }
608
609         inode = cfile->dentry->d_inode;
610         cifs_sb = CIFS_SB(inode->i_sb);
611         tcon = tlink_tcon(cfile->tlink);
612         server = tcon->ses->server;
613
614         /*
615          * Can not grab rename sem here because various ops, including those
616          * that already have the rename sem can end up causing writepage to get
617          * called and if the server was down that means we end up here, and we
618          * can never tell if the caller already has the rename_sem.
619          */
620         full_path = build_path_from_dentry(cfile->dentry);
621         if (full_path == NULL) {
622                 rc = -ENOMEM;
623                 mutex_unlock(&cfile->fh_mutex);
624                 free_xid(xid);
625                 return rc;
626         }
627
628         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
629                  inode, cfile->f_flags, full_path);
630
631         if (tcon->ses->server->oplocks)
632                 oplock = REQ_OPLOCK;
633         else
634                 oplock = 0;
635
636         if (tcon->unix_ext && cap_unix(tcon->ses) &&
637             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
638                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
639                 /*
640                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
641                  * original open. Must mask them off for a reopen.
642                  */
643                 unsigned int oflags = cfile->f_flags &
644                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
645
646                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
647                                      cifs_sb->mnt_file_mode /* ignored */,
648                                      oflags, &oplock, &cfile->fid.netfid, xid);
649                 if (rc == 0) {
650                         cifs_dbg(FYI, "posix reopen succeeded\n");
651                         goto reopen_success;
652                 }
653                 /*
654                  * fallthrough to retry open the old way on errors, especially
655                  * in the reconnect path it is important to retry hard
656                  */
657         }
658
659         desired_access = cifs_convert_flags(cfile->f_flags);
660
661         if (backup_cred(cifs_sb))
662                 create_options |= CREATE_OPEN_BACKUP_INTENT;
663
664         if (server->ops->get_lease_key)
665                 server->ops->get_lease_key(inode, &cfile->fid);
666
667         oparms.tcon = tcon;
668         oparms.cifs_sb = cifs_sb;
669         oparms.desired_access = desired_access;
670         oparms.create_options = create_options;
671         oparms.disposition = disposition;
672         oparms.path = full_path;
673         oparms.fid = &cfile->fid;
674         oparms.reconnect = true;
675
676         /*
677          * Can not refresh inode by passing in file_info buf to be returned by
678          * CIFSSMBOpen and then calling get_inode_info with returned buf since
679          * file might have write behind data that needs to be flushed and server
680          * version of file size can be stale. If we knew for sure that inode was
681          * not dirty locally we could do this.
682          */
683         rc = server->ops->open(xid, &oparms, &oplock, NULL);
684         if (rc == -ENOENT && oparms.reconnect == false) {
685                 /* durable handle timeout is expired - open the file again */
686                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
687                 /* indicate that we need to relock the file */
688                 oparms.reconnect = true;
689         }
690
691         if (rc) {
692                 mutex_unlock(&cfile->fh_mutex);
693                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
694                 cifs_dbg(FYI, "oplock: %d\n", oplock);
695                 goto reopen_error_exit;
696         }
697
698 reopen_success:
699         cfile->invalidHandle = false;
700         mutex_unlock(&cfile->fh_mutex);
701         cinode = CIFS_I(inode);
702
703         if (can_flush) {
704                 rc = filemap_write_and_wait(inode->i_mapping);
705                 mapping_set_error(inode->i_mapping, rc);
706
707                 if (tcon->unix_ext)
708                         rc = cifs_get_inode_info_unix(&inode, full_path,
709                                                       inode->i_sb, xid);
710                 else
711                         rc = cifs_get_inode_info(&inode, full_path, NULL,
712                                                  inode->i_sb, xid, NULL);
713         }
714         /*
715          * Else we are writing out data to server already and could deadlock if
716          * we tried to flush data, and since we do not know if we have data that
717          * would invalidate the current end of file on the server we can not go
718          * to the server to get the new inode info.
719          */
720
721         server->ops->set_fid(cfile, &cfile->fid, oplock);
722         if (oparms.reconnect)
723                 cifs_relock_file(cfile);
724
725 reopen_error_exit:
726         kfree(full_path);
727         free_xid(xid);
728         return rc;
729 }
730
731 int cifs_close(struct inode *inode, struct file *file)
732 {
733         if (file->private_data != NULL) {
734                 cifsFileInfo_put(file->private_data);
735                 file->private_data = NULL;
736         }
737
738         /* return code from the ->release op is always ignored */
739         return 0;
740 }
741
742 int cifs_closedir(struct inode *inode, struct file *file)
743 {
744         int rc = 0;
745         unsigned int xid;
746         struct cifsFileInfo *cfile = file->private_data;
747         struct cifs_tcon *tcon;
748         struct TCP_Server_Info *server;
749         char *buf;
750
751         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
752
753         if (cfile == NULL)
754                 return rc;
755
756         xid = get_xid();
757         tcon = tlink_tcon(cfile->tlink);
758         server = tcon->ses->server;
759
760         cifs_dbg(FYI, "Freeing private data in close dir\n");
761         spin_lock(&cifs_file_list_lock);
762         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
763                 cfile->invalidHandle = true;
764                 spin_unlock(&cifs_file_list_lock);
765                 if (server->ops->close_dir)
766                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
767                 else
768                         rc = -ENOSYS;
769                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
770                 /* not much we can do if it fails anyway, ignore rc */
771                 rc = 0;
772         } else
773                 spin_unlock(&cifs_file_list_lock);
774
775         buf = cfile->srch_inf.ntwrk_buf_start;
776         if (buf) {
777                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
778                 cfile->srch_inf.ntwrk_buf_start = NULL;
779                 if (cfile->srch_inf.smallBuf)
780                         cifs_small_buf_release(buf);
781                 else
782                         cifs_buf_release(buf);
783         }
784
785         cifs_put_tlink(cfile->tlink);
786         kfree(file->private_data);
787         file->private_data = NULL;
788         /* BB can we lock the filestruct while this is going on? */
789         free_xid(xid);
790         return rc;
791 }
792
793 static struct cifsLockInfo *
794 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
795 {
796         struct cifsLockInfo *lock =
797                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
798         if (!lock)
799                 return lock;
800         lock->offset = offset;
801         lock->length = length;
802         lock->type = type;
803         lock->pid = current->tgid;
804         INIT_LIST_HEAD(&lock->blist);
805         init_waitqueue_head(&lock->block_q);
806         return lock;
807 }
808
809 void
810 cifs_del_lock_waiters(struct cifsLockInfo *lock)
811 {
812         struct cifsLockInfo *li, *tmp;
813         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
814                 list_del_init(&li->blist);
815                 wake_up(&li->block_q);
816         }
817 }
818
819 #define CIFS_LOCK_OP    0
820 #define CIFS_READ_OP    1
821 #define CIFS_WRITE_OP   2
822
823 /* @rw_check : 0 - no op, 1 - read, 2 - write */
824 static bool
825 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
826                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
827                             struct cifsLockInfo **conf_lock, int rw_check)
828 {
829         struct cifsLockInfo *li;
830         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
831         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
832
833         list_for_each_entry(li, &fdlocks->locks, llist) {
834                 if (offset + length <= li->offset ||
835                     offset >= li->offset + li->length)
836                         continue;
837                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
838                     server->ops->compare_fids(cfile, cur_cfile)) {
839                         /* shared lock prevents write op through the same fid */
840                         if (!(li->type & server->vals->shared_lock_type) ||
841                             rw_check != CIFS_WRITE_OP)
842                                 continue;
843                 }
844                 if ((type & server->vals->shared_lock_type) &&
845                     ((server->ops->compare_fids(cfile, cur_cfile) &&
846                      current->tgid == li->pid) || type == li->type))
847                         continue;
848                 if (conf_lock)
849                         *conf_lock = li;
850                 return true;
851         }
852         return false;
853 }
854
855 bool
856 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
857                         __u8 type, struct cifsLockInfo **conf_lock,
858                         int rw_check)
859 {
860         bool rc = false;
861         struct cifs_fid_locks *cur;
862         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
863
864         list_for_each_entry(cur, &cinode->llist, llist) {
865                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
866                                                  cfile, conf_lock, rw_check);
867                 if (rc)
868                         break;
869         }
870
871         return rc;
872 }
873
874 /*
875  * Check if there is another lock that prevents us to set the lock (mandatory
876  * style). If such a lock exists, update the flock structure with its
877  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
878  * or leave it the same if we can't. Returns 0 if we don't need to request to
879  * the server or 1 otherwise.
880  */
881 static int
882 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
883                __u8 type, struct file_lock *flock)
884 {
885         int rc = 0;
886         struct cifsLockInfo *conf_lock;
887         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
888         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
889         bool exist;
890
891         down_read(&cinode->lock_sem);
892
893         exist = cifs_find_lock_conflict(cfile, offset, length, type,
894                                         &conf_lock, CIFS_LOCK_OP);
895         if (exist) {
896                 flock->fl_start = conf_lock->offset;
897                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
898                 flock->fl_pid = conf_lock->pid;
899                 if (conf_lock->type & server->vals->shared_lock_type)
900                         flock->fl_type = F_RDLCK;
901                 else
902                         flock->fl_type = F_WRLCK;
903         } else if (!cinode->can_cache_brlcks)
904                 rc = 1;
905         else
906                 flock->fl_type = F_UNLCK;
907
908         up_read(&cinode->lock_sem);
909         return rc;
910 }
911
912 static void
913 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
914 {
915         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
916         down_write(&cinode->lock_sem);
917         list_add_tail(&lock->llist, &cfile->llist->locks);
918         up_write(&cinode->lock_sem);
919 }
920
921 /*
922  * Set the byte-range lock (mandatory style). Returns:
923  * 1) 0, if we set the lock and don't need to request to the server;
924  * 2) 1, if no locks prevent us but we need to request to the server;
925  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
926  */
927 static int
928 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
929                  bool wait)
930 {
931         struct cifsLockInfo *conf_lock;
932         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
933         bool exist;
934         int rc = 0;
935
936 try_again:
937         exist = false;
938         down_write(&cinode->lock_sem);
939
940         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
941                                         lock->type, &conf_lock, CIFS_LOCK_OP);
942         if (!exist && cinode->can_cache_brlcks) {
943                 list_add_tail(&lock->llist, &cfile->llist->locks);
944                 up_write(&cinode->lock_sem);
945                 return rc;
946         }
947
948         if (!exist)
949                 rc = 1;
950         else if (!wait)
951                 rc = -EACCES;
952         else {
953                 list_add_tail(&lock->blist, &conf_lock->blist);
954                 up_write(&cinode->lock_sem);
955                 rc = wait_event_interruptible(lock->block_q,
956                                         (lock->blist.prev == &lock->blist) &&
957                                         (lock->blist.next == &lock->blist));
958                 if (!rc)
959                         goto try_again;
960                 down_write(&cinode->lock_sem);
961                 list_del_init(&lock->blist);
962         }
963
964         up_write(&cinode->lock_sem);
965         return rc;
966 }
967
968 /*
969  * Check if there is another lock that prevents us to set the lock (posix
970  * style). If such a lock exists, update the flock structure with its
971  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
972  * or leave it the same if we can't. Returns 0 if we don't need to request to
973  * the server or 1 otherwise.
974  */
975 static int
976 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
977 {
978         int rc = 0;
979         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
980         unsigned char saved_type = flock->fl_type;
981
982         if ((flock->fl_flags & FL_POSIX) == 0)
983                 return 1;
984
985         down_read(&cinode->lock_sem);
986         posix_test_lock(file, flock);
987
988         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
989                 flock->fl_type = saved_type;
990                 rc = 1;
991         }
992
993         up_read(&cinode->lock_sem);
994         return rc;
995 }
996
997 /*
998  * Set the byte-range lock (posix style). Returns:
999  * 1) 0, if we set the lock and don't need to request to the server;
1000  * 2) 1, if we need to request to the server;
1001  * 3) <0, if the error occurs while setting the lock.
1002  */
1003 static int
1004 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1005 {
1006         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1007         int rc = 1;
1008
1009         if ((flock->fl_flags & FL_POSIX) == 0)
1010                 return rc;
1011
1012 try_again:
1013         down_write(&cinode->lock_sem);
1014         if (!cinode->can_cache_brlcks) {
1015                 up_write(&cinode->lock_sem);
1016                 return rc;
1017         }
1018
1019         rc = posix_lock_file(file, flock, NULL);
1020         up_write(&cinode->lock_sem);
1021         if (rc == FILE_LOCK_DEFERRED) {
1022                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1023                 if (!rc)
1024                         goto try_again;
1025                 posix_unblock_lock(flock);
1026         }
1027         return rc;
1028 }
1029
1030 int
1031 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1032 {
1033         unsigned int xid;
1034         int rc = 0, stored_rc;
1035         struct cifsLockInfo *li, *tmp;
1036         struct cifs_tcon *tcon;
1037         unsigned int num, max_num, max_buf;
1038         LOCKING_ANDX_RANGE *buf, *cur;
1039         int types[] = {LOCKING_ANDX_LARGE_FILES,
1040                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1041         int i;
1042
1043         xid = get_xid();
1044         tcon = tlink_tcon(cfile->tlink);
1045
1046         /*
1047          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1048          * and check it for zero before using.
1049          */
1050         max_buf = tcon->ses->server->maxBuf;
1051         if (!max_buf) {
1052                 free_xid(xid);
1053                 return -EINVAL;
1054         }
1055
1056         max_num = (max_buf - sizeof(struct smb_hdr)) /
1057                                                 sizeof(LOCKING_ANDX_RANGE);
1058         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1059         if (!buf) {
1060                 free_xid(xid);
1061                 return -ENOMEM;
1062         }
1063
1064         for (i = 0; i < 2; i++) {
1065                 cur = buf;
1066                 num = 0;
1067                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1068                         if (li->type != types[i])
1069                                 continue;
1070                         cur->Pid = cpu_to_le16(li->pid);
1071                         cur->LengthLow = cpu_to_le32((u32)li->length);
1072                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1073                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1074                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1075                         if (++num == max_num) {
1076                                 stored_rc = cifs_lockv(xid, tcon,
1077                                                        cfile->fid.netfid,
1078                                                        (__u8)li->type, 0, num,
1079                                                        buf);
1080                                 if (stored_rc)
1081                                         rc = stored_rc;
1082                                 cur = buf;
1083                                 num = 0;
1084                         } else
1085                                 cur++;
1086                 }
1087
1088                 if (num) {
1089                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1090                                                (__u8)types[i], 0, num, buf);
1091                         if (stored_rc)
1092                                 rc = stored_rc;
1093                 }
1094         }
1095
1096         kfree(buf);
1097         free_xid(xid);
1098         return rc;
1099 }
1100
1101 /* copied from fs/locks.c with a name change */
1102 #define cifs_for_each_lock(inode, lockp) \
1103         for (lockp = &inode->i_flock; *lockp != NULL; \
1104              lockp = &(*lockp)->fl_next)
1105
1106 struct lock_to_push {
1107         struct list_head llist;
1108         __u64 offset;
1109         __u64 length;
1110         __u32 pid;
1111         __u16 netfid;
1112         __u8 type;
1113 };
1114
1115 static int
1116 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1117 {
1118         struct inode *inode = cfile->dentry->d_inode;
1119         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1120         struct file_lock *flock, **before;
1121         unsigned int count = 0, i = 0;
1122         int rc = 0, xid, type;
1123         struct list_head locks_to_send, *el;
1124         struct lock_to_push *lck, *tmp;
1125         __u64 length;
1126
1127         xid = get_xid();
1128
1129         spin_lock(&inode->i_lock);
1130         cifs_for_each_lock(inode, before) {
1131                 if ((*before)->fl_flags & FL_POSIX)
1132                         count++;
1133         }
1134         spin_unlock(&inode->i_lock);
1135
1136         INIT_LIST_HEAD(&locks_to_send);
1137
1138         /*
1139          * Allocating count locks is enough because no FL_POSIX locks can be
1140          * added to the list while we are holding cinode->lock_sem that
1141          * protects locking operations of this inode.
1142          */
1143         for (; i < count; i++) {
1144                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1145                 if (!lck) {
1146                         rc = -ENOMEM;
1147                         goto err_out;
1148                 }
1149                 list_add_tail(&lck->llist, &locks_to_send);
1150         }
1151
1152         el = locks_to_send.next;
1153         spin_lock(&inode->i_lock);
1154         cifs_for_each_lock(inode, before) {
1155                 flock = *before;
1156                 if ((flock->fl_flags & FL_POSIX) == 0)
1157                         continue;
1158                 if (el == &locks_to_send) {
1159                         /*
1160                          * The list ended. We don't have enough allocated
1161                          * structures - something is really wrong.
1162                          */
1163                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1164                         break;
1165                 }
1166                 length = 1 + flock->fl_end - flock->fl_start;
1167                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1168                         type = CIFS_RDLCK;
1169                 else
1170                         type = CIFS_WRLCK;
1171                 lck = list_entry(el, struct lock_to_push, llist);
1172                 lck->pid = flock->fl_pid;
1173                 lck->netfid = cfile->fid.netfid;
1174                 lck->length = length;
1175                 lck->type = type;
1176                 lck->offset = flock->fl_start;
1177                 el = el->next;
1178         }
1179         spin_unlock(&inode->i_lock);
1180
1181         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1182                 int stored_rc;
1183
1184                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1185                                              lck->offset, lck->length, NULL,
1186                                              lck->type, 0);
1187                 if (stored_rc)
1188                         rc = stored_rc;
1189                 list_del(&lck->llist);
1190                 kfree(lck);
1191         }
1192
1193 out:
1194         free_xid(xid);
1195         return rc;
1196 err_out:
1197         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1198                 list_del(&lck->llist);
1199                 kfree(lck);
1200         }
1201         goto out;
1202 }
1203
1204 static int
1205 cifs_push_locks(struct cifsFileInfo *cfile)
1206 {
1207         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1208         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1209         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1210         int rc = 0;
1211
1212         /* we are going to update can_cache_brlcks here - need a write access */
1213         down_write(&cinode->lock_sem);
1214         if (!cinode->can_cache_brlcks) {
1215                 up_write(&cinode->lock_sem);
1216                 return rc;
1217         }
1218
1219         if (cap_unix(tcon->ses) &&
1220             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1221             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1222                 rc = cifs_push_posix_locks(cfile);
1223         else
1224                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1225
1226         cinode->can_cache_brlcks = false;
1227         up_write(&cinode->lock_sem);
1228         return rc;
1229 }
1230
1231 static void
1232 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1233                 bool *wait_flag, struct TCP_Server_Info *server)
1234 {
1235         if (flock->fl_flags & FL_POSIX)
1236                 cifs_dbg(FYI, "Posix\n");
1237         if (flock->fl_flags & FL_FLOCK)
1238                 cifs_dbg(FYI, "Flock\n");
1239         if (flock->fl_flags & FL_SLEEP) {
1240                 cifs_dbg(FYI, "Blocking lock\n");
1241                 *wait_flag = true;
1242         }
1243         if (flock->fl_flags & FL_ACCESS)
1244                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1245         if (flock->fl_flags & FL_LEASE)
1246                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1247         if (flock->fl_flags &
1248             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1249                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1250                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1251
1252         *type = server->vals->large_lock_type;
1253         if (flock->fl_type == F_WRLCK) {
1254                 cifs_dbg(FYI, "F_WRLCK\n");
1255                 *type |= server->vals->exclusive_lock_type;
1256                 *lock = 1;
1257         } else if (flock->fl_type == F_UNLCK) {
1258                 cifs_dbg(FYI, "F_UNLCK\n");
1259                 *type |= server->vals->unlock_lock_type;
1260                 *unlock = 1;
1261                 /* Check if unlock includes more than one lock range */
1262         } else if (flock->fl_type == F_RDLCK) {
1263                 cifs_dbg(FYI, "F_RDLCK\n");
1264                 *type |= server->vals->shared_lock_type;
1265                 *lock = 1;
1266         } else if (flock->fl_type == F_EXLCK) {
1267                 cifs_dbg(FYI, "F_EXLCK\n");
1268                 *type |= server->vals->exclusive_lock_type;
1269                 *lock = 1;
1270         } else if (flock->fl_type == F_SHLCK) {
1271                 cifs_dbg(FYI, "F_SHLCK\n");
1272                 *type |= server->vals->shared_lock_type;
1273                 *lock = 1;
1274         } else
1275                 cifs_dbg(FYI, "Unknown type of lock\n");
1276 }
1277
1278 static int
1279 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1280            bool wait_flag, bool posix_lck, unsigned int xid)
1281 {
1282         int rc = 0;
1283         __u64 length = 1 + flock->fl_end - flock->fl_start;
1284         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1285         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1286         struct TCP_Server_Info *server = tcon->ses->server;
1287         __u16 netfid = cfile->fid.netfid;
1288
1289         if (posix_lck) {
1290                 int posix_lock_type;
1291
1292                 rc = cifs_posix_lock_test(file, flock);
1293                 if (!rc)
1294                         return rc;
1295
1296                 if (type & server->vals->shared_lock_type)
1297                         posix_lock_type = CIFS_RDLCK;
1298                 else
1299                         posix_lock_type = CIFS_WRLCK;
1300                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1301                                       flock->fl_start, length, flock,
1302                                       posix_lock_type, wait_flag);
1303                 return rc;
1304         }
1305
1306         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1307         if (!rc)
1308                 return rc;
1309
1310         /* BB we could chain these into one lock request BB */
1311         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1312                                     1, 0, false);
1313         if (rc == 0) {
1314                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1315                                             type, 0, 1, false);
1316                 flock->fl_type = F_UNLCK;
1317                 if (rc != 0)
1318                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1319                                  rc);
1320                 return 0;
1321         }
1322
1323         if (type & server->vals->shared_lock_type) {
1324                 flock->fl_type = F_WRLCK;
1325                 return 0;
1326         }
1327
1328         type &= ~server->vals->exclusive_lock_type;
1329
1330         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1331                                     type | server->vals->shared_lock_type,
1332                                     1, 0, false);
1333         if (rc == 0) {
1334                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1335                         type | server->vals->shared_lock_type, 0, 1, false);
1336                 flock->fl_type = F_RDLCK;
1337                 if (rc != 0)
1338                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1339                                  rc);
1340         } else
1341                 flock->fl_type = F_WRLCK;
1342
1343         return 0;
1344 }
1345
1346 void
1347 cifs_move_llist(struct list_head *source, struct list_head *dest)
1348 {
1349         struct list_head *li, *tmp;
1350         list_for_each_safe(li, tmp, source)
1351                 list_move(li, dest);
1352 }
1353
1354 void
1355 cifs_free_llist(struct list_head *llist)
1356 {
1357         struct cifsLockInfo *li, *tmp;
1358         list_for_each_entry_safe(li, tmp, llist, llist) {
1359                 cifs_del_lock_waiters(li);
1360                 list_del(&li->llist);
1361                 kfree(li);
1362         }
1363 }
1364
1365 int
1366 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1367                   unsigned int xid)
1368 {
1369         int rc = 0, stored_rc;
1370         int types[] = {LOCKING_ANDX_LARGE_FILES,
1371                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1372         unsigned int i;
1373         unsigned int max_num, num, max_buf;
1374         LOCKING_ANDX_RANGE *buf, *cur;
1375         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1376         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1377         struct cifsLockInfo *li, *tmp;
1378         __u64 length = 1 + flock->fl_end - flock->fl_start;
1379         struct list_head tmp_llist;
1380
1381         INIT_LIST_HEAD(&tmp_llist);
1382
1383         /*
1384          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1385          * and check it for zero before using.
1386          */
1387         max_buf = tcon->ses->server->maxBuf;
1388         if (!max_buf)
1389                 return -EINVAL;
1390
1391         max_num = (max_buf - sizeof(struct smb_hdr)) /
1392                                                 sizeof(LOCKING_ANDX_RANGE);
1393         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1394         if (!buf)
1395                 return -ENOMEM;
1396
1397         down_write(&cinode->lock_sem);
1398         for (i = 0; i < 2; i++) {
1399                 cur = buf;
1400                 num = 0;
1401                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1402                         if (flock->fl_start > li->offset ||
1403                             (flock->fl_start + length) <
1404                             (li->offset + li->length))
1405                                 continue;
1406                         if (current->tgid != li->pid)
1407                                 continue;
1408                         if (types[i] != li->type)
1409                                 continue;
1410                         if (cinode->can_cache_brlcks) {
1411                                 /*
1412                                  * We can cache brlock requests - simply remove
1413                                  * a lock from the file's list.
1414                                  */
1415                                 list_del(&li->llist);
1416                                 cifs_del_lock_waiters(li);
1417                                 kfree(li);
1418                                 continue;
1419                         }
1420                         cur->Pid = cpu_to_le16(li->pid);
1421                         cur->LengthLow = cpu_to_le32((u32)li->length);
1422                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1423                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1424                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1425                         /*
1426                          * We need to save a lock here to let us add it again to
1427                          * the file's list if the unlock range request fails on
1428                          * the server.
1429                          */
1430                         list_move(&li->llist, &tmp_llist);
1431                         if (++num == max_num) {
1432                                 stored_rc = cifs_lockv(xid, tcon,
1433                                                        cfile->fid.netfid,
1434                                                        li->type, num, 0, buf);
1435                                 if (stored_rc) {
1436                                         /*
1437                                          * We failed on the unlock range
1438                                          * request - add all locks from the tmp
1439                                          * list to the head of the file's list.
1440                                          */
1441                                         cifs_move_llist(&tmp_llist,
1442                                                         &cfile->llist->locks);
1443                                         rc = stored_rc;
1444                                 } else
1445                                         /*
1446                                          * The unlock range request succeed -
1447                                          * free the tmp list.
1448                                          */
1449                                         cifs_free_llist(&tmp_llist);
1450                                 cur = buf;
1451                                 num = 0;
1452                         } else
1453                                 cur++;
1454                 }
1455                 if (num) {
1456                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1457                                                types[i], num, 0, buf);
1458                         if (stored_rc) {
1459                                 cifs_move_llist(&tmp_llist,
1460                                                 &cfile->llist->locks);
1461                                 rc = stored_rc;
1462                         } else
1463                                 cifs_free_llist(&tmp_llist);
1464                 }
1465         }
1466
1467         up_write(&cinode->lock_sem);
1468         kfree(buf);
1469         return rc;
1470 }
1471
1472 static int
1473 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1474            bool wait_flag, bool posix_lck, int lock, int unlock,
1475            unsigned int xid)
1476 {
1477         int rc = 0;
1478         __u64 length = 1 + flock->fl_end - flock->fl_start;
1479         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1480         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1481         struct TCP_Server_Info *server = tcon->ses->server;
1482         struct inode *inode = cfile->dentry->d_inode;
1483
1484         if (posix_lck) {
1485                 int posix_lock_type;
1486
1487                 rc = cifs_posix_lock_set(file, flock);
1488                 if (!rc || rc < 0)
1489                         return rc;
1490
1491                 if (type & server->vals->shared_lock_type)
1492                         posix_lock_type = CIFS_RDLCK;
1493                 else
1494                         posix_lock_type = CIFS_WRLCK;
1495
1496                 if (unlock == 1)
1497                         posix_lock_type = CIFS_UNLCK;
1498
1499                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1500                                       current->tgid, flock->fl_start, length,
1501                                       NULL, posix_lock_type, wait_flag);
1502                 goto out;
1503         }
1504
1505         if (lock) {
1506                 struct cifsLockInfo *lock;
1507
1508                 lock = cifs_lock_init(flock->fl_start, length, type);
1509                 if (!lock)
1510                         return -ENOMEM;
1511
1512                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1513                 if (rc < 0) {
1514                         kfree(lock);
1515                         return rc;
1516                 }
1517                 if (!rc)
1518                         goto out;
1519
1520                 /*
1521                  * Windows 7 server can delay breaking lease from read to None
1522                  * if we set a byte-range lock on a file - break it explicitly
1523                  * before sending the lock to the server to be sure the next
1524                  * read won't conflict with non-overlapted locks due to
1525                  * pagereading.
1526                  */
1527                 if (!CIFS_I(inode)->clientCanCacheAll &&
1528                                         CIFS_I(inode)->clientCanCacheRead) {
1529                         cifs_invalidate_mapping(inode);
1530                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1531                                  inode);
1532                         CIFS_I(inode)->clientCanCacheRead = false;
1533                 }
1534
1535                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1536                                             type, 1, 0, wait_flag);
1537                 if (rc) {
1538                         kfree(lock);
1539                         return rc;
1540                 }
1541
1542                 cifs_lock_add(cfile, lock);
1543         } else if (unlock)
1544                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1545
1546 out:
1547         if (flock->fl_flags & FL_POSIX)
1548                 posix_lock_file_wait(file, flock);
1549         return rc;
1550 }
1551
1552 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1553 {
1554         int rc, xid;
1555         int lock = 0, unlock = 0;
1556         bool wait_flag = false;
1557         bool posix_lck = false;
1558         struct cifs_sb_info *cifs_sb;
1559         struct cifs_tcon *tcon;
1560         struct cifsInodeInfo *cinode;
1561         struct cifsFileInfo *cfile;
1562         __u16 netfid;
1563         __u32 type;
1564
1565         rc = -EACCES;
1566         xid = get_xid();
1567
1568         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1569                  cmd, flock->fl_flags, flock->fl_type,
1570                  flock->fl_start, flock->fl_end);
1571
1572         cfile = (struct cifsFileInfo *)file->private_data;
1573         tcon = tlink_tcon(cfile->tlink);
1574
1575         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1576                         tcon->ses->server);
1577
1578         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1579         netfid = cfile->fid.netfid;
1580         cinode = CIFS_I(file_inode(file));
1581
1582         if (cap_unix(tcon->ses) &&
1583             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1584             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1585                 posix_lck = true;
1586         /*
1587          * BB add code here to normalize offset and length to account for
1588          * negative length which we can not accept over the wire.
1589          */
1590         if (IS_GETLK(cmd)) {
1591                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1592                 free_xid(xid);
1593                 return rc;
1594         }
1595
1596         if (!lock && !unlock) {
1597                 /*
1598                  * if no lock or unlock then nothing to do since we do not
1599                  * know what it is
1600                  */
1601                 free_xid(xid);
1602                 return -EOPNOTSUPP;
1603         }
1604
1605         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1606                         xid);
1607         free_xid(xid);
1608         return rc;
1609 }
1610
1611 /*
1612  * update the file size (if needed) after a write. Should be called with
1613  * the inode->i_lock held
1614  */
1615 void
1616 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1617                       unsigned int bytes_written)
1618 {
1619         loff_t end_of_write = offset + bytes_written;
1620
1621         if (end_of_write > cifsi->server_eof)
1622                 cifsi->server_eof = end_of_write;
1623 }
1624
1625 static ssize_t
1626 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1627            size_t write_size, loff_t *offset)
1628 {
1629         int rc = 0;
1630         unsigned int bytes_written = 0;
1631         unsigned int total_written;
1632         struct cifs_sb_info *cifs_sb;
1633         struct cifs_tcon *tcon;
1634         struct TCP_Server_Info *server;
1635         unsigned int xid;
1636         struct dentry *dentry = open_file->dentry;
1637         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1638         struct cifs_io_parms io_parms;
1639
1640         cifs_sb = CIFS_SB(dentry->d_sb);
1641
1642         cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1643                  write_size, *offset, dentry->d_name.name);
1644
1645         tcon = tlink_tcon(open_file->tlink);
1646         server = tcon->ses->server;
1647
1648         if (!server->ops->sync_write)
1649                 return -ENOSYS;
1650
1651         xid = get_xid();
1652
1653         for (total_written = 0; write_size > total_written;
1654              total_written += bytes_written) {
1655                 rc = -EAGAIN;
1656                 while (rc == -EAGAIN) {
1657                         struct kvec iov[2];
1658                         unsigned int len;
1659
1660                         if (open_file->invalidHandle) {
1661                                 /* we could deadlock if we called
1662                                    filemap_fdatawait from here so tell
1663                                    reopen_file not to flush data to
1664                                    server now */
1665                                 rc = cifs_reopen_file(open_file, false);
1666                                 if (rc != 0)
1667                                         break;
1668                         }
1669
1670                         len = min((size_t)cifs_sb->wsize,
1671                                   write_size - total_written);
1672                         /* iov[0] is reserved for smb header */
1673                         iov[1].iov_base = (char *)write_data + total_written;
1674                         iov[1].iov_len = len;
1675                         io_parms.pid = pid;
1676                         io_parms.tcon = tcon;
1677                         io_parms.offset = *offset;
1678                         io_parms.length = len;
1679                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1680                                                      &bytes_written, iov, 1);
1681                 }
1682                 if (rc || (bytes_written == 0)) {
1683                         if (total_written)
1684                                 break;
1685                         else {
1686                                 free_xid(xid);
1687                                 return rc;
1688                         }
1689                 } else {
1690                         spin_lock(&dentry->d_inode->i_lock);
1691                         cifs_update_eof(cifsi, *offset, bytes_written);
1692                         spin_unlock(&dentry->d_inode->i_lock);
1693                         *offset += bytes_written;
1694                 }
1695         }
1696
1697         cifs_stats_bytes_written(tcon, total_written);
1698
1699         if (total_written > 0) {
1700                 spin_lock(&dentry->d_inode->i_lock);
1701                 if (*offset > dentry->d_inode->i_size)
1702                         i_size_write(dentry->d_inode, *offset);
1703                 spin_unlock(&dentry->d_inode->i_lock);
1704         }
1705         mark_inode_dirty_sync(dentry->d_inode);
1706         free_xid(xid);
1707         return total_written;
1708 }
1709
1710 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1711                                         bool fsuid_only)
1712 {
1713         struct cifsFileInfo *open_file = NULL;
1714         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1715
1716         /* only filter by fsuid on multiuser mounts */
1717         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1718                 fsuid_only = false;
1719
1720         spin_lock(&cifs_file_list_lock);
1721         /* we could simply get the first_list_entry since write-only entries
1722            are always at the end of the list but since the first entry might
1723            have a close pending, we go through the whole list */
1724         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1725                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1726                         continue;
1727                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1728                         if (!open_file->invalidHandle) {
1729                                 /* found a good file */
1730                                 /* lock it so it will not be closed on us */
1731                                 cifsFileInfo_get_locked(open_file);
1732                                 spin_unlock(&cifs_file_list_lock);
1733                                 return open_file;
1734                         } /* else might as well continue, and look for
1735                              another, or simply have the caller reopen it
1736                              again rather than trying to fix this handle */
1737                 } else /* write only file */
1738                         break; /* write only files are last so must be done */
1739         }
1740         spin_unlock(&cifs_file_list_lock);
1741         return NULL;
1742 }
1743
1744 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1745                                         bool fsuid_only)
1746 {
1747         struct cifsFileInfo *open_file, *inv_file = NULL;
1748         struct cifs_sb_info *cifs_sb;
1749         bool any_available = false;
1750         int rc;
1751         unsigned int refind = 0;
1752
1753         /* Having a null inode here (because mapping->host was set to zero by
1754         the VFS or MM) should not happen but we had reports of on oops (due to
1755         it being zero) during stress testcases so we need to check for it */
1756
1757         if (cifs_inode == NULL) {
1758                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1759                 dump_stack();
1760                 return NULL;
1761         }
1762
1763         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1764
1765         /* only filter by fsuid on multiuser mounts */
1766         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1767                 fsuid_only = false;
1768
1769         spin_lock(&cifs_file_list_lock);
1770 refind_writable:
1771         if (refind > MAX_REOPEN_ATT) {
1772                 spin_unlock(&cifs_file_list_lock);
1773                 return NULL;
1774         }
1775         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1776                 if (!any_available && open_file->pid != current->tgid)
1777                         continue;
1778                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1779                         continue;
1780                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1781                         if (!open_file->invalidHandle) {
1782                                 /* found a good writable file */
1783                                 cifsFileInfo_get_locked(open_file);
1784                                 spin_unlock(&cifs_file_list_lock);
1785                                 return open_file;
1786                         } else {
1787                                 if (!inv_file)
1788                                         inv_file = open_file;
1789                         }
1790                 }
1791         }
1792         /* couldn't find useable FH with same pid, try any available */
1793         if (!any_available) {
1794                 any_available = true;
1795                 goto refind_writable;
1796         }
1797
1798         if (inv_file) {
1799                 any_available = false;
1800                 cifsFileInfo_get_locked(inv_file);
1801         }
1802
1803         spin_unlock(&cifs_file_list_lock);
1804
1805         if (inv_file) {
1806                 rc = cifs_reopen_file(inv_file, false);
1807                 if (!rc)
1808                         return inv_file;
1809                 else {
1810                         spin_lock(&cifs_file_list_lock);
1811                         list_move_tail(&inv_file->flist,
1812                                         &cifs_inode->openFileList);
1813                         spin_unlock(&cifs_file_list_lock);
1814                         cifsFileInfo_put(inv_file);
1815                         spin_lock(&cifs_file_list_lock);
1816                         ++refind;
1817                         goto refind_writable;
1818                 }
1819         }
1820
1821         return NULL;
1822 }
1823
1824 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1825 {
1826         struct address_space *mapping = page->mapping;
1827         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1828         char *write_data;
1829         int rc = -EFAULT;
1830         int bytes_written = 0;
1831         struct inode *inode;
1832         struct cifsFileInfo *open_file;
1833
1834         if (!mapping || !mapping->host)
1835                 return -EFAULT;
1836
1837         inode = page->mapping->host;
1838
1839         offset += (loff_t)from;
1840         write_data = kmap(page);
1841         write_data += from;
1842
1843         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1844                 kunmap(page);
1845                 return -EIO;
1846         }
1847
1848         /* racing with truncate? */
1849         if (offset > mapping->host->i_size) {
1850                 kunmap(page);
1851                 return 0; /* don't care */
1852         }
1853
1854         /* check to make sure that we are not extending the file */
1855         if (mapping->host->i_size - offset < (loff_t)to)
1856                 to = (unsigned)(mapping->host->i_size - offset);
1857
1858         open_file = find_writable_file(CIFS_I(mapping->host), false);
1859         if (open_file) {
1860                 bytes_written = cifs_write(open_file, open_file->pid,
1861                                            write_data, to - from, &offset);
1862                 cifsFileInfo_put(open_file);
1863                 /* Does mm or vfs already set times? */
1864                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1865                 if ((bytes_written > 0) && (offset))
1866                         rc = 0;
1867                 else if (bytes_written < 0)
1868                         rc = bytes_written;
1869         } else {
1870                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1871                 rc = -EIO;
1872         }
1873
1874         kunmap(page);
1875         return rc;
1876 }
1877
1878 static int cifs_writepages(struct address_space *mapping,
1879                            struct writeback_control *wbc)
1880 {
1881         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1882         bool done = false, scanned = false, range_whole = false;
1883         pgoff_t end, index;
1884         struct cifs_writedata *wdata;
1885         struct TCP_Server_Info *server;
1886         struct page *page;
1887         int rc = 0;
1888
1889         /*
1890          * If wsize is smaller than the page cache size, default to writing
1891          * one page at a time via cifs_writepage
1892          */
1893         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1894                 return generic_writepages(mapping, wbc);
1895
1896         if (wbc->range_cyclic) {
1897                 index = mapping->writeback_index; /* Start from prev offset */
1898                 end = -1;
1899         } else {
1900                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1901                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1902                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1903                         range_whole = true;
1904                 scanned = true;
1905         }
1906 retry:
1907         while (!done && index <= end) {
1908                 unsigned int i, nr_pages, found_pages;
1909                 pgoff_t next = 0, tofind;
1910                 struct page **pages;
1911
1912                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1913                                 end - index) + 1;
1914
1915                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1916                                              cifs_writev_complete);
1917                 if (!wdata) {
1918                         rc = -ENOMEM;
1919                         break;
1920                 }
1921
1922                 /*
1923                  * find_get_pages_tag seems to return a max of 256 on each
1924                  * iteration, so we must call it several times in order to
1925                  * fill the array or the wsize is effectively limited to
1926                  * 256 * PAGE_CACHE_SIZE.
1927                  */
1928                 found_pages = 0;
1929                 pages = wdata->pages;
1930                 do {
1931                         nr_pages = find_get_pages_tag(mapping, &index,
1932                                                         PAGECACHE_TAG_DIRTY,
1933                                                         tofind, pages);
1934                         found_pages += nr_pages;
1935                         tofind -= nr_pages;
1936                         pages += nr_pages;
1937                 } while (nr_pages && tofind && index <= end);
1938
1939                 if (found_pages == 0) {
1940                         kref_put(&wdata->refcount, cifs_writedata_release);
1941                         break;
1942                 }
1943
1944                 nr_pages = 0;
1945                 for (i = 0; i < found_pages; i++) {
1946                         page = wdata->pages[i];
1947                         /*
1948                          * At this point we hold neither mapping->tree_lock nor
1949                          * lock on the page itself: the page may be truncated or
1950                          * invalidated (changing page->mapping to NULL), or even
1951                          * swizzled back from swapper_space to tmpfs file
1952                          * mapping
1953                          */
1954
1955                         if (nr_pages == 0)
1956                                 lock_page(page);
1957                         else if (!trylock_page(page))
1958                                 break;
1959
1960                         if (unlikely(page->mapping != mapping)) {
1961                                 unlock_page(page);
1962                                 break;
1963                         }
1964
1965                         if (!wbc->range_cyclic && page->index > end) {
1966                                 done = true;
1967                                 unlock_page(page);
1968                                 break;
1969                         }
1970
1971                         if (next && (page->index != next)) {
1972                                 /* Not next consecutive page */
1973                                 unlock_page(page);
1974                                 break;
1975                         }
1976
1977                         if (wbc->sync_mode != WB_SYNC_NONE)
1978                                 wait_on_page_writeback(page);
1979
1980                         if (PageWriteback(page) ||
1981                                         !clear_page_dirty_for_io(page)) {
1982                                 unlock_page(page);
1983                                 break;
1984                         }
1985
1986                         /*
1987                          * This actually clears the dirty bit in the radix tree.
1988                          * See cifs_writepage() for more commentary.
1989                          */
1990                         set_page_writeback(page);
1991
1992                         if (page_offset(page) >= i_size_read(mapping->host)) {
1993                                 done = true;
1994                                 unlock_page(page);
1995                                 end_page_writeback(page);
1996                                 break;
1997                         }
1998
1999                         wdata->pages[i] = page;
2000                         next = page->index + 1;
2001                         ++nr_pages;
2002                 }
2003
2004                 /* reset index to refind any pages skipped */
2005                 if (nr_pages == 0)
2006                         index = wdata->pages[0]->index + 1;
2007
2008                 /* put any pages we aren't going to use */
2009                 for (i = nr_pages; i < found_pages; i++) {
2010                         page_cache_release(wdata->pages[i]);
2011                         wdata->pages[i] = NULL;
2012                 }
2013
2014                 /* nothing to write? */
2015                 if (nr_pages == 0) {
2016                         kref_put(&wdata->refcount, cifs_writedata_release);
2017                         continue;
2018                 }
2019
2020                 wdata->sync_mode = wbc->sync_mode;
2021                 wdata->nr_pages = nr_pages;
2022                 wdata->offset = page_offset(wdata->pages[0]);
2023                 wdata->pagesz = PAGE_CACHE_SIZE;
2024                 wdata->tailsz =
2025                         min(i_size_read(mapping->host) -
2026                             page_offset(wdata->pages[nr_pages - 1]),
2027                             (loff_t)PAGE_CACHE_SIZE);
2028                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2029                                         wdata->tailsz;
2030
2031                 do {
2032                         if (wdata->cfile != NULL)
2033                                 cifsFileInfo_put(wdata->cfile);
2034                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2035                                                           false);
2036                         if (!wdata->cfile) {
2037                                 cifs_dbg(VFS, "No writable handles for inode\n");
2038                                 rc = -EBADF;
2039                                 break;
2040                         }
2041                         wdata->pid = wdata->cfile->pid;
2042                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2043                         rc = server->ops->async_writev(wdata);
2044                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2045
2046                 for (i = 0; i < nr_pages; ++i)
2047                         unlock_page(wdata->pages[i]);
2048
2049                 /* send failure -- clean up the mess */
2050                 if (rc != 0) {
2051                         for (i = 0; i < nr_pages; ++i) {
2052                                 if (rc == -EAGAIN)
2053                                         redirty_page_for_writepage(wbc,
2054                                                            wdata->pages[i]);
2055                                 else
2056                                         SetPageError(wdata->pages[i]);
2057                                 end_page_writeback(wdata->pages[i]);
2058                                 page_cache_release(wdata->pages[i]);
2059                         }
2060                         if (rc != -EAGAIN)
2061                                 mapping_set_error(mapping, rc);
2062                 }
2063                 kref_put(&wdata->refcount, cifs_writedata_release);
2064
2065                 wbc->nr_to_write -= nr_pages;
2066                 if (wbc->nr_to_write <= 0)
2067                         done = true;
2068
2069                 index = next;
2070         }
2071
2072         if (!scanned && !done) {
2073                 /*
2074                  * We hit the last page and there is more work to be done: wrap
2075                  * back to the start of the file
2076                  */
2077                 scanned = true;
2078                 index = 0;
2079                 goto retry;
2080         }
2081
2082         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2083                 mapping->writeback_index = index;
2084
2085         return rc;
2086 }
2087
2088 static int
2089 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2090 {
2091         int rc;
2092         unsigned int xid;
2093
2094         xid = get_xid();
2095 /* BB add check for wbc flags */
2096         page_cache_get(page);
2097         if (!PageUptodate(page))
2098                 cifs_dbg(FYI, "ppw - page not up to date\n");
2099
2100         /*
2101          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2102          *
2103          * A writepage() implementation always needs to do either this,
2104          * or re-dirty the page with "redirty_page_for_writepage()" in
2105          * the case of a failure.
2106          *
2107          * Just unlocking the page will cause the radix tree tag-bits
2108          * to fail to update with the state of the page correctly.
2109          */
2110         set_page_writeback(page);
2111 retry_write:
2112         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2113         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2114                 goto retry_write;
2115         else if (rc == -EAGAIN)
2116                 redirty_page_for_writepage(wbc, page);
2117         else if (rc != 0)
2118                 SetPageError(page);
2119         else
2120                 SetPageUptodate(page);
2121         end_page_writeback(page);
2122         page_cache_release(page);
2123         free_xid(xid);
2124         return rc;
2125 }
2126
2127 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2128 {
2129         int rc = cifs_writepage_locked(page, wbc);
2130         unlock_page(page);
2131         return rc;
2132 }
2133
2134 static int cifs_write_end(struct file *file, struct address_space *mapping,
2135                         loff_t pos, unsigned len, unsigned copied,
2136                         struct page *page, void *fsdata)
2137 {
2138         int rc;
2139         struct inode *inode = mapping->host;
2140         struct cifsFileInfo *cfile = file->private_data;
2141         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2142         __u32 pid;
2143
2144         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2145                 pid = cfile->pid;
2146         else
2147                 pid = current->tgid;
2148
2149         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2150                  page, pos, copied);
2151
2152         if (PageChecked(page)) {
2153                 if (copied == len)
2154                         SetPageUptodate(page);
2155                 ClearPageChecked(page);
2156         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2157                 SetPageUptodate(page);
2158
2159         if (!PageUptodate(page)) {
2160                 char *page_data;
2161                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2162                 unsigned int xid;
2163
2164                 xid = get_xid();
2165                 /* this is probably better than directly calling
2166                    partialpage_write since in this function the file handle is
2167                    known which we might as well leverage */
2168                 /* BB check if anything else missing out of ppw
2169                    such as updating last write time */
2170                 page_data = kmap(page);
2171                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2172                 /* if (rc < 0) should we set writebehind rc? */
2173                 kunmap(page);
2174
2175                 free_xid(xid);
2176         } else {
2177                 rc = copied;
2178                 pos += copied;
2179                 set_page_dirty(page);
2180         }
2181
2182         if (rc > 0) {
2183                 spin_lock(&inode->i_lock);
2184                 if (pos > inode->i_size)
2185                         i_size_write(inode, pos);
2186                 spin_unlock(&inode->i_lock);
2187         }
2188
2189         unlock_page(page);
2190         page_cache_release(page);
2191
2192         return rc;
2193 }
2194
2195 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2196                       int datasync)
2197 {
2198         unsigned int xid;
2199         int rc = 0;
2200         struct cifs_tcon *tcon;
2201         struct TCP_Server_Info *server;
2202         struct cifsFileInfo *smbfile = file->private_data;
2203         struct inode *inode = file_inode(file);
2204         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2205
2206         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2207         if (rc)
2208                 return rc;
2209         mutex_lock(&inode->i_mutex);
2210
2211         xid = get_xid();
2212
2213         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2214                  file->f_path.dentry->d_name.name, datasync);
2215
2216         if (!CIFS_I(inode)->clientCanCacheRead) {
2217                 rc = cifs_invalidate_mapping(inode);
2218                 if (rc) {
2219                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2220                         rc = 0; /* don't care about it in fsync */
2221                 }
2222         }
2223
2224         tcon = tlink_tcon(smbfile->tlink);
2225         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2226                 server = tcon->ses->server;
2227                 if (server->ops->flush)
2228                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2229                 else
2230                         rc = -ENOSYS;
2231         }
2232
2233         free_xid(xid);
2234         mutex_unlock(&inode->i_mutex);
2235         return rc;
2236 }
2237
2238 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2239 {
2240         unsigned int xid;
2241         int rc = 0;
2242         struct cifs_tcon *tcon;
2243         struct TCP_Server_Info *server;
2244         struct cifsFileInfo *smbfile = file->private_data;
2245         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2246         struct inode *inode = file->f_mapping->host;
2247
2248         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2249         if (rc)
2250                 return rc;
2251         mutex_lock(&inode->i_mutex);
2252
2253         xid = get_xid();
2254
2255         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2256                  file->f_path.dentry->d_name.name, datasync);
2257
2258         tcon = tlink_tcon(smbfile->tlink);
2259         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2260                 server = tcon->ses->server;
2261                 if (server->ops->flush)
2262                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2263                 else
2264                         rc = -ENOSYS;
2265         }
2266
2267         free_xid(xid);
2268         mutex_unlock(&inode->i_mutex);
2269         return rc;
2270 }
2271
2272 /*
2273  * As file closes, flush all cached write data for this inode checking
2274  * for write behind errors.
2275  */
2276 int cifs_flush(struct file *file, fl_owner_t id)
2277 {
2278         struct inode *inode = file_inode(file);
2279         int rc = 0;
2280
2281         if (file->f_mode & FMODE_WRITE)
2282                 rc = filemap_write_and_wait(inode->i_mapping);
2283
2284         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2285
2286         return rc;
2287 }
2288
2289 static int
2290 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2291 {
2292         int rc = 0;
2293         unsigned long i;
2294
2295         for (i = 0; i < num_pages; i++) {
2296                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2297                 if (!pages[i]) {
2298                         /*
2299                          * save number of pages we have already allocated and
2300                          * return with ENOMEM error
2301                          */
2302                         num_pages = i;
2303                         rc = -ENOMEM;
2304                         break;
2305                 }
2306         }
2307
2308         if (rc) {
2309                 for (i = 0; i < num_pages; i++)
2310                         put_page(pages[i]);
2311         }
2312         return rc;
2313 }
2314
2315 static inline
2316 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2317 {
2318         size_t num_pages;
2319         size_t clen;
2320
2321         clen = min_t(const size_t, len, wsize);
2322         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2323
2324         if (cur_len)
2325                 *cur_len = clen;
2326
2327         return num_pages;
2328 }
2329
2330 static void
2331 cifs_uncached_writev_complete(struct work_struct *work)
2332 {
2333         int i;
2334         struct cifs_writedata *wdata = container_of(work,
2335                                         struct cifs_writedata, work);
2336         struct inode *inode = wdata->cfile->dentry->d_inode;
2337         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2338
2339         spin_lock(&inode->i_lock);
2340         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2341         if (cifsi->server_eof > inode->i_size)
2342                 i_size_write(inode, cifsi->server_eof);
2343         spin_unlock(&inode->i_lock);
2344
2345         complete(&wdata->done);
2346
2347         if (wdata->result != -EAGAIN) {
2348                 for (i = 0; i < wdata->nr_pages; i++)
2349                         put_page(wdata->pages[i]);
2350         }
2351
2352         kref_put(&wdata->refcount, cifs_writedata_release);
2353 }
2354
2355 /* attempt to send write to server, retry on any -EAGAIN errors */
2356 static int
2357 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2358 {
2359         int rc;
2360         struct TCP_Server_Info *server;
2361
2362         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2363
2364         do {
2365                 if (wdata->cfile->invalidHandle) {
2366                         rc = cifs_reopen_file(wdata->cfile, false);
2367                         if (rc != 0)
2368                                 continue;
2369                 }
2370                 rc = server->ops->async_writev(wdata);
2371         } while (rc == -EAGAIN);
2372
2373         return rc;
2374 }
2375
2376 static ssize_t
2377 cifs_iovec_write(struct file *file, const struct iovec *iov,
2378                  unsigned long nr_segs, loff_t *poffset)
2379 {
2380         unsigned long nr_pages, i;
2381         size_t copied, len, cur_len;
2382         ssize_t total_written = 0;
2383         loff_t offset;
2384         struct iov_iter it;
2385         struct cifsFileInfo *open_file;
2386         struct cifs_tcon *tcon;
2387         struct cifs_sb_info *cifs_sb;
2388         struct cifs_writedata *wdata, *tmp;
2389         struct list_head wdata_list;
2390         int rc;
2391         pid_t pid;
2392
2393         len = iov_length(iov, nr_segs);
2394         if (!len)
2395                 return 0;
2396
2397         rc = generic_write_checks(file, poffset, &len, 0);
2398         if (rc)
2399                 return rc;
2400
2401         INIT_LIST_HEAD(&wdata_list);
2402         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2403         open_file = file->private_data;
2404         tcon = tlink_tcon(open_file->tlink);
2405
2406         if (!tcon->ses->server->ops->async_writev)
2407                 return -ENOSYS;
2408
2409         offset = *poffset;
2410
2411         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2412                 pid = open_file->pid;
2413         else
2414                 pid = current->tgid;
2415
2416         iov_iter_init(&it, iov, nr_segs, len, 0);
2417         do {
2418                 size_t save_len;
2419
2420                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2421                 wdata = cifs_writedata_alloc(nr_pages,
2422                                              cifs_uncached_writev_complete);
2423                 if (!wdata) {
2424                         rc = -ENOMEM;
2425                         break;
2426                 }
2427
2428                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2429                 if (rc) {
2430                         kfree(wdata);
2431                         break;
2432                 }
2433
2434                 save_len = cur_len;
2435                 for (i = 0; i < nr_pages; i++) {
2436                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2437                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2438                                                          0, copied);
2439                         cur_len -= copied;
2440                         iov_iter_advance(&it, copied);
2441                 }
2442                 cur_len = save_len - cur_len;
2443
2444                 wdata->sync_mode = WB_SYNC_ALL;
2445                 wdata->nr_pages = nr_pages;
2446                 wdata->offset = (__u64)offset;
2447                 wdata->cfile = cifsFileInfo_get(open_file);
2448                 wdata->pid = pid;
2449                 wdata->bytes = cur_len;
2450                 wdata->pagesz = PAGE_SIZE;
2451                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2452                 rc = cifs_uncached_retry_writev(wdata);
2453                 if (rc) {
2454                         kref_put(&wdata->refcount, cifs_writedata_release);
2455                         break;
2456                 }
2457
2458                 list_add_tail(&wdata->list, &wdata_list);
2459                 offset += cur_len;
2460                 len -= cur_len;
2461         } while (len > 0);
2462
2463         /*
2464          * If at least one write was successfully sent, then discard any rc
2465          * value from the later writes. If the other write succeeds, then
2466          * we'll end up returning whatever was written. If it fails, then
2467          * we'll get a new rc value from that.
2468          */
2469         if (!list_empty(&wdata_list))
2470                 rc = 0;
2471
2472         /*
2473          * Wait for and collect replies for any successful sends in order of
2474          * increasing offset. Once an error is hit or we get a fatal signal
2475          * while waiting, then return without waiting for any more replies.
2476          */
2477 restart_loop:
2478         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2479                 if (!rc) {
2480                         /* FIXME: freezable too? */
2481                         rc = wait_for_completion_killable(&wdata->done);
2482                         if (rc)
2483                                 rc = -EINTR;
2484                         else if (wdata->result)
2485                                 rc = wdata->result;
2486                         else
2487                                 total_written += wdata->bytes;
2488
2489                         /* resend call if it's a retryable error */
2490                         if (rc == -EAGAIN) {
2491                                 rc = cifs_uncached_retry_writev(wdata);
2492                                 goto restart_loop;
2493                         }
2494                 }
2495                 list_del_init(&wdata->list);
2496                 kref_put(&wdata->refcount, cifs_writedata_release);
2497         }
2498
2499         if (total_written > 0)
2500                 *poffset += total_written;
2501
2502         cifs_stats_bytes_written(tcon, total_written);
2503         return total_written ? total_written : (ssize_t)rc;
2504 }
2505
2506 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2507                                 unsigned long nr_segs, loff_t pos)
2508 {
2509         ssize_t written;
2510         struct inode *inode;
2511
2512         inode = file_inode(iocb->ki_filp);
2513
2514         /*
2515          * BB - optimize the way when signing is disabled. We can drop this
2516          * extra memory-to-memory copying and use iovec buffers for constructing
2517          * write request.
2518          */
2519
2520         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2521         if (written > 0) {
2522                 CIFS_I(inode)->invalid_mapping = true;
2523                 iocb->ki_pos = pos;
2524         }
2525
2526         return written;
2527 }
2528
2529 static ssize_t
2530 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2531             unsigned long nr_segs, loff_t pos)
2532 {
2533         struct file *file = iocb->ki_filp;
2534         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2535         struct inode *inode = file->f_mapping->host;
2536         struct cifsInodeInfo *cinode = CIFS_I(inode);
2537         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2538         ssize_t rc = -EACCES;
2539
2540         BUG_ON(iocb->ki_pos != pos);
2541
2542         /*
2543          * We need to hold the sem to be sure nobody modifies lock list
2544          * with a brlock that prevents writing.
2545          */
2546         down_read(&cinode->lock_sem);
2547         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2548                                      server->vals->exclusive_lock_type, NULL,
2549                                      CIFS_WRITE_OP)) {
2550                 mutex_lock(&inode->i_mutex);
2551                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2552                                                &iocb->ki_pos);
2553                 mutex_unlock(&inode->i_mutex);
2554         }
2555
2556         if (rc > 0 || rc == -EIOCBQUEUED) {
2557                 ssize_t err;
2558
2559                 err = generic_write_sync(file, pos, rc);
2560                 if (err < 0 && rc > 0)
2561                         rc = err;
2562         }
2563
2564         up_read(&cinode->lock_sem);
2565         return rc;
2566 }
2567
2568 ssize_t
2569 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2570                    unsigned long nr_segs, loff_t pos)
2571 {
2572         struct inode *inode = file_inode(iocb->ki_filp);
2573         struct cifsInodeInfo *cinode = CIFS_I(inode);
2574         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2575         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2576                                                 iocb->ki_filp->private_data;
2577         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2578         ssize_t written;
2579
2580         if (cinode->clientCanCacheAll) {
2581                 if (cap_unix(tcon->ses) &&
2582                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2583                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2584                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2585                 return cifs_writev(iocb, iov, nr_segs, pos);
2586         }
2587         /*
2588          * For non-oplocked files in strict cache mode we need to write the data
2589          * to the server exactly from the pos to pos+len-1 rather than flush all
2590          * affected pages because it may cause a error with mandatory locks on
2591          * these pages but not on the region from pos to ppos+len-1.
2592          */
2593         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2594         if (written > 0 && cinode->clientCanCacheRead) {
2595                 /*
2596                  * Windows 7 server can delay breaking level2 oplock if a write
2597                  * request comes - break it on the client to prevent reading
2598                  * an old data.
2599                  */
2600                 cifs_invalidate_mapping(inode);
2601                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2602                          inode);
2603                 cinode->clientCanCacheRead = false;
2604         }
2605         return written;
2606 }
2607
2608 static struct cifs_readdata *
2609 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2610 {
2611         struct cifs_readdata *rdata;
2612
2613         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2614                         GFP_KERNEL);
2615         if (rdata != NULL) {
2616                 kref_init(&rdata->refcount);
2617                 INIT_LIST_HEAD(&rdata->list);
2618                 init_completion(&rdata->done);
2619                 INIT_WORK(&rdata->work, complete);
2620         }
2621
2622         return rdata;
2623 }
2624
2625 void
2626 cifs_readdata_release(struct kref *refcount)
2627 {
2628         struct cifs_readdata *rdata = container_of(refcount,
2629                                         struct cifs_readdata, refcount);
2630
2631         if (rdata->cfile)
2632                 cifsFileInfo_put(rdata->cfile);
2633
2634         kfree(rdata);
2635 }
2636
2637 static int
2638 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2639 {
2640         int rc = 0;
2641         struct page *page;
2642         unsigned int i;
2643
2644         for (i = 0; i < nr_pages; i++) {
2645                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2646                 if (!page) {
2647                         rc = -ENOMEM;
2648                         break;
2649                 }
2650                 rdata->pages[i] = page;
2651         }
2652
2653         if (rc) {
2654                 for (i = 0; i < nr_pages; i++) {
2655                         put_page(rdata->pages[i]);
2656                         rdata->pages[i] = NULL;
2657                 }
2658         }
2659         return rc;
2660 }
2661
2662 static void
2663 cifs_uncached_readdata_release(struct kref *refcount)
2664 {
2665         struct cifs_readdata *rdata = container_of(refcount,
2666                                         struct cifs_readdata, refcount);
2667         unsigned int i;
2668
2669         for (i = 0; i < rdata->nr_pages; i++) {
2670                 put_page(rdata->pages[i]);
2671                 rdata->pages[i] = NULL;
2672         }
2673         cifs_readdata_release(refcount);
2674 }
2675
2676 static int
2677 cifs_retry_async_readv(struct cifs_readdata *rdata)
2678 {
2679         int rc;
2680         struct TCP_Server_Info *server;
2681
2682         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2683
2684         do {
2685                 if (rdata->cfile->invalidHandle) {
2686                         rc = cifs_reopen_file(rdata->cfile, true);
2687                         if (rc != 0)
2688                                 continue;
2689                 }
2690                 rc = server->ops->async_readv(rdata);
2691         } while (rc == -EAGAIN);
2692
2693         return rc;
2694 }
2695
2696 /**
2697  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2698  * @rdata:      the readdata response with list of pages holding data
2699  * @iov:        vector in which we should copy the data
2700  * @nr_segs:    number of segments in vector
2701  * @offset:     offset into file of the first iovec
2702  * @copied:     used to return the amount of data copied to the iov
2703  *
2704  * This function copies data from a list of pages in a readdata response into
2705  * an array of iovecs. It will first calculate where the data should go
2706  * based on the info in the readdata and then copy the data into that spot.
2707  */
2708 static ssize_t
2709 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2710                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2711 {
2712         int rc = 0;
2713         struct iov_iter ii;
2714         size_t pos = rdata->offset - offset;
2715         ssize_t remaining = rdata->bytes;
2716         unsigned char *pdata;
2717         unsigned int i;
2718
2719         /* set up iov_iter and advance to the correct offset */
2720         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2721         iov_iter_advance(&ii, pos);
2722
2723         *copied = 0;
2724         for (i = 0; i < rdata->nr_pages; i++) {
2725                 ssize_t copy;
2726                 struct page *page = rdata->pages[i];
2727
2728                 /* copy a whole page or whatever's left */
2729                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2730
2731                 /* ...but limit it to whatever space is left in the iov */
2732                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2733
2734                 /* go while there's data to be copied and no errors */
2735                 if (copy && !rc) {
2736                         pdata = kmap(page);
2737                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2738                                                 (int)copy);
2739                         kunmap(page);
2740                         if (!rc) {
2741                                 *copied += copy;
2742                                 remaining -= copy;
2743                                 iov_iter_advance(&ii, copy);
2744                         }
2745                 }
2746         }
2747
2748         return rc;
2749 }
2750
2751 static void
2752 cifs_uncached_readv_complete(struct work_struct *work)
2753 {
2754         struct cifs_readdata *rdata = container_of(work,
2755                                                 struct cifs_readdata, work);
2756
2757         complete(&rdata->done);
2758         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2759 }
2760
2761 static int
2762 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2763                         struct cifs_readdata *rdata, unsigned int len)
2764 {
2765         int total_read = 0, result = 0;
2766         unsigned int i;
2767         unsigned int nr_pages = rdata->nr_pages;
2768         struct kvec iov;
2769
2770         rdata->tailsz = PAGE_SIZE;
2771         for (i = 0; i < nr_pages; i++) {
2772                 struct page *page = rdata->pages[i];
2773
2774                 if (len >= PAGE_SIZE) {
2775                         /* enough data to fill the page */
2776                         iov.iov_base = kmap(page);
2777                         iov.iov_len = PAGE_SIZE;
2778                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2779                                  i, iov.iov_base, iov.iov_len);
2780                         len -= PAGE_SIZE;
2781                 } else if (len > 0) {
2782                         /* enough for partial page, fill and zero the rest */
2783                         iov.iov_base = kmap(page);
2784                         iov.iov_len = len;
2785                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2786                                  i, iov.iov_base, iov.iov_len);
2787                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2788                         rdata->tailsz = len;
2789                         len = 0;
2790                 } else {
2791                         /* no need to hold page hostage */
2792                         rdata->pages[i] = NULL;
2793                         rdata->nr_pages--;
2794                         put_page(page);
2795                         continue;
2796                 }
2797
2798                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2799                 kunmap(page);
2800                 if (result < 0)
2801                         break;
2802
2803                 total_read += result;
2804         }
2805
2806         return total_read > 0 ? total_read : result;
2807 }
2808
2809 static ssize_t
2810 cifs_iovec_read(struct file *file, const struct iovec *iov,
2811                  unsigned long nr_segs, loff_t *poffset)
2812 {
2813         ssize_t rc;
2814         size_t len, cur_len;
2815         ssize_t total_read = 0;
2816         loff_t offset = *poffset;
2817         unsigned int npages;
2818         struct cifs_sb_info *cifs_sb;
2819         struct cifs_tcon *tcon;
2820         struct cifsFileInfo *open_file;
2821         struct cifs_readdata *rdata, *tmp;
2822         struct list_head rdata_list;
2823         pid_t pid;
2824
2825         if (!nr_segs)
2826                 return 0;
2827
2828         len = iov_length(iov, nr_segs);
2829         if (!len)
2830                 return 0;
2831
2832         INIT_LIST_HEAD(&rdata_list);
2833         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2834         open_file = file->private_data;
2835         tcon = tlink_tcon(open_file->tlink);
2836
2837         if (!tcon->ses->server->ops->async_readv)
2838                 return -ENOSYS;
2839
2840         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2841                 pid = open_file->pid;
2842         else
2843                 pid = current->tgid;
2844
2845         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2846                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2847
2848         do {
2849                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2850                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2851
2852                 /* allocate a readdata struct */
2853                 rdata = cifs_readdata_alloc(npages,
2854                                             cifs_uncached_readv_complete);
2855                 if (!rdata) {
2856                         rc = -ENOMEM;
2857                         goto error;
2858                 }
2859
2860                 rc = cifs_read_allocate_pages(rdata, npages);
2861                 if (rc)
2862                         goto error;
2863
2864                 rdata->cfile = cifsFileInfo_get(open_file);
2865                 rdata->nr_pages = npages;
2866                 rdata->offset = offset;
2867                 rdata->bytes = cur_len;
2868                 rdata->pid = pid;
2869                 rdata->pagesz = PAGE_SIZE;
2870                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2871
2872                 rc = cifs_retry_async_readv(rdata);
2873 error:
2874                 if (rc) {
2875                         kref_put(&rdata->refcount,
2876                                  cifs_uncached_readdata_release);
2877                         break;
2878                 }
2879
2880                 list_add_tail(&rdata->list, &rdata_list);
2881                 offset += cur_len;
2882                 len -= cur_len;
2883         } while (len > 0);
2884
2885         /* if at least one read request send succeeded, then reset rc */
2886         if (!list_empty(&rdata_list))
2887                 rc = 0;
2888
2889         /* the loop below should proceed in the order of increasing offsets */
2890 restart_loop:
2891         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2892                 if (!rc) {
2893                         ssize_t copied;
2894
2895                         /* FIXME: freezable sleep too? */
2896                         rc = wait_for_completion_killable(&rdata->done);
2897                         if (rc)
2898                                 rc = -EINTR;
2899                         else if (rdata->result)
2900                                 rc = rdata->result;
2901                         else {
2902                                 rc = cifs_readdata_to_iov(rdata, iov,
2903                                                         nr_segs, *poffset,
2904                                                         &copied);
2905                                 total_read += copied;
2906                         }
2907
2908                         /* resend call if it's a retryable error */
2909                         if (rc == -EAGAIN) {
2910                                 rc = cifs_retry_async_readv(rdata);
2911                                 goto restart_loop;
2912                         }
2913                 }
2914                 list_del_init(&rdata->list);
2915                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2916         }
2917
2918         cifs_stats_bytes_read(tcon, total_read);
2919         *poffset += total_read;
2920
2921         /* mask nodata case */
2922         if (rc == -ENODATA)
2923                 rc = 0;
2924
2925         return total_read ? total_read : rc;
2926 }
2927
2928 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2929                                unsigned long nr_segs, loff_t pos)
2930 {
2931         ssize_t read;
2932
2933         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2934         if (read > 0)
2935                 iocb->ki_pos = pos;
2936
2937         return read;
2938 }
2939
2940 ssize_t
2941 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2942                   unsigned long nr_segs, loff_t pos)
2943 {
2944         struct inode *inode = file_inode(iocb->ki_filp);
2945         struct cifsInodeInfo *cinode = CIFS_I(inode);
2946         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2947         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2948                                                 iocb->ki_filp->private_data;
2949         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2950         int rc = -EACCES;
2951
2952         /*
2953          * In strict cache mode we need to read from the server all the time
2954          * if we don't have level II oplock because the server can delay mtime
2955          * change - so we can't make a decision about inode invalidating.
2956          * And we can also fail with pagereading if there are mandatory locks
2957          * on pages affected by this read but not on the region from pos to
2958          * pos+len-1.
2959          */
2960         if (!cinode->clientCanCacheRead)
2961                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2962
2963         if (cap_unix(tcon->ses) &&
2964             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2965             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2966                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2967
2968         /*
2969          * We need to hold the sem to be sure nobody modifies lock list
2970          * with a brlock that prevents reading.
2971          */
2972         down_read(&cinode->lock_sem);
2973         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2974                                      tcon->ses->server->vals->shared_lock_type,
2975                                      NULL, CIFS_READ_OP))
2976                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2977         up_read(&cinode->lock_sem);
2978         return rc;
2979 }
2980
2981 static ssize_t
2982 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2983 {
2984         int rc = -EACCES;
2985         unsigned int bytes_read = 0;
2986         unsigned int total_read;
2987         unsigned int current_read_size;
2988         unsigned int rsize;
2989         struct cifs_sb_info *cifs_sb;
2990         struct cifs_tcon *tcon;
2991         struct TCP_Server_Info *server;
2992         unsigned int xid;
2993         char *cur_offset;
2994         struct cifsFileInfo *open_file;
2995         struct cifs_io_parms io_parms;
2996         int buf_type = CIFS_NO_BUFFER;
2997         __u32 pid;
2998
2999         xid = get_xid();
3000         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3001
3002         /* FIXME: set up handlers for larger reads and/or convert to async */
3003         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3004
3005         if (file->private_data == NULL) {
3006                 rc = -EBADF;
3007                 free_xid(xid);
3008                 return rc;
3009         }
3010         open_file = file->private_data;
3011         tcon = tlink_tcon(open_file->tlink);
3012         server = tcon->ses->server;
3013
3014         if (!server->ops->sync_read) {
3015                 free_xid(xid);
3016                 return -ENOSYS;
3017         }
3018
3019         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3020                 pid = open_file->pid;
3021         else
3022                 pid = current->tgid;
3023
3024         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3025                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3026
3027         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3028              total_read += bytes_read, cur_offset += bytes_read) {
3029                 current_read_size = min_t(uint, read_size - total_read, rsize);
3030                 /*
3031                  * For windows me and 9x we do not want to request more than it
3032                  * negotiated since it will refuse the read then.
3033                  */
3034                 if ((tcon->ses) && !(tcon->ses->capabilities &
3035                                 tcon->ses->server->vals->cap_large_files)) {
3036                         current_read_size = min_t(uint, current_read_size,
3037                                         CIFSMaxBufSize);
3038                 }
3039                 rc = -EAGAIN;
3040                 while (rc == -EAGAIN) {
3041                         if (open_file->invalidHandle) {
3042                                 rc = cifs_reopen_file(open_file, true);
3043                                 if (rc != 0)
3044                                         break;
3045                         }
3046                         io_parms.pid = pid;
3047                         io_parms.tcon = tcon;
3048                         io_parms.offset = *offset;
3049                         io_parms.length = current_read_size;
3050                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3051                                                     &bytes_read, &cur_offset,
3052                                                     &buf_type);
3053                 }
3054                 if (rc || (bytes_read == 0)) {
3055                         if (total_read) {
3056                                 break;
3057                         } else {
3058                                 free_xid(xid);
3059                                 return rc;
3060                         }
3061                 } else {
3062                         cifs_stats_bytes_read(tcon, total_read);
3063                         *offset += bytes_read;
3064                 }
3065         }
3066         free_xid(xid);
3067         return total_read;
3068 }
3069
3070 /*
3071  * If the page is mmap'ed into a process' page tables, then we need to make
3072  * sure that it doesn't change while being written back.
3073  */
3074 static int
3075 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3076 {
3077         struct page *page = vmf->page;
3078
3079         lock_page(page);
3080         return VM_FAULT_LOCKED;
3081 }
3082
3083 static struct vm_operations_struct cifs_file_vm_ops = {
3084         .fault = filemap_fault,
3085         .page_mkwrite = cifs_page_mkwrite,
3086         .remap_pages = generic_file_remap_pages,
3087 };
3088
3089 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3090 {
3091         int rc, xid;
3092         struct inode *inode = file_inode(file);
3093
3094         xid = get_xid();
3095
3096         if (!CIFS_I(inode)->clientCanCacheRead) {
3097                 rc = cifs_invalidate_mapping(inode);
3098                 if (rc)
3099                         return rc;
3100         }
3101
3102         rc = generic_file_mmap(file, vma);
3103         if (rc == 0)
3104                 vma->vm_ops = &cifs_file_vm_ops;
3105         free_xid(xid);
3106         return rc;
3107 }
3108
3109 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3110 {
3111         int rc, xid;
3112
3113         xid = get_xid();
3114         rc = cifs_revalidate_file(file);
3115         if (rc) {
3116                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3117                          rc);
3118                 free_xid(xid);
3119                 return rc;
3120         }
3121         rc = generic_file_mmap(file, vma);
3122         if (rc == 0)
3123                 vma->vm_ops = &cifs_file_vm_ops;
3124         free_xid(xid);
3125         return rc;
3126 }
3127
3128 static void
3129 cifs_readv_complete(struct work_struct *work)
3130 {
3131         unsigned int i;
3132         struct cifs_readdata *rdata = container_of(work,
3133                                                 struct cifs_readdata, work);
3134
3135         for (i = 0; i < rdata->nr_pages; i++) {
3136                 struct page *page = rdata->pages[i];
3137
3138                 lru_cache_add_file(page);
3139
3140                 if (rdata->result == 0) {
3141                         flush_dcache_page(page);
3142                         SetPageUptodate(page);
3143                 }
3144
3145                 unlock_page(page);
3146
3147                 if (rdata->result == 0)
3148                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3149
3150                 page_cache_release(page);
3151                 rdata->pages[i] = NULL;
3152         }
3153         kref_put(&rdata->refcount, cifs_readdata_release);
3154 }
3155
3156 static int
3157 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3158                         struct cifs_readdata *rdata, unsigned int len)
3159 {
3160         int total_read = 0, result = 0;
3161         unsigned int i;
3162         u64 eof;
3163         pgoff_t eof_index;
3164         unsigned int nr_pages = rdata->nr_pages;
3165         struct kvec iov;
3166
3167         /* determine the eof that the server (probably) has */
3168         eof = CIFS_I(rdata->mapping->host)->server_eof;
3169         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3170         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3171
3172         rdata->tailsz = PAGE_CACHE_SIZE;
3173         for (i = 0; i < nr_pages; i++) {
3174                 struct page *page = rdata->pages[i];
3175
3176                 if (len >= PAGE_CACHE_SIZE) {
3177                         /* enough data to fill the page */
3178                         iov.iov_base = kmap(page);
3179                         iov.iov_len = PAGE_CACHE_SIZE;
3180                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3181                                  i, page->index, iov.iov_base, iov.iov_len);
3182                         len -= PAGE_CACHE_SIZE;
3183                 } else if (len > 0) {
3184                         /* enough for partial page, fill and zero the rest */
3185                         iov.iov_base = kmap(page);
3186                         iov.iov_len = len;
3187                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3188                                  i, page->index, iov.iov_base, iov.iov_len);
3189                         memset(iov.iov_base + len,
3190                                 '\0', PAGE_CACHE_SIZE - len);
3191                         rdata->tailsz = len;
3192                         len = 0;
3193                 } else if (page->index > eof_index) {
3194                         /*
3195                          * The VFS will not try to do readahead past the
3196                          * i_size, but it's possible that we have outstanding
3197                          * writes with gaps in the middle and the i_size hasn't
3198                          * caught up yet. Populate those with zeroed out pages
3199                          * to prevent the VFS from repeatedly attempting to
3200                          * fill them until the writes are flushed.
3201                          */
3202                         zero_user(page, 0, PAGE_CACHE_SIZE);
3203                         lru_cache_add_file(page);
3204                         flush_dcache_page(page);
3205                         SetPageUptodate(page);
3206                         unlock_page(page);
3207                         page_cache_release(page);
3208                         rdata->pages[i] = NULL;
3209                         rdata->nr_pages--;
3210                         continue;
3211                 } else {
3212                         /* no need to hold page hostage */
3213                         lru_cache_add_file(page);
3214                         unlock_page(page);
3215                         page_cache_release(page);
3216                         rdata->pages[i] = NULL;
3217                         rdata->nr_pages--;
3218                         continue;
3219                 }
3220
3221                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3222                 kunmap(page);
3223                 if (result < 0)
3224                         break;
3225
3226                 total_read += result;
3227         }
3228
3229         return total_read > 0 ? total_read : result;
3230 }
3231
3232 static int cifs_readpages(struct file *file, struct address_space *mapping,
3233         struct list_head *page_list, unsigned num_pages)
3234 {
3235         int rc;
3236         struct list_head tmplist;
3237         struct cifsFileInfo *open_file = file->private_data;
3238         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3239         unsigned int rsize = cifs_sb->rsize;
3240         pid_t pid;
3241
3242         /*
3243          * Give up immediately if rsize is too small to read an entire page.
3244          * The VFS will fall back to readpage. We should never reach this
3245          * point however since we set ra_pages to 0 when the rsize is smaller
3246          * than a cache page.
3247          */
3248         if (unlikely(rsize < PAGE_CACHE_SIZE))
3249                 return 0;
3250
3251         /*
3252          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3253          * immediately if the cookie is negative
3254          */
3255         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3256                                          &num_pages);
3257         if (rc == 0)
3258                 return rc;
3259
3260         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3261                 pid = open_file->pid;
3262         else
3263                 pid = current->tgid;
3264
3265         rc = 0;
3266         INIT_LIST_HEAD(&tmplist);
3267
3268         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3269                  __func__, file, mapping, num_pages);
3270
3271         /*
3272          * Start with the page at end of list and move it to private
3273          * list. Do the same with any following pages until we hit
3274          * the rsize limit, hit an index discontinuity, or run out of
3275          * pages. Issue the async read and then start the loop again
3276          * until the list is empty.
3277          *
3278          * Note that list order is important. The page_list is in
3279          * the order of declining indexes. When we put the pages in
3280          * the rdata->pages, then we want them in increasing order.
3281          */
3282         while (!list_empty(page_list)) {
3283                 unsigned int i;
3284                 unsigned int bytes = PAGE_CACHE_SIZE;
3285                 unsigned int expected_index;
3286                 unsigned int nr_pages = 1;
3287                 loff_t offset;
3288                 struct page *page, *tpage;
3289                 struct cifs_readdata *rdata;
3290
3291                 page = list_entry(page_list->prev, struct page, lru);
3292
3293                 /*
3294                  * Lock the page and put it in the cache. Since no one else
3295                  * should have access to this page, we're safe to simply set
3296                  * PG_locked without checking it first.
3297                  */
3298                 __set_page_locked(page);
3299                 rc = add_to_page_cache_locked(page, mapping,
3300                                               page->index, GFP_KERNEL);
3301
3302                 /* give up if we can't stick it in the cache */
3303                 if (rc) {
3304                         __clear_page_locked(page);
3305                         break;
3306                 }
3307
3308                 /* move first page to the tmplist */
3309                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3310                 list_move_tail(&page->lru, &tmplist);
3311
3312                 /* now try and add more pages onto the request */
3313                 expected_index = page->index + 1;
3314                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3315                         /* discontinuity ? */
3316                         if (page->index != expected_index)
3317                                 break;
3318
3319                         /* would this page push the read over the rsize? */
3320                         if (bytes + PAGE_CACHE_SIZE > rsize)
3321                                 break;
3322
3323                         __set_page_locked(page);
3324                         if (add_to_page_cache_locked(page, mapping,
3325                                                 page->index, GFP_KERNEL)) {
3326                                 __clear_page_locked(page);
3327                                 break;
3328                         }
3329                         list_move_tail(&page->lru, &tmplist);
3330                         bytes += PAGE_CACHE_SIZE;
3331                         expected_index++;
3332                         nr_pages++;
3333                 }
3334
3335                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3336                 if (!rdata) {
3337                         /* best to give up if we're out of mem */
3338                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3339                                 list_del(&page->lru);
3340                                 lru_cache_add_file(page);
3341                                 unlock_page(page);
3342                                 page_cache_release(page);
3343                         }
3344                         rc = -ENOMEM;
3345                         break;
3346                 }
3347
3348                 rdata->cfile = cifsFileInfo_get(open_file);
3349                 rdata->mapping = mapping;
3350                 rdata->offset = offset;
3351                 rdata->bytes = bytes;
3352                 rdata->pid = pid;
3353                 rdata->pagesz = PAGE_CACHE_SIZE;
3354                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3355
3356                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3357                         list_del(&page->lru);
3358                         rdata->pages[rdata->nr_pages++] = page;
3359                 }
3360
3361                 rc = cifs_retry_async_readv(rdata);
3362                 if (rc != 0) {
3363                         for (i = 0; i < rdata->nr_pages; i++) {
3364                                 page = rdata->pages[i];
3365                                 lru_cache_add_file(page);
3366                                 unlock_page(page);
3367                                 page_cache_release(page);
3368                         }
3369                         kref_put(&rdata->refcount, cifs_readdata_release);
3370                         break;
3371                 }
3372
3373                 kref_put(&rdata->refcount, cifs_readdata_release);
3374         }
3375
3376         return rc;
3377 }
3378
3379 static int cifs_readpage_worker(struct file *file, struct page *page,
3380         loff_t *poffset)
3381 {
3382         char *read_data;
3383         int rc;
3384
3385         /* Is the page cached? */
3386         rc = cifs_readpage_from_fscache(file_inode(file), page);
3387         if (rc == 0)
3388                 goto read_complete;
3389
3390         page_cache_get(page);
3391         read_data = kmap(page);
3392         /* for reads over a certain size could initiate async read ahead */
3393
3394         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3395
3396         if (rc < 0)
3397                 goto io_error;
3398         else
3399                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3400
3401         file_inode(file)->i_atime =
3402                 current_fs_time(file_inode(file)->i_sb);
3403
3404         if (PAGE_CACHE_SIZE > rc)
3405                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3406
3407         flush_dcache_page(page);
3408         SetPageUptodate(page);
3409
3410         /* send this page to the cache */
3411         cifs_readpage_to_fscache(file_inode(file), page);
3412
3413         rc = 0;
3414
3415 io_error:
3416         kunmap(page);
3417         page_cache_release(page);
3418
3419 read_complete:
3420         return rc;
3421 }
3422
3423 static int cifs_readpage(struct file *file, struct page *page)
3424 {
3425         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3426         int rc = -EACCES;
3427         unsigned int xid;
3428
3429         xid = get_xid();
3430
3431         if (file->private_data == NULL) {
3432                 rc = -EBADF;
3433                 free_xid(xid);
3434                 return rc;
3435         }
3436
3437         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3438                  page, (int)offset, (int)offset);
3439
3440         rc = cifs_readpage_worker(file, page, &offset);
3441
3442         unlock_page(page);
3443
3444         free_xid(xid);
3445         return rc;
3446 }
3447
3448 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3449 {
3450         struct cifsFileInfo *open_file;
3451
3452         spin_lock(&cifs_file_list_lock);
3453         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3454                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3455                         spin_unlock(&cifs_file_list_lock);
3456                         return 1;
3457                 }
3458         }
3459         spin_unlock(&cifs_file_list_lock);
3460         return 0;
3461 }
3462
3463 /* We do not want to update the file size from server for inodes
3464    open for write - to avoid races with writepage extending
3465    the file - in the future we could consider allowing
3466    refreshing the inode only on increases in the file size
3467    but this is tricky to do without racing with writebehind
3468    page caching in the current Linux kernel design */
3469 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3470 {
3471         if (!cifsInode)
3472                 return true;
3473
3474         if (is_inode_writable(cifsInode)) {
3475                 /* This inode is open for write at least once */
3476                 struct cifs_sb_info *cifs_sb;
3477
3478                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3479                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3480                         /* since no page cache to corrupt on directio
3481                         we can change size safely */
3482                         return true;
3483                 }
3484
3485                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3486                         return true;
3487
3488                 return false;
3489         } else
3490                 return true;
3491 }
3492
3493 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3494                         loff_t pos, unsigned len, unsigned flags,
3495                         struct page **pagep, void **fsdata)
3496 {
3497         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3498         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3499         loff_t page_start = pos & PAGE_MASK;
3500         loff_t i_size;
3501         struct page *page;
3502         int rc = 0;
3503
3504         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3505
3506         page = grab_cache_page_write_begin(mapping, index, flags);
3507         if (!page) {
3508                 rc = -ENOMEM;
3509                 goto out;
3510         }
3511
3512         if (PageUptodate(page))
3513                 goto out;
3514
3515         /*
3516          * If we write a full page it will be up to date, no need to read from
3517          * the server. If the write is short, we'll end up doing a sync write
3518          * instead.
3519          */
3520         if (len == PAGE_CACHE_SIZE)
3521                 goto out;
3522
3523         /*
3524          * optimize away the read when we have an oplock, and we're not
3525          * expecting to use any of the data we'd be reading in. That
3526          * is, when the page lies beyond the EOF, or straddles the EOF
3527          * and the write will cover all of the existing data.
3528          */
3529         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3530                 i_size = i_size_read(mapping->host);
3531                 if (page_start >= i_size ||
3532                     (offset == 0 && (pos + len) >= i_size)) {
3533                         zero_user_segments(page, 0, offset,
3534                                            offset + len,
3535                                            PAGE_CACHE_SIZE);
3536                         /*
3537                          * PageChecked means that the parts of the page
3538                          * to which we're not writing are considered up
3539                          * to date. Once the data is copied to the
3540                          * page, it can be set uptodate.
3541                          */
3542                         SetPageChecked(page);
3543                         goto out;
3544                 }
3545         }
3546
3547         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3548                 /*
3549                  * might as well read a page, it is fast enough. If we get
3550                  * an error, we don't need to return it. cifs_write_end will
3551                  * do a sync write instead since PG_uptodate isn't set.
3552                  */
3553                 cifs_readpage_worker(file, page, &page_start);
3554         } else {
3555                 /* we could try using another file handle if there is one -
3556                    but how would we lock it to prevent close of that handle
3557                    racing with this read? In any case
3558                    this will be written out by write_end so is fine */
3559         }
3560 out:
3561         *pagep = page;
3562         return rc;
3563 }
3564
3565 static int cifs_release_page(struct page *page, gfp_t gfp)
3566 {
3567         if (PagePrivate(page))
3568                 return 0;
3569
3570         return cifs_fscache_release_page(page, gfp);
3571 }
3572
3573 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3574                                  unsigned int length)
3575 {
3576         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3577
3578         if (offset == 0 && length == PAGE_CACHE_SIZE)
3579                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3580 }
3581
3582 static int cifs_launder_page(struct page *page)
3583 {
3584         int rc = 0;
3585         loff_t range_start = page_offset(page);
3586         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3587         struct writeback_control wbc = {
3588                 .sync_mode = WB_SYNC_ALL,
3589                 .nr_to_write = 0,
3590                 .range_start = range_start,
3591                 .range_end = range_end,
3592         };
3593
3594         cifs_dbg(FYI, "Launder page: %p\n", page);
3595
3596         if (clear_page_dirty_for_io(page))
3597                 rc = cifs_writepage_locked(page, &wbc);
3598
3599         cifs_fscache_invalidate_page(page, page->mapping->host);
3600         return rc;
3601 }
3602
3603 void cifs_oplock_break(struct work_struct *work)
3604 {
3605         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3606                                                   oplock_break);
3607         struct inode *inode = cfile->dentry->d_inode;
3608         struct cifsInodeInfo *cinode = CIFS_I(inode);
3609         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3610         int rc = 0;
3611
3612         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3613                                                 cifs_has_mand_locks(cinode)) {
3614                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3615                          inode);
3616                 cinode->clientCanCacheRead = false;
3617         }
3618
3619         if (inode && S_ISREG(inode->i_mode)) {
3620                 if (cinode->clientCanCacheRead)
3621                         break_lease(inode, O_RDONLY);
3622                 else
3623                         break_lease(inode, O_WRONLY);
3624                 rc = filemap_fdatawrite(inode->i_mapping);
3625                 if (cinode->clientCanCacheRead == 0) {
3626                         rc = filemap_fdatawait(inode->i_mapping);
3627                         mapping_set_error(inode->i_mapping, rc);
3628                         cifs_invalidate_mapping(inode);
3629                 }
3630                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3631         }
3632
3633         rc = cifs_push_locks(cfile);
3634         if (rc)
3635                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3636
3637         /*
3638          * releasing stale oplock after recent reconnect of smb session using
3639          * a now incorrect file handle is not a data integrity issue but do
3640          * not bother sending an oplock release if session to server still is
3641          * disconnected since oplock already released by the server
3642          */
3643         if (!cfile->oplock_break_cancelled) {
3644                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3645                                                              cinode);
3646                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3647         }
3648 }
3649
3650 const struct address_space_operations cifs_addr_ops = {
3651         .readpage = cifs_readpage,
3652         .readpages = cifs_readpages,
3653         .writepage = cifs_writepage,
3654         .writepages = cifs_writepages,
3655         .write_begin = cifs_write_begin,
3656         .write_end = cifs_write_end,
3657         .set_page_dirty = __set_page_dirty_nobuffers,
3658         .releasepage = cifs_release_page,
3659         .invalidatepage = cifs_invalidate_page,
3660         .launder_page = cifs_launder_page,
3661 };
3662
3663 /*
3664  * cifs_readpages requires the server to support a buffer large enough to
3665  * contain the header plus one complete page of data.  Otherwise, we need
3666  * to leave cifs_readpages out of the address space operations.
3667  */
3668 const struct address_space_operations cifs_addr_ops_smallbuf = {
3669         .readpage = cifs_readpage,
3670         .writepage = cifs_writepage,
3671         .writepages = cifs_writepages,
3672         .write_begin = cifs_write_begin,
3673         .write_end = cifs_write_end,
3674         .set_page_dirty = __set_page_dirty_nobuffers,
3675         .releasepage = cifs_release_page,
3676         .invalidatepage = cifs_invalidate_page,
3677         .launder_page = cifs_launder_page,
3678 };