CIFS: Move oplock break to ops struct
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46 static inline int cifs_convert_flags(unsigned int flags)
47 {
48         if ((flags & O_ACCMODE) == O_RDONLY)
49                 return GENERIC_READ;
50         else if ((flags & O_ACCMODE) == O_WRONLY)
51                 return GENERIC_WRITE;
52         else if ((flags & O_ACCMODE) == O_RDWR) {
53                 /* GENERIC_ALL is too much permission to request
54                    can cause unnecessary access denied on create */
55                 /* return GENERIC_ALL; */
56                 return (GENERIC_READ | GENERIC_WRITE);
57         }
58
59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61                 FILE_READ_DATA);
62 }
63
64 static u32 cifs_posix_convert_flags(unsigned int flags)
65 {
66         u32 posix_flags = 0;
67
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 posix_flags = SMB_O_RDONLY;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 posix_flags = SMB_O_WRONLY;
72         else if ((flags & O_ACCMODE) == O_RDWR)
73                 posix_flags = SMB_O_RDWR;
74
75         if (flags & O_CREAT)
76                 posix_flags |= SMB_O_CREAT;
77         if (flags & O_EXCL)
78                 posix_flags |= SMB_O_EXCL;
79         if (flags & O_TRUNC)
80                 posix_flags |= SMB_O_TRUNC;
81         /* be safe and imply O_SYNC for O_DSYNC */
82         if (flags & O_DSYNC)
83                 posix_flags |= SMB_O_SYNC;
84         if (flags & O_DIRECTORY)
85                 posix_flags |= SMB_O_DIRECTORY;
86         if (flags & O_NOFOLLOW)
87                 posix_flags |= SMB_O_NOFOLLOW;
88         if (flags & O_DIRECT)
89                 posix_flags |= SMB_O_DIRECT;
90
91         return posix_flags;
92 }
93
94 static inline int cifs_get_disposition(unsigned int flags)
95 {
96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97                 return FILE_CREATE;
98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99                 return FILE_OVERWRITE_IF;
100         else if ((flags & O_CREAT) == O_CREAT)
101                 return FILE_OPEN_IF;
102         else if ((flags & O_TRUNC) == O_TRUNC)
103                 return FILE_OVERWRITE;
104         else
105                 return FILE_OPEN;
106 }
107
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109                         struct super_block *sb, int mode, unsigned int f_flags,
110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
111 {
112         int rc;
113         FILE_UNIX_BASIC_INFO *presp_data;
114         __u32 posix_flags = 0;
115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116         struct cifs_fattr fattr;
117         struct tcon_link *tlink;
118         struct cifs_tcon *tcon;
119
120         cFYI(1, "posix open %s", full_path);
121
122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123         if (presp_data == NULL)
124                 return -ENOMEM;
125
126         tlink = cifs_sb_tlink(cifs_sb);
127         if (IS_ERR(tlink)) {
128                 rc = PTR_ERR(tlink);
129                 goto posix_open_ret;
130         }
131
132         tcon = tlink_tcon(tlink);
133         mode &= ~current_umask();
134
135         posix_flags = cifs_posix_convert_flags(f_flags);
136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137                              poplock, full_path, cifs_sb->local_nls,
138                              cifs_sb->mnt_cifs_flags &
139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
140         cifs_put_tlink(tlink);
141
142         if (rc)
143                 goto posix_open_ret;
144
145         if (presp_data->Type == cpu_to_le32(-1))
146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
147
148         if (!pinode)
149                 goto posix_open_ret; /* caller does not need info */
150
151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152
153         /* get new inode and set it up */
154         if (*pinode == NULL) {
155                 cifs_fill_uniqueid(sb, &fattr);
156                 *pinode = cifs_iget(sb, &fattr);
157                 if (!*pinode) {
158                         rc = -ENOMEM;
159                         goto posix_open_ret;
160                 }
161         } else {
162                 cifs_fattr_to_inode(*pinode, &fattr);
163         }
164
165 posix_open_ret:
166         kfree(presp_data);
167         return rc;
168 }
169
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173              struct cifs_fid *fid, unsigned int xid)
174 {
175         int rc;
176         int desired_access;
177         int disposition;
178         int create_options = CREATE_NOT_DIR;
179         FILE_ALL_INFO *buf;
180
181         if (!tcon->ses->server->ops->open)
182                 return -ENOSYS;
183
184         desired_access = cifs_convert_flags(f_flags);
185
186 /*********************************************************************
187  *  open flag mapping table:
188  *
189  *      POSIX Flag            CIFS Disposition
190  *      ----------            ----------------
191  *      O_CREAT               FILE_OPEN_IF
192  *      O_CREAT | O_EXCL      FILE_CREATE
193  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
194  *      O_TRUNC               FILE_OVERWRITE
195  *      none of the above     FILE_OPEN
196  *
197  *      Note that there is not a direct match between disposition
198  *      FILE_SUPERSEDE (ie create whether or not file exists although
199  *      O_CREAT | O_TRUNC is similar but truncates the existing
200  *      file rather than creating a new file as FILE_SUPERSEDE does
201  *      (which uses the attributes / metadata passed in on open call)
202  *?
203  *?  O_SYNC is a reasonable match to CIFS writethrough flag
204  *?  and the read write flags match reasonably.  O_LARGEFILE
205  *?  is irrelevant because largefile support is always used
206  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
207  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
208  *********************************************************************/
209
210         disposition = cifs_get_disposition(f_flags);
211
212         /* BB pass O_SYNC flag through on file attributes .. BB */
213
214         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
215         if (!buf)
216                 return -ENOMEM;
217
218         if (backup_cred(cifs_sb))
219                 create_options |= CREATE_OPEN_BACKUP_INTENT;
220
221         rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition,
222                                           desired_access, create_options, fid,
223                                           oplock, buf, cifs_sb);
224
225         if (rc)
226                 goto out;
227
228         if (tcon->unix_ext)
229                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
230                                               xid);
231         else
232                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
233                                          xid, &fid->netfid);
234
235 out:
236         kfree(buf);
237         return rc;
238 }
239
240 struct cifsFileInfo *
241 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
242                   struct tcon_link *tlink, __u32 oplock)
243 {
244         struct dentry *dentry = file->f_path.dentry;
245         struct inode *inode = dentry->d_inode;
246         struct cifsInodeInfo *cinode = CIFS_I(inode);
247         struct cifsFileInfo *cfile;
248
249         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
250         if (cfile == NULL)
251                 return cfile;
252
253         cfile->count = 1;
254         cfile->pid = current->tgid;
255         cfile->uid = current_fsuid();
256         cfile->dentry = dget(dentry);
257         cfile->f_flags = file->f_flags;
258         cfile->invalidHandle = false;
259         cfile->tlink = cifs_get_tlink(tlink);
260         mutex_init(&cfile->fh_mutex);
261         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
262         INIT_LIST_HEAD(&cfile->llist);
263         tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
264
265         spin_lock(&cifs_file_list_lock);
266         list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
267         /* if readable file instance put first in list*/
268         if (file->f_mode & FMODE_READ)
269                 list_add(&cfile->flist, &cinode->openFileList);
270         else
271                 list_add_tail(&cfile->flist, &cinode->openFileList);
272         spin_unlock(&cifs_file_list_lock);
273
274         file->private_data = cfile;
275         return cfile;
276 }
277
278 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
279
280 struct cifsFileInfo *
281 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
282 {
283         spin_lock(&cifs_file_list_lock);
284         cifsFileInfo_get_locked(cifs_file);
285         spin_unlock(&cifs_file_list_lock);
286         return cifs_file;
287 }
288
289 /*
290  * Release a reference on the file private data. This may involve closing
291  * the filehandle out on the server. Must be called without holding
292  * cifs_file_list_lock.
293  */
294 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
295 {
296         struct inode *inode = cifs_file->dentry->d_inode;
297         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
298         struct cifsInodeInfo *cifsi = CIFS_I(inode);
299         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
300         struct cifsLockInfo *li, *tmp;
301
302         spin_lock(&cifs_file_list_lock);
303         if (--cifs_file->count > 0) {
304                 spin_unlock(&cifs_file_list_lock);
305                 return;
306         }
307
308         /* remove it from the lists */
309         list_del(&cifs_file->flist);
310         list_del(&cifs_file->tlist);
311
312         if (list_empty(&cifsi->openFileList)) {
313                 cFYI(1, "closing last open instance for inode %p",
314                         cifs_file->dentry->d_inode);
315                 /*
316                  * In strict cache mode we need invalidate mapping on the last
317                  * close  because it may cause a error when we open this file
318                  * again and get at least level II oplock.
319                  */
320                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
321                         CIFS_I(inode)->invalid_mapping = true;
322                 cifs_set_oplock_level(cifsi, 0);
323         }
324         spin_unlock(&cifs_file_list_lock);
325
326         cancel_work_sync(&cifs_file->oplock_break);
327
328         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
329                 struct TCP_Server_Info *server = tcon->ses->server;
330                 unsigned int xid;
331                 int rc = -ENOSYS;
332
333                 xid = get_xid();
334                 if (server->ops->close)
335                         rc = server->ops->close(xid, tcon, &cifs_file->fid);
336                 free_xid(xid);
337         }
338
339         /* Delete any outstanding lock records. We'll lose them when the file
340          * is closed anyway.
341          */
342         mutex_lock(&cifsi->lock_mutex);
343         list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
344                 list_del(&li->llist);
345                 cifs_del_lock_waiters(li);
346                 kfree(li);
347         }
348         mutex_unlock(&cifsi->lock_mutex);
349
350         cifs_put_tlink(cifs_file->tlink);
351         dput(cifs_file->dentry);
352         kfree(cifs_file);
353 }
354
355 int cifs_open(struct inode *inode, struct file *file)
356 {
357         int rc = -EACCES;
358         unsigned int xid;
359         __u32 oplock;
360         struct cifs_sb_info *cifs_sb;
361         struct cifs_tcon *tcon;
362         struct tcon_link *tlink;
363         struct cifsFileInfo *cfile = NULL;
364         char *full_path = NULL;
365         bool posix_open_ok = false;
366         struct cifs_fid fid;
367
368         xid = get_xid();
369
370         cifs_sb = CIFS_SB(inode->i_sb);
371         tlink = cifs_sb_tlink(cifs_sb);
372         if (IS_ERR(tlink)) {
373                 free_xid(xid);
374                 return PTR_ERR(tlink);
375         }
376         tcon = tlink_tcon(tlink);
377
378         full_path = build_path_from_dentry(file->f_path.dentry);
379         if (full_path == NULL) {
380                 rc = -ENOMEM;
381                 goto out;
382         }
383
384         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
385                  inode, file->f_flags, full_path);
386
387         if (tcon->ses->server->oplocks)
388                 oplock = REQ_OPLOCK;
389         else
390                 oplock = 0;
391
392         if (!tcon->broken_posix_open && tcon->unix_ext &&
393             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
394                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
395                 /* can not refresh inode info since size could be stale */
396                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
397                                 cifs_sb->mnt_file_mode /* ignored */,
398                                 file->f_flags, &oplock, &fid.netfid, xid);
399                 if (rc == 0) {
400                         cFYI(1, "posix open succeeded");
401                         posix_open_ok = true;
402                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
403                         if (tcon->ses->serverNOS)
404                                 cERROR(1, "server %s of type %s returned"
405                                            " unexpected error on SMB posix open"
406                                            ", disabling posix open support."
407                                            " Check if server update available.",
408                                            tcon->ses->serverName,
409                                            tcon->ses->serverNOS);
410                         tcon->broken_posix_open = true;
411                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
412                          (rc != -EOPNOTSUPP)) /* path not found or net err */
413                         goto out;
414                 /*
415                  * Else fallthrough to retry open the old way on network i/o
416                  * or DFS errors.
417                  */
418         }
419
420         if (!posix_open_ok) {
421                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
422                                   file->f_flags, &oplock, &fid, xid);
423                 if (rc)
424                         goto out;
425         }
426
427         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
428         if (cfile == NULL) {
429                 if (tcon->ses->server->ops->close)
430                         tcon->ses->server->ops->close(xid, tcon, &fid);
431                 rc = -ENOMEM;
432                 goto out;
433         }
434
435         cifs_fscache_set_inode_cookie(inode, file);
436
437         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
438                 /*
439                  * Time to set mode which we can not set earlier due to
440                  * problems creating new read-only files.
441                  */
442                 struct cifs_unix_set_info_args args = {
443                         .mode   = inode->i_mode,
444                         .uid    = NO_CHANGE_64,
445                         .gid    = NO_CHANGE_64,
446                         .ctime  = NO_CHANGE_64,
447                         .atime  = NO_CHANGE_64,
448                         .mtime  = NO_CHANGE_64,
449                         .device = 0,
450                 };
451                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
452                                        cfile->pid);
453         }
454
455 out:
456         kfree(full_path);
457         free_xid(xid);
458         cifs_put_tlink(tlink);
459         return rc;
460 }
461
462 /*
463  * Try to reacquire byte range locks that were released when session
464  * to server was lost
465  */
466 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
467 {
468         int rc = 0;
469
470         /* BB list all locks open on this file and relock */
471
472         return rc;
473 }
474
475 static int
476 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
477 {
478         int rc = -EACCES;
479         unsigned int xid;
480         __u32 oplock;
481         struct cifs_sb_info *cifs_sb;
482         struct cifs_tcon *tcon;
483         struct TCP_Server_Info *server;
484         struct cifsInodeInfo *cinode;
485         struct inode *inode;
486         char *full_path = NULL;
487         int desired_access;
488         int disposition = FILE_OPEN;
489         int create_options = CREATE_NOT_DIR;
490         struct cifs_fid fid;
491
492         xid = get_xid();
493         mutex_lock(&cfile->fh_mutex);
494         if (!cfile->invalidHandle) {
495                 mutex_unlock(&cfile->fh_mutex);
496                 rc = 0;
497                 free_xid(xid);
498                 return rc;
499         }
500
501         inode = cfile->dentry->d_inode;
502         cifs_sb = CIFS_SB(inode->i_sb);
503         tcon = tlink_tcon(cfile->tlink);
504         server = tcon->ses->server;
505
506         /*
507          * Can not grab rename sem here because various ops, including those
508          * that already have the rename sem can end up causing writepage to get
509          * called and if the server was down that means we end up here, and we
510          * can never tell if the caller already has the rename_sem.
511          */
512         full_path = build_path_from_dentry(cfile->dentry);
513         if (full_path == NULL) {
514                 rc = -ENOMEM;
515                 mutex_unlock(&cfile->fh_mutex);
516                 free_xid(xid);
517                 return rc;
518         }
519
520         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
521              full_path);
522
523         if (tcon->ses->server->oplocks)
524                 oplock = REQ_OPLOCK;
525         else
526                 oplock = 0;
527
528         if (tcon->unix_ext && cap_unix(tcon->ses) &&
529             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
530                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
531                 /*
532                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
533                  * original open. Must mask them off for a reopen.
534                  */
535                 unsigned int oflags = cfile->f_flags &
536                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
537
538                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
539                                      cifs_sb->mnt_file_mode /* ignored */,
540                                      oflags, &oplock, &fid.netfid, xid);
541                 if (rc == 0) {
542                         cFYI(1, "posix reopen succeeded");
543                         goto reopen_success;
544                 }
545                 /*
546                  * fallthrough to retry open the old way on errors, especially
547                  * in the reconnect path it is important to retry hard
548                  */
549         }
550
551         desired_access = cifs_convert_flags(cfile->f_flags);
552
553         if (backup_cred(cifs_sb))
554                 create_options |= CREATE_OPEN_BACKUP_INTENT;
555
556         /*
557          * Can not refresh inode by passing in file_info buf to be returned by
558          * CIFSSMBOpen and then calling get_inode_info with returned buf since
559          * file might have write behind data that needs to be flushed and server
560          * version of file size can be stale. If we knew for sure that inode was
561          * not dirty locally we could do this.
562          */
563         rc = server->ops->open(xid, tcon, full_path, disposition,
564                                desired_access, create_options, &fid, &oplock,
565                                NULL, cifs_sb);
566         if (rc) {
567                 mutex_unlock(&cfile->fh_mutex);
568                 cFYI(1, "cifs_reopen returned 0x%x", rc);
569                 cFYI(1, "oplock: %d", oplock);
570                 goto reopen_error_exit;
571         }
572
573 reopen_success:
574         cfile->invalidHandle = false;
575         mutex_unlock(&cfile->fh_mutex);
576         cinode = CIFS_I(inode);
577
578         if (can_flush) {
579                 rc = filemap_write_and_wait(inode->i_mapping);
580                 mapping_set_error(inode->i_mapping, rc);
581
582                 if (tcon->unix_ext)
583                         rc = cifs_get_inode_info_unix(&inode, full_path,
584                                                       inode->i_sb, xid);
585                 else
586                         rc = cifs_get_inode_info(&inode, full_path, NULL,
587                                                  inode->i_sb, xid, NULL);
588         }
589         /*
590          * Else we are writing out data to server already and could deadlock if
591          * we tried to flush data, and since we do not know if we have data that
592          * would invalidate the current end of file on the server we can not go
593          * to the server to get the new inode info.
594          */
595
596         server->ops->set_fid(cfile, &fid, oplock);
597         cifs_relock_file(cfile);
598
599 reopen_error_exit:
600         kfree(full_path);
601         free_xid(xid);
602         return rc;
603 }
604
605 int cifs_close(struct inode *inode, struct file *file)
606 {
607         if (file->private_data != NULL) {
608                 cifsFileInfo_put(file->private_data);
609                 file->private_data = NULL;
610         }
611
612         /* return code from the ->release op is always ignored */
613         return 0;
614 }
615
616 int cifs_closedir(struct inode *inode, struct file *file)
617 {
618         int rc = 0;
619         unsigned int xid;
620         struct cifsFileInfo *cfile = file->private_data;
621         struct cifs_tcon *tcon;
622         struct TCP_Server_Info *server;
623         char *buf;
624
625         cFYI(1, "Closedir inode = 0x%p", inode);
626
627         if (cfile == NULL)
628                 return rc;
629
630         xid = get_xid();
631         tcon = tlink_tcon(cfile->tlink);
632         server = tcon->ses->server;
633
634         cFYI(1, "Freeing private data in close dir");
635         spin_lock(&cifs_file_list_lock);
636         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
637                 cfile->invalidHandle = true;
638                 spin_unlock(&cifs_file_list_lock);
639                 if (server->ops->close_dir)
640                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
641                 else
642                         rc = -ENOSYS;
643                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
644                 /* not much we can do if it fails anyway, ignore rc */
645                 rc = 0;
646         } else
647                 spin_unlock(&cifs_file_list_lock);
648
649         buf = cfile->srch_inf.ntwrk_buf_start;
650         if (buf) {
651                 cFYI(1, "closedir free smb buf in srch struct");
652                 cfile->srch_inf.ntwrk_buf_start = NULL;
653                 if (cfile->srch_inf.smallBuf)
654                         cifs_small_buf_release(buf);
655                 else
656                         cifs_buf_release(buf);
657         }
658
659         cifs_put_tlink(cfile->tlink);
660         kfree(file->private_data);
661         file->private_data = NULL;
662         /* BB can we lock the filestruct while this is going on? */
663         free_xid(xid);
664         return rc;
665 }
666
667 static struct cifsLockInfo *
668 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
669 {
670         struct cifsLockInfo *lock =
671                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
672         if (!lock)
673                 return lock;
674         lock->offset = offset;
675         lock->length = length;
676         lock->type = type;
677         lock->pid = current->tgid;
678         INIT_LIST_HEAD(&lock->blist);
679         init_waitqueue_head(&lock->block_q);
680         return lock;
681 }
682
683 static void
684 cifs_del_lock_waiters(struct cifsLockInfo *lock)
685 {
686         struct cifsLockInfo *li, *tmp;
687         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
688                 list_del_init(&li->blist);
689                 wake_up(&li->block_q);
690         }
691 }
692
693 static bool
694 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
695                             __u64 length, __u8 type, struct cifsFileInfo *cur,
696                             struct cifsLockInfo **conf_lock)
697 {
698         struct cifsLockInfo *li;
699         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
700
701         list_for_each_entry(li, &cfile->llist, llist) {
702                 if (offset + length <= li->offset ||
703                     offset >= li->offset + li->length)
704                         continue;
705                 else if ((type & server->vals->shared_lock_type) &&
706                          ((server->ops->compare_fids(cur, cfile) &&
707                            current->tgid == li->pid) || type == li->type))
708                         continue;
709                 else {
710                         *conf_lock = li;
711                         return true;
712                 }
713         }
714         return false;
715 }
716
717 static bool
718 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
719                         __u8 type, struct cifsLockInfo **conf_lock)
720 {
721         bool rc = false;
722         struct cifsFileInfo *fid, *tmp;
723         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
724
725         spin_lock(&cifs_file_list_lock);
726         list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
727                 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
728                                                  cfile, conf_lock);
729                 if (rc)
730                         break;
731         }
732         spin_unlock(&cifs_file_list_lock);
733
734         return rc;
735 }
736
737 /*
738  * Check if there is another lock that prevents us to set the lock (mandatory
739  * style). If such a lock exists, update the flock structure with its
740  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
741  * or leave it the same if we can't. Returns 0 if we don't need to request to
742  * the server or 1 otherwise.
743  */
744 static int
745 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
746                __u8 type, struct file_lock *flock)
747 {
748         int rc = 0;
749         struct cifsLockInfo *conf_lock;
750         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
751         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
752         bool exist;
753
754         mutex_lock(&cinode->lock_mutex);
755
756         exist = cifs_find_lock_conflict(cfile, offset, length, type,
757                                         &conf_lock);
758         if (exist) {
759                 flock->fl_start = conf_lock->offset;
760                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
761                 flock->fl_pid = conf_lock->pid;
762                 if (conf_lock->type & server->vals->shared_lock_type)
763                         flock->fl_type = F_RDLCK;
764                 else
765                         flock->fl_type = F_WRLCK;
766         } else if (!cinode->can_cache_brlcks)
767                 rc = 1;
768         else
769                 flock->fl_type = F_UNLCK;
770
771         mutex_unlock(&cinode->lock_mutex);
772         return rc;
773 }
774
775 static void
776 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
777 {
778         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
779         mutex_lock(&cinode->lock_mutex);
780         list_add_tail(&lock->llist, &cfile->llist);
781         mutex_unlock(&cinode->lock_mutex);
782 }
783
784 /*
785  * Set the byte-range lock (mandatory style). Returns:
786  * 1) 0, if we set the lock and don't need to request to the server;
787  * 2) 1, if no locks prevent us but we need to request to the server;
788  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
789  */
790 static int
791 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
792                  bool wait)
793 {
794         struct cifsLockInfo *conf_lock;
795         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
796         bool exist;
797         int rc = 0;
798
799 try_again:
800         exist = false;
801         mutex_lock(&cinode->lock_mutex);
802
803         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
804                                         lock->type, &conf_lock);
805         if (!exist && cinode->can_cache_brlcks) {
806                 list_add_tail(&lock->llist, &cfile->llist);
807                 mutex_unlock(&cinode->lock_mutex);
808                 return rc;
809         }
810
811         if (!exist)
812                 rc = 1;
813         else if (!wait)
814                 rc = -EACCES;
815         else {
816                 list_add_tail(&lock->blist, &conf_lock->blist);
817                 mutex_unlock(&cinode->lock_mutex);
818                 rc = wait_event_interruptible(lock->block_q,
819                                         (lock->blist.prev == &lock->blist) &&
820                                         (lock->blist.next == &lock->blist));
821                 if (!rc)
822                         goto try_again;
823                 mutex_lock(&cinode->lock_mutex);
824                 list_del_init(&lock->blist);
825         }
826
827         mutex_unlock(&cinode->lock_mutex);
828         return rc;
829 }
830
831 /*
832  * Check if there is another lock that prevents us to set the lock (posix
833  * style). If such a lock exists, update the flock structure with its
834  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
835  * or leave it the same if we can't. Returns 0 if we don't need to request to
836  * the server or 1 otherwise.
837  */
838 static int
839 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
840 {
841         int rc = 0;
842         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
843         unsigned char saved_type = flock->fl_type;
844
845         if ((flock->fl_flags & FL_POSIX) == 0)
846                 return 1;
847
848         mutex_lock(&cinode->lock_mutex);
849         posix_test_lock(file, flock);
850
851         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
852                 flock->fl_type = saved_type;
853                 rc = 1;
854         }
855
856         mutex_unlock(&cinode->lock_mutex);
857         return rc;
858 }
859
860 /*
861  * Set the byte-range lock (posix style). Returns:
862  * 1) 0, if we set the lock and don't need to request to the server;
863  * 2) 1, if we need to request to the server;
864  * 3) <0, if the error occurs while setting the lock.
865  */
866 static int
867 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
868 {
869         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
870         int rc = 1;
871
872         if ((flock->fl_flags & FL_POSIX) == 0)
873                 return rc;
874
875 try_again:
876         mutex_lock(&cinode->lock_mutex);
877         if (!cinode->can_cache_brlcks) {
878                 mutex_unlock(&cinode->lock_mutex);
879                 return rc;
880         }
881
882         rc = posix_lock_file(file, flock, NULL);
883         mutex_unlock(&cinode->lock_mutex);
884         if (rc == FILE_LOCK_DEFERRED) {
885                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
886                 if (!rc)
887                         goto try_again;
888                 locks_delete_block(flock);
889         }
890         return rc;
891 }
892
893 static int
894 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
895 {
896         unsigned int xid;
897         int rc = 0, stored_rc;
898         struct cifsLockInfo *li, *tmp;
899         struct cifs_tcon *tcon;
900         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
901         unsigned int num, max_num, max_buf;
902         LOCKING_ANDX_RANGE *buf, *cur;
903         int types[] = {LOCKING_ANDX_LARGE_FILES,
904                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
905         int i;
906
907         xid = get_xid();
908         tcon = tlink_tcon(cfile->tlink);
909
910         mutex_lock(&cinode->lock_mutex);
911         if (!cinode->can_cache_brlcks) {
912                 mutex_unlock(&cinode->lock_mutex);
913                 free_xid(xid);
914                 return rc;
915         }
916
917         /*
918          * Accessing maxBuf is racy with cifs_reconnect - need to store value
919          * and check it for zero before using.
920          */
921         max_buf = tcon->ses->server->maxBuf;
922         if (!max_buf) {
923                 mutex_unlock(&cinode->lock_mutex);
924                 free_xid(xid);
925                 return -EINVAL;
926         }
927
928         max_num = (max_buf - sizeof(struct smb_hdr)) /
929                                                 sizeof(LOCKING_ANDX_RANGE);
930         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
931         if (!buf) {
932                 mutex_unlock(&cinode->lock_mutex);
933                 free_xid(xid);
934                 return -ENOMEM;
935         }
936
937         for (i = 0; i < 2; i++) {
938                 cur = buf;
939                 num = 0;
940                 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
941                         if (li->type != types[i])
942                                 continue;
943                         cur->Pid = cpu_to_le16(li->pid);
944                         cur->LengthLow = cpu_to_le32((u32)li->length);
945                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
946                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
947                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
948                         if (++num == max_num) {
949                                 stored_rc = cifs_lockv(xid, tcon,
950                                                        cfile->fid.netfid,
951                                                        (__u8)li->type, 0, num,
952                                                        buf);
953                                 if (stored_rc)
954                                         rc = stored_rc;
955                                 cur = buf;
956                                 num = 0;
957                         } else
958                                 cur++;
959                 }
960
961                 if (num) {
962                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
963                                                (__u8)types[i], 0, num, buf);
964                         if (stored_rc)
965                                 rc = stored_rc;
966                 }
967         }
968
969         cinode->can_cache_brlcks = false;
970         mutex_unlock(&cinode->lock_mutex);
971
972         kfree(buf);
973         free_xid(xid);
974         return rc;
975 }
976
977 /* copied from fs/locks.c with a name change */
978 #define cifs_for_each_lock(inode, lockp) \
979         for (lockp = &inode->i_flock; *lockp != NULL; \
980              lockp = &(*lockp)->fl_next)
981
982 struct lock_to_push {
983         struct list_head llist;
984         __u64 offset;
985         __u64 length;
986         __u32 pid;
987         __u16 netfid;
988         __u8 type;
989 };
990
991 static int
992 cifs_push_posix_locks(struct cifsFileInfo *cfile)
993 {
994         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
995         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
996         struct file_lock *flock, **before;
997         unsigned int count = 0, i = 0;
998         int rc = 0, xid, type;
999         struct list_head locks_to_send, *el;
1000         struct lock_to_push *lck, *tmp;
1001         __u64 length;
1002
1003         xid = get_xid();
1004
1005         mutex_lock(&cinode->lock_mutex);
1006         if (!cinode->can_cache_brlcks) {
1007                 mutex_unlock(&cinode->lock_mutex);
1008                 free_xid(xid);
1009                 return rc;
1010         }
1011
1012         lock_flocks();
1013         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1014                 if ((*before)->fl_flags & FL_POSIX)
1015                         count++;
1016         }
1017         unlock_flocks();
1018
1019         INIT_LIST_HEAD(&locks_to_send);
1020
1021         /*
1022          * Allocating count locks is enough because no FL_POSIX locks can be
1023          * added to the list while we are holding cinode->lock_mutex that
1024          * protects locking operations of this inode.
1025          */
1026         for (; i < count; i++) {
1027                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1028                 if (!lck) {
1029                         rc = -ENOMEM;
1030                         goto err_out;
1031                 }
1032                 list_add_tail(&lck->llist, &locks_to_send);
1033         }
1034
1035         el = locks_to_send.next;
1036         lock_flocks();
1037         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1038                 flock = *before;
1039                 if ((flock->fl_flags & FL_POSIX) == 0)
1040                         continue;
1041                 if (el == &locks_to_send) {
1042                         /*
1043                          * The list ended. We don't have enough allocated
1044                          * structures - something is really wrong.
1045                          */
1046                         cERROR(1, "Can't push all brlocks!");
1047                         break;
1048                 }
1049                 length = 1 + flock->fl_end - flock->fl_start;
1050                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1051                         type = CIFS_RDLCK;
1052                 else
1053                         type = CIFS_WRLCK;
1054                 lck = list_entry(el, struct lock_to_push, llist);
1055                 lck->pid = flock->fl_pid;
1056                 lck->netfid = cfile->fid.netfid;
1057                 lck->length = length;
1058                 lck->type = type;
1059                 lck->offset = flock->fl_start;
1060                 el = el->next;
1061         }
1062         unlock_flocks();
1063
1064         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1065                 int stored_rc;
1066
1067                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1068                                              lck->offset, lck->length, NULL,
1069                                              lck->type, 0);
1070                 if (stored_rc)
1071                         rc = stored_rc;
1072                 list_del(&lck->llist);
1073                 kfree(lck);
1074         }
1075
1076 out:
1077         cinode->can_cache_brlcks = false;
1078         mutex_unlock(&cinode->lock_mutex);
1079
1080         free_xid(xid);
1081         return rc;
1082 err_out:
1083         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1084                 list_del(&lck->llist);
1085                 kfree(lck);
1086         }
1087         goto out;
1088 }
1089
1090 static int
1091 cifs_push_locks(struct cifsFileInfo *cfile)
1092 {
1093         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1094         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1095
1096         if (cap_unix(tcon->ses) &&
1097             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1098             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1099                 return cifs_push_posix_locks(cfile);
1100
1101         return cifs_push_mandatory_locks(cfile);
1102 }
1103
1104 static void
1105 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1106                 bool *wait_flag, struct TCP_Server_Info *server)
1107 {
1108         if (flock->fl_flags & FL_POSIX)
1109                 cFYI(1, "Posix");
1110         if (flock->fl_flags & FL_FLOCK)
1111                 cFYI(1, "Flock");
1112         if (flock->fl_flags & FL_SLEEP) {
1113                 cFYI(1, "Blocking lock");
1114                 *wait_flag = true;
1115         }
1116         if (flock->fl_flags & FL_ACCESS)
1117                 cFYI(1, "Process suspended by mandatory locking - "
1118                         "not implemented yet");
1119         if (flock->fl_flags & FL_LEASE)
1120                 cFYI(1, "Lease on file - not implemented yet");
1121         if (flock->fl_flags &
1122             (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1123                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1124
1125         *type = server->vals->large_lock_type;
1126         if (flock->fl_type == F_WRLCK) {
1127                 cFYI(1, "F_WRLCK ");
1128                 *type |= server->vals->exclusive_lock_type;
1129                 *lock = 1;
1130         } else if (flock->fl_type == F_UNLCK) {
1131                 cFYI(1, "F_UNLCK");
1132                 *type |= server->vals->unlock_lock_type;
1133                 *unlock = 1;
1134                 /* Check if unlock includes more than one lock range */
1135         } else if (flock->fl_type == F_RDLCK) {
1136                 cFYI(1, "F_RDLCK");
1137                 *type |= server->vals->shared_lock_type;
1138                 *lock = 1;
1139         } else if (flock->fl_type == F_EXLCK) {
1140                 cFYI(1, "F_EXLCK");
1141                 *type |= server->vals->exclusive_lock_type;
1142                 *lock = 1;
1143         } else if (flock->fl_type == F_SHLCK) {
1144                 cFYI(1, "F_SHLCK");
1145                 *type |= server->vals->shared_lock_type;
1146                 *lock = 1;
1147         } else
1148                 cFYI(1, "Unknown type of lock");
1149 }
1150
1151 static int
1152 cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
1153                     __u64 length, __u32 type, int lock, int unlock, bool wait)
1154 {
1155         return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid,
1156                            current->tgid, length, offset, unlock, lock,
1157                            (__u8)type, wait, 0);
1158 }
1159
1160 static int
1161 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1162            bool wait_flag, bool posix_lck, unsigned int xid)
1163 {
1164         int rc = 0;
1165         __u64 length = 1 + flock->fl_end - flock->fl_start;
1166         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1167         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1168         struct TCP_Server_Info *server = tcon->ses->server;
1169         __u16 netfid = cfile->fid.netfid;
1170
1171         if (posix_lck) {
1172                 int posix_lock_type;
1173
1174                 rc = cifs_posix_lock_test(file, flock);
1175                 if (!rc)
1176                         return rc;
1177
1178                 if (type & server->vals->shared_lock_type)
1179                         posix_lock_type = CIFS_RDLCK;
1180                 else
1181                         posix_lock_type = CIFS_WRLCK;
1182                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1183                                       flock->fl_start, length, flock,
1184                                       posix_lock_type, wait_flag);
1185                 return rc;
1186         }
1187
1188         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1189         if (!rc)
1190                 return rc;
1191
1192         /* BB we could chain these into one lock request BB */
1193         rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1194                                  1, 0, false);
1195         if (rc == 0) {
1196                 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1197                                          type, 0, 1, false);
1198                 flock->fl_type = F_UNLCK;
1199                 if (rc != 0)
1200                         cERROR(1, "Error unlocking previously locked "
1201                                   "range %d during test of lock", rc);
1202                 return 0;
1203         }
1204
1205         if (type & server->vals->shared_lock_type) {
1206                 flock->fl_type = F_WRLCK;
1207                 return 0;
1208         }
1209
1210         rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1211                                  type | server->vals->shared_lock_type, 1, 0,
1212                                  false);
1213         if (rc == 0) {
1214                 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1215                                          type | server->vals->shared_lock_type,
1216                                          0, 1, false);
1217                 flock->fl_type = F_RDLCK;
1218                 if (rc != 0)
1219                         cERROR(1, "Error unlocking previously locked "
1220                                   "range %d during test of lock", rc);
1221         } else
1222                 flock->fl_type = F_WRLCK;
1223
1224         return 0;
1225 }
1226
1227 static void
1228 cifs_move_llist(struct list_head *source, struct list_head *dest)
1229 {
1230         struct list_head *li, *tmp;
1231         list_for_each_safe(li, tmp, source)
1232                 list_move(li, dest);
1233 }
1234
1235 static void
1236 cifs_free_llist(struct list_head *llist)
1237 {
1238         struct cifsLockInfo *li, *tmp;
1239         list_for_each_entry_safe(li, tmp, llist, llist) {
1240                 cifs_del_lock_waiters(li);
1241                 list_del(&li->llist);
1242                 kfree(li);
1243         }
1244 }
1245
1246 static int
1247 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1248                   unsigned int xid)
1249 {
1250         int rc = 0, stored_rc;
1251         int types[] = {LOCKING_ANDX_LARGE_FILES,
1252                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1253         unsigned int i;
1254         unsigned int max_num, num, max_buf;
1255         LOCKING_ANDX_RANGE *buf, *cur;
1256         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1257         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1258         struct cifsLockInfo *li, *tmp;
1259         __u64 length = 1 + flock->fl_end - flock->fl_start;
1260         struct list_head tmp_llist;
1261
1262         INIT_LIST_HEAD(&tmp_llist);
1263
1264         /*
1265          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1266          * and check it for zero before using.
1267          */
1268         max_buf = tcon->ses->server->maxBuf;
1269         if (!max_buf)
1270                 return -EINVAL;
1271
1272         max_num = (max_buf - sizeof(struct smb_hdr)) /
1273                                                 sizeof(LOCKING_ANDX_RANGE);
1274         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1275         if (!buf)
1276                 return -ENOMEM;
1277
1278         mutex_lock(&cinode->lock_mutex);
1279         for (i = 0; i < 2; i++) {
1280                 cur = buf;
1281                 num = 0;
1282                 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1283                         if (flock->fl_start > li->offset ||
1284                             (flock->fl_start + length) <
1285                             (li->offset + li->length))
1286                                 continue;
1287                         if (current->tgid != li->pid)
1288                                 continue;
1289                         if (types[i] != li->type)
1290                                 continue;
1291                         if (cinode->can_cache_brlcks) {
1292                                 /*
1293                                  * We can cache brlock requests - simply remove
1294                                  * a lock from the file's list.
1295                                  */
1296                                 list_del(&li->llist);
1297                                 cifs_del_lock_waiters(li);
1298                                 kfree(li);
1299                                 continue;
1300                         }
1301                         cur->Pid = cpu_to_le16(li->pid);
1302                         cur->LengthLow = cpu_to_le32((u32)li->length);
1303                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1304                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1305                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1306                         /*
1307                          * We need to save a lock here to let us add it again to
1308                          * the file's list if the unlock range request fails on
1309                          * the server.
1310                          */
1311                         list_move(&li->llist, &tmp_llist);
1312                         if (++num == max_num) {
1313                                 stored_rc = cifs_lockv(xid, tcon,
1314                                                        cfile->fid.netfid,
1315                                                        li->type, num, 0, buf);
1316                                 if (stored_rc) {
1317                                         /*
1318                                          * We failed on the unlock range
1319                                          * request - add all locks from the tmp
1320                                          * list to the head of the file's list.
1321                                          */
1322                                         cifs_move_llist(&tmp_llist,
1323                                                         &cfile->llist);
1324                                         rc = stored_rc;
1325                                 } else
1326                                         /*
1327                                          * The unlock range request succeed -
1328                                          * free the tmp list.
1329                                          */
1330                                         cifs_free_llist(&tmp_llist);
1331                                 cur = buf;
1332                                 num = 0;
1333                         } else
1334                                 cur++;
1335                 }
1336                 if (num) {
1337                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1338                                                types[i], num, 0, buf);
1339                         if (stored_rc) {
1340                                 cifs_move_llist(&tmp_llist, &cfile->llist);
1341                                 rc = stored_rc;
1342                         } else
1343                                 cifs_free_llist(&tmp_llist);
1344                 }
1345         }
1346
1347         mutex_unlock(&cinode->lock_mutex);
1348         kfree(buf);
1349         return rc;
1350 }
1351
1352 static int
1353 cifs_setlk(struct file *file,  struct file_lock *flock, __u32 type,
1354            bool wait_flag, bool posix_lck, int lock, int unlock,
1355            unsigned int xid)
1356 {
1357         int rc = 0;
1358         __u64 length = 1 + flock->fl_end - flock->fl_start;
1359         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1360         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1361         struct TCP_Server_Info *server = tcon->ses->server;
1362         __u16 netfid = cfile->fid.netfid;
1363
1364         if (posix_lck) {
1365                 int posix_lock_type;
1366
1367                 rc = cifs_posix_lock_set(file, flock);
1368                 if (!rc || rc < 0)
1369                         return rc;
1370
1371                 if (type & server->vals->shared_lock_type)
1372                         posix_lock_type = CIFS_RDLCK;
1373                 else
1374                         posix_lock_type = CIFS_WRLCK;
1375
1376                 if (unlock == 1)
1377                         posix_lock_type = CIFS_UNLCK;
1378
1379                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1380                                       flock->fl_start, length, NULL,
1381                                       posix_lock_type, wait_flag);
1382                 goto out;
1383         }
1384
1385         if (lock) {
1386                 struct cifsLockInfo *lock;
1387
1388                 lock = cifs_lock_init(flock->fl_start, length, type);
1389                 if (!lock)
1390                         return -ENOMEM;
1391
1392                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1393                 if (rc < 0)
1394                         kfree(lock);
1395                 if (rc <= 0)
1396                         goto out;
1397
1398                 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1399                                          type, 1, 0, wait_flag);
1400                 if (rc) {
1401                         kfree(lock);
1402                         goto out;
1403                 }
1404
1405                 cifs_lock_add(cfile, lock);
1406         } else if (unlock)
1407                 rc = cifs_unlock_range(cfile, flock, xid);
1408
1409 out:
1410         if (flock->fl_flags & FL_POSIX)
1411                 posix_lock_file_wait(file, flock);
1412         return rc;
1413 }
1414
1415 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1416 {
1417         int rc, xid;
1418         int lock = 0, unlock = 0;
1419         bool wait_flag = false;
1420         bool posix_lck = false;
1421         struct cifs_sb_info *cifs_sb;
1422         struct cifs_tcon *tcon;
1423         struct cifsInodeInfo *cinode;
1424         struct cifsFileInfo *cfile;
1425         __u16 netfid;
1426         __u32 type;
1427
1428         rc = -EACCES;
1429         xid = get_xid();
1430
1431         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1432                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1433                 flock->fl_start, flock->fl_end);
1434
1435         cfile = (struct cifsFileInfo *)file->private_data;
1436         tcon = tlink_tcon(cfile->tlink);
1437
1438         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1439                         tcon->ses->server);
1440
1441         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1442         netfid = cfile->fid.netfid;
1443         cinode = CIFS_I(file->f_path.dentry->d_inode);
1444
1445         if (cap_unix(tcon->ses) &&
1446             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1447             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1448                 posix_lck = true;
1449         /*
1450          * BB add code here to normalize offset and length to account for
1451          * negative length which we can not accept over the wire.
1452          */
1453         if (IS_GETLK(cmd)) {
1454                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1455                 free_xid(xid);
1456                 return rc;
1457         }
1458
1459         if (!lock && !unlock) {
1460                 /*
1461                  * if no lock or unlock then nothing to do since we do not
1462                  * know what it is
1463                  */
1464                 free_xid(xid);
1465                 return -EOPNOTSUPP;
1466         }
1467
1468         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1469                         xid);
1470         free_xid(xid);
1471         return rc;
1472 }
1473
1474 /*
1475  * update the file size (if needed) after a write. Should be called with
1476  * the inode->i_lock held
1477  */
1478 void
1479 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1480                       unsigned int bytes_written)
1481 {
1482         loff_t end_of_write = offset + bytes_written;
1483
1484         if (end_of_write > cifsi->server_eof)
1485                 cifsi->server_eof = end_of_write;
1486 }
1487
1488 static ssize_t
1489 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1490            size_t write_size, loff_t *offset)
1491 {
1492         int rc = 0;
1493         unsigned int bytes_written = 0;
1494         unsigned int total_written;
1495         struct cifs_sb_info *cifs_sb;
1496         struct cifs_tcon *tcon;
1497         struct TCP_Server_Info *server;
1498         unsigned int xid;
1499         struct dentry *dentry = open_file->dentry;
1500         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1501         struct cifs_io_parms io_parms;
1502
1503         cifs_sb = CIFS_SB(dentry->d_sb);
1504
1505         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1506              *offset, dentry->d_name.name);
1507
1508         tcon = tlink_tcon(open_file->tlink);
1509         server = tcon->ses->server;
1510
1511         if (!server->ops->sync_write)
1512                 return -ENOSYS;
1513
1514         xid = get_xid();
1515
1516         for (total_written = 0; write_size > total_written;
1517              total_written += bytes_written) {
1518                 rc = -EAGAIN;
1519                 while (rc == -EAGAIN) {
1520                         struct kvec iov[2];
1521                         unsigned int len;
1522
1523                         if (open_file->invalidHandle) {
1524                                 /* we could deadlock if we called
1525                                    filemap_fdatawait from here so tell
1526                                    reopen_file not to flush data to
1527                                    server now */
1528                                 rc = cifs_reopen_file(open_file, false);
1529                                 if (rc != 0)
1530                                         break;
1531                         }
1532
1533                         len = min((size_t)cifs_sb->wsize,
1534                                   write_size - total_written);
1535                         /* iov[0] is reserved for smb header */
1536                         iov[1].iov_base = (char *)write_data + total_written;
1537                         iov[1].iov_len = len;
1538                         io_parms.pid = pid;
1539                         io_parms.tcon = tcon;
1540                         io_parms.offset = *offset;
1541                         io_parms.length = len;
1542                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1543                                                      &bytes_written, iov, 1);
1544                 }
1545                 if (rc || (bytes_written == 0)) {
1546                         if (total_written)
1547                                 break;
1548                         else {
1549                                 free_xid(xid);
1550                                 return rc;
1551                         }
1552                 } else {
1553                         spin_lock(&dentry->d_inode->i_lock);
1554                         cifs_update_eof(cifsi, *offset, bytes_written);
1555                         spin_unlock(&dentry->d_inode->i_lock);
1556                         *offset += bytes_written;
1557                 }
1558         }
1559
1560         cifs_stats_bytes_written(tcon, total_written);
1561
1562         if (total_written > 0) {
1563                 spin_lock(&dentry->d_inode->i_lock);
1564                 if (*offset > dentry->d_inode->i_size)
1565                         i_size_write(dentry->d_inode, *offset);
1566                 spin_unlock(&dentry->d_inode->i_lock);
1567         }
1568         mark_inode_dirty_sync(dentry->d_inode);
1569         free_xid(xid);
1570         return total_written;
1571 }
1572
1573 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1574                                         bool fsuid_only)
1575 {
1576         struct cifsFileInfo *open_file = NULL;
1577         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1578
1579         /* only filter by fsuid on multiuser mounts */
1580         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1581                 fsuid_only = false;
1582
1583         spin_lock(&cifs_file_list_lock);
1584         /* we could simply get the first_list_entry since write-only entries
1585            are always at the end of the list but since the first entry might
1586            have a close pending, we go through the whole list */
1587         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1588                 if (fsuid_only && open_file->uid != current_fsuid())
1589                         continue;
1590                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1591                         if (!open_file->invalidHandle) {
1592                                 /* found a good file */
1593                                 /* lock it so it will not be closed on us */
1594                                 cifsFileInfo_get_locked(open_file);
1595                                 spin_unlock(&cifs_file_list_lock);
1596                                 return open_file;
1597                         } /* else might as well continue, and look for
1598                              another, or simply have the caller reopen it
1599                              again rather than trying to fix this handle */
1600                 } else /* write only file */
1601                         break; /* write only files are last so must be done */
1602         }
1603         spin_unlock(&cifs_file_list_lock);
1604         return NULL;
1605 }
1606
1607 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1608                                         bool fsuid_only)
1609 {
1610         struct cifsFileInfo *open_file, *inv_file = NULL;
1611         struct cifs_sb_info *cifs_sb;
1612         bool any_available = false;
1613         int rc;
1614         unsigned int refind = 0;
1615
1616         /* Having a null inode here (because mapping->host was set to zero by
1617         the VFS or MM) should not happen but we had reports of on oops (due to
1618         it being zero) during stress testcases so we need to check for it */
1619
1620         if (cifs_inode == NULL) {
1621                 cERROR(1, "Null inode passed to cifs_writeable_file");
1622                 dump_stack();
1623                 return NULL;
1624         }
1625
1626         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1627
1628         /* only filter by fsuid on multiuser mounts */
1629         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1630                 fsuid_only = false;
1631
1632         spin_lock(&cifs_file_list_lock);
1633 refind_writable:
1634         if (refind > MAX_REOPEN_ATT) {
1635                 spin_unlock(&cifs_file_list_lock);
1636                 return NULL;
1637         }
1638         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1639                 if (!any_available && open_file->pid != current->tgid)
1640                         continue;
1641                 if (fsuid_only && open_file->uid != current_fsuid())
1642                         continue;
1643                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1644                         if (!open_file->invalidHandle) {
1645                                 /* found a good writable file */
1646                                 cifsFileInfo_get_locked(open_file);
1647                                 spin_unlock(&cifs_file_list_lock);
1648                                 return open_file;
1649                         } else {
1650                                 if (!inv_file)
1651                                         inv_file = open_file;
1652                         }
1653                 }
1654         }
1655         /* couldn't find useable FH with same pid, try any available */
1656         if (!any_available) {
1657                 any_available = true;
1658                 goto refind_writable;
1659         }
1660
1661         if (inv_file) {
1662                 any_available = false;
1663                 cifsFileInfo_get_locked(inv_file);
1664         }
1665
1666         spin_unlock(&cifs_file_list_lock);
1667
1668         if (inv_file) {
1669                 rc = cifs_reopen_file(inv_file, false);
1670                 if (!rc)
1671                         return inv_file;
1672                 else {
1673                         spin_lock(&cifs_file_list_lock);
1674                         list_move_tail(&inv_file->flist,
1675                                         &cifs_inode->openFileList);
1676                         spin_unlock(&cifs_file_list_lock);
1677                         cifsFileInfo_put(inv_file);
1678                         spin_lock(&cifs_file_list_lock);
1679                         ++refind;
1680                         goto refind_writable;
1681                 }
1682         }
1683
1684         return NULL;
1685 }
1686
1687 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1688 {
1689         struct address_space *mapping = page->mapping;
1690         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1691         char *write_data;
1692         int rc = -EFAULT;
1693         int bytes_written = 0;
1694         struct inode *inode;
1695         struct cifsFileInfo *open_file;
1696
1697         if (!mapping || !mapping->host)
1698                 return -EFAULT;
1699
1700         inode = page->mapping->host;
1701
1702         offset += (loff_t)from;
1703         write_data = kmap(page);
1704         write_data += from;
1705
1706         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1707                 kunmap(page);
1708                 return -EIO;
1709         }
1710
1711         /* racing with truncate? */
1712         if (offset > mapping->host->i_size) {
1713                 kunmap(page);
1714                 return 0; /* don't care */
1715         }
1716
1717         /* check to make sure that we are not extending the file */
1718         if (mapping->host->i_size - offset < (loff_t)to)
1719                 to = (unsigned)(mapping->host->i_size - offset);
1720
1721         open_file = find_writable_file(CIFS_I(mapping->host), false);
1722         if (open_file) {
1723                 bytes_written = cifs_write(open_file, open_file->pid,
1724                                            write_data, to - from, &offset);
1725                 cifsFileInfo_put(open_file);
1726                 /* Does mm or vfs already set times? */
1727                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1728                 if ((bytes_written > 0) && (offset))
1729                         rc = 0;
1730                 else if (bytes_written < 0)
1731                         rc = bytes_written;
1732         } else {
1733                 cFYI(1, "No writeable filehandles for inode");
1734                 rc = -EIO;
1735         }
1736
1737         kunmap(page);
1738         return rc;
1739 }
1740
1741 /*
1742  * Marshal up the iov array, reserving the first one for the header. Also,
1743  * set wdata->bytes.
1744  */
1745 static void
1746 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1747 {
1748         int i;
1749         struct inode *inode = wdata->cfile->dentry->d_inode;
1750         loff_t size = i_size_read(inode);
1751
1752         /* marshal up the pages into iov array */
1753         wdata->bytes = 0;
1754         for (i = 0; i < wdata->nr_pages; i++) {
1755                 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1756                                         (loff_t)PAGE_CACHE_SIZE);
1757                 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1758                 wdata->bytes += iov[i + 1].iov_len;
1759         }
1760 }
1761
1762 static int cifs_writepages(struct address_space *mapping,
1763                            struct writeback_control *wbc)
1764 {
1765         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1766         bool done = false, scanned = false, range_whole = false;
1767         pgoff_t end, index;
1768         struct cifs_writedata *wdata;
1769         struct TCP_Server_Info *server;
1770         struct page *page;
1771         int rc = 0;
1772
1773         /*
1774          * If wsize is smaller than the page cache size, default to writing
1775          * one page at a time via cifs_writepage
1776          */
1777         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1778                 return generic_writepages(mapping, wbc);
1779
1780         if (wbc->range_cyclic) {
1781                 index = mapping->writeback_index; /* Start from prev offset */
1782                 end = -1;
1783         } else {
1784                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1785                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1786                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1787                         range_whole = true;
1788                 scanned = true;
1789         }
1790 retry:
1791         while (!done && index <= end) {
1792                 unsigned int i, nr_pages, found_pages;
1793                 pgoff_t next = 0, tofind;
1794                 struct page **pages;
1795
1796                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1797                                 end - index) + 1;
1798
1799                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1800                                              cifs_writev_complete);
1801                 if (!wdata) {
1802                         rc = -ENOMEM;
1803                         break;
1804                 }
1805
1806                 /*
1807                  * find_get_pages_tag seems to return a max of 256 on each
1808                  * iteration, so we must call it several times in order to
1809                  * fill the array or the wsize is effectively limited to
1810                  * 256 * PAGE_CACHE_SIZE.
1811                  */
1812                 found_pages = 0;
1813                 pages = wdata->pages;
1814                 do {
1815                         nr_pages = find_get_pages_tag(mapping, &index,
1816                                                         PAGECACHE_TAG_DIRTY,
1817                                                         tofind, pages);
1818                         found_pages += nr_pages;
1819                         tofind -= nr_pages;
1820                         pages += nr_pages;
1821                 } while (nr_pages && tofind && index <= end);
1822
1823                 if (found_pages == 0) {
1824                         kref_put(&wdata->refcount, cifs_writedata_release);
1825                         break;
1826                 }
1827
1828                 nr_pages = 0;
1829                 for (i = 0; i < found_pages; i++) {
1830                         page = wdata->pages[i];
1831                         /*
1832                          * At this point we hold neither mapping->tree_lock nor
1833                          * lock on the page itself: the page may be truncated or
1834                          * invalidated (changing page->mapping to NULL), or even
1835                          * swizzled back from swapper_space to tmpfs file
1836                          * mapping
1837                          */
1838
1839                         if (nr_pages == 0)
1840                                 lock_page(page);
1841                         else if (!trylock_page(page))
1842                                 break;
1843
1844                         if (unlikely(page->mapping != mapping)) {
1845                                 unlock_page(page);
1846                                 break;
1847                         }
1848
1849                         if (!wbc->range_cyclic && page->index > end) {
1850                                 done = true;
1851                                 unlock_page(page);
1852                                 break;
1853                         }
1854
1855                         if (next && (page->index != next)) {
1856                                 /* Not next consecutive page */
1857                                 unlock_page(page);
1858                                 break;
1859                         }
1860
1861                         if (wbc->sync_mode != WB_SYNC_NONE)
1862                                 wait_on_page_writeback(page);
1863
1864                         if (PageWriteback(page) ||
1865                                         !clear_page_dirty_for_io(page)) {
1866                                 unlock_page(page);
1867                                 break;
1868                         }
1869
1870                         /*
1871                          * This actually clears the dirty bit in the radix tree.
1872                          * See cifs_writepage() for more commentary.
1873                          */
1874                         set_page_writeback(page);
1875
1876                         if (page_offset(page) >= mapping->host->i_size) {
1877                                 done = true;
1878                                 unlock_page(page);
1879                                 end_page_writeback(page);
1880                                 break;
1881                         }
1882
1883                         wdata->pages[i] = page;
1884                         next = page->index + 1;
1885                         ++nr_pages;
1886                 }
1887
1888                 /* reset index to refind any pages skipped */
1889                 if (nr_pages == 0)
1890                         index = wdata->pages[0]->index + 1;
1891
1892                 /* put any pages we aren't going to use */
1893                 for (i = nr_pages; i < found_pages; i++) {
1894                         page_cache_release(wdata->pages[i]);
1895                         wdata->pages[i] = NULL;
1896                 }
1897
1898                 /* nothing to write? */
1899                 if (nr_pages == 0) {
1900                         kref_put(&wdata->refcount, cifs_writedata_release);
1901                         continue;
1902                 }
1903
1904                 wdata->sync_mode = wbc->sync_mode;
1905                 wdata->nr_pages = nr_pages;
1906                 wdata->offset = page_offset(wdata->pages[0]);
1907                 wdata->marshal_iov = cifs_writepages_marshal_iov;
1908
1909                 do {
1910                         if (wdata->cfile != NULL)
1911                                 cifsFileInfo_put(wdata->cfile);
1912                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1913                                                           false);
1914                         if (!wdata->cfile) {
1915                                 cERROR(1, "No writable handles for inode");
1916                                 rc = -EBADF;
1917                                 break;
1918                         }
1919                         wdata->pid = wdata->cfile->pid;
1920                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1921                         rc = server->ops->async_writev(wdata);
1922                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1923
1924                 for (i = 0; i < nr_pages; ++i)
1925                         unlock_page(wdata->pages[i]);
1926
1927                 /* send failure -- clean up the mess */
1928                 if (rc != 0) {
1929                         for (i = 0; i < nr_pages; ++i) {
1930                                 if (rc == -EAGAIN)
1931                                         redirty_page_for_writepage(wbc,
1932                                                            wdata->pages[i]);
1933                                 else
1934                                         SetPageError(wdata->pages[i]);
1935                                 end_page_writeback(wdata->pages[i]);
1936                                 page_cache_release(wdata->pages[i]);
1937                         }
1938                         if (rc != -EAGAIN)
1939                                 mapping_set_error(mapping, rc);
1940                 }
1941                 kref_put(&wdata->refcount, cifs_writedata_release);
1942
1943                 wbc->nr_to_write -= nr_pages;
1944                 if (wbc->nr_to_write <= 0)
1945                         done = true;
1946
1947                 index = next;
1948         }
1949
1950         if (!scanned && !done) {
1951                 /*
1952                  * We hit the last page and there is more work to be done: wrap
1953                  * back to the start of the file
1954                  */
1955                 scanned = true;
1956                 index = 0;
1957                 goto retry;
1958         }
1959
1960         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1961                 mapping->writeback_index = index;
1962
1963         return rc;
1964 }
1965
1966 static int
1967 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1968 {
1969         int rc;
1970         unsigned int xid;
1971
1972         xid = get_xid();
1973 /* BB add check for wbc flags */
1974         page_cache_get(page);
1975         if (!PageUptodate(page))
1976                 cFYI(1, "ppw - page not up to date");
1977
1978         /*
1979          * Set the "writeback" flag, and clear "dirty" in the radix tree.
1980          *
1981          * A writepage() implementation always needs to do either this,
1982          * or re-dirty the page with "redirty_page_for_writepage()" in
1983          * the case of a failure.
1984          *
1985          * Just unlocking the page will cause the radix tree tag-bits
1986          * to fail to update with the state of the page correctly.
1987          */
1988         set_page_writeback(page);
1989 retry_write:
1990         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1991         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1992                 goto retry_write;
1993         else if (rc == -EAGAIN)
1994                 redirty_page_for_writepage(wbc, page);
1995         else if (rc != 0)
1996                 SetPageError(page);
1997         else
1998                 SetPageUptodate(page);
1999         end_page_writeback(page);
2000         page_cache_release(page);
2001         free_xid(xid);
2002         return rc;
2003 }
2004
2005 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2006 {
2007         int rc = cifs_writepage_locked(page, wbc);
2008         unlock_page(page);
2009         return rc;
2010 }
2011
2012 static int cifs_write_end(struct file *file, struct address_space *mapping,
2013                         loff_t pos, unsigned len, unsigned copied,
2014                         struct page *page, void *fsdata)
2015 {
2016         int rc;
2017         struct inode *inode = mapping->host;
2018         struct cifsFileInfo *cfile = file->private_data;
2019         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2020         __u32 pid;
2021
2022         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2023                 pid = cfile->pid;
2024         else
2025                 pid = current->tgid;
2026
2027         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2028                  page, pos, copied);
2029
2030         if (PageChecked(page)) {
2031                 if (copied == len)
2032                         SetPageUptodate(page);
2033                 ClearPageChecked(page);
2034         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2035                 SetPageUptodate(page);
2036
2037         if (!PageUptodate(page)) {
2038                 char *page_data;
2039                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2040                 unsigned int xid;
2041
2042                 xid = get_xid();
2043                 /* this is probably better than directly calling
2044                    partialpage_write since in this function the file handle is
2045                    known which we might as well leverage */
2046                 /* BB check if anything else missing out of ppw
2047                    such as updating last write time */
2048                 page_data = kmap(page);
2049                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2050                 /* if (rc < 0) should we set writebehind rc? */
2051                 kunmap(page);
2052
2053                 free_xid(xid);
2054         } else {
2055                 rc = copied;
2056                 pos += copied;
2057                 set_page_dirty(page);
2058         }
2059
2060         if (rc > 0) {
2061                 spin_lock(&inode->i_lock);
2062                 if (pos > inode->i_size)
2063                         i_size_write(inode, pos);
2064                 spin_unlock(&inode->i_lock);
2065         }
2066
2067         unlock_page(page);
2068         page_cache_release(page);
2069
2070         return rc;
2071 }
2072
2073 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2074                       int datasync)
2075 {
2076         unsigned int xid;
2077         int rc = 0;
2078         struct cifs_tcon *tcon;
2079         struct TCP_Server_Info *server;
2080         struct cifsFileInfo *smbfile = file->private_data;
2081         struct inode *inode = file->f_path.dentry->d_inode;
2082         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2083
2084         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2085         if (rc)
2086                 return rc;
2087         mutex_lock(&inode->i_mutex);
2088
2089         xid = get_xid();
2090
2091         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2092                 file->f_path.dentry->d_name.name, datasync);
2093
2094         if (!CIFS_I(inode)->clientCanCacheRead) {
2095                 rc = cifs_invalidate_mapping(inode);
2096                 if (rc) {
2097                         cFYI(1, "rc: %d during invalidate phase", rc);
2098                         rc = 0; /* don't care about it in fsync */
2099                 }
2100         }
2101
2102         tcon = tlink_tcon(smbfile->tlink);
2103         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2104                 server = tcon->ses->server;
2105                 if (server->ops->flush)
2106                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2107                 else
2108                         rc = -ENOSYS;
2109         }
2110
2111         free_xid(xid);
2112         mutex_unlock(&inode->i_mutex);
2113         return rc;
2114 }
2115
2116 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2117 {
2118         unsigned int xid;
2119         int rc = 0;
2120         struct cifs_tcon *tcon;
2121         struct TCP_Server_Info *server;
2122         struct cifsFileInfo *smbfile = file->private_data;
2123         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2124         struct inode *inode = file->f_mapping->host;
2125
2126         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2127         if (rc)
2128                 return rc;
2129         mutex_lock(&inode->i_mutex);
2130
2131         xid = get_xid();
2132
2133         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2134                 file->f_path.dentry->d_name.name, datasync);
2135
2136         tcon = tlink_tcon(smbfile->tlink);
2137         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2138                 server = tcon->ses->server;
2139                 if (server->ops->flush)
2140                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2141                 else
2142                         rc = -ENOSYS;
2143         }
2144
2145         free_xid(xid);
2146         mutex_unlock(&inode->i_mutex);
2147         return rc;
2148 }
2149
2150 /*
2151  * As file closes, flush all cached write data for this inode checking
2152  * for write behind errors.
2153  */
2154 int cifs_flush(struct file *file, fl_owner_t id)
2155 {
2156         struct inode *inode = file->f_path.dentry->d_inode;
2157         int rc = 0;
2158
2159         if (file->f_mode & FMODE_WRITE)
2160                 rc = filemap_write_and_wait(inode->i_mapping);
2161
2162         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2163
2164         return rc;
2165 }
2166
2167 static int
2168 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2169 {
2170         int rc = 0;
2171         unsigned long i;
2172
2173         for (i = 0; i < num_pages; i++) {
2174                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2175                 if (!pages[i]) {
2176                         /*
2177                          * save number of pages we have already allocated and
2178                          * return with ENOMEM error
2179                          */
2180                         num_pages = i;
2181                         rc = -ENOMEM;
2182                         break;
2183                 }
2184         }
2185
2186         if (rc) {
2187                 for (i = 0; i < num_pages; i++)
2188                         put_page(pages[i]);
2189         }
2190         return rc;
2191 }
2192
2193 static inline
2194 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2195 {
2196         size_t num_pages;
2197         size_t clen;
2198
2199         clen = min_t(const size_t, len, wsize);
2200         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2201
2202         if (cur_len)
2203                 *cur_len = clen;
2204
2205         return num_pages;
2206 }
2207
2208 static void
2209 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2210 {
2211         int i;
2212         size_t bytes = wdata->bytes;
2213
2214         /* marshal up the pages into iov array */
2215         for (i = 0; i < wdata->nr_pages; i++) {
2216                 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2217                 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2218                 bytes -= iov[i + 1].iov_len;
2219         }
2220 }
2221
2222 static void
2223 cifs_uncached_writev_complete(struct work_struct *work)
2224 {
2225         int i;
2226         struct cifs_writedata *wdata = container_of(work,
2227                                         struct cifs_writedata, work);
2228         struct inode *inode = wdata->cfile->dentry->d_inode;
2229         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2230
2231         spin_lock(&inode->i_lock);
2232         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2233         if (cifsi->server_eof > inode->i_size)
2234                 i_size_write(inode, cifsi->server_eof);
2235         spin_unlock(&inode->i_lock);
2236
2237         complete(&wdata->done);
2238
2239         if (wdata->result != -EAGAIN) {
2240                 for (i = 0; i < wdata->nr_pages; i++)
2241                         put_page(wdata->pages[i]);
2242         }
2243
2244         kref_put(&wdata->refcount, cifs_writedata_release);
2245 }
2246
2247 /* attempt to send write to server, retry on any -EAGAIN errors */
2248 static int
2249 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2250 {
2251         int rc;
2252         struct TCP_Server_Info *server;
2253
2254         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2255
2256         do {
2257                 if (wdata->cfile->invalidHandle) {
2258                         rc = cifs_reopen_file(wdata->cfile, false);
2259                         if (rc != 0)
2260                                 continue;
2261                 }
2262                 rc = server->ops->async_writev(wdata);
2263         } while (rc == -EAGAIN);
2264
2265         return rc;
2266 }
2267
2268 static ssize_t
2269 cifs_iovec_write(struct file *file, const struct iovec *iov,
2270                  unsigned long nr_segs, loff_t *poffset)
2271 {
2272         unsigned long nr_pages, i;
2273         size_t copied, len, cur_len;
2274         ssize_t total_written = 0;
2275         loff_t offset;
2276         struct iov_iter it;
2277         struct cifsFileInfo *open_file;
2278         struct cifs_tcon *tcon;
2279         struct cifs_sb_info *cifs_sb;
2280         struct cifs_writedata *wdata, *tmp;
2281         struct list_head wdata_list;
2282         int rc;
2283         pid_t pid;
2284
2285         len = iov_length(iov, nr_segs);
2286         if (!len)
2287                 return 0;
2288
2289         rc = generic_write_checks(file, poffset, &len, 0);
2290         if (rc)
2291                 return rc;
2292
2293         INIT_LIST_HEAD(&wdata_list);
2294         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2295         open_file = file->private_data;
2296         tcon = tlink_tcon(open_file->tlink);
2297
2298         if (!tcon->ses->server->ops->async_writev)
2299                 return -ENOSYS;
2300
2301         offset = *poffset;
2302
2303         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2304                 pid = open_file->pid;
2305         else
2306                 pid = current->tgid;
2307
2308         iov_iter_init(&it, iov, nr_segs, len, 0);
2309         do {
2310                 size_t save_len;
2311
2312                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2313                 wdata = cifs_writedata_alloc(nr_pages,
2314                                              cifs_uncached_writev_complete);
2315                 if (!wdata) {
2316                         rc = -ENOMEM;
2317                         break;
2318                 }
2319
2320                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2321                 if (rc) {
2322                         kfree(wdata);
2323                         break;
2324                 }
2325
2326                 save_len = cur_len;
2327                 for (i = 0; i < nr_pages; i++) {
2328                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2329                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2330                                                          0, copied);
2331                         cur_len -= copied;
2332                         iov_iter_advance(&it, copied);
2333                 }
2334                 cur_len = save_len - cur_len;
2335
2336                 wdata->sync_mode = WB_SYNC_ALL;
2337                 wdata->nr_pages = nr_pages;
2338                 wdata->offset = (__u64)offset;
2339                 wdata->cfile = cifsFileInfo_get(open_file);
2340                 wdata->pid = pid;
2341                 wdata->bytes = cur_len;
2342                 wdata->marshal_iov = cifs_uncached_marshal_iov;
2343                 rc = cifs_uncached_retry_writev(wdata);
2344                 if (rc) {
2345                         kref_put(&wdata->refcount, cifs_writedata_release);
2346                         break;
2347                 }
2348
2349                 list_add_tail(&wdata->list, &wdata_list);
2350                 offset += cur_len;
2351                 len -= cur_len;
2352         } while (len > 0);
2353
2354         /*
2355          * If at least one write was successfully sent, then discard any rc
2356          * value from the later writes. If the other write succeeds, then
2357          * we'll end up returning whatever was written. If it fails, then
2358          * we'll get a new rc value from that.
2359          */
2360         if (!list_empty(&wdata_list))
2361                 rc = 0;
2362
2363         /*
2364          * Wait for and collect replies for any successful sends in order of
2365          * increasing offset. Once an error is hit or we get a fatal signal
2366          * while waiting, then return without waiting for any more replies.
2367          */
2368 restart_loop:
2369         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2370                 if (!rc) {
2371                         /* FIXME: freezable too? */
2372                         rc = wait_for_completion_killable(&wdata->done);
2373                         if (rc)
2374                                 rc = -EINTR;
2375                         else if (wdata->result)
2376                                 rc = wdata->result;
2377                         else
2378                                 total_written += wdata->bytes;
2379
2380                         /* resend call if it's a retryable error */
2381                         if (rc == -EAGAIN) {
2382                                 rc = cifs_uncached_retry_writev(wdata);
2383                                 goto restart_loop;
2384                         }
2385                 }
2386                 list_del_init(&wdata->list);
2387                 kref_put(&wdata->refcount, cifs_writedata_release);
2388         }
2389
2390         if (total_written > 0)
2391                 *poffset += total_written;
2392
2393         cifs_stats_bytes_written(tcon, total_written);
2394         return total_written ? total_written : (ssize_t)rc;
2395 }
2396
2397 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2398                                 unsigned long nr_segs, loff_t pos)
2399 {
2400         ssize_t written;
2401         struct inode *inode;
2402
2403         inode = iocb->ki_filp->f_path.dentry->d_inode;
2404
2405         /*
2406          * BB - optimize the way when signing is disabled. We can drop this
2407          * extra memory-to-memory copying and use iovec buffers for constructing
2408          * write request.
2409          */
2410
2411         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2412         if (written > 0) {
2413                 CIFS_I(inode)->invalid_mapping = true;
2414                 iocb->ki_pos = pos;
2415         }
2416
2417         return written;
2418 }
2419
2420 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2421                            unsigned long nr_segs, loff_t pos)
2422 {
2423         struct inode *inode;
2424
2425         inode = iocb->ki_filp->f_path.dentry->d_inode;
2426
2427         if (CIFS_I(inode)->clientCanCacheAll)
2428                 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2429
2430         /*
2431          * In strict cache mode we need to write the data to the server exactly
2432          * from the pos to pos+len-1 rather than flush all affected pages
2433          * because it may cause a error with mandatory locks on these pages but
2434          * not on the region from pos to ppos+len-1.
2435          */
2436
2437         return cifs_user_writev(iocb, iov, nr_segs, pos);
2438 }
2439
2440 static struct cifs_readdata *
2441 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2442 {
2443         struct cifs_readdata *rdata;
2444
2445         rdata = kzalloc(sizeof(*rdata) +
2446                         sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2447         if (rdata != NULL) {
2448                 kref_init(&rdata->refcount);
2449                 INIT_LIST_HEAD(&rdata->list);
2450                 init_completion(&rdata->done);
2451                 INIT_WORK(&rdata->work, complete);
2452                 INIT_LIST_HEAD(&rdata->pages);
2453         }
2454         return rdata;
2455 }
2456
2457 void
2458 cifs_readdata_release(struct kref *refcount)
2459 {
2460         struct cifs_readdata *rdata = container_of(refcount,
2461                                         struct cifs_readdata, refcount);
2462
2463         if (rdata->cfile)
2464                 cifsFileInfo_put(rdata->cfile);
2465
2466         kfree(rdata);
2467 }
2468
2469 static int
2470 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2471 {
2472         int rc = 0;
2473         struct page *page, *tpage;
2474         unsigned int i;
2475
2476         for (i = 0; i < npages; i++) {
2477                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2478                 if (!page) {
2479                         rc = -ENOMEM;
2480                         break;
2481                 }
2482                 list_add(&page->lru, list);
2483         }
2484
2485         if (rc) {
2486                 list_for_each_entry_safe(page, tpage, list, lru) {
2487                         list_del(&page->lru);
2488                         put_page(page);
2489                 }
2490         }
2491         return rc;
2492 }
2493
2494 static void
2495 cifs_uncached_readdata_release(struct kref *refcount)
2496 {
2497         struct page *page, *tpage;
2498         struct cifs_readdata *rdata = container_of(refcount,
2499                                         struct cifs_readdata, refcount);
2500
2501         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2502                 list_del(&page->lru);
2503                 put_page(page);
2504         }
2505         cifs_readdata_release(refcount);
2506 }
2507
2508 static int
2509 cifs_retry_async_readv(struct cifs_readdata *rdata)
2510 {
2511         int rc;
2512         struct TCP_Server_Info *server;
2513
2514         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2515
2516         do {
2517                 if (rdata->cfile->invalidHandle) {
2518                         rc = cifs_reopen_file(rdata->cfile, true);
2519                         if (rc != 0)
2520                                 continue;
2521                 }
2522                 rc = server->ops->async_readv(rdata);
2523         } while (rc == -EAGAIN);
2524
2525         return rc;
2526 }
2527
2528 /**
2529  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2530  * @rdata:      the readdata response with list of pages holding data
2531  * @iov:        vector in which we should copy the data
2532  * @nr_segs:    number of segments in vector
2533  * @offset:     offset into file of the first iovec
2534  * @copied:     used to return the amount of data copied to the iov
2535  *
2536  * This function copies data from a list of pages in a readdata response into
2537  * an array of iovecs. It will first calculate where the data should go
2538  * based on the info in the readdata and then copy the data into that spot.
2539  */
2540 static ssize_t
2541 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2542                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2543 {
2544         int rc = 0;
2545         struct iov_iter ii;
2546         size_t pos = rdata->offset - offset;
2547         struct page *page, *tpage;
2548         ssize_t remaining = rdata->bytes;
2549         unsigned char *pdata;
2550
2551         /* set up iov_iter and advance to the correct offset */
2552         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2553         iov_iter_advance(&ii, pos);
2554
2555         *copied = 0;
2556         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2557                 ssize_t copy;
2558
2559                 /* copy a whole page or whatever's left */
2560                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2561
2562                 /* ...but limit it to whatever space is left in the iov */
2563                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2564
2565                 /* go while there's data to be copied and no errors */
2566                 if (copy && !rc) {
2567                         pdata = kmap(page);
2568                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2569                                                 (int)copy);
2570                         kunmap(page);
2571                         if (!rc) {
2572                                 *copied += copy;
2573                                 remaining -= copy;
2574                                 iov_iter_advance(&ii, copy);
2575                         }
2576                 }
2577
2578                 list_del(&page->lru);
2579                 put_page(page);
2580         }
2581
2582         return rc;
2583 }
2584
2585 static void
2586 cifs_uncached_readv_complete(struct work_struct *work)
2587 {
2588         struct cifs_readdata *rdata = container_of(work,
2589                                                 struct cifs_readdata, work);
2590
2591         /* if the result is non-zero then the pages weren't kmapped */
2592         if (rdata->result == 0) {
2593                 struct page *page;
2594
2595                 list_for_each_entry(page, &rdata->pages, lru)
2596                         kunmap(page);
2597         }
2598
2599         complete(&rdata->done);
2600         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2601 }
2602
2603 static int
2604 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2605                                 unsigned int remaining)
2606 {
2607         int len = 0;
2608         struct page *page, *tpage;
2609
2610         rdata->nr_iov = 1;
2611         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2612                 if (remaining >= PAGE_SIZE) {
2613                         /* enough data to fill the page */
2614                         rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2615                         rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2616                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2617                                 rdata->nr_iov, page->index,
2618                                 rdata->iov[rdata->nr_iov].iov_base,
2619                                 rdata->iov[rdata->nr_iov].iov_len);
2620                         ++rdata->nr_iov;
2621                         len += PAGE_SIZE;
2622                         remaining -= PAGE_SIZE;
2623                 } else if (remaining > 0) {
2624                         /* enough for partial page, fill and zero the rest */
2625                         rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2626                         rdata->iov[rdata->nr_iov].iov_len = remaining;
2627                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2628                                 rdata->nr_iov, page->index,
2629                                 rdata->iov[rdata->nr_iov].iov_base,
2630                                 rdata->iov[rdata->nr_iov].iov_len);
2631                         memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2632                                 '\0', PAGE_SIZE - remaining);
2633                         ++rdata->nr_iov;
2634                         len += remaining;
2635                         remaining = 0;
2636                 } else {
2637                         /* no need to hold page hostage */
2638                         list_del(&page->lru);
2639                         put_page(page);
2640                 }
2641         }
2642
2643         return len;
2644 }
2645
2646 static ssize_t
2647 cifs_iovec_read(struct file *file, const struct iovec *iov,
2648                  unsigned long nr_segs, loff_t *poffset)
2649 {
2650         ssize_t rc;
2651         size_t len, cur_len;
2652         ssize_t total_read = 0;
2653         loff_t offset = *poffset;
2654         unsigned int npages;
2655         struct cifs_sb_info *cifs_sb;
2656         struct cifs_tcon *tcon;
2657         struct cifsFileInfo *open_file;
2658         struct cifs_readdata *rdata, *tmp;
2659         struct list_head rdata_list;
2660         pid_t pid;
2661
2662         if (!nr_segs)
2663                 return 0;
2664
2665         len = iov_length(iov, nr_segs);
2666         if (!len)
2667                 return 0;
2668
2669         INIT_LIST_HEAD(&rdata_list);
2670         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2671         open_file = file->private_data;
2672         tcon = tlink_tcon(open_file->tlink);
2673
2674         if (!tcon->ses->server->ops->async_readv)
2675                 return -ENOSYS;
2676
2677         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2678                 pid = open_file->pid;
2679         else
2680                 pid = current->tgid;
2681
2682         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2683                 cFYI(1, "attempting read on write only file instance");
2684
2685         do {
2686                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2687                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2688
2689                 /* allocate a readdata struct */
2690                 rdata = cifs_readdata_alloc(npages,
2691                                             cifs_uncached_readv_complete);
2692                 if (!rdata) {
2693                         rc = -ENOMEM;
2694                         goto error;
2695                 }
2696
2697                 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2698                 if (rc)
2699                         goto error;
2700
2701                 rdata->cfile = cifsFileInfo_get(open_file);
2702                 rdata->offset = offset;
2703                 rdata->bytes = cur_len;
2704                 rdata->pid = pid;
2705                 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2706
2707                 rc = cifs_retry_async_readv(rdata);
2708 error:
2709                 if (rc) {
2710                         kref_put(&rdata->refcount,
2711                                  cifs_uncached_readdata_release);
2712                         break;
2713                 }
2714
2715                 list_add_tail(&rdata->list, &rdata_list);
2716                 offset += cur_len;
2717                 len -= cur_len;
2718         } while (len > 0);
2719
2720         /* if at least one read request send succeeded, then reset rc */
2721         if (!list_empty(&rdata_list))
2722                 rc = 0;
2723
2724         /* the loop below should proceed in the order of increasing offsets */
2725 restart_loop:
2726         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2727                 if (!rc) {
2728                         ssize_t copied;
2729
2730                         /* FIXME: freezable sleep too? */
2731                         rc = wait_for_completion_killable(&rdata->done);
2732                         if (rc)
2733                                 rc = -EINTR;
2734                         else if (rdata->result)
2735                                 rc = rdata->result;
2736                         else {
2737                                 rc = cifs_readdata_to_iov(rdata, iov,
2738                                                         nr_segs, *poffset,
2739                                                         &copied);
2740                                 total_read += copied;
2741                         }
2742
2743                         /* resend call if it's a retryable error */
2744                         if (rc == -EAGAIN) {
2745                                 rc = cifs_retry_async_readv(rdata);
2746                                 goto restart_loop;
2747                         }
2748                 }
2749                 list_del_init(&rdata->list);
2750                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2751         }
2752
2753         cifs_stats_bytes_read(tcon, total_read);
2754         *poffset += total_read;
2755
2756         /* mask nodata case */
2757         if (rc == -ENODATA)
2758                 rc = 0;
2759
2760         return total_read ? total_read : rc;
2761 }
2762
2763 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2764                                unsigned long nr_segs, loff_t pos)
2765 {
2766         ssize_t read;
2767
2768         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2769         if (read > 0)
2770                 iocb->ki_pos = pos;
2771
2772         return read;
2773 }
2774
2775 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2776                           unsigned long nr_segs, loff_t pos)
2777 {
2778         struct inode *inode;
2779
2780         inode = iocb->ki_filp->f_path.dentry->d_inode;
2781
2782         if (CIFS_I(inode)->clientCanCacheRead)
2783                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2784
2785         /*
2786          * In strict cache mode we need to read from the server all the time
2787          * if we don't have level II oplock because the server can delay mtime
2788          * change - so we can't make a decision about inode invalidating.
2789          * And we can also fail with pagereading if there are mandatory locks
2790          * on pages affected by this read but not on the region from pos to
2791          * pos+len-1.
2792          */
2793
2794         return cifs_user_readv(iocb, iov, nr_segs, pos);
2795 }
2796
2797 static ssize_t
2798 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2799 {
2800         int rc = -EACCES;
2801         unsigned int bytes_read = 0;
2802         unsigned int total_read;
2803         unsigned int current_read_size;
2804         unsigned int rsize;
2805         struct cifs_sb_info *cifs_sb;
2806         struct cifs_tcon *tcon;
2807         struct TCP_Server_Info *server;
2808         unsigned int xid;
2809         char *cur_offset;
2810         struct cifsFileInfo *open_file;
2811         struct cifs_io_parms io_parms;
2812         int buf_type = CIFS_NO_BUFFER;
2813         __u32 pid;
2814
2815         xid = get_xid();
2816         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2817
2818         /* FIXME: set up handlers for larger reads and/or convert to async */
2819         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2820
2821         if (file->private_data == NULL) {
2822                 rc = -EBADF;
2823                 free_xid(xid);
2824                 return rc;
2825         }
2826         open_file = file->private_data;
2827         tcon = tlink_tcon(open_file->tlink);
2828         server = tcon->ses->server;
2829
2830         if (!server->ops->sync_read) {
2831                 free_xid(xid);
2832                 return -ENOSYS;
2833         }
2834
2835         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2836                 pid = open_file->pid;
2837         else
2838                 pid = current->tgid;
2839
2840         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2841                 cFYI(1, "attempting read on write only file instance");
2842
2843         for (total_read = 0, cur_offset = read_data; read_size > total_read;
2844              total_read += bytes_read, cur_offset += bytes_read) {
2845                 current_read_size = min_t(uint, read_size - total_read, rsize);
2846                 /*
2847                  * For windows me and 9x we do not want to request more than it
2848                  * negotiated since it will refuse the read then.
2849                  */
2850                 if ((tcon->ses) && !(tcon->ses->capabilities &
2851                                 tcon->ses->server->vals->cap_large_files)) {
2852                         current_read_size = min_t(uint, current_read_size,
2853                                         CIFSMaxBufSize);
2854                 }
2855                 rc = -EAGAIN;
2856                 while (rc == -EAGAIN) {
2857                         if (open_file->invalidHandle) {
2858                                 rc = cifs_reopen_file(open_file, true);
2859                                 if (rc != 0)
2860                                         break;
2861                         }
2862                         io_parms.pid = pid;
2863                         io_parms.tcon = tcon;
2864                         io_parms.offset = *offset;
2865                         io_parms.length = current_read_size;
2866                         rc = server->ops->sync_read(xid, open_file, &io_parms,
2867                                                     &bytes_read, &cur_offset,
2868                                                     &buf_type);
2869                 }
2870                 if (rc || (bytes_read == 0)) {
2871                         if (total_read) {
2872                                 break;
2873                         } else {
2874                                 free_xid(xid);
2875                                 return rc;
2876                         }
2877                 } else {
2878                         cifs_stats_bytes_read(tcon, total_read);
2879                         *offset += bytes_read;
2880                 }
2881         }
2882         free_xid(xid);
2883         return total_read;
2884 }
2885
2886 /*
2887  * If the page is mmap'ed into a process' page tables, then we need to make
2888  * sure that it doesn't change while being written back.
2889  */
2890 static int
2891 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2892 {
2893         struct page *page = vmf->page;
2894
2895         lock_page(page);
2896         return VM_FAULT_LOCKED;
2897 }
2898
2899 static struct vm_operations_struct cifs_file_vm_ops = {
2900         .fault = filemap_fault,
2901         .page_mkwrite = cifs_page_mkwrite,
2902 };
2903
2904 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2905 {
2906         int rc, xid;
2907         struct inode *inode = file->f_path.dentry->d_inode;
2908
2909         xid = get_xid();
2910
2911         if (!CIFS_I(inode)->clientCanCacheRead) {
2912                 rc = cifs_invalidate_mapping(inode);
2913                 if (rc)
2914                         return rc;
2915         }
2916
2917         rc = generic_file_mmap(file, vma);
2918         if (rc == 0)
2919                 vma->vm_ops = &cifs_file_vm_ops;
2920         free_xid(xid);
2921         return rc;
2922 }
2923
2924 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2925 {
2926         int rc, xid;
2927
2928         xid = get_xid();
2929         rc = cifs_revalidate_file(file);
2930         if (rc) {
2931                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2932                 free_xid(xid);
2933                 return rc;
2934         }
2935         rc = generic_file_mmap(file, vma);
2936         if (rc == 0)
2937                 vma->vm_ops = &cifs_file_vm_ops;
2938         free_xid(xid);
2939         return rc;
2940 }
2941
2942 static void
2943 cifs_readv_complete(struct work_struct *work)
2944 {
2945         struct cifs_readdata *rdata = container_of(work,
2946                                                 struct cifs_readdata, work);
2947         struct page *page, *tpage;
2948
2949         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2950                 list_del(&page->lru);
2951                 lru_cache_add_file(page);
2952
2953                 if (rdata->result == 0) {
2954                         kunmap(page);
2955                         flush_dcache_page(page);
2956                         SetPageUptodate(page);
2957                 }
2958
2959                 unlock_page(page);
2960
2961                 if (rdata->result == 0)
2962                         cifs_readpage_to_fscache(rdata->mapping->host, page);
2963
2964                 page_cache_release(page);
2965         }
2966         kref_put(&rdata->refcount, cifs_readdata_release);
2967 }
2968
2969 static int
2970 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2971 {
2972         int len = 0;
2973         struct page *page, *tpage;
2974         u64 eof;
2975         pgoff_t eof_index;
2976
2977         /* determine the eof that the server (probably) has */
2978         eof = CIFS_I(rdata->mapping->host)->server_eof;
2979         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2980         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2981
2982         rdata->nr_iov = 1;
2983         list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2984                 if (remaining >= PAGE_CACHE_SIZE) {
2985                         /* enough data to fill the page */
2986                         rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2987                         rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2988                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2989                                 rdata->nr_iov, page->index,
2990                                 rdata->iov[rdata->nr_iov].iov_base,
2991                                 rdata->iov[rdata->nr_iov].iov_len);
2992                         ++rdata->nr_iov;
2993                         len += PAGE_CACHE_SIZE;
2994                         remaining -= PAGE_CACHE_SIZE;
2995                 } else if (remaining > 0) {
2996                         /* enough for partial page, fill and zero the rest */
2997                         rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2998                         rdata->iov[rdata->nr_iov].iov_len = remaining;
2999                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3000                                 rdata->nr_iov, page->index,
3001                                 rdata->iov[rdata->nr_iov].iov_base,
3002                                 rdata->iov[rdata->nr_iov].iov_len);
3003                         memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
3004                                 '\0', PAGE_CACHE_SIZE - remaining);
3005                         ++rdata->nr_iov;
3006                         len += remaining;
3007                         remaining = 0;
3008                 } else if (page->index > eof_index) {
3009                         /*
3010                          * The VFS will not try to do readahead past the
3011                          * i_size, but it's possible that we have outstanding
3012                          * writes with gaps in the middle and the i_size hasn't
3013                          * caught up yet. Populate those with zeroed out pages
3014                          * to prevent the VFS from repeatedly attempting to
3015                          * fill them until the writes are flushed.
3016                          */
3017                         zero_user(page, 0, PAGE_CACHE_SIZE);
3018                         list_del(&page->lru);
3019                         lru_cache_add_file(page);
3020                         flush_dcache_page(page);
3021                         SetPageUptodate(page);
3022                         unlock_page(page);
3023                         page_cache_release(page);
3024                 } else {
3025                         /* no need to hold page hostage */
3026                         list_del(&page->lru);
3027                         lru_cache_add_file(page);
3028                         unlock_page(page);
3029                         page_cache_release(page);
3030                 }
3031         }
3032
3033         return len;
3034 }
3035
3036 static int cifs_readpages(struct file *file, struct address_space *mapping,
3037         struct list_head *page_list, unsigned num_pages)
3038 {
3039         int rc;
3040         struct list_head tmplist;
3041         struct cifsFileInfo *open_file = file->private_data;
3042         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3043         unsigned int rsize = cifs_sb->rsize;
3044         pid_t pid;
3045
3046         /*
3047          * Give up immediately if rsize is too small to read an entire page.
3048          * The VFS will fall back to readpage. We should never reach this
3049          * point however since we set ra_pages to 0 when the rsize is smaller
3050          * than a cache page.
3051          */
3052         if (unlikely(rsize < PAGE_CACHE_SIZE))
3053                 return 0;
3054
3055         /*
3056          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3057          * immediately if the cookie is negative
3058          */
3059         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3060                                          &num_pages);
3061         if (rc == 0)
3062                 return rc;
3063
3064         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3065                 pid = open_file->pid;
3066         else
3067                 pid = current->tgid;
3068
3069         rc = 0;
3070         INIT_LIST_HEAD(&tmplist);
3071
3072         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3073                 mapping, num_pages);
3074
3075         /*
3076          * Start with the page at end of list and move it to private
3077          * list. Do the same with any following pages until we hit
3078          * the rsize limit, hit an index discontinuity, or run out of
3079          * pages. Issue the async read and then start the loop again
3080          * until the list is empty.
3081          *
3082          * Note that list order is important. The page_list is in
3083          * the order of declining indexes. When we put the pages in
3084          * the rdata->pages, then we want them in increasing order.
3085          */
3086         while (!list_empty(page_list)) {
3087                 unsigned int bytes = PAGE_CACHE_SIZE;
3088                 unsigned int expected_index;
3089                 unsigned int nr_pages = 1;
3090                 loff_t offset;
3091                 struct page *page, *tpage;
3092                 struct cifs_readdata *rdata;
3093
3094                 page = list_entry(page_list->prev, struct page, lru);
3095
3096                 /*
3097                  * Lock the page and put it in the cache. Since no one else
3098                  * should have access to this page, we're safe to simply set
3099                  * PG_locked without checking it first.
3100                  */
3101                 __set_page_locked(page);
3102                 rc = add_to_page_cache_locked(page, mapping,
3103                                               page->index, GFP_KERNEL);
3104
3105                 /* give up if we can't stick it in the cache */
3106                 if (rc) {
3107                         __clear_page_locked(page);
3108                         break;
3109                 }
3110
3111                 /* move first page to the tmplist */
3112                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3113                 list_move_tail(&page->lru, &tmplist);
3114
3115                 /* now try and add more pages onto the request */
3116                 expected_index = page->index + 1;
3117                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3118                         /* discontinuity ? */
3119                         if (page->index != expected_index)
3120                                 break;
3121
3122                         /* would this page push the read over the rsize? */
3123                         if (bytes + PAGE_CACHE_SIZE > rsize)
3124                                 break;
3125
3126                         __set_page_locked(page);
3127                         if (add_to_page_cache_locked(page, mapping,
3128                                                 page->index, GFP_KERNEL)) {
3129                                 __clear_page_locked(page);
3130                                 break;
3131                         }
3132                         list_move_tail(&page->lru, &tmplist);
3133                         bytes += PAGE_CACHE_SIZE;
3134                         expected_index++;
3135                         nr_pages++;
3136                 }
3137
3138                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3139                 if (!rdata) {
3140                         /* best to give up if we're out of mem */
3141                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3142                                 list_del(&page->lru);
3143                                 lru_cache_add_file(page);
3144                                 unlock_page(page);
3145                                 page_cache_release(page);
3146                         }
3147                         rc = -ENOMEM;
3148                         break;
3149                 }
3150
3151                 rdata->cfile = cifsFileInfo_get(open_file);
3152                 rdata->mapping = mapping;
3153                 rdata->offset = offset;
3154                 rdata->bytes = bytes;
3155                 rdata->pid = pid;
3156                 rdata->marshal_iov = cifs_readpages_marshal_iov;
3157                 list_splice_init(&tmplist, &rdata->pages);
3158
3159                 rc = cifs_retry_async_readv(rdata);
3160                 if (rc != 0) {
3161                         list_for_each_entry_safe(page, tpage, &rdata->pages,
3162                                                  lru) {
3163                                 list_del(&page->lru);
3164                                 lru_cache_add_file(page);
3165                                 unlock_page(page);
3166                                 page_cache_release(page);
3167                         }
3168                         kref_put(&rdata->refcount, cifs_readdata_release);
3169                         break;
3170                 }
3171
3172                 kref_put(&rdata->refcount, cifs_readdata_release);
3173         }
3174
3175         return rc;
3176 }
3177
3178 static int cifs_readpage_worker(struct file *file, struct page *page,
3179         loff_t *poffset)
3180 {
3181         char *read_data;
3182         int rc;
3183
3184         /* Is the page cached? */
3185         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3186         if (rc == 0)
3187                 goto read_complete;
3188
3189         page_cache_get(page);
3190         read_data = kmap(page);
3191         /* for reads over a certain size could initiate async read ahead */
3192
3193         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3194
3195         if (rc < 0)
3196                 goto io_error;
3197         else
3198                 cFYI(1, "Bytes read %d", rc);
3199
3200         file->f_path.dentry->d_inode->i_atime =
3201                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3202
3203         if (PAGE_CACHE_SIZE > rc)
3204                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3205
3206         flush_dcache_page(page);
3207         SetPageUptodate(page);
3208
3209         /* send this page to the cache */
3210         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3211
3212         rc = 0;
3213
3214 io_error:
3215         kunmap(page);
3216         page_cache_release(page);
3217
3218 read_complete:
3219         return rc;
3220 }
3221
3222 static int cifs_readpage(struct file *file, struct page *page)
3223 {
3224         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3225         int rc = -EACCES;
3226         unsigned int xid;
3227
3228         xid = get_xid();
3229
3230         if (file->private_data == NULL) {
3231                 rc = -EBADF;
3232                 free_xid(xid);
3233                 return rc;
3234         }
3235
3236         cFYI(1, "readpage %p at offset %d 0x%x",
3237                  page, (int)offset, (int)offset);
3238
3239         rc = cifs_readpage_worker(file, page, &offset);
3240
3241         unlock_page(page);
3242
3243         free_xid(xid);
3244         return rc;
3245 }
3246
3247 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3248 {
3249         struct cifsFileInfo *open_file;
3250
3251         spin_lock(&cifs_file_list_lock);
3252         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3253                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3254                         spin_unlock(&cifs_file_list_lock);
3255                         return 1;
3256                 }
3257         }
3258         spin_unlock(&cifs_file_list_lock);
3259         return 0;
3260 }
3261
3262 /* We do not want to update the file size from server for inodes
3263    open for write - to avoid races with writepage extending
3264    the file - in the future we could consider allowing
3265    refreshing the inode only on increases in the file size
3266    but this is tricky to do without racing with writebehind
3267    page caching in the current Linux kernel design */
3268 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3269 {
3270         if (!cifsInode)
3271                 return true;
3272
3273         if (is_inode_writable(cifsInode)) {
3274                 /* This inode is open for write at least once */
3275                 struct cifs_sb_info *cifs_sb;
3276
3277                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3278                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3279                         /* since no page cache to corrupt on directio
3280                         we can change size safely */
3281                         return true;
3282                 }
3283
3284                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3285                         return true;
3286
3287                 return false;
3288         } else
3289                 return true;
3290 }
3291
3292 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3293                         loff_t pos, unsigned len, unsigned flags,
3294                         struct page **pagep, void **fsdata)
3295 {
3296         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3297         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3298         loff_t page_start = pos & PAGE_MASK;
3299         loff_t i_size;
3300         struct page *page;
3301         int rc = 0;
3302
3303         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3304
3305         page = grab_cache_page_write_begin(mapping, index, flags);
3306         if (!page) {
3307                 rc = -ENOMEM;
3308                 goto out;
3309         }
3310
3311         if (PageUptodate(page))
3312                 goto out;
3313
3314         /*
3315          * If we write a full page it will be up to date, no need to read from
3316          * the server. If the write is short, we'll end up doing a sync write
3317          * instead.
3318          */
3319         if (len == PAGE_CACHE_SIZE)
3320                 goto out;
3321
3322         /*
3323          * optimize away the read when we have an oplock, and we're not
3324          * expecting to use any of the data we'd be reading in. That
3325          * is, when the page lies beyond the EOF, or straddles the EOF
3326          * and the write will cover all of the existing data.
3327          */
3328         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3329                 i_size = i_size_read(mapping->host);
3330                 if (page_start >= i_size ||
3331                     (offset == 0 && (pos + len) >= i_size)) {
3332                         zero_user_segments(page, 0, offset,
3333                                            offset + len,
3334                                            PAGE_CACHE_SIZE);
3335                         /*
3336                          * PageChecked means that the parts of the page
3337                          * to which we're not writing are considered up
3338                          * to date. Once the data is copied to the
3339                          * page, it can be set uptodate.
3340                          */
3341                         SetPageChecked(page);
3342                         goto out;
3343                 }
3344         }
3345
3346         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3347                 /*
3348                  * might as well read a page, it is fast enough. If we get
3349                  * an error, we don't need to return it. cifs_write_end will
3350                  * do a sync write instead since PG_uptodate isn't set.
3351                  */
3352                 cifs_readpage_worker(file, page, &page_start);
3353         } else {
3354                 /* we could try using another file handle if there is one -
3355                    but how would we lock it to prevent close of that handle
3356                    racing with this read? In any case
3357                    this will be written out by write_end so is fine */
3358         }
3359 out:
3360         *pagep = page;
3361         return rc;
3362 }
3363
3364 static int cifs_release_page(struct page *page, gfp_t gfp)
3365 {
3366         if (PagePrivate(page))
3367                 return 0;
3368
3369         return cifs_fscache_release_page(page, gfp);
3370 }
3371
3372 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3373 {
3374         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3375
3376         if (offset == 0)
3377                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3378 }
3379
3380 static int cifs_launder_page(struct page *page)
3381 {
3382         int rc = 0;
3383         loff_t range_start = page_offset(page);
3384         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3385         struct writeback_control wbc = {
3386                 .sync_mode = WB_SYNC_ALL,
3387                 .nr_to_write = 0,
3388                 .range_start = range_start,
3389                 .range_end = range_end,
3390         };
3391
3392         cFYI(1, "Launder page: %p", page);
3393
3394         if (clear_page_dirty_for_io(page))
3395                 rc = cifs_writepage_locked(page, &wbc);
3396
3397         cifs_fscache_invalidate_page(page, page->mapping->host);
3398         return rc;
3399 }
3400
3401 void cifs_oplock_break(struct work_struct *work)
3402 {
3403         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3404                                                   oplock_break);
3405         struct inode *inode = cfile->dentry->d_inode;
3406         struct cifsInodeInfo *cinode = CIFS_I(inode);
3407         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3408         int rc = 0;
3409
3410         if (inode && S_ISREG(inode->i_mode)) {
3411                 if (cinode->clientCanCacheRead)
3412                         break_lease(inode, O_RDONLY);
3413                 else
3414                         break_lease(inode, O_WRONLY);
3415                 rc = filemap_fdatawrite(inode->i_mapping);
3416                 if (cinode->clientCanCacheRead == 0) {
3417                         rc = filemap_fdatawait(inode->i_mapping);
3418                         mapping_set_error(inode->i_mapping, rc);
3419                         invalidate_remote_inode(inode);
3420                 }
3421                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3422         }
3423
3424         rc = cifs_push_locks(cfile);
3425         if (rc)
3426                 cERROR(1, "Push locks rc = %d", rc);
3427
3428         /*
3429          * releasing stale oplock after recent reconnect of smb session using
3430          * a now incorrect file handle is not a data integrity issue but do
3431          * not bother sending an oplock release if session to server still is
3432          * disconnected since oplock already released by the server
3433          */
3434         if (!cfile->oplock_break_cancelled) {
3435                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3436                                                              cinode);
3437                 cFYI(1, "Oplock release rc = %d", rc);
3438         }
3439 }
3440
3441 const struct address_space_operations cifs_addr_ops = {
3442         .readpage = cifs_readpage,
3443         .readpages = cifs_readpages,
3444         .writepage = cifs_writepage,
3445         .writepages = cifs_writepages,
3446         .write_begin = cifs_write_begin,
3447         .write_end = cifs_write_end,
3448         .set_page_dirty = __set_page_dirty_nobuffers,
3449         .releasepage = cifs_release_page,
3450         .invalidatepage = cifs_invalidate_page,
3451         .launder_page = cifs_launder_page,
3452 };
3453
3454 /*
3455  * cifs_readpages requires the server to support a buffer large enough to
3456  * contain the header plus one complete page of data.  Otherwise, we need
3457  * to leave cifs_readpages out of the address space operations.
3458  */
3459 const struct address_space_operations cifs_addr_ops_smallbuf = {
3460         .readpage = cifs_readpage,
3461         .writepage = cifs_writepage,
3462         .writepages = cifs_writepages,
3463         .write_begin = cifs_write_begin,
3464         .write_end = cifs_write_end,
3465         .set_page_dirty = __set_page_dirty_nobuffers,
3466         .releasepage = cifs_release_page,
3467         .invalidatepage = cifs_invalidate_page,
3468         .launder_page = cifs_launder_page,
3469 };