PM / runtime: Asynchronous "idle" in pm_runtime_allow()
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         oparms.tcon = tcon;
228         oparms.cifs_sb = cifs_sb;
229         oparms.desired_access = desired_access;
230         oparms.create_options = create_options;
231         oparms.disposition = disposition;
232         oparms.path = full_path;
233         oparms.fid = fid;
234         oparms.reconnect = false;
235
236         rc = server->ops->open(xid, &oparms, oplock, buf);
237
238         if (rc)
239                 goto out;
240
241         if (tcon->unix_ext)
242                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
243                                               xid);
244         else
245                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246                                          xid, fid);
247
248 out:
249         kfree(buf);
250         return rc;
251 }
252
253 static bool
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
255 {
256         struct cifs_fid_locks *cur;
257         bool has_locks = false;
258
259         down_read(&cinode->lock_sem);
260         list_for_each_entry(cur, &cinode->llist, llist) {
261                 if (!list_empty(&cur->locks)) {
262                         has_locks = true;
263                         break;
264                 }
265         }
266         up_read(&cinode->lock_sem);
267         return has_locks;
268 }
269
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272                   struct tcon_link *tlink, __u32 oplock)
273 {
274         struct dentry *dentry = file_dentry(file);
275         struct inode *inode = d_inode(dentry);
276         struct cifsInodeInfo *cinode = CIFS_I(inode);
277         struct cifsFileInfo *cfile;
278         struct cifs_fid_locks *fdlocks;
279         struct cifs_tcon *tcon = tlink_tcon(tlink);
280         struct TCP_Server_Info *server = tcon->ses->server;
281
282         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
283         if (cfile == NULL)
284                 return cfile;
285
286         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
287         if (!fdlocks) {
288                 kfree(cfile);
289                 return NULL;
290         }
291
292         INIT_LIST_HEAD(&fdlocks->locks);
293         fdlocks->cfile = cfile;
294         cfile->llist = fdlocks;
295         down_write(&cinode->lock_sem);
296         list_add(&fdlocks->llist, &cinode->llist);
297         up_write(&cinode->lock_sem);
298
299         cfile->count = 1;
300         cfile->pid = current->tgid;
301         cfile->uid = current_fsuid();
302         cfile->dentry = dget(dentry);
303         cfile->f_flags = file->f_flags;
304         cfile->invalidHandle = false;
305         cfile->tlink = cifs_get_tlink(tlink);
306         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307         mutex_init(&cfile->fh_mutex);
308
309         cifs_sb_active(inode->i_sb);
310
311         /*
312          * If the server returned a read oplock and we have mandatory brlocks,
313          * set oplock level to None.
314          */
315         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
316                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
317                 oplock = 0;
318         }
319
320         spin_lock(&cifs_file_list_lock);
321         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
322                 oplock = fid->pending_open->oplock;
323         list_del(&fid->pending_open->olist);
324
325         fid->purge_cache = false;
326         server->ops->set_fid(cfile, fid, oplock);
327
328         list_add(&cfile->tlist, &tcon->openFileList);
329         /* if readable file instance put first in list*/
330         if (file->f_mode & FMODE_READ)
331                 list_add(&cfile->flist, &cinode->openFileList);
332         else
333                 list_add_tail(&cfile->flist, &cinode->openFileList);
334         spin_unlock(&cifs_file_list_lock);
335
336         if (fid->purge_cache)
337                 cifs_zap_mapping(inode);
338
339         file->private_data = cfile;
340         return cfile;
341 }
342
343 struct cifsFileInfo *
344 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
345 {
346         spin_lock(&cifs_file_list_lock);
347         cifsFileInfo_get_locked(cifs_file);
348         spin_unlock(&cifs_file_list_lock);
349         return cifs_file;
350 }
351
352 /*
353  * Release a reference on the file private data. This may involve closing
354  * the filehandle out on the server. Must be called without holding
355  * cifs_file_list_lock.
356  */
357 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
358 {
359         struct inode *inode = d_inode(cifs_file->dentry);
360         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
361         struct TCP_Server_Info *server = tcon->ses->server;
362         struct cifsInodeInfo *cifsi = CIFS_I(inode);
363         struct super_block *sb = inode->i_sb;
364         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
365         struct cifsLockInfo *li, *tmp;
366         struct cifs_fid fid;
367         struct cifs_pending_open open;
368         bool oplock_break_cancelled;
369
370         spin_lock(&cifs_file_list_lock);
371         if (--cifs_file->count > 0) {
372                 spin_unlock(&cifs_file_list_lock);
373                 return;
374         }
375
376         if (server->ops->get_lease_key)
377                 server->ops->get_lease_key(inode, &fid);
378
379         /* store open in pending opens to make sure we don't miss lease break */
380         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
381
382         /* remove it from the lists */
383         list_del(&cifs_file->flist);
384         list_del(&cifs_file->tlist);
385
386         if (list_empty(&cifsi->openFileList)) {
387                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
388                          d_inode(cifs_file->dentry));
389                 /*
390                  * In strict cache mode we need invalidate mapping on the last
391                  * close  because it may cause a error when we open this file
392                  * again and get at least level II oplock.
393                  */
394                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
395                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
396                 cifs_set_oplock_level(cifsi, 0);
397         }
398         spin_unlock(&cifs_file_list_lock);
399
400         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
401
402         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
403                 struct TCP_Server_Info *server = tcon->ses->server;
404                 unsigned int xid;
405
406                 xid = get_xid();
407                 if (server->ops->close)
408                         server->ops->close(xid, tcon, &cifs_file->fid);
409                 _free_xid(xid);
410         }
411
412         if (oplock_break_cancelled)
413                 cifs_done_oplock_break(cifsi);
414
415         cifs_del_pending_open(&open);
416
417         /*
418          * Delete any outstanding lock records. We'll lose them when the file
419          * is closed anyway.
420          */
421         down_write(&cifsi->lock_sem);
422         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
423                 list_del(&li->llist);
424                 cifs_del_lock_waiters(li);
425                 kfree(li);
426         }
427         list_del(&cifs_file->llist->llist);
428         kfree(cifs_file->llist);
429         up_write(&cifsi->lock_sem);
430
431         cifs_put_tlink(cifs_file->tlink);
432         dput(cifs_file->dentry);
433         cifs_sb_deactive(sb);
434         kfree(cifs_file);
435 }
436
437 int cifs_open(struct inode *inode, struct file *file)
438
439 {
440         int rc = -EACCES;
441         unsigned int xid;
442         __u32 oplock;
443         struct cifs_sb_info *cifs_sb;
444         struct TCP_Server_Info *server;
445         struct cifs_tcon *tcon;
446         struct tcon_link *tlink;
447         struct cifsFileInfo *cfile = NULL;
448         char *full_path = NULL;
449         bool posix_open_ok = false;
450         struct cifs_fid fid;
451         struct cifs_pending_open open;
452
453         xid = get_xid();
454
455         cifs_sb = CIFS_SB(inode->i_sb);
456         tlink = cifs_sb_tlink(cifs_sb);
457         if (IS_ERR(tlink)) {
458                 free_xid(xid);
459                 return PTR_ERR(tlink);
460         }
461         tcon = tlink_tcon(tlink);
462         server = tcon->ses->server;
463
464         full_path = build_path_from_dentry(file_dentry(file));
465         if (full_path == NULL) {
466                 rc = -ENOMEM;
467                 goto out;
468         }
469
470         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
471                  inode, file->f_flags, full_path);
472
473         if (file->f_flags & O_DIRECT &&
474             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
475                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
476                         file->f_op = &cifs_file_direct_nobrl_ops;
477                 else
478                         file->f_op = &cifs_file_direct_ops;
479         }
480
481         if (server->oplocks)
482                 oplock = REQ_OPLOCK;
483         else
484                 oplock = 0;
485
486         if (!tcon->broken_posix_open && tcon->unix_ext &&
487             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
488                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
489                 /* can not refresh inode info since size could be stale */
490                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
491                                 cifs_sb->mnt_file_mode /* ignored */,
492                                 file->f_flags, &oplock, &fid.netfid, xid);
493                 if (rc == 0) {
494                         cifs_dbg(FYI, "posix open succeeded\n");
495                         posix_open_ok = true;
496                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
497                         if (tcon->ses->serverNOS)
498                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
499                                          tcon->ses->serverName,
500                                          tcon->ses->serverNOS);
501                         tcon->broken_posix_open = true;
502                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
503                          (rc != -EOPNOTSUPP)) /* path not found or net err */
504                         goto out;
505                 /*
506                  * Else fallthrough to retry open the old way on network i/o
507                  * or DFS errors.
508                  */
509         }
510
511         if (server->ops->get_lease_key)
512                 server->ops->get_lease_key(inode, &fid);
513
514         cifs_add_pending_open(&fid, tlink, &open);
515
516         if (!posix_open_ok) {
517                 if (server->ops->get_lease_key)
518                         server->ops->get_lease_key(inode, &fid);
519
520                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
521                                   file->f_flags, &oplock, &fid, xid);
522                 if (rc) {
523                         cifs_del_pending_open(&open);
524                         goto out;
525                 }
526         }
527
528         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
529         if (cfile == NULL) {
530                 if (server->ops->close)
531                         server->ops->close(xid, tcon, &fid);
532                 cifs_del_pending_open(&open);
533                 rc = -ENOMEM;
534                 goto out;
535         }
536
537         cifs_fscache_set_inode_cookie(inode, file);
538
539         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
540                 /*
541                  * Time to set mode which we can not set earlier due to
542                  * problems creating new read-only files.
543                  */
544                 struct cifs_unix_set_info_args args = {
545                         .mode   = inode->i_mode,
546                         .uid    = INVALID_UID, /* no change */
547                         .gid    = INVALID_GID, /* no change */
548                         .ctime  = NO_CHANGE_64,
549                         .atime  = NO_CHANGE_64,
550                         .mtime  = NO_CHANGE_64,
551                         .device = 0,
552                 };
553                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
554                                        cfile->pid);
555         }
556
557 out:
558         kfree(full_path);
559         free_xid(xid);
560         cifs_put_tlink(tlink);
561         return rc;
562 }
563
564 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
565
566 /*
567  * Try to reacquire byte range locks that were released when session
568  * to server was lost.
569  */
570 static int
571 cifs_relock_file(struct cifsFileInfo *cfile)
572 {
573         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
574         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
575         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
576         int rc = 0;
577
578         down_read(&cinode->lock_sem);
579         if (cinode->can_cache_brlcks) {
580                 /* can cache locks - no need to relock */
581                 up_read(&cinode->lock_sem);
582                 return rc;
583         }
584
585         if (cap_unix(tcon->ses) &&
586             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
587             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
588                 rc = cifs_push_posix_locks(cfile);
589         else
590                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
591
592         up_read(&cinode->lock_sem);
593         return rc;
594 }
595
596 static int
597 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
598 {
599         int rc = -EACCES;
600         unsigned int xid;
601         __u32 oplock;
602         struct cifs_sb_info *cifs_sb;
603         struct cifs_tcon *tcon;
604         struct TCP_Server_Info *server;
605         struct cifsInodeInfo *cinode;
606         struct inode *inode;
607         char *full_path = NULL;
608         int desired_access;
609         int disposition = FILE_OPEN;
610         int create_options = CREATE_NOT_DIR;
611         struct cifs_open_parms oparms;
612
613         xid = get_xid();
614         mutex_lock(&cfile->fh_mutex);
615         if (!cfile->invalidHandle) {
616                 mutex_unlock(&cfile->fh_mutex);
617                 rc = 0;
618                 free_xid(xid);
619                 return rc;
620         }
621
622         inode = d_inode(cfile->dentry);
623         cifs_sb = CIFS_SB(inode->i_sb);
624         tcon = tlink_tcon(cfile->tlink);
625         server = tcon->ses->server;
626
627         /*
628          * Can not grab rename sem here because various ops, including those
629          * that already have the rename sem can end up causing writepage to get
630          * called and if the server was down that means we end up here, and we
631          * can never tell if the caller already has the rename_sem.
632          */
633         full_path = build_path_from_dentry(cfile->dentry);
634         if (full_path == NULL) {
635                 rc = -ENOMEM;
636                 mutex_unlock(&cfile->fh_mutex);
637                 free_xid(xid);
638                 return rc;
639         }
640
641         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
642                  inode, cfile->f_flags, full_path);
643
644         if (tcon->ses->server->oplocks)
645                 oplock = REQ_OPLOCK;
646         else
647                 oplock = 0;
648
649         if (tcon->unix_ext && cap_unix(tcon->ses) &&
650             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
651                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
652                 /*
653                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
654                  * original open. Must mask them off for a reopen.
655                  */
656                 unsigned int oflags = cfile->f_flags &
657                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
658
659                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
660                                      cifs_sb->mnt_file_mode /* ignored */,
661                                      oflags, &oplock, &cfile->fid.netfid, xid);
662                 if (rc == 0) {
663                         cifs_dbg(FYI, "posix reopen succeeded\n");
664                         oparms.reconnect = true;
665                         goto reopen_success;
666                 }
667                 /*
668                  * fallthrough to retry open the old way on errors, especially
669                  * in the reconnect path it is important to retry hard
670                  */
671         }
672
673         desired_access = cifs_convert_flags(cfile->f_flags);
674
675         if (backup_cred(cifs_sb))
676                 create_options |= CREATE_OPEN_BACKUP_INTENT;
677
678         if (server->ops->get_lease_key)
679                 server->ops->get_lease_key(inode, &cfile->fid);
680
681         oparms.tcon = tcon;
682         oparms.cifs_sb = cifs_sb;
683         oparms.desired_access = desired_access;
684         oparms.create_options = create_options;
685         oparms.disposition = disposition;
686         oparms.path = full_path;
687         oparms.fid = &cfile->fid;
688         oparms.reconnect = true;
689
690         /*
691          * Can not refresh inode by passing in file_info buf to be returned by
692          * ops->open and then calling get_inode_info with returned buf since
693          * file might have write behind data that needs to be flushed and server
694          * version of file size can be stale. If we knew for sure that inode was
695          * not dirty locally we could do this.
696          */
697         rc = server->ops->open(xid, &oparms, &oplock, NULL);
698         if (rc == -ENOENT && oparms.reconnect == false) {
699                 /* durable handle timeout is expired - open the file again */
700                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
701                 /* indicate that we need to relock the file */
702                 oparms.reconnect = true;
703         }
704
705         if (rc) {
706                 mutex_unlock(&cfile->fh_mutex);
707                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
708                 cifs_dbg(FYI, "oplock: %d\n", oplock);
709                 goto reopen_error_exit;
710         }
711
712 reopen_success:
713         cfile->invalidHandle = false;
714         mutex_unlock(&cfile->fh_mutex);
715         cinode = CIFS_I(inode);
716
717         if (can_flush) {
718                 rc = filemap_write_and_wait(inode->i_mapping);
719                 mapping_set_error(inode->i_mapping, rc);
720
721                 if (tcon->unix_ext)
722                         rc = cifs_get_inode_info_unix(&inode, full_path,
723                                                       inode->i_sb, xid);
724                 else
725                         rc = cifs_get_inode_info(&inode, full_path, NULL,
726                                                  inode->i_sb, xid, NULL);
727         }
728         /*
729          * Else we are writing out data to server already and could deadlock if
730          * we tried to flush data, and since we do not know if we have data that
731          * would invalidate the current end of file on the server we can not go
732          * to the server to get the new inode info.
733          */
734
735         server->ops->set_fid(cfile, &cfile->fid, oplock);
736         if (oparms.reconnect)
737                 cifs_relock_file(cfile);
738
739 reopen_error_exit:
740         kfree(full_path);
741         free_xid(xid);
742         return rc;
743 }
744
745 int cifs_close(struct inode *inode, struct file *file)
746 {
747         if (file->private_data != NULL) {
748                 cifsFileInfo_put(file->private_data);
749                 file->private_data = NULL;
750         }
751
752         /* return code from the ->release op is always ignored */
753         return 0;
754 }
755
756 int cifs_closedir(struct inode *inode, struct file *file)
757 {
758         int rc = 0;
759         unsigned int xid;
760         struct cifsFileInfo *cfile = file->private_data;
761         struct cifs_tcon *tcon;
762         struct TCP_Server_Info *server;
763         char *buf;
764
765         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
766
767         if (cfile == NULL)
768                 return rc;
769
770         xid = get_xid();
771         tcon = tlink_tcon(cfile->tlink);
772         server = tcon->ses->server;
773
774         cifs_dbg(FYI, "Freeing private data in close dir\n");
775         spin_lock(&cifs_file_list_lock);
776         if (server->ops->dir_needs_close(cfile)) {
777                 cfile->invalidHandle = true;
778                 spin_unlock(&cifs_file_list_lock);
779                 if (server->ops->close_dir)
780                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
781                 else
782                         rc = -ENOSYS;
783                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
784                 /* not much we can do if it fails anyway, ignore rc */
785                 rc = 0;
786         } else
787                 spin_unlock(&cifs_file_list_lock);
788
789         buf = cfile->srch_inf.ntwrk_buf_start;
790         if (buf) {
791                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
792                 cfile->srch_inf.ntwrk_buf_start = NULL;
793                 if (cfile->srch_inf.smallBuf)
794                         cifs_small_buf_release(buf);
795                 else
796                         cifs_buf_release(buf);
797         }
798
799         cifs_put_tlink(cfile->tlink);
800         kfree(file->private_data);
801         file->private_data = NULL;
802         /* BB can we lock the filestruct while this is going on? */
803         free_xid(xid);
804         return rc;
805 }
806
807 static struct cifsLockInfo *
808 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
809 {
810         struct cifsLockInfo *lock =
811                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
812         if (!lock)
813                 return lock;
814         lock->offset = offset;
815         lock->length = length;
816         lock->type = type;
817         lock->pid = current->tgid;
818         INIT_LIST_HEAD(&lock->blist);
819         init_waitqueue_head(&lock->block_q);
820         return lock;
821 }
822
823 void
824 cifs_del_lock_waiters(struct cifsLockInfo *lock)
825 {
826         struct cifsLockInfo *li, *tmp;
827         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
828                 list_del_init(&li->blist);
829                 wake_up(&li->block_q);
830         }
831 }
832
833 #define CIFS_LOCK_OP    0
834 #define CIFS_READ_OP    1
835 #define CIFS_WRITE_OP   2
836
837 /* @rw_check : 0 - no op, 1 - read, 2 - write */
838 static bool
839 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
840                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
841                             struct cifsLockInfo **conf_lock, int rw_check)
842 {
843         struct cifsLockInfo *li;
844         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
845         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
846
847         list_for_each_entry(li, &fdlocks->locks, llist) {
848                 if (offset + length <= li->offset ||
849                     offset >= li->offset + li->length)
850                         continue;
851                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
852                     server->ops->compare_fids(cfile, cur_cfile)) {
853                         /* shared lock prevents write op through the same fid */
854                         if (!(li->type & server->vals->shared_lock_type) ||
855                             rw_check != CIFS_WRITE_OP)
856                                 continue;
857                 }
858                 if ((type & server->vals->shared_lock_type) &&
859                     ((server->ops->compare_fids(cfile, cur_cfile) &&
860                      current->tgid == li->pid) || type == li->type))
861                         continue;
862                 if (conf_lock)
863                         *conf_lock = li;
864                 return true;
865         }
866         return false;
867 }
868
869 bool
870 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
871                         __u8 type, struct cifsLockInfo **conf_lock,
872                         int rw_check)
873 {
874         bool rc = false;
875         struct cifs_fid_locks *cur;
876         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
877
878         list_for_each_entry(cur, &cinode->llist, llist) {
879                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
880                                                  cfile, conf_lock, rw_check);
881                 if (rc)
882                         break;
883         }
884
885         return rc;
886 }
887
888 /*
889  * Check if there is another lock that prevents us to set the lock (mandatory
890  * style). If such a lock exists, update the flock structure with its
891  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
892  * or leave it the same if we can't. Returns 0 if we don't need to request to
893  * the server or 1 otherwise.
894  */
895 static int
896 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
897                __u8 type, struct file_lock *flock)
898 {
899         int rc = 0;
900         struct cifsLockInfo *conf_lock;
901         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
902         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
903         bool exist;
904
905         down_read(&cinode->lock_sem);
906
907         exist = cifs_find_lock_conflict(cfile, offset, length, type,
908                                         &conf_lock, CIFS_LOCK_OP);
909         if (exist) {
910                 flock->fl_start = conf_lock->offset;
911                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
912                 flock->fl_pid = conf_lock->pid;
913                 if (conf_lock->type & server->vals->shared_lock_type)
914                         flock->fl_type = F_RDLCK;
915                 else
916                         flock->fl_type = F_WRLCK;
917         } else if (!cinode->can_cache_brlcks)
918                 rc = 1;
919         else
920                 flock->fl_type = F_UNLCK;
921
922         up_read(&cinode->lock_sem);
923         return rc;
924 }
925
926 static void
927 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
928 {
929         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
930         down_write(&cinode->lock_sem);
931         list_add_tail(&lock->llist, &cfile->llist->locks);
932         up_write(&cinode->lock_sem);
933 }
934
935 /*
936  * Set the byte-range lock (mandatory style). Returns:
937  * 1) 0, if we set the lock and don't need to request to the server;
938  * 2) 1, if no locks prevent us but we need to request to the server;
939  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
940  */
941 static int
942 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
943                  bool wait)
944 {
945         struct cifsLockInfo *conf_lock;
946         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
947         bool exist;
948         int rc = 0;
949
950 try_again:
951         exist = false;
952         down_write(&cinode->lock_sem);
953
954         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
955                                         lock->type, &conf_lock, CIFS_LOCK_OP);
956         if (!exist && cinode->can_cache_brlcks) {
957                 list_add_tail(&lock->llist, &cfile->llist->locks);
958                 up_write(&cinode->lock_sem);
959                 return rc;
960         }
961
962         if (!exist)
963                 rc = 1;
964         else if (!wait)
965                 rc = -EACCES;
966         else {
967                 list_add_tail(&lock->blist, &conf_lock->blist);
968                 up_write(&cinode->lock_sem);
969                 rc = wait_event_interruptible(lock->block_q,
970                                         (lock->blist.prev == &lock->blist) &&
971                                         (lock->blist.next == &lock->blist));
972                 if (!rc)
973                         goto try_again;
974                 down_write(&cinode->lock_sem);
975                 list_del_init(&lock->blist);
976         }
977
978         up_write(&cinode->lock_sem);
979         return rc;
980 }
981
982 /*
983  * Check if there is another lock that prevents us to set the lock (posix
984  * style). If such a lock exists, update the flock structure with its
985  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
986  * or leave it the same if we can't. Returns 0 if we don't need to request to
987  * the server or 1 otherwise.
988  */
989 static int
990 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
991 {
992         int rc = 0;
993         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
994         unsigned char saved_type = flock->fl_type;
995
996         if ((flock->fl_flags & FL_POSIX) == 0)
997                 return 1;
998
999         down_read(&cinode->lock_sem);
1000         posix_test_lock(file, flock);
1001
1002         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1003                 flock->fl_type = saved_type;
1004                 rc = 1;
1005         }
1006
1007         up_read(&cinode->lock_sem);
1008         return rc;
1009 }
1010
1011 /*
1012  * Set the byte-range lock (posix style). Returns:
1013  * 1) 0, if we set the lock and don't need to request to the server;
1014  * 2) 1, if we need to request to the server;
1015  * 3) <0, if the error occurs while setting the lock.
1016  */
1017 static int
1018 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1019 {
1020         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1021         int rc = 1;
1022
1023         if ((flock->fl_flags & FL_POSIX) == 0)
1024                 return rc;
1025
1026 try_again:
1027         down_write(&cinode->lock_sem);
1028         if (!cinode->can_cache_brlcks) {
1029                 up_write(&cinode->lock_sem);
1030                 return rc;
1031         }
1032
1033         rc = posix_lock_file(file, flock, NULL);
1034         up_write(&cinode->lock_sem);
1035         if (rc == FILE_LOCK_DEFERRED) {
1036                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1037                 if (!rc)
1038                         goto try_again;
1039                 posix_unblock_lock(flock);
1040         }
1041         return rc;
1042 }
1043
1044 int
1045 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1046 {
1047         unsigned int xid;
1048         int rc = 0, stored_rc;
1049         struct cifsLockInfo *li, *tmp;
1050         struct cifs_tcon *tcon;
1051         unsigned int num, max_num, max_buf;
1052         LOCKING_ANDX_RANGE *buf, *cur;
1053         int types[] = {LOCKING_ANDX_LARGE_FILES,
1054                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1055         int i;
1056
1057         xid = get_xid();
1058         tcon = tlink_tcon(cfile->tlink);
1059
1060         /*
1061          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1062          * and check it for zero before using.
1063          */
1064         max_buf = tcon->ses->server->maxBuf;
1065         if (!max_buf) {
1066                 free_xid(xid);
1067                 return -EINVAL;
1068         }
1069
1070         max_num = (max_buf - sizeof(struct smb_hdr)) /
1071                                                 sizeof(LOCKING_ANDX_RANGE);
1072         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1073         if (!buf) {
1074                 free_xid(xid);
1075                 return -ENOMEM;
1076         }
1077
1078         for (i = 0; i < 2; i++) {
1079                 cur = buf;
1080                 num = 0;
1081                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1082                         if (li->type != types[i])
1083                                 continue;
1084                         cur->Pid = cpu_to_le16(li->pid);
1085                         cur->LengthLow = cpu_to_le32((u32)li->length);
1086                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1087                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1088                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1089                         if (++num == max_num) {
1090                                 stored_rc = cifs_lockv(xid, tcon,
1091                                                        cfile->fid.netfid,
1092                                                        (__u8)li->type, 0, num,
1093                                                        buf);
1094                                 if (stored_rc)
1095                                         rc = stored_rc;
1096                                 cur = buf;
1097                                 num = 0;
1098                         } else
1099                                 cur++;
1100                 }
1101
1102                 if (num) {
1103                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1104                                                (__u8)types[i], 0, num, buf);
1105                         if (stored_rc)
1106                                 rc = stored_rc;
1107                 }
1108         }
1109
1110         kfree(buf);
1111         free_xid(xid);
1112         return rc;
1113 }
1114
1115 struct lock_to_push {
1116         struct list_head llist;
1117         __u64 offset;
1118         __u64 length;
1119         __u32 pid;
1120         __u16 netfid;
1121         __u8 type;
1122 };
1123
1124 static int
1125 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1126 {
1127         struct inode *inode = d_inode(cfile->dentry);
1128         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1129         struct file_lock *flock;
1130         struct file_lock_context *flctx = inode->i_flctx;
1131         unsigned int count = 0, i;
1132         int rc = 0, xid, type;
1133         struct list_head locks_to_send, *el;
1134         struct lock_to_push *lck, *tmp;
1135         __u64 length;
1136
1137         xid = get_xid();
1138
1139         if (!flctx)
1140                 goto out;
1141
1142         spin_lock(&flctx->flc_lock);
1143         list_for_each(el, &flctx->flc_posix) {
1144                 count++;
1145         }
1146         spin_unlock(&flctx->flc_lock);
1147
1148         INIT_LIST_HEAD(&locks_to_send);
1149
1150         /*
1151          * Allocating count locks is enough because no FL_POSIX locks can be
1152          * added to the list while we are holding cinode->lock_sem that
1153          * protects locking operations of this inode.
1154          */
1155         for (i = 0; i < count; i++) {
1156                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1157                 if (!lck) {
1158                         rc = -ENOMEM;
1159                         goto err_out;
1160                 }
1161                 list_add_tail(&lck->llist, &locks_to_send);
1162         }
1163
1164         el = locks_to_send.next;
1165         spin_lock(&flctx->flc_lock);
1166         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1167                 if (el == &locks_to_send) {
1168                         /*
1169                          * The list ended. We don't have enough allocated
1170                          * structures - something is really wrong.
1171                          */
1172                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1173                         break;
1174                 }
1175                 length = 1 + flock->fl_end - flock->fl_start;
1176                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1177                         type = CIFS_RDLCK;
1178                 else
1179                         type = CIFS_WRLCK;
1180                 lck = list_entry(el, struct lock_to_push, llist);
1181                 lck->pid = flock->fl_pid;
1182                 lck->netfid = cfile->fid.netfid;
1183                 lck->length = length;
1184                 lck->type = type;
1185                 lck->offset = flock->fl_start;
1186         }
1187         spin_unlock(&flctx->flc_lock);
1188
1189         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1190                 int stored_rc;
1191
1192                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1193                                              lck->offset, lck->length, NULL,
1194                                              lck->type, 0);
1195                 if (stored_rc)
1196                         rc = stored_rc;
1197                 list_del(&lck->llist);
1198                 kfree(lck);
1199         }
1200
1201 out:
1202         free_xid(xid);
1203         return rc;
1204 err_out:
1205         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1206                 list_del(&lck->llist);
1207                 kfree(lck);
1208         }
1209         goto out;
1210 }
1211
1212 static int
1213 cifs_push_locks(struct cifsFileInfo *cfile)
1214 {
1215         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1216         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1217         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1218         int rc = 0;
1219
1220         /* we are going to update can_cache_brlcks here - need a write access */
1221         down_write(&cinode->lock_sem);
1222         if (!cinode->can_cache_brlcks) {
1223                 up_write(&cinode->lock_sem);
1224                 return rc;
1225         }
1226
1227         if (cap_unix(tcon->ses) &&
1228             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1229             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1230                 rc = cifs_push_posix_locks(cfile);
1231         else
1232                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1233
1234         cinode->can_cache_brlcks = false;
1235         up_write(&cinode->lock_sem);
1236         return rc;
1237 }
1238
1239 static void
1240 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1241                 bool *wait_flag, struct TCP_Server_Info *server)
1242 {
1243         if (flock->fl_flags & FL_POSIX)
1244                 cifs_dbg(FYI, "Posix\n");
1245         if (flock->fl_flags & FL_FLOCK)
1246                 cifs_dbg(FYI, "Flock\n");
1247         if (flock->fl_flags & FL_SLEEP) {
1248                 cifs_dbg(FYI, "Blocking lock\n");
1249                 *wait_flag = true;
1250         }
1251         if (flock->fl_flags & FL_ACCESS)
1252                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1253         if (flock->fl_flags & FL_LEASE)
1254                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1255         if (flock->fl_flags &
1256             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1257                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1258                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1259
1260         *type = server->vals->large_lock_type;
1261         if (flock->fl_type == F_WRLCK) {
1262                 cifs_dbg(FYI, "F_WRLCK\n");
1263                 *type |= server->vals->exclusive_lock_type;
1264                 *lock = 1;
1265         } else if (flock->fl_type == F_UNLCK) {
1266                 cifs_dbg(FYI, "F_UNLCK\n");
1267                 *type |= server->vals->unlock_lock_type;
1268                 *unlock = 1;
1269                 /* Check if unlock includes more than one lock range */
1270         } else if (flock->fl_type == F_RDLCK) {
1271                 cifs_dbg(FYI, "F_RDLCK\n");
1272                 *type |= server->vals->shared_lock_type;
1273                 *lock = 1;
1274         } else if (flock->fl_type == F_EXLCK) {
1275                 cifs_dbg(FYI, "F_EXLCK\n");
1276                 *type |= server->vals->exclusive_lock_type;
1277                 *lock = 1;
1278         } else if (flock->fl_type == F_SHLCK) {
1279                 cifs_dbg(FYI, "F_SHLCK\n");
1280                 *type |= server->vals->shared_lock_type;
1281                 *lock = 1;
1282         } else
1283                 cifs_dbg(FYI, "Unknown type of lock\n");
1284 }
1285
1286 static int
1287 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1288            bool wait_flag, bool posix_lck, unsigned int xid)
1289 {
1290         int rc = 0;
1291         __u64 length = 1 + flock->fl_end - flock->fl_start;
1292         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1293         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1294         struct TCP_Server_Info *server = tcon->ses->server;
1295         __u16 netfid = cfile->fid.netfid;
1296
1297         if (posix_lck) {
1298                 int posix_lock_type;
1299
1300                 rc = cifs_posix_lock_test(file, flock);
1301                 if (!rc)
1302                         return rc;
1303
1304                 if (type & server->vals->shared_lock_type)
1305                         posix_lock_type = CIFS_RDLCK;
1306                 else
1307                         posix_lock_type = CIFS_WRLCK;
1308                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1309                                       flock->fl_start, length, flock,
1310                                       posix_lock_type, wait_flag);
1311                 return rc;
1312         }
1313
1314         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1315         if (!rc)
1316                 return rc;
1317
1318         /* BB we could chain these into one lock request BB */
1319         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1320                                     1, 0, false);
1321         if (rc == 0) {
1322                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1323                                             type, 0, 1, false);
1324                 flock->fl_type = F_UNLCK;
1325                 if (rc != 0)
1326                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1327                                  rc);
1328                 return 0;
1329         }
1330
1331         if (type & server->vals->shared_lock_type) {
1332                 flock->fl_type = F_WRLCK;
1333                 return 0;
1334         }
1335
1336         type &= ~server->vals->exclusive_lock_type;
1337
1338         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1339                                     type | server->vals->shared_lock_type,
1340                                     1, 0, false);
1341         if (rc == 0) {
1342                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1343                         type | server->vals->shared_lock_type, 0, 1, false);
1344                 flock->fl_type = F_RDLCK;
1345                 if (rc != 0)
1346                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1347                                  rc);
1348         } else
1349                 flock->fl_type = F_WRLCK;
1350
1351         return 0;
1352 }
1353
1354 void
1355 cifs_move_llist(struct list_head *source, struct list_head *dest)
1356 {
1357         struct list_head *li, *tmp;
1358         list_for_each_safe(li, tmp, source)
1359                 list_move(li, dest);
1360 }
1361
1362 void
1363 cifs_free_llist(struct list_head *llist)
1364 {
1365         struct cifsLockInfo *li, *tmp;
1366         list_for_each_entry_safe(li, tmp, llist, llist) {
1367                 cifs_del_lock_waiters(li);
1368                 list_del(&li->llist);
1369                 kfree(li);
1370         }
1371 }
1372
1373 int
1374 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1375                   unsigned int xid)
1376 {
1377         int rc = 0, stored_rc;
1378         int types[] = {LOCKING_ANDX_LARGE_FILES,
1379                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1380         unsigned int i;
1381         unsigned int max_num, num, max_buf;
1382         LOCKING_ANDX_RANGE *buf, *cur;
1383         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1384         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1385         struct cifsLockInfo *li, *tmp;
1386         __u64 length = 1 + flock->fl_end - flock->fl_start;
1387         struct list_head tmp_llist;
1388
1389         INIT_LIST_HEAD(&tmp_llist);
1390
1391         /*
1392          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1393          * and check it for zero before using.
1394          */
1395         max_buf = tcon->ses->server->maxBuf;
1396         if (!max_buf)
1397                 return -EINVAL;
1398
1399         max_num = (max_buf - sizeof(struct smb_hdr)) /
1400                                                 sizeof(LOCKING_ANDX_RANGE);
1401         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1402         if (!buf)
1403                 return -ENOMEM;
1404
1405         down_write(&cinode->lock_sem);
1406         for (i = 0; i < 2; i++) {
1407                 cur = buf;
1408                 num = 0;
1409                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1410                         if (flock->fl_start > li->offset ||
1411                             (flock->fl_start + length) <
1412                             (li->offset + li->length))
1413                                 continue;
1414                         if (current->tgid != li->pid)
1415                                 continue;
1416                         if (types[i] != li->type)
1417                                 continue;
1418                         if (cinode->can_cache_brlcks) {
1419                                 /*
1420                                  * We can cache brlock requests - simply remove
1421                                  * a lock from the file's list.
1422                                  */
1423                                 list_del(&li->llist);
1424                                 cifs_del_lock_waiters(li);
1425                                 kfree(li);
1426                                 continue;
1427                         }
1428                         cur->Pid = cpu_to_le16(li->pid);
1429                         cur->LengthLow = cpu_to_le32((u32)li->length);
1430                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1431                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1432                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1433                         /*
1434                          * We need to save a lock here to let us add it again to
1435                          * the file's list if the unlock range request fails on
1436                          * the server.
1437                          */
1438                         list_move(&li->llist, &tmp_llist);
1439                         if (++num == max_num) {
1440                                 stored_rc = cifs_lockv(xid, tcon,
1441                                                        cfile->fid.netfid,
1442                                                        li->type, num, 0, buf);
1443                                 if (stored_rc) {
1444                                         /*
1445                                          * We failed on the unlock range
1446                                          * request - add all locks from the tmp
1447                                          * list to the head of the file's list.
1448                                          */
1449                                         cifs_move_llist(&tmp_llist,
1450                                                         &cfile->llist->locks);
1451                                         rc = stored_rc;
1452                                 } else
1453                                         /*
1454                                          * The unlock range request succeed -
1455                                          * free the tmp list.
1456                                          */
1457                                         cifs_free_llist(&tmp_llist);
1458                                 cur = buf;
1459                                 num = 0;
1460                         } else
1461                                 cur++;
1462                 }
1463                 if (num) {
1464                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1465                                                types[i], num, 0, buf);
1466                         if (stored_rc) {
1467                                 cifs_move_llist(&tmp_llist,
1468                                                 &cfile->llist->locks);
1469                                 rc = stored_rc;
1470                         } else
1471                                 cifs_free_llist(&tmp_llist);
1472                 }
1473         }
1474
1475         up_write(&cinode->lock_sem);
1476         kfree(buf);
1477         return rc;
1478 }
1479
1480 static int
1481 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1482            bool wait_flag, bool posix_lck, int lock, int unlock,
1483            unsigned int xid)
1484 {
1485         int rc = 0;
1486         __u64 length = 1 + flock->fl_end - flock->fl_start;
1487         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1488         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1489         struct TCP_Server_Info *server = tcon->ses->server;
1490         struct inode *inode = d_inode(cfile->dentry);
1491
1492         if (posix_lck) {
1493                 int posix_lock_type;
1494
1495                 rc = cifs_posix_lock_set(file, flock);
1496                 if (!rc || rc < 0)
1497                         return rc;
1498
1499                 if (type & server->vals->shared_lock_type)
1500                         posix_lock_type = CIFS_RDLCK;
1501                 else
1502                         posix_lock_type = CIFS_WRLCK;
1503
1504                 if (unlock == 1)
1505                         posix_lock_type = CIFS_UNLCK;
1506
1507                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1508                                       current->tgid, flock->fl_start, length,
1509                                       NULL, posix_lock_type, wait_flag);
1510                 goto out;
1511         }
1512
1513         if (lock) {
1514                 struct cifsLockInfo *lock;
1515
1516                 lock = cifs_lock_init(flock->fl_start, length, type);
1517                 if (!lock)
1518                         return -ENOMEM;
1519
1520                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1521                 if (rc < 0) {
1522                         kfree(lock);
1523                         return rc;
1524                 }
1525                 if (!rc)
1526                         goto out;
1527
1528                 /*
1529                  * Windows 7 server can delay breaking lease from read to None
1530                  * if we set a byte-range lock on a file - break it explicitly
1531                  * before sending the lock to the server to be sure the next
1532                  * read won't conflict with non-overlapted locks due to
1533                  * pagereading.
1534                  */
1535                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1536                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1537                         cifs_zap_mapping(inode);
1538                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1539                                  inode);
1540                         CIFS_I(inode)->oplock = 0;
1541                 }
1542
1543                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1544                                             type, 1, 0, wait_flag);
1545                 if (rc) {
1546                         kfree(lock);
1547                         return rc;
1548                 }
1549
1550                 cifs_lock_add(cfile, lock);
1551         } else if (unlock)
1552                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1553
1554 out:
1555         if (flock->fl_flags & FL_POSIX && !rc)
1556                 rc = locks_lock_file_wait(file, flock);
1557         return rc;
1558 }
1559
1560 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1561 {
1562         int rc, xid;
1563         int lock = 0, unlock = 0;
1564         bool wait_flag = false;
1565         bool posix_lck = false;
1566         struct cifs_sb_info *cifs_sb;
1567         struct cifs_tcon *tcon;
1568         struct cifsInodeInfo *cinode;
1569         struct cifsFileInfo *cfile;
1570         __u16 netfid;
1571         __u32 type;
1572
1573         rc = -EACCES;
1574         xid = get_xid();
1575
1576         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1577                  cmd, flock->fl_flags, flock->fl_type,
1578                  flock->fl_start, flock->fl_end);
1579
1580         cfile = (struct cifsFileInfo *)file->private_data;
1581         tcon = tlink_tcon(cfile->tlink);
1582
1583         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1584                         tcon->ses->server);
1585
1586         cifs_sb = CIFS_FILE_SB(file);
1587         netfid = cfile->fid.netfid;
1588         cinode = CIFS_I(file_inode(file));
1589
1590         if (cap_unix(tcon->ses) &&
1591             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1592             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1593                 posix_lck = true;
1594         /*
1595          * BB add code here to normalize offset and length to account for
1596          * negative length which we can not accept over the wire.
1597          */
1598         if (IS_GETLK(cmd)) {
1599                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1600                 free_xid(xid);
1601                 return rc;
1602         }
1603
1604         if (!lock && !unlock) {
1605                 /*
1606                  * if no lock or unlock then nothing to do since we do not
1607                  * know what it is
1608                  */
1609                 free_xid(xid);
1610                 return -EOPNOTSUPP;
1611         }
1612
1613         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1614                         xid);
1615         free_xid(xid);
1616         return rc;
1617 }
1618
1619 /*
1620  * update the file size (if needed) after a write. Should be called with
1621  * the inode->i_lock held
1622  */
1623 void
1624 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1625                       unsigned int bytes_written)
1626 {
1627         loff_t end_of_write = offset + bytes_written;
1628
1629         if (end_of_write > cifsi->server_eof)
1630                 cifsi->server_eof = end_of_write;
1631 }
1632
1633 static ssize_t
1634 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1635            size_t write_size, loff_t *offset)
1636 {
1637         int rc = 0;
1638         unsigned int bytes_written = 0;
1639         unsigned int total_written;
1640         struct cifs_sb_info *cifs_sb;
1641         struct cifs_tcon *tcon;
1642         struct TCP_Server_Info *server;
1643         unsigned int xid;
1644         struct dentry *dentry = open_file->dentry;
1645         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1646         struct cifs_io_parms io_parms;
1647
1648         cifs_sb = CIFS_SB(dentry->d_sb);
1649
1650         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1651                  write_size, *offset, dentry);
1652
1653         tcon = tlink_tcon(open_file->tlink);
1654         server = tcon->ses->server;
1655
1656         if (!server->ops->sync_write)
1657                 return -ENOSYS;
1658
1659         xid = get_xid();
1660
1661         for (total_written = 0; write_size > total_written;
1662              total_written += bytes_written) {
1663                 rc = -EAGAIN;
1664                 while (rc == -EAGAIN) {
1665                         struct kvec iov[2];
1666                         unsigned int len;
1667
1668                         if (open_file->invalidHandle) {
1669                                 /* we could deadlock if we called
1670                                    filemap_fdatawait from here so tell
1671                                    reopen_file not to flush data to
1672                                    server now */
1673                                 rc = cifs_reopen_file(open_file, false);
1674                                 if (rc != 0)
1675                                         break;
1676                         }
1677
1678                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1679                                   (unsigned int)write_size - total_written);
1680                         /* iov[0] is reserved for smb header */
1681                         iov[1].iov_base = (char *)write_data + total_written;
1682                         iov[1].iov_len = len;
1683                         io_parms.pid = pid;
1684                         io_parms.tcon = tcon;
1685                         io_parms.offset = *offset;
1686                         io_parms.length = len;
1687                         rc = server->ops->sync_write(xid, &open_file->fid,
1688                                         &io_parms, &bytes_written, iov, 1);
1689                 }
1690                 if (rc || (bytes_written == 0)) {
1691                         if (total_written)
1692                                 break;
1693                         else {
1694                                 free_xid(xid);
1695                                 return rc;
1696                         }
1697                 } else {
1698                         spin_lock(&d_inode(dentry)->i_lock);
1699                         cifs_update_eof(cifsi, *offset, bytes_written);
1700                         spin_unlock(&d_inode(dentry)->i_lock);
1701                         *offset += bytes_written;
1702                 }
1703         }
1704
1705         cifs_stats_bytes_written(tcon, total_written);
1706
1707         if (total_written > 0) {
1708                 spin_lock(&d_inode(dentry)->i_lock);
1709                 if (*offset > d_inode(dentry)->i_size)
1710                         i_size_write(d_inode(dentry), *offset);
1711                 spin_unlock(&d_inode(dentry)->i_lock);
1712         }
1713         mark_inode_dirty_sync(d_inode(dentry));
1714         free_xid(xid);
1715         return total_written;
1716 }
1717
1718 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1719                                         bool fsuid_only)
1720 {
1721         struct cifsFileInfo *open_file = NULL;
1722         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1723
1724         /* only filter by fsuid on multiuser mounts */
1725         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1726                 fsuid_only = false;
1727
1728         spin_lock(&cifs_file_list_lock);
1729         /* we could simply get the first_list_entry since write-only entries
1730            are always at the end of the list but since the first entry might
1731            have a close pending, we go through the whole list */
1732         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1733                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1734                         continue;
1735                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1736                         if (!open_file->invalidHandle) {
1737                                 /* found a good file */
1738                                 /* lock it so it will not be closed on us */
1739                                 cifsFileInfo_get_locked(open_file);
1740                                 spin_unlock(&cifs_file_list_lock);
1741                                 return open_file;
1742                         } /* else might as well continue, and look for
1743                              another, or simply have the caller reopen it
1744                              again rather than trying to fix this handle */
1745                 } else /* write only file */
1746                         break; /* write only files are last so must be done */
1747         }
1748         spin_unlock(&cifs_file_list_lock);
1749         return NULL;
1750 }
1751
1752 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1753                                         bool fsuid_only)
1754 {
1755         struct cifsFileInfo *open_file, *inv_file = NULL;
1756         struct cifs_sb_info *cifs_sb;
1757         bool any_available = false;
1758         int rc;
1759         unsigned int refind = 0;
1760
1761         /* Having a null inode here (because mapping->host was set to zero by
1762         the VFS or MM) should not happen but we had reports of on oops (due to
1763         it being zero) during stress testcases so we need to check for it */
1764
1765         if (cifs_inode == NULL) {
1766                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1767                 dump_stack();
1768                 return NULL;
1769         }
1770
1771         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1772
1773         /* only filter by fsuid on multiuser mounts */
1774         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1775                 fsuid_only = false;
1776
1777         spin_lock(&cifs_file_list_lock);
1778 refind_writable:
1779         if (refind > MAX_REOPEN_ATT) {
1780                 spin_unlock(&cifs_file_list_lock);
1781                 return NULL;
1782         }
1783         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1784                 if (!any_available && open_file->pid != current->tgid)
1785                         continue;
1786                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1787                         continue;
1788                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1789                         if (!open_file->invalidHandle) {
1790                                 /* found a good writable file */
1791                                 cifsFileInfo_get_locked(open_file);
1792                                 spin_unlock(&cifs_file_list_lock);
1793                                 return open_file;
1794                         } else {
1795                                 if (!inv_file)
1796                                         inv_file = open_file;
1797                         }
1798                 }
1799         }
1800         /* couldn't find useable FH with same pid, try any available */
1801         if (!any_available) {
1802                 any_available = true;
1803                 goto refind_writable;
1804         }
1805
1806         if (inv_file) {
1807                 any_available = false;
1808                 cifsFileInfo_get_locked(inv_file);
1809         }
1810
1811         spin_unlock(&cifs_file_list_lock);
1812
1813         if (inv_file) {
1814                 rc = cifs_reopen_file(inv_file, false);
1815                 if (!rc)
1816                         return inv_file;
1817                 else {
1818                         spin_lock(&cifs_file_list_lock);
1819                         list_move_tail(&inv_file->flist,
1820                                         &cifs_inode->openFileList);
1821                         spin_unlock(&cifs_file_list_lock);
1822                         cifsFileInfo_put(inv_file);
1823                         spin_lock(&cifs_file_list_lock);
1824                         ++refind;
1825                         inv_file = NULL;
1826                         goto refind_writable;
1827                 }
1828         }
1829
1830         return NULL;
1831 }
1832
1833 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1834 {
1835         struct address_space *mapping = page->mapping;
1836         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1837         char *write_data;
1838         int rc = -EFAULT;
1839         int bytes_written = 0;
1840         struct inode *inode;
1841         struct cifsFileInfo *open_file;
1842
1843         if (!mapping || !mapping->host)
1844                 return -EFAULT;
1845
1846         inode = page->mapping->host;
1847
1848         offset += (loff_t)from;
1849         write_data = kmap(page);
1850         write_data += from;
1851
1852         if ((to > PAGE_SIZE) || (from > to)) {
1853                 kunmap(page);
1854                 return -EIO;
1855         }
1856
1857         /* racing with truncate? */
1858         if (offset > mapping->host->i_size) {
1859                 kunmap(page);
1860                 return 0; /* don't care */
1861         }
1862
1863         /* check to make sure that we are not extending the file */
1864         if (mapping->host->i_size - offset < (loff_t)to)
1865                 to = (unsigned)(mapping->host->i_size - offset);
1866
1867         open_file = find_writable_file(CIFS_I(mapping->host), false);
1868         if (open_file) {
1869                 bytes_written = cifs_write(open_file, open_file->pid,
1870                                            write_data, to - from, &offset);
1871                 cifsFileInfo_put(open_file);
1872                 /* Does mm or vfs already set times? */
1873                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1874                 if ((bytes_written > 0) && (offset))
1875                         rc = 0;
1876                 else if (bytes_written < 0)
1877                         rc = bytes_written;
1878         } else {
1879                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1880                 rc = -EIO;
1881         }
1882
1883         kunmap(page);
1884         return rc;
1885 }
1886
1887 static struct cifs_writedata *
1888 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1889                           pgoff_t end, pgoff_t *index,
1890                           unsigned int *found_pages)
1891 {
1892         unsigned int nr_pages;
1893         struct page **pages;
1894         struct cifs_writedata *wdata;
1895
1896         wdata = cifs_writedata_alloc((unsigned int)tofind,
1897                                      cifs_writev_complete);
1898         if (!wdata)
1899                 return NULL;
1900
1901         /*
1902          * find_get_pages_tag seems to return a max of 256 on each
1903          * iteration, so we must call it several times in order to
1904          * fill the array or the wsize is effectively limited to
1905          * 256 * PAGE_SIZE.
1906          */
1907         *found_pages = 0;
1908         pages = wdata->pages;
1909         do {
1910                 nr_pages = find_get_pages_tag(mapping, index,
1911                                               PAGECACHE_TAG_DIRTY, tofind,
1912                                               pages);
1913                 *found_pages += nr_pages;
1914                 tofind -= nr_pages;
1915                 pages += nr_pages;
1916         } while (nr_pages && tofind && *index <= end);
1917
1918         return wdata;
1919 }
1920
1921 static unsigned int
1922 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1923                     struct address_space *mapping,
1924                     struct writeback_control *wbc,
1925                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1926 {
1927         unsigned int nr_pages = 0, i;
1928         struct page *page;
1929
1930         for (i = 0; i < found_pages; i++) {
1931                 page = wdata->pages[i];
1932                 /*
1933                  * At this point we hold neither mapping->tree_lock nor
1934                  * lock on the page itself: the page may be truncated or
1935                  * invalidated (changing page->mapping to NULL), or even
1936                  * swizzled back from swapper_space to tmpfs file
1937                  * mapping
1938                  */
1939
1940                 if (nr_pages == 0)
1941                         lock_page(page);
1942                 else if (!trylock_page(page))
1943                         break;
1944
1945                 if (unlikely(page->mapping != mapping)) {
1946                         unlock_page(page);
1947                         break;
1948                 }
1949
1950                 if (!wbc->range_cyclic && page->index > end) {
1951                         *done = true;
1952                         unlock_page(page);
1953                         break;
1954                 }
1955
1956                 if (*next && (page->index != *next)) {
1957                         /* Not next consecutive page */
1958                         unlock_page(page);
1959                         break;
1960                 }
1961
1962                 if (wbc->sync_mode != WB_SYNC_NONE)
1963                         wait_on_page_writeback(page);
1964
1965                 if (PageWriteback(page) ||
1966                                 !clear_page_dirty_for_io(page)) {
1967                         unlock_page(page);
1968                         break;
1969                 }
1970
1971                 /*
1972                  * This actually clears the dirty bit in the radix tree.
1973                  * See cifs_writepage() for more commentary.
1974                  */
1975                 set_page_writeback(page);
1976                 if (page_offset(page) >= i_size_read(mapping->host)) {
1977                         *done = true;
1978                         unlock_page(page);
1979                         end_page_writeback(page);
1980                         break;
1981                 }
1982
1983                 wdata->pages[i] = page;
1984                 *next = page->index + 1;
1985                 ++nr_pages;
1986         }
1987
1988         /* reset index to refind any pages skipped */
1989         if (nr_pages == 0)
1990                 *index = wdata->pages[0]->index + 1;
1991
1992         /* put any pages we aren't going to use */
1993         for (i = nr_pages; i < found_pages; i++) {
1994                 put_page(wdata->pages[i]);
1995                 wdata->pages[i] = NULL;
1996         }
1997
1998         return nr_pages;
1999 }
2000
2001 static int
2002 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2003                  struct address_space *mapping, struct writeback_control *wbc)
2004 {
2005         int rc = 0;
2006         struct TCP_Server_Info *server;
2007         unsigned int i;
2008
2009         wdata->sync_mode = wbc->sync_mode;
2010         wdata->nr_pages = nr_pages;
2011         wdata->offset = page_offset(wdata->pages[0]);
2012         wdata->pagesz = PAGE_SIZE;
2013         wdata->tailsz = min(i_size_read(mapping->host) -
2014                         page_offset(wdata->pages[nr_pages - 1]),
2015                         (loff_t)PAGE_SIZE);
2016         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2017
2018         if (wdata->cfile != NULL)
2019                 cifsFileInfo_put(wdata->cfile);
2020         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2021         if (!wdata->cfile) {
2022                 cifs_dbg(VFS, "No writable handles for inode\n");
2023                 rc = -EBADF;
2024         } else {
2025                 wdata->pid = wdata->cfile->pid;
2026                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2027                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2028         }
2029
2030         for (i = 0; i < nr_pages; ++i)
2031                 unlock_page(wdata->pages[i]);
2032
2033         return rc;
2034 }
2035
2036 static int cifs_writepages(struct address_space *mapping,
2037                            struct writeback_control *wbc)
2038 {
2039         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2040         struct TCP_Server_Info *server;
2041         bool done = false, scanned = false, range_whole = false;
2042         pgoff_t end, index;
2043         struct cifs_writedata *wdata;
2044         int rc = 0;
2045
2046         /*
2047          * If wsize is smaller than the page cache size, default to writing
2048          * one page at a time via cifs_writepage
2049          */
2050         if (cifs_sb->wsize < PAGE_SIZE)
2051                 return generic_writepages(mapping, wbc);
2052
2053         if (wbc->range_cyclic) {
2054                 index = mapping->writeback_index; /* Start from prev offset */
2055                 end = -1;
2056         } else {
2057                 index = wbc->range_start >> PAGE_SHIFT;
2058                 end = wbc->range_end >> PAGE_SHIFT;
2059                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2060                         range_whole = true;
2061                 scanned = true;
2062         }
2063         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2064 retry:
2065         while (!done && index <= end) {
2066                 unsigned int i, nr_pages, found_pages, wsize, credits;
2067                 pgoff_t next = 0, tofind, saved_index = index;
2068
2069                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2070                                                    &wsize, &credits);
2071                 if (rc)
2072                         break;
2073
2074                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2075
2076                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2077                                                   &found_pages);
2078                 if (!wdata) {
2079                         rc = -ENOMEM;
2080                         add_credits_and_wake_if(server, credits, 0);
2081                         break;
2082                 }
2083
2084                 if (found_pages == 0) {
2085                         kref_put(&wdata->refcount, cifs_writedata_release);
2086                         add_credits_and_wake_if(server, credits, 0);
2087                         break;
2088                 }
2089
2090                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2091                                                end, &index, &next, &done);
2092
2093                 /* nothing to write? */
2094                 if (nr_pages == 0) {
2095                         kref_put(&wdata->refcount, cifs_writedata_release);
2096                         add_credits_and_wake_if(server, credits, 0);
2097                         continue;
2098                 }
2099
2100                 wdata->credits = credits;
2101
2102                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2103
2104                 /* send failure -- clean up the mess */
2105                 if (rc != 0) {
2106                         add_credits_and_wake_if(server, wdata->credits, 0);
2107                         for (i = 0; i < nr_pages; ++i) {
2108                                 if (rc == -EAGAIN)
2109                                         redirty_page_for_writepage(wbc,
2110                                                            wdata->pages[i]);
2111                                 else
2112                                         SetPageError(wdata->pages[i]);
2113                                 end_page_writeback(wdata->pages[i]);
2114                                 put_page(wdata->pages[i]);
2115                         }
2116                         if (rc != -EAGAIN)
2117                                 mapping_set_error(mapping, rc);
2118                 }
2119                 kref_put(&wdata->refcount, cifs_writedata_release);
2120
2121                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2122                         index = saved_index;
2123                         continue;
2124                 }
2125
2126                 wbc->nr_to_write -= nr_pages;
2127                 if (wbc->nr_to_write <= 0)
2128                         done = true;
2129
2130                 index = next;
2131         }
2132
2133         if (!scanned && !done) {
2134                 /*
2135                  * We hit the last page and there is more work to be done: wrap
2136                  * back to the start of the file
2137                  */
2138                 scanned = true;
2139                 index = 0;
2140                 goto retry;
2141         }
2142
2143         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2144                 mapping->writeback_index = index;
2145
2146         return rc;
2147 }
2148
2149 static int
2150 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2151 {
2152         int rc;
2153         unsigned int xid;
2154
2155         xid = get_xid();
2156 /* BB add check for wbc flags */
2157         get_page(page);
2158         if (!PageUptodate(page))
2159                 cifs_dbg(FYI, "ppw - page not up to date\n");
2160
2161         /*
2162          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2163          *
2164          * A writepage() implementation always needs to do either this,
2165          * or re-dirty the page with "redirty_page_for_writepage()" in
2166          * the case of a failure.
2167          *
2168          * Just unlocking the page will cause the radix tree tag-bits
2169          * to fail to update with the state of the page correctly.
2170          */
2171         set_page_writeback(page);
2172 retry_write:
2173         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2174         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2175                 goto retry_write;
2176         else if (rc == -EAGAIN)
2177                 redirty_page_for_writepage(wbc, page);
2178         else if (rc != 0)
2179                 SetPageError(page);
2180         else
2181                 SetPageUptodate(page);
2182         end_page_writeback(page);
2183         put_page(page);
2184         free_xid(xid);
2185         return rc;
2186 }
2187
2188 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2189 {
2190         int rc = cifs_writepage_locked(page, wbc);
2191         unlock_page(page);
2192         return rc;
2193 }
2194
2195 static int cifs_write_end(struct file *file, struct address_space *mapping,
2196                         loff_t pos, unsigned len, unsigned copied,
2197                         struct page *page, void *fsdata)
2198 {
2199         int rc;
2200         struct inode *inode = mapping->host;
2201         struct cifsFileInfo *cfile = file->private_data;
2202         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2203         __u32 pid;
2204
2205         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2206                 pid = cfile->pid;
2207         else
2208                 pid = current->tgid;
2209
2210         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2211                  page, pos, copied);
2212
2213         if (PageChecked(page)) {
2214                 if (copied == len)
2215                         SetPageUptodate(page);
2216                 ClearPageChecked(page);
2217         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2218                 SetPageUptodate(page);
2219
2220         if (!PageUptodate(page)) {
2221                 char *page_data;
2222                 unsigned offset = pos & (PAGE_SIZE - 1);
2223                 unsigned int xid;
2224
2225                 xid = get_xid();
2226                 /* this is probably better than directly calling
2227                    partialpage_write since in this function the file handle is
2228                    known which we might as well leverage */
2229                 /* BB check if anything else missing out of ppw
2230                    such as updating last write time */
2231                 page_data = kmap(page);
2232                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2233                 /* if (rc < 0) should we set writebehind rc? */
2234                 kunmap(page);
2235
2236                 free_xid(xid);
2237         } else {
2238                 rc = copied;
2239                 pos += copied;
2240                 set_page_dirty(page);
2241         }
2242
2243         if (rc > 0) {
2244                 spin_lock(&inode->i_lock);
2245                 if (pos > inode->i_size)
2246                         i_size_write(inode, pos);
2247                 spin_unlock(&inode->i_lock);
2248         }
2249
2250         unlock_page(page);
2251         put_page(page);
2252
2253         return rc;
2254 }
2255
2256 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2257                       int datasync)
2258 {
2259         unsigned int xid;
2260         int rc = 0;
2261         struct cifs_tcon *tcon;
2262         struct TCP_Server_Info *server;
2263         struct cifsFileInfo *smbfile = file->private_data;
2264         struct inode *inode = file_inode(file);
2265         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2266
2267         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2268         if (rc)
2269                 return rc;
2270         inode_lock(inode);
2271
2272         xid = get_xid();
2273
2274         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2275                  file, datasync);
2276
2277         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2278                 rc = cifs_zap_mapping(inode);
2279                 if (rc) {
2280                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2281                         rc = 0; /* don't care about it in fsync */
2282                 }
2283         }
2284
2285         tcon = tlink_tcon(smbfile->tlink);
2286         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2287                 server = tcon->ses->server;
2288                 if (server->ops->flush)
2289                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2290                 else
2291                         rc = -ENOSYS;
2292         }
2293
2294         free_xid(xid);
2295         inode_unlock(inode);
2296         return rc;
2297 }
2298
2299 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2300 {
2301         unsigned int xid;
2302         int rc = 0;
2303         struct cifs_tcon *tcon;
2304         struct TCP_Server_Info *server;
2305         struct cifsFileInfo *smbfile = file->private_data;
2306         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2307         struct inode *inode = file->f_mapping->host;
2308
2309         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2310         if (rc)
2311                 return rc;
2312         inode_lock(inode);
2313
2314         xid = get_xid();
2315
2316         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2317                  file, datasync);
2318
2319         tcon = tlink_tcon(smbfile->tlink);
2320         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2321                 server = tcon->ses->server;
2322                 if (server->ops->flush)
2323                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2324                 else
2325                         rc = -ENOSYS;
2326         }
2327
2328         free_xid(xid);
2329         inode_unlock(inode);
2330         return rc;
2331 }
2332
2333 /*
2334  * As file closes, flush all cached write data for this inode checking
2335  * for write behind errors.
2336  */
2337 int cifs_flush(struct file *file, fl_owner_t id)
2338 {
2339         struct inode *inode = file_inode(file);
2340         int rc = 0;
2341
2342         if (file->f_mode & FMODE_WRITE)
2343                 rc = filemap_write_and_wait(inode->i_mapping);
2344
2345         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2346
2347         return rc;
2348 }
2349
2350 static int
2351 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2352 {
2353         int rc = 0;
2354         unsigned long i;
2355
2356         for (i = 0; i < num_pages; i++) {
2357                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2358                 if (!pages[i]) {
2359                         /*
2360                          * save number of pages we have already allocated and
2361                          * return with ENOMEM error
2362                          */
2363                         num_pages = i;
2364                         rc = -ENOMEM;
2365                         break;
2366                 }
2367         }
2368
2369         if (rc) {
2370                 for (i = 0; i < num_pages; i++)
2371                         put_page(pages[i]);
2372         }
2373         return rc;
2374 }
2375
2376 static inline
2377 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2378 {
2379         size_t num_pages;
2380         size_t clen;
2381
2382         clen = min_t(const size_t, len, wsize);
2383         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2384
2385         if (cur_len)
2386                 *cur_len = clen;
2387
2388         return num_pages;
2389 }
2390
2391 static void
2392 cifs_uncached_writedata_release(struct kref *refcount)
2393 {
2394         int i;
2395         struct cifs_writedata *wdata = container_of(refcount,
2396                                         struct cifs_writedata, refcount);
2397
2398         for (i = 0; i < wdata->nr_pages; i++)
2399                 put_page(wdata->pages[i]);
2400         cifs_writedata_release(refcount);
2401 }
2402
2403 static void
2404 cifs_uncached_writev_complete(struct work_struct *work)
2405 {
2406         struct cifs_writedata *wdata = container_of(work,
2407                                         struct cifs_writedata, work);
2408         struct inode *inode = d_inode(wdata->cfile->dentry);
2409         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2410
2411         spin_lock(&inode->i_lock);
2412         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2413         if (cifsi->server_eof > inode->i_size)
2414                 i_size_write(inode, cifsi->server_eof);
2415         spin_unlock(&inode->i_lock);
2416
2417         complete(&wdata->done);
2418
2419         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2420 }
2421
2422 static int
2423 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2424                       size_t *len, unsigned long *num_pages)
2425 {
2426         size_t save_len, copied, bytes, cur_len = *len;
2427         unsigned long i, nr_pages = *num_pages;
2428
2429         save_len = cur_len;
2430         for (i = 0; i < nr_pages; i++) {
2431                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2432                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2433                 cur_len -= copied;
2434                 /*
2435                  * If we didn't copy as much as we expected, then that
2436                  * may mean we trod into an unmapped area. Stop copying
2437                  * at that point. On the next pass through the big
2438                  * loop, we'll likely end up getting a zero-length
2439                  * write and bailing out of it.
2440                  */
2441                 if (copied < bytes)
2442                         break;
2443         }
2444         cur_len = save_len - cur_len;
2445         *len = cur_len;
2446
2447         /*
2448          * If we have no data to send, then that probably means that
2449          * the copy above failed altogether. That's most likely because
2450          * the address in the iovec was bogus. Return -EFAULT and let
2451          * the caller free anything we allocated and bail out.
2452          */
2453         if (!cur_len)
2454                 return -EFAULT;
2455
2456         /*
2457          * i + 1 now represents the number of pages we actually used in
2458          * the copy phase above.
2459          */
2460         *num_pages = i + 1;
2461         return 0;
2462 }
2463
2464 static int
2465 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2466                      struct cifsFileInfo *open_file,
2467                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2468 {
2469         int rc = 0;
2470         size_t cur_len;
2471         unsigned long nr_pages, num_pages, i;
2472         struct cifs_writedata *wdata;
2473         struct iov_iter saved_from;
2474         loff_t saved_offset = offset;
2475         pid_t pid;
2476         struct TCP_Server_Info *server;
2477
2478         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2479                 pid = open_file->pid;
2480         else
2481                 pid = current->tgid;
2482
2483         server = tlink_tcon(open_file->tlink)->ses->server;
2484         memcpy(&saved_from, from, sizeof(struct iov_iter));
2485
2486         do {
2487                 unsigned int wsize, credits;
2488
2489                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2490                                                    &wsize, &credits);
2491                 if (rc)
2492                         break;
2493
2494                 nr_pages = get_numpages(wsize, len, &cur_len);
2495                 wdata = cifs_writedata_alloc(nr_pages,
2496                                              cifs_uncached_writev_complete);
2497                 if (!wdata) {
2498                         rc = -ENOMEM;
2499                         add_credits_and_wake_if(server, credits, 0);
2500                         break;
2501                 }
2502
2503                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2504                 if (rc) {
2505                         kfree(wdata);
2506                         add_credits_and_wake_if(server, credits, 0);
2507                         break;
2508                 }
2509
2510                 num_pages = nr_pages;
2511                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2512                 if (rc) {
2513                         for (i = 0; i < nr_pages; i++)
2514                                 put_page(wdata->pages[i]);
2515                         kfree(wdata);
2516                         add_credits_and_wake_if(server, credits, 0);
2517                         break;
2518                 }
2519
2520                 /*
2521                  * Bring nr_pages down to the number of pages we actually used,
2522                  * and free any pages that we didn't use.
2523                  */
2524                 for ( ; nr_pages > num_pages; nr_pages--)
2525                         put_page(wdata->pages[nr_pages - 1]);
2526
2527                 wdata->sync_mode = WB_SYNC_ALL;
2528                 wdata->nr_pages = nr_pages;
2529                 wdata->offset = (__u64)offset;
2530                 wdata->cfile = cifsFileInfo_get(open_file);
2531                 wdata->pid = pid;
2532                 wdata->bytes = cur_len;
2533                 wdata->pagesz = PAGE_SIZE;
2534                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2535                 wdata->credits = credits;
2536
2537                 if (!wdata->cfile->invalidHandle ||
2538                     !cifs_reopen_file(wdata->cfile, false))
2539                         rc = server->ops->async_writev(wdata,
2540                                         cifs_uncached_writedata_release);
2541                 if (rc) {
2542                         add_credits_and_wake_if(server, wdata->credits, 0);
2543                         kref_put(&wdata->refcount,
2544                                  cifs_uncached_writedata_release);
2545                         if (rc == -EAGAIN) {
2546                                 memcpy(from, &saved_from,
2547                                        sizeof(struct iov_iter));
2548                                 iov_iter_advance(from, offset - saved_offset);
2549                                 continue;
2550                         }
2551                         break;
2552                 }
2553
2554                 list_add_tail(&wdata->list, wdata_list);
2555                 offset += cur_len;
2556                 len -= cur_len;
2557         } while (len > 0);
2558
2559         return rc;
2560 }
2561
2562 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2563 {
2564         struct file *file = iocb->ki_filp;
2565         ssize_t total_written = 0;
2566         struct cifsFileInfo *open_file;
2567         struct cifs_tcon *tcon;
2568         struct cifs_sb_info *cifs_sb;
2569         struct cifs_writedata *wdata, *tmp;
2570         struct list_head wdata_list;
2571         struct iov_iter saved_from;
2572         int rc;
2573
2574         /*
2575          * BB - optimize the way when signing is disabled. We can drop this
2576          * extra memory-to-memory copying and use iovec buffers for constructing
2577          * write request.
2578          */
2579
2580         rc = generic_write_checks(iocb, from);
2581         if (rc <= 0)
2582                 return rc;
2583
2584         INIT_LIST_HEAD(&wdata_list);
2585         cifs_sb = CIFS_FILE_SB(file);
2586         open_file = file->private_data;
2587         tcon = tlink_tcon(open_file->tlink);
2588
2589         if (!tcon->ses->server->ops->async_writev)
2590                 return -ENOSYS;
2591
2592         memcpy(&saved_from, from, sizeof(struct iov_iter));
2593
2594         rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2595                                   open_file, cifs_sb, &wdata_list);
2596
2597         /*
2598          * If at least one write was successfully sent, then discard any rc
2599          * value from the later writes. If the other write succeeds, then
2600          * we'll end up returning whatever was written. If it fails, then
2601          * we'll get a new rc value from that.
2602          */
2603         if (!list_empty(&wdata_list))
2604                 rc = 0;
2605
2606         /*
2607          * Wait for and collect replies for any successful sends in order of
2608          * increasing offset. Once an error is hit or we get a fatal signal
2609          * while waiting, then return without waiting for any more replies.
2610          */
2611 restart_loop:
2612         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2613                 if (!rc) {
2614                         /* FIXME: freezable too? */
2615                         rc = wait_for_completion_killable(&wdata->done);
2616                         if (rc)
2617                                 rc = -EINTR;
2618                         else if (wdata->result)
2619                                 rc = wdata->result;
2620                         else
2621                                 total_written += wdata->bytes;
2622
2623                         /* resend call if it's a retryable error */
2624                         if (rc == -EAGAIN) {
2625                                 struct list_head tmp_list;
2626                                 struct iov_iter tmp_from;
2627
2628                                 INIT_LIST_HEAD(&tmp_list);
2629                                 list_del_init(&wdata->list);
2630
2631                                 memcpy(&tmp_from, &saved_from,
2632                                        sizeof(struct iov_iter));
2633                                 iov_iter_advance(&tmp_from,
2634                                                  wdata->offset - iocb->ki_pos);
2635
2636                                 rc = cifs_write_from_iter(wdata->offset,
2637                                                 wdata->bytes, &tmp_from,
2638                                                 open_file, cifs_sb, &tmp_list);
2639
2640                                 list_splice(&tmp_list, &wdata_list);
2641
2642                                 kref_put(&wdata->refcount,
2643                                          cifs_uncached_writedata_release);
2644                                 goto restart_loop;
2645                         }
2646                 }
2647                 list_del_init(&wdata->list);
2648                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2649         }
2650
2651         if (unlikely(!total_written))
2652                 return rc;
2653
2654         iocb->ki_pos += total_written;
2655         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2656         cifs_stats_bytes_written(tcon, total_written);
2657         return total_written;
2658 }
2659
2660 static ssize_t
2661 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2662 {
2663         struct file *file = iocb->ki_filp;
2664         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2665         struct inode *inode = file->f_mapping->host;
2666         struct cifsInodeInfo *cinode = CIFS_I(inode);
2667         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2668         ssize_t rc;
2669
2670         /*
2671          * We need to hold the sem to be sure nobody modifies lock list
2672          * with a brlock that prevents writing.
2673          */
2674         down_read(&cinode->lock_sem);
2675         inode_lock(inode);
2676
2677         rc = generic_write_checks(iocb, from);
2678         if (rc <= 0)
2679                 goto out;
2680
2681         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2682                                      server->vals->exclusive_lock_type, NULL,
2683                                      CIFS_WRITE_OP))
2684                 rc = __generic_file_write_iter(iocb, from);
2685         else
2686                 rc = -EACCES;
2687 out:
2688         inode_unlock(inode);
2689
2690         if (rc > 0)
2691                 rc = generic_write_sync(iocb, rc);
2692         up_read(&cinode->lock_sem);
2693         return rc;
2694 }
2695
2696 ssize_t
2697 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2698 {
2699         struct inode *inode = file_inode(iocb->ki_filp);
2700         struct cifsInodeInfo *cinode = CIFS_I(inode);
2701         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2702         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2703                                                 iocb->ki_filp->private_data;
2704         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2705         ssize_t written;
2706
2707         written = cifs_get_writer(cinode);
2708         if (written)
2709                 return written;
2710
2711         if (CIFS_CACHE_WRITE(cinode)) {
2712                 if (cap_unix(tcon->ses) &&
2713                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2714                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2715                         written = generic_file_write_iter(iocb, from);
2716                         goto out;
2717                 }
2718                 written = cifs_writev(iocb, from);
2719                 goto out;
2720         }
2721         /*
2722          * For non-oplocked files in strict cache mode we need to write the data
2723          * to the server exactly from the pos to pos+len-1 rather than flush all
2724          * affected pages because it may cause a error with mandatory locks on
2725          * these pages but not on the region from pos to ppos+len-1.
2726          */
2727         written = cifs_user_writev(iocb, from);
2728         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2729                 /*
2730                  * Windows 7 server can delay breaking level2 oplock if a write
2731                  * request comes - break it on the client to prevent reading
2732                  * an old data.
2733                  */
2734                 cifs_zap_mapping(inode);
2735                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2736                          inode);
2737                 cinode->oplock = 0;
2738         }
2739 out:
2740         cifs_put_writer(cinode);
2741         return written;
2742 }
2743
2744 static struct cifs_readdata *
2745 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2746 {
2747         struct cifs_readdata *rdata;
2748
2749         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2750                         GFP_KERNEL);
2751         if (rdata != NULL) {
2752                 kref_init(&rdata->refcount);
2753                 INIT_LIST_HEAD(&rdata->list);
2754                 init_completion(&rdata->done);
2755                 INIT_WORK(&rdata->work, complete);
2756         }
2757
2758         return rdata;
2759 }
2760
2761 void
2762 cifs_readdata_release(struct kref *refcount)
2763 {
2764         struct cifs_readdata *rdata = container_of(refcount,
2765                                         struct cifs_readdata, refcount);
2766
2767         if (rdata->cfile)
2768                 cifsFileInfo_put(rdata->cfile);
2769
2770         kfree(rdata);
2771 }
2772
2773 static int
2774 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2775 {
2776         int rc = 0;
2777         struct page *page;
2778         unsigned int i;
2779
2780         for (i = 0; i < nr_pages; i++) {
2781                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2782                 if (!page) {
2783                         rc = -ENOMEM;
2784                         break;
2785                 }
2786                 rdata->pages[i] = page;
2787         }
2788
2789         if (rc) {
2790                 for (i = 0; i < nr_pages; i++) {
2791                         put_page(rdata->pages[i]);
2792                         rdata->pages[i] = NULL;
2793                 }
2794         }
2795         return rc;
2796 }
2797
2798 static void
2799 cifs_uncached_readdata_release(struct kref *refcount)
2800 {
2801         struct cifs_readdata *rdata = container_of(refcount,
2802                                         struct cifs_readdata, refcount);
2803         unsigned int i;
2804
2805         for (i = 0; i < rdata->nr_pages; i++) {
2806                 put_page(rdata->pages[i]);
2807                 rdata->pages[i] = NULL;
2808         }
2809         cifs_readdata_release(refcount);
2810 }
2811
2812 /**
2813  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2814  * @rdata:      the readdata response with list of pages holding data
2815  * @iter:       destination for our data
2816  *
2817  * This function copies data from a list of pages in a readdata response into
2818  * an array of iovecs. It will first calculate where the data should go
2819  * based on the info in the readdata and then copy the data into that spot.
2820  */
2821 static int
2822 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2823 {
2824         size_t remaining = rdata->got_bytes;
2825         unsigned int i;
2826
2827         for (i = 0; i < rdata->nr_pages; i++) {
2828                 struct page *page = rdata->pages[i];
2829                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2830                 size_t written = copy_page_to_iter(page, 0, copy, iter);
2831                 remaining -= written;
2832                 if (written < copy && iov_iter_count(iter) > 0)
2833                         break;
2834         }
2835         return remaining ? -EFAULT : 0;
2836 }
2837
2838 static void
2839 cifs_uncached_readv_complete(struct work_struct *work)
2840 {
2841         struct cifs_readdata *rdata = container_of(work,
2842                                                 struct cifs_readdata, work);
2843
2844         complete(&rdata->done);
2845         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2846 }
2847
2848 static int
2849 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2850                         struct cifs_readdata *rdata, unsigned int len)
2851 {
2852         int result = 0;
2853         unsigned int i;
2854         unsigned int nr_pages = rdata->nr_pages;
2855
2856         rdata->got_bytes = 0;
2857         rdata->tailsz = PAGE_SIZE;
2858         for (i = 0; i < nr_pages; i++) {
2859                 struct page *page = rdata->pages[i];
2860                 size_t n;
2861
2862                 if (len <= 0) {
2863                         /* no need to hold page hostage */
2864                         rdata->pages[i] = NULL;
2865                         rdata->nr_pages--;
2866                         put_page(page);
2867                         continue;
2868                 }
2869                 n = len;
2870                 if (len >= PAGE_SIZE) {
2871                         /* enough data to fill the page */
2872                         n = PAGE_SIZE;
2873                         len -= n;
2874                 } else {
2875                         zero_user(page, len, PAGE_SIZE - len);
2876                         rdata->tailsz = len;
2877                         len = 0;
2878                 }
2879                 result = cifs_read_page_from_socket(server, page, n);
2880                 if (result < 0)
2881                         break;
2882
2883                 rdata->got_bytes += result;
2884         }
2885
2886         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2887                                                 rdata->got_bytes : result;
2888 }
2889
2890 static int
2891 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2892                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2893 {
2894         struct cifs_readdata *rdata;
2895         unsigned int npages, rsize, credits;
2896         size_t cur_len;
2897         int rc;
2898         pid_t pid;
2899         struct TCP_Server_Info *server;
2900
2901         server = tlink_tcon(open_file->tlink)->ses->server;
2902
2903         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2904                 pid = open_file->pid;
2905         else
2906                 pid = current->tgid;
2907
2908         do {
2909                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2910                                                    &rsize, &credits);
2911                 if (rc)
2912                         break;
2913
2914                 cur_len = min_t(const size_t, len, rsize);
2915                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2916
2917                 /* allocate a readdata struct */
2918                 rdata = cifs_readdata_alloc(npages,
2919                                             cifs_uncached_readv_complete);
2920                 if (!rdata) {
2921                         add_credits_and_wake_if(server, credits, 0);
2922                         rc = -ENOMEM;
2923                         break;
2924                 }
2925
2926                 rc = cifs_read_allocate_pages(rdata, npages);
2927                 if (rc)
2928                         goto error;
2929
2930                 rdata->cfile = cifsFileInfo_get(open_file);
2931                 rdata->nr_pages = npages;
2932                 rdata->offset = offset;
2933                 rdata->bytes = cur_len;
2934                 rdata->pid = pid;
2935                 rdata->pagesz = PAGE_SIZE;
2936                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2937                 rdata->credits = credits;
2938
2939                 if (!rdata->cfile->invalidHandle ||
2940                     !cifs_reopen_file(rdata->cfile, true))
2941                         rc = server->ops->async_readv(rdata);
2942 error:
2943                 if (rc) {
2944                         add_credits_and_wake_if(server, rdata->credits, 0);
2945                         kref_put(&rdata->refcount,
2946                                  cifs_uncached_readdata_release);
2947                         if (rc == -EAGAIN)
2948                                 continue;
2949                         break;
2950                 }
2951
2952                 list_add_tail(&rdata->list, rdata_list);
2953                 offset += cur_len;
2954                 len -= cur_len;
2955         } while (len > 0);
2956
2957         return rc;
2958 }
2959
2960 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2961 {
2962         struct file *file = iocb->ki_filp;
2963         ssize_t rc;
2964         size_t len;
2965         ssize_t total_read = 0;
2966         loff_t offset = iocb->ki_pos;
2967         struct cifs_sb_info *cifs_sb;
2968         struct cifs_tcon *tcon;
2969         struct cifsFileInfo *open_file;
2970         struct cifs_readdata *rdata, *tmp;
2971         struct list_head rdata_list;
2972
2973         len = iov_iter_count(to);
2974         if (!len)
2975                 return 0;
2976
2977         INIT_LIST_HEAD(&rdata_list);
2978         cifs_sb = CIFS_FILE_SB(file);
2979         open_file = file->private_data;
2980         tcon = tlink_tcon(open_file->tlink);
2981
2982         if (!tcon->ses->server->ops->async_readv)
2983                 return -ENOSYS;
2984
2985         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2986                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2987
2988         rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
2989
2990         /* if at least one read request send succeeded, then reset rc */
2991         if (!list_empty(&rdata_list))
2992                 rc = 0;
2993
2994         len = iov_iter_count(to);
2995         /* the loop below should proceed in the order of increasing offsets */
2996 again:
2997         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2998                 if (!rc) {
2999                         /* FIXME: freezable sleep too? */
3000                         rc = wait_for_completion_killable(&rdata->done);
3001                         if (rc)
3002                                 rc = -EINTR;
3003                         else if (rdata->result == -EAGAIN) {
3004                                 /* resend call if it's a retryable error */
3005                                 struct list_head tmp_list;
3006                                 unsigned int got_bytes = rdata->got_bytes;
3007
3008                                 list_del_init(&rdata->list);
3009                                 INIT_LIST_HEAD(&tmp_list);
3010
3011                                 /*
3012                                  * Got a part of data and then reconnect has
3013                                  * happened -- fill the buffer and continue
3014                                  * reading.
3015                                  */
3016                                 if (got_bytes && got_bytes < rdata->bytes) {
3017                                         rc = cifs_readdata_to_iov(rdata, to);
3018                                         if (rc) {
3019                                                 kref_put(&rdata->refcount,
3020                                                 cifs_uncached_readdata_release);
3021                                                 continue;
3022                                         }
3023                                 }
3024
3025                                 rc = cifs_send_async_read(
3026                                                 rdata->offset + got_bytes,
3027                                                 rdata->bytes - got_bytes,
3028                                                 rdata->cfile, cifs_sb,
3029                                                 &tmp_list);
3030
3031                                 list_splice(&tmp_list, &rdata_list);
3032
3033                                 kref_put(&rdata->refcount,
3034                                          cifs_uncached_readdata_release);
3035                                 goto again;
3036                         } else if (rdata->result)
3037                                 rc = rdata->result;
3038                         else
3039                                 rc = cifs_readdata_to_iov(rdata, to);
3040
3041                         /* if there was a short read -- discard anything left */
3042                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3043                                 rc = -ENODATA;
3044                 }
3045                 list_del_init(&rdata->list);
3046                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3047         }
3048
3049         total_read = len - iov_iter_count(to);
3050
3051         cifs_stats_bytes_read(tcon, total_read);
3052
3053         /* mask nodata case */
3054         if (rc == -ENODATA)
3055                 rc = 0;
3056
3057         if (total_read) {
3058                 iocb->ki_pos += total_read;
3059                 return total_read;
3060         }
3061         return rc;
3062 }
3063
3064 ssize_t
3065 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3066 {
3067         struct inode *inode = file_inode(iocb->ki_filp);
3068         struct cifsInodeInfo *cinode = CIFS_I(inode);
3069         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3070         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3071                                                 iocb->ki_filp->private_data;
3072         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3073         int rc = -EACCES;
3074
3075         /*
3076          * In strict cache mode we need to read from the server all the time
3077          * if we don't have level II oplock because the server can delay mtime
3078          * change - so we can't make a decision about inode invalidating.
3079          * And we can also fail with pagereading if there are mandatory locks
3080          * on pages affected by this read but not on the region from pos to
3081          * pos+len-1.
3082          */
3083         if (!CIFS_CACHE_READ(cinode))
3084                 return cifs_user_readv(iocb, to);
3085
3086         if (cap_unix(tcon->ses) &&
3087             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3088             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3089                 return generic_file_read_iter(iocb, to);
3090
3091         /*
3092          * We need to hold the sem to be sure nobody modifies lock list
3093          * with a brlock that prevents reading.
3094          */
3095         down_read(&cinode->lock_sem);
3096         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3097                                      tcon->ses->server->vals->shared_lock_type,
3098                                      NULL, CIFS_READ_OP))
3099                 rc = generic_file_read_iter(iocb, to);
3100         up_read(&cinode->lock_sem);
3101         return rc;
3102 }
3103
3104 static ssize_t
3105 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3106 {
3107         int rc = -EACCES;
3108         unsigned int bytes_read = 0;
3109         unsigned int total_read;
3110         unsigned int current_read_size;
3111         unsigned int rsize;
3112         struct cifs_sb_info *cifs_sb;
3113         struct cifs_tcon *tcon;
3114         struct TCP_Server_Info *server;
3115         unsigned int xid;
3116         char *cur_offset;
3117         struct cifsFileInfo *open_file;
3118         struct cifs_io_parms io_parms;
3119         int buf_type = CIFS_NO_BUFFER;
3120         __u32 pid;
3121
3122         xid = get_xid();
3123         cifs_sb = CIFS_FILE_SB(file);
3124
3125         /* FIXME: set up handlers for larger reads and/or convert to async */
3126         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3127
3128         if (file->private_data == NULL) {
3129                 rc = -EBADF;
3130                 free_xid(xid);
3131                 return rc;
3132         }
3133         open_file = file->private_data;
3134         tcon = tlink_tcon(open_file->tlink);
3135         server = tcon->ses->server;
3136
3137         if (!server->ops->sync_read) {
3138                 free_xid(xid);
3139                 return -ENOSYS;
3140         }
3141
3142         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3143                 pid = open_file->pid;
3144         else
3145                 pid = current->tgid;
3146
3147         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3148                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3149
3150         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3151              total_read += bytes_read, cur_offset += bytes_read) {
3152                 do {
3153                         current_read_size = min_t(uint, read_size - total_read,
3154                                                   rsize);
3155                         /*
3156                          * For windows me and 9x we do not want to request more
3157                          * than it negotiated since it will refuse the read
3158                          * then.
3159                          */
3160                         if ((tcon->ses) && !(tcon->ses->capabilities &
3161                                 tcon->ses->server->vals->cap_large_files)) {
3162                                 current_read_size = min_t(uint,
3163                                         current_read_size, CIFSMaxBufSize);
3164                         }
3165                         if (open_file->invalidHandle) {
3166                                 rc = cifs_reopen_file(open_file, true);
3167                                 if (rc != 0)
3168                                         break;
3169                         }
3170                         io_parms.pid = pid;
3171                         io_parms.tcon = tcon;
3172                         io_parms.offset = *offset;
3173                         io_parms.length = current_read_size;
3174                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3175                                                     &bytes_read, &cur_offset,
3176                                                     &buf_type);
3177                 } while (rc == -EAGAIN);
3178
3179                 if (rc || (bytes_read == 0)) {
3180                         if (total_read) {
3181                                 break;
3182                         } else {
3183                                 free_xid(xid);
3184                                 return rc;
3185                         }
3186                 } else {
3187                         cifs_stats_bytes_read(tcon, total_read);
3188                         *offset += bytes_read;
3189                 }
3190         }
3191         free_xid(xid);
3192         return total_read;
3193 }
3194
3195 /*
3196  * If the page is mmap'ed into a process' page tables, then we need to make
3197  * sure that it doesn't change while being written back.
3198  */
3199 static int
3200 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3201 {
3202         struct page *page = vmf->page;
3203
3204         lock_page(page);
3205         return VM_FAULT_LOCKED;
3206 }
3207
3208 static const struct vm_operations_struct cifs_file_vm_ops = {
3209         .fault = filemap_fault,
3210         .map_pages = filemap_map_pages,
3211         .page_mkwrite = cifs_page_mkwrite,
3212 };
3213
3214 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3215 {
3216         int rc, xid;
3217         struct inode *inode = file_inode(file);
3218
3219         xid = get_xid();
3220
3221         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3222                 rc = cifs_zap_mapping(inode);
3223                 if (rc)
3224                         return rc;
3225         }
3226
3227         rc = generic_file_mmap(file, vma);
3228         if (rc == 0)
3229                 vma->vm_ops = &cifs_file_vm_ops;
3230         free_xid(xid);
3231         return rc;
3232 }
3233
3234 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3235 {
3236         int rc, xid;
3237
3238         xid = get_xid();
3239         rc = cifs_revalidate_file(file);
3240         if (rc) {
3241                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3242                          rc);
3243                 free_xid(xid);
3244                 return rc;
3245         }
3246         rc = generic_file_mmap(file, vma);
3247         if (rc == 0)
3248                 vma->vm_ops = &cifs_file_vm_ops;
3249         free_xid(xid);
3250         return rc;
3251 }
3252
3253 static void
3254 cifs_readv_complete(struct work_struct *work)
3255 {
3256         unsigned int i, got_bytes;
3257         struct cifs_readdata *rdata = container_of(work,
3258                                                 struct cifs_readdata, work);
3259
3260         got_bytes = rdata->got_bytes;
3261         for (i = 0; i < rdata->nr_pages; i++) {
3262                 struct page *page = rdata->pages[i];
3263
3264                 lru_cache_add_file(page);
3265
3266                 if (rdata->result == 0 ||
3267                     (rdata->result == -EAGAIN && got_bytes)) {
3268                         flush_dcache_page(page);
3269                         SetPageUptodate(page);
3270                 }
3271
3272                 unlock_page(page);
3273
3274                 if (rdata->result == 0 ||
3275                     (rdata->result == -EAGAIN && got_bytes))
3276                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3277
3278                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3279
3280                 put_page(page);
3281                 rdata->pages[i] = NULL;
3282         }
3283         kref_put(&rdata->refcount, cifs_readdata_release);
3284 }
3285
3286 static int
3287 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3288                         struct cifs_readdata *rdata, unsigned int len)
3289 {
3290         int result = 0;
3291         unsigned int i;
3292         u64 eof;
3293         pgoff_t eof_index;
3294         unsigned int nr_pages = rdata->nr_pages;
3295
3296         /* determine the eof that the server (probably) has */
3297         eof = CIFS_I(rdata->mapping->host)->server_eof;
3298         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3299         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3300
3301         rdata->got_bytes = 0;
3302         rdata->tailsz = PAGE_SIZE;
3303         for (i = 0; i < nr_pages; i++) {
3304                 struct page *page = rdata->pages[i];
3305                 size_t n = PAGE_SIZE;
3306
3307                 if (len >= PAGE_SIZE) {
3308                         len -= PAGE_SIZE;
3309                 } else if (len > 0) {
3310                         /* enough for partial page, fill and zero the rest */
3311                         zero_user(page, len, PAGE_SIZE - len);
3312                         n = rdata->tailsz = len;
3313                         len = 0;
3314                 } else if (page->index > eof_index) {
3315                         /*
3316                          * The VFS will not try to do readahead past the
3317                          * i_size, but it's possible that we have outstanding
3318                          * writes with gaps in the middle and the i_size hasn't
3319                          * caught up yet. Populate those with zeroed out pages
3320                          * to prevent the VFS from repeatedly attempting to
3321                          * fill them until the writes are flushed.
3322                          */
3323                         zero_user(page, 0, PAGE_SIZE);
3324                         lru_cache_add_file(page);
3325                         flush_dcache_page(page);
3326                         SetPageUptodate(page);
3327                         unlock_page(page);
3328                         put_page(page);
3329                         rdata->pages[i] = NULL;
3330                         rdata->nr_pages--;
3331                         continue;
3332                 } else {
3333                         /* no need to hold page hostage */
3334                         lru_cache_add_file(page);
3335                         unlock_page(page);
3336                         put_page(page);
3337                         rdata->pages[i] = NULL;
3338                         rdata->nr_pages--;
3339                         continue;
3340                 }
3341
3342                 result = cifs_read_page_from_socket(server, page, n);
3343                 if (result < 0)
3344                         break;
3345
3346                 rdata->got_bytes += result;
3347         }
3348
3349         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3350                                                 rdata->got_bytes : result;
3351 }
3352
3353 static int
3354 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3355                     unsigned int rsize, struct list_head *tmplist,
3356                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3357 {
3358         struct page *page, *tpage;
3359         unsigned int expected_index;
3360         int rc;
3361         gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
3362
3363         INIT_LIST_HEAD(tmplist);
3364
3365         page = list_entry(page_list->prev, struct page, lru);
3366
3367         /*
3368          * Lock the page and put it in the cache. Since no one else
3369          * should have access to this page, we're safe to simply set
3370          * PG_locked without checking it first.
3371          */
3372         __SetPageLocked(page);
3373         rc = add_to_page_cache_locked(page, mapping,
3374                                       page->index, gfp);
3375
3376         /* give up if we can't stick it in the cache */
3377         if (rc) {
3378                 __ClearPageLocked(page);
3379                 return rc;
3380         }
3381
3382         /* move first page to the tmplist */
3383         *offset = (loff_t)page->index << PAGE_SHIFT;
3384         *bytes = PAGE_SIZE;
3385         *nr_pages = 1;
3386         list_move_tail(&page->lru, tmplist);
3387
3388         /* now try and add more pages onto the request */
3389         expected_index = page->index + 1;
3390         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3391                 /* discontinuity ? */
3392                 if (page->index != expected_index)
3393                         break;
3394
3395                 /* would this page push the read over the rsize? */
3396                 if (*bytes + PAGE_SIZE > rsize)
3397                         break;
3398
3399                 __SetPageLocked(page);
3400                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3401                         __ClearPageLocked(page);
3402                         break;
3403                 }
3404                 list_move_tail(&page->lru, tmplist);
3405                 (*bytes) += PAGE_SIZE;
3406                 expected_index++;
3407                 (*nr_pages)++;
3408         }
3409         return rc;
3410 }
3411
3412 static int cifs_readpages(struct file *file, struct address_space *mapping,
3413         struct list_head *page_list, unsigned num_pages)
3414 {
3415         int rc;
3416         struct list_head tmplist;
3417         struct cifsFileInfo *open_file = file->private_data;
3418         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3419         struct TCP_Server_Info *server;
3420         pid_t pid;
3421
3422         /*
3423          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3424          * immediately if the cookie is negative
3425          *
3426          * After this point, every page in the list might have PG_fscache set,
3427          * so we will need to clean that up off of every page we don't use.
3428          */
3429         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3430                                          &num_pages);
3431         if (rc == 0)
3432                 return rc;
3433
3434         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3435                 pid = open_file->pid;
3436         else
3437                 pid = current->tgid;
3438
3439         rc = 0;
3440         server = tlink_tcon(open_file->tlink)->ses->server;
3441
3442         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3443                  __func__, file, mapping, num_pages);
3444
3445         /*
3446          * Start with the page at end of list and move it to private
3447          * list. Do the same with any following pages until we hit
3448          * the rsize limit, hit an index discontinuity, or run out of
3449          * pages. Issue the async read and then start the loop again
3450          * until the list is empty.
3451          *
3452          * Note that list order is important. The page_list is in
3453          * the order of declining indexes. When we put the pages in
3454          * the rdata->pages, then we want them in increasing order.
3455          */
3456         while (!list_empty(page_list)) {
3457                 unsigned int i, nr_pages, bytes, rsize;
3458                 loff_t offset;
3459                 struct page *page, *tpage;
3460                 struct cifs_readdata *rdata;
3461                 unsigned credits;
3462
3463                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3464                                                    &rsize, &credits);
3465                 if (rc)
3466                         break;
3467
3468                 /*
3469                  * Give up immediately if rsize is too small to read an entire
3470                  * page. The VFS will fall back to readpage. We should never
3471                  * reach this point however since we set ra_pages to 0 when the
3472                  * rsize is smaller than a cache page.
3473                  */
3474                 if (unlikely(rsize < PAGE_SIZE)) {
3475                         add_credits_and_wake_if(server, credits, 0);
3476                         return 0;
3477                 }
3478
3479                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3480                                          &nr_pages, &offset, &bytes);
3481                 if (rc) {
3482                         add_credits_and_wake_if(server, credits, 0);
3483                         break;
3484                 }
3485
3486                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3487                 if (!rdata) {
3488                         /* best to give up if we're out of mem */
3489                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3490                                 list_del(&page->lru);
3491                                 lru_cache_add_file(page);
3492                                 unlock_page(page);
3493                                 put_page(page);
3494                         }
3495                         rc = -ENOMEM;
3496                         add_credits_and_wake_if(server, credits, 0);
3497                         break;
3498                 }
3499
3500                 rdata->cfile = cifsFileInfo_get(open_file);
3501                 rdata->mapping = mapping;
3502                 rdata->offset = offset;
3503                 rdata->bytes = bytes;
3504                 rdata->pid = pid;
3505                 rdata->pagesz = PAGE_SIZE;
3506                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3507                 rdata->credits = credits;
3508
3509                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3510                         list_del(&page->lru);
3511                         rdata->pages[rdata->nr_pages++] = page;
3512                 }
3513
3514                 if (!rdata->cfile->invalidHandle ||
3515                     !cifs_reopen_file(rdata->cfile, true))
3516                         rc = server->ops->async_readv(rdata);
3517                 if (rc) {
3518                         add_credits_and_wake_if(server, rdata->credits, 0);
3519                         for (i = 0; i < rdata->nr_pages; i++) {
3520                                 page = rdata->pages[i];
3521                                 lru_cache_add_file(page);
3522                                 unlock_page(page);
3523                                 put_page(page);
3524                         }
3525                         /* Fallback to the readpage in error/reconnect cases */
3526                         kref_put(&rdata->refcount, cifs_readdata_release);
3527                         break;
3528                 }
3529
3530                 kref_put(&rdata->refcount, cifs_readdata_release);
3531         }
3532
3533         /* Any pages that have been shown to fscache but didn't get added to
3534          * the pagecache must be uncached before they get returned to the
3535          * allocator.
3536          */
3537         cifs_fscache_readpages_cancel(mapping->host, page_list);
3538         return rc;
3539 }
3540
3541 /*
3542  * cifs_readpage_worker must be called with the page pinned
3543  */
3544 static int cifs_readpage_worker(struct file *file, struct page *page,
3545         loff_t *poffset)
3546 {
3547         char *read_data;
3548         int rc;
3549
3550         /* Is the page cached? */
3551         rc = cifs_readpage_from_fscache(file_inode(file), page);
3552         if (rc == 0)
3553                 goto read_complete;
3554
3555         read_data = kmap(page);
3556         /* for reads over a certain size could initiate async read ahead */
3557
3558         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3559
3560         if (rc < 0)
3561                 goto io_error;
3562         else
3563                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3564
3565         file_inode(file)->i_atime =
3566                 current_fs_time(file_inode(file)->i_sb);
3567
3568         if (PAGE_SIZE > rc)
3569                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3570
3571         flush_dcache_page(page);
3572         SetPageUptodate(page);
3573
3574         /* send this page to the cache */
3575         cifs_readpage_to_fscache(file_inode(file), page);
3576
3577         rc = 0;
3578
3579 io_error:
3580         kunmap(page);
3581         unlock_page(page);
3582
3583 read_complete:
3584         return rc;
3585 }
3586
3587 static int cifs_readpage(struct file *file, struct page *page)
3588 {
3589         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3590         int rc = -EACCES;
3591         unsigned int xid;
3592
3593         xid = get_xid();
3594
3595         if (file->private_data == NULL) {
3596                 rc = -EBADF;
3597                 free_xid(xid);
3598                 return rc;
3599         }
3600
3601         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3602                  page, (int)offset, (int)offset);
3603
3604         rc = cifs_readpage_worker(file, page, &offset);
3605
3606         free_xid(xid);
3607         return rc;
3608 }
3609
3610 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3611 {
3612         struct cifsFileInfo *open_file;
3613
3614         spin_lock(&cifs_file_list_lock);
3615         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3616                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3617                         spin_unlock(&cifs_file_list_lock);
3618                         return 1;
3619                 }
3620         }
3621         spin_unlock(&cifs_file_list_lock);
3622         return 0;
3623 }
3624
3625 /* We do not want to update the file size from server for inodes
3626    open for write - to avoid races with writepage extending
3627    the file - in the future we could consider allowing
3628    refreshing the inode only on increases in the file size
3629    but this is tricky to do without racing with writebehind
3630    page caching in the current Linux kernel design */
3631 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3632 {
3633         if (!cifsInode)
3634                 return true;
3635
3636         if (is_inode_writable(cifsInode)) {
3637                 /* This inode is open for write at least once */
3638                 struct cifs_sb_info *cifs_sb;
3639
3640                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3641                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3642                         /* since no page cache to corrupt on directio
3643                         we can change size safely */
3644                         return true;
3645                 }
3646
3647                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3648                         return true;
3649
3650                 return false;
3651         } else
3652                 return true;
3653 }
3654
3655 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3656                         loff_t pos, unsigned len, unsigned flags,
3657                         struct page **pagep, void **fsdata)
3658 {
3659         int oncethru = 0;
3660         pgoff_t index = pos >> PAGE_SHIFT;
3661         loff_t offset = pos & (PAGE_SIZE - 1);
3662         loff_t page_start = pos & PAGE_MASK;
3663         loff_t i_size;
3664         struct page *page;
3665         int rc = 0;
3666
3667         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3668
3669 start:
3670         page = grab_cache_page_write_begin(mapping, index, flags);
3671         if (!page) {
3672                 rc = -ENOMEM;
3673                 goto out;
3674         }
3675
3676         if (PageUptodate(page))
3677                 goto out;
3678
3679         /*
3680          * If we write a full page it will be up to date, no need to read from
3681          * the server. If the write is short, we'll end up doing a sync write
3682          * instead.
3683          */
3684         if (len == PAGE_SIZE)
3685                 goto out;
3686
3687         /*
3688          * optimize away the read when we have an oplock, and we're not
3689          * expecting to use any of the data we'd be reading in. That
3690          * is, when the page lies beyond the EOF, or straddles the EOF
3691          * and the write will cover all of the existing data.
3692          */
3693         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3694                 i_size = i_size_read(mapping->host);
3695                 if (page_start >= i_size ||
3696                     (offset == 0 && (pos + len) >= i_size)) {
3697                         zero_user_segments(page, 0, offset,
3698                                            offset + len,
3699                                            PAGE_SIZE);
3700                         /*
3701                          * PageChecked means that the parts of the page
3702                          * to which we're not writing are considered up
3703                          * to date. Once the data is copied to the
3704                          * page, it can be set uptodate.
3705                          */
3706                         SetPageChecked(page);
3707                         goto out;
3708                 }
3709         }
3710
3711         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3712                 /*
3713                  * might as well read a page, it is fast enough. If we get
3714                  * an error, we don't need to return it. cifs_write_end will
3715                  * do a sync write instead since PG_uptodate isn't set.
3716                  */
3717                 cifs_readpage_worker(file, page, &page_start);
3718                 put_page(page);
3719                 oncethru = 1;
3720                 goto start;
3721         } else {
3722                 /* we could try using another file handle if there is one -
3723                    but how would we lock it to prevent close of that handle
3724                    racing with this read? In any case
3725                    this will be written out by write_end so is fine */
3726         }
3727 out:
3728         *pagep = page;
3729         return rc;
3730 }
3731
3732 static int cifs_release_page(struct page *page, gfp_t gfp)
3733 {
3734         if (PagePrivate(page))
3735                 return 0;
3736
3737         return cifs_fscache_release_page(page, gfp);
3738 }
3739
3740 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3741                                  unsigned int length)
3742 {
3743         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3744
3745         if (offset == 0 && length == PAGE_SIZE)
3746                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3747 }
3748
3749 static int cifs_launder_page(struct page *page)
3750 {
3751         int rc = 0;
3752         loff_t range_start = page_offset(page);
3753         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3754         struct writeback_control wbc = {
3755                 .sync_mode = WB_SYNC_ALL,
3756                 .nr_to_write = 0,
3757                 .range_start = range_start,
3758                 .range_end = range_end,
3759         };
3760
3761         cifs_dbg(FYI, "Launder page: %p\n", page);
3762
3763         if (clear_page_dirty_for_io(page))
3764                 rc = cifs_writepage_locked(page, &wbc);
3765
3766         cifs_fscache_invalidate_page(page, page->mapping->host);
3767         return rc;
3768 }
3769
3770 void cifs_oplock_break(struct work_struct *work)
3771 {
3772         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3773                                                   oplock_break);
3774         struct inode *inode = d_inode(cfile->dentry);
3775         struct cifsInodeInfo *cinode = CIFS_I(inode);
3776         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3777         struct TCP_Server_Info *server = tcon->ses->server;
3778         int rc = 0;
3779
3780         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3781                         TASK_UNINTERRUPTIBLE);
3782
3783         server->ops->downgrade_oplock(server, cinode,
3784                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3785
3786         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3787                                                 cifs_has_mand_locks(cinode)) {
3788                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3789                          inode);
3790                 cinode->oplock = 0;
3791         }
3792
3793         if (inode && S_ISREG(inode->i_mode)) {
3794                 if (CIFS_CACHE_READ(cinode))
3795                         break_lease(inode, O_RDONLY);
3796                 else
3797                         break_lease(inode, O_WRONLY);
3798                 rc = filemap_fdatawrite(inode->i_mapping);
3799                 if (!CIFS_CACHE_READ(cinode)) {
3800                         rc = filemap_fdatawait(inode->i_mapping);
3801                         mapping_set_error(inode->i_mapping, rc);
3802                         cifs_zap_mapping(inode);
3803                 }
3804                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3805         }
3806
3807         rc = cifs_push_locks(cfile);
3808         if (rc)
3809                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3810
3811         /*
3812          * releasing stale oplock after recent reconnect of smb session using
3813          * a now incorrect file handle is not a data integrity issue but do
3814          * not bother sending an oplock release if session to server still is
3815          * disconnected since oplock already released by the server
3816          */
3817         if (!cfile->oplock_break_cancelled) {
3818                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3819                                                              cinode);
3820                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3821         }
3822         cifs_done_oplock_break(cinode);
3823 }
3824
3825 /*
3826  * The presence of cifs_direct_io() in the address space ops vector
3827  * allowes open() O_DIRECT flags which would have failed otherwise.
3828  *
3829  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3830  * so this method should never be called.
3831  *
3832  * Direct IO is not yet supported in the cached mode. 
3833  */
3834 static ssize_t
3835 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3836 {
3837         /*
3838          * FIXME
3839          * Eventually need to support direct IO for non forcedirectio mounts
3840          */
3841         return -EINVAL;
3842 }
3843
3844
3845 const struct address_space_operations cifs_addr_ops = {
3846         .readpage = cifs_readpage,
3847         .readpages = cifs_readpages,
3848         .writepage = cifs_writepage,
3849         .writepages = cifs_writepages,
3850         .write_begin = cifs_write_begin,
3851         .write_end = cifs_write_end,
3852         .set_page_dirty = __set_page_dirty_nobuffers,
3853         .releasepage = cifs_release_page,
3854         .direct_IO = cifs_direct_io,
3855         .invalidatepage = cifs_invalidate_page,
3856         .launder_page = cifs_launder_page,
3857 };
3858
3859 /*
3860  * cifs_readpages requires the server to support a buffer large enough to
3861  * contain the header plus one complete page of data.  Otherwise, we need
3862  * to leave cifs_readpages out of the address space operations.
3863  */
3864 const struct address_space_operations cifs_addr_ops_smallbuf = {
3865         .readpage = cifs_readpage,
3866         .writepage = cifs_writepage,
3867         .writepages = cifs_writepages,
3868         .write_begin = cifs_write_begin,
3869         .write_end = cifs_write_end,
3870         .set_page_dirty = __set_page_dirty_nobuffers,
3871         .releasepage = cifs_release_page,
3872         .invalidatepage = cifs_invalidate_page,
3873         .launder_page = cifs_launder_page,
3874 };