4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
173 __u16 *pnetfid, int xid)
178 int create_options = CREATE_NOT_DIR;
181 desiredAccess = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
215 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT;
218 if (tcon->ses->capabilities & CAP_NT_SMBS)
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
220 desiredAccess, create_options, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
233 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
244 struct cifsFileInfo *
245 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
246 struct tcon_link *tlink, __u32 oplock)
248 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 if (pCifsFile == NULL)
257 pCifsFile->count = 1;
258 pCifsFile->netfid = fileHandle;
259 pCifsFile->pid = current->tgid;
260 pCifsFile->uid = current_fsuid();
261 pCifsFile->dentry = dget(dentry);
262 pCifsFile->f_flags = file->f_flags;
263 pCifsFile->invalidHandle = false;
264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
267 INIT_LIST_HEAD(&pCifsFile->llist);
269 spin_lock(&cifs_file_list_lock);
270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
271 /* if readable file instance put first in list*/
272 if (file->f_mode & FMODE_READ)
273 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
275 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
276 spin_unlock(&cifs_file_list_lock);
278 cifs_set_oplock_level(pCifsInode, oplock);
279 pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll;
281 file->private_data = pCifsFile;
285 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
288 * Release a reference on the file private data. This may involve closing
289 * the filehandle out on the server. Must be called without holding
290 * cifs_file_list_lock.
292 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
294 struct inode *inode = cifs_file->dentry->d_inode;
295 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
296 struct cifsInodeInfo *cifsi = CIFS_I(inode);
297 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
298 struct cifsLockInfo *li, *tmp;
300 spin_lock(&cifs_file_list_lock);
301 if (--cifs_file->count > 0) {
302 spin_unlock(&cifs_file_list_lock);
306 /* remove it from the lists */
307 list_del(&cifs_file->flist);
308 list_del(&cifs_file->tlist);
310 if (list_empty(&cifsi->openFileList)) {
311 cFYI(1, "closing last open instance for inode %p",
312 cifs_file->dentry->d_inode);
314 /* in strict cache mode we need invalidate mapping on the last
315 close because it may cause a error when we open this file
316 again and get at least level II oplock */
317 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
318 CIFS_I(inode)->invalid_mapping = true;
320 cifs_set_oplock_level(cifsi, 0);
322 spin_unlock(&cifs_file_list_lock);
324 cancel_work_sync(&cifs_file->oplock_break);
326 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
330 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
334 /* Delete any outstanding lock records. We'll lose them when the file
337 mutex_lock(&cifsi->lock_mutex);
338 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
339 list_del(&li->llist);
340 cifs_del_lock_waiters(li);
343 mutex_unlock(&cifsi->lock_mutex);
345 cifs_put_tlink(cifs_file->tlink);
346 dput(cifs_file->dentry);
350 int cifs_open(struct inode *inode, struct file *file)
355 struct cifs_sb_info *cifs_sb;
356 struct cifs_tcon *tcon;
357 struct tcon_link *tlink;
358 struct cifsFileInfo *pCifsFile = NULL;
359 char *full_path = NULL;
360 bool posix_open_ok = false;
365 cifs_sb = CIFS_SB(inode->i_sb);
366 tlink = cifs_sb_tlink(cifs_sb);
369 return PTR_ERR(tlink);
371 tcon = tlink_tcon(tlink);
373 full_path = build_path_from_dentry(file->f_path.dentry);
374 if (full_path == NULL) {
379 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
380 inode, file->f_flags, full_path);
382 if (tcon->ses->server->oplocks)
387 if (!tcon->broken_posix_open && tcon->unix_ext &&
388 (tcon->ses->capabilities & CAP_UNIX) &&
389 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
390 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
391 /* can not refresh inode info since size could be stale */
392 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
393 cifs_sb->mnt_file_mode /* ignored */,
394 file->f_flags, &oplock, &netfid, xid);
396 cFYI(1, "posix open succeeded");
397 posix_open_ok = true;
398 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
399 if (tcon->ses->serverNOS)
400 cERROR(1, "server %s of type %s returned"
401 " unexpected error on SMB posix open"
402 ", disabling posix open support."
403 " Check if server update available.",
404 tcon->ses->serverName,
405 tcon->ses->serverNOS);
406 tcon->broken_posix_open = true;
407 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
408 (rc != -EOPNOTSUPP)) /* path not found or net err */
410 /* else fallthrough to retry open the old way on network i/o
414 if (!posix_open_ok) {
415 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
416 file->f_flags, &oplock, &netfid, xid);
421 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
422 if (pCifsFile == NULL) {
423 CIFSSMBClose(xid, tcon, netfid);
428 cifs_fscache_set_inode_cookie(inode, file);
430 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
431 /* time to set mode which we can not set earlier due to
432 problems creating new read-only files */
433 struct cifs_unix_set_info_args args = {
434 .mode = inode->i_mode,
437 .ctime = NO_CHANGE_64,
438 .atime = NO_CHANGE_64,
439 .mtime = NO_CHANGE_64,
442 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
449 cifs_put_tlink(tlink);
453 /* Try to reacquire byte range locks that were released when session */
454 /* to server was lost */
455 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
459 /* BB list all locks open on this file and relock */
464 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
469 struct cifs_sb_info *cifs_sb;
470 struct cifs_tcon *tcon;
471 struct cifsInodeInfo *pCifsInode;
473 char *full_path = NULL;
475 int disposition = FILE_OPEN;
476 int create_options = CREATE_NOT_DIR;
480 mutex_lock(&pCifsFile->fh_mutex);
481 if (!pCifsFile->invalidHandle) {
482 mutex_unlock(&pCifsFile->fh_mutex);
488 inode = pCifsFile->dentry->d_inode;
489 cifs_sb = CIFS_SB(inode->i_sb);
490 tcon = tlink_tcon(pCifsFile->tlink);
492 /* can not grab rename sem here because various ops, including
493 those that already have the rename sem can end up causing writepage
494 to get called and if the server was down that means we end up here,
495 and we can never tell if the caller already has the rename_sem */
496 full_path = build_path_from_dentry(pCifsFile->dentry);
497 if (full_path == NULL) {
499 mutex_unlock(&pCifsFile->fh_mutex);
504 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
505 inode, pCifsFile->f_flags, full_path);
507 if (tcon->ses->server->oplocks)
512 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
513 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
514 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
517 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
518 * original open. Must mask them off for a reopen.
520 unsigned int oflags = pCifsFile->f_flags &
521 ~(O_CREAT | O_EXCL | O_TRUNC);
523 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
524 cifs_sb->mnt_file_mode /* ignored */,
525 oflags, &oplock, &netfid, xid);
527 cFYI(1, "posix reopen succeeded");
530 /* fallthrough to retry open the old way on errors, especially
531 in the reconnect path it is important to retry hard */
534 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
536 if (backup_cred(cifs_sb))
537 create_options |= CREATE_OPEN_BACKUP_INTENT;
539 /* Can not refresh inode by passing in file_info buf to be returned
540 by SMBOpen and then calling get_inode_info with returned buf
541 since file might have write behind data that needs to be flushed
542 and server version of file size can be stale. If we knew for sure
543 that inode was not dirty locally we could do this */
545 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
546 create_options, &netfid, &oplock, NULL,
547 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
548 CIFS_MOUNT_MAP_SPECIAL_CHR);
550 mutex_unlock(&pCifsFile->fh_mutex);
551 cFYI(1, "cifs_open returned 0x%x", rc);
552 cFYI(1, "oplock: %d", oplock);
553 goto reopen_error_exit;
557 pCifsFile->netfid = netfid;
558 pCifsFile->invalidHandle = false;
559 mutex_unlock(&pCifsFile->fh_mutex);
560 pCifsInode = CIFS_I(inode);
563 rc = filemap_write_and_wait(inode->i_mapping);
564 mapping_set_error(inode->i_mapping, rc);
567 rc = cifs_get_inode_info_unix(&inode,
568 full_path, inode->i_sb, xid);
570 rc = cifs_get_inode_info(&inode,
571 full_path, NULL, inode->i_sb,
573 } /* else we are writing out data to server already
574 and could deadlock if we tried to flush data, and
575 since we do not know if we have data that would
576 invalidate the current end of file on the server
577 we can not go to the server to get the new inod
580 cifs_set_oplock_level(pCifsInode, oplock);
582 cifs_relock_file(pCifsFile);
590 int cifs_close(struct inode *inode, struct file *file)
592 if (file->private_data != NULL) {
593 cifsFileInfo_put(file->private_data);
594 file->private_data = NULL;
597 /* return code from the ->release op is always ignored */
601 int cifs_closedir(struct inode *inode, struct file *file)
605 struct cifsFileInfo *pCFileStruct = file->private_data;
608 cFYI(1, "Closedir inode = 0x%p", inode);
613 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
615 cFYI(1, "Freeing private data in close dir");
616 spin_lock(&cifs_file_list_lock);
617 if (!pCFileStruct->srch_inf.endOfSearch &&
618 !pCFileStruct->invalidHandle) {
619 pCFileStruct->invalidHandle = true;
620 spin_unlock(&cifs_file_list_lock);
621 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
622 cFYI(1, "Closing uncompleted readdir with rc %d",
624 /* not much we can do if it fails anyway, ignore rc */
627 spin_unlock(&cifs_file_list_lock);
628 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
630 cFYI(1, "closedir free smb buf in srch struct");
631 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
632 if (pCFileStruct->srch_inf.smallBuf)
633 cifs_small_buf_release(ptmp);
635 cifs_buf_release(ptmp);
637 cifs_put_tlink(pCFileStruct->tlink);
638 kfree(file->private_data);
639 file->private_data = NULL;
641 /* BB can we lock the filestruct while this is going on? */
646 static struct cifsLockInfo *
647 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
649 struct cifsLockInfo *lock =
650 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
653 lock->offset = offset;
654 lock->length = length;
656 lock->pid = current->tgid;
657 INIT_LIST_HEAD(&lock->blist);
658 init_waitqueue_head(&lock->block_q);
663 cifs_del_lock_waiters(struct cifsLockInfo *lock)
665 struct cifsLockInfo *li, *tmp;
666 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
667 list_del_init(&li->blist);
668 wake_up(&li->block_q);
673 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
674 __u64 length, __u8 type, __u16 netfid,
675 struct cifsLockInfo **conf_lock)
677 struct cifsLockInfo *li;
679 list_for_each_entry(li, &cfile->llist, llist) {
680 if (offset + length <= li->offset ||
681 offset >= li->offset + li->length)
683 else if ((type & LOCKING_ANDX_SHARED_LOCK) &&
684 ((netfid == cfile->netfid && current->tgid == li->pid)
685 || type == li->type))
696 cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset,
697 __u64 length, __u8 type, __u16 netfid,
698 struct cifsLockInfo **conf_lock)
701 struct cifsFileInfo *fid, *tmp;
703 spin_lock(&cifs_file_list_lock);
704 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
705 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
710 spin_unlock(&cifs_file_list_lock);
716 * Check if there is another lock that prevents us to set the lock (mandatory
717 * style). If such a lock exists, update the flock structure with its
718 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
719 * or leave it the same if we can't. Returns 0 if we don't need to request to
720 * the server or 1 otherwise.
723 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
724 __u8 type, struct file_lock *flock)
727 struct cifsLockInfo *conf_lock;
728 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
731 mutex_lock(&cinode->lock_mutex);
733 exist = cifs_find_lock_conflict(cinode, offset, length, type,
734 cfile->netfid, &conf_lock);
736 flock->fl_start = conf_lock->offset;
737 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
738 flock->fl_pid = conf_lock->pid;
739 if (conf_lock->type & LOCKING_ANDX_SHARED_LOCK)
740 flock->fl_type = F_RDLCK;
742 flock->fl_type = F_WRLCK;
743 } else if (!cinode->can_cache_brlcks)
746 flock->fl_type = F_UNLCK;
748 mutex_unlock(&cinode->lock_mutex);
753 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
755 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
756 mutex_lock(&cinode->lock_mutex);
757 list_add_tail(&lock->llist, &cfile->llist);
758 mutex_unlock(&cinode->lock_mutex);
762 * Set the byte-range lock (mandatory style). Returns:
763 * 1) 0, if we set the lock and don't need to request to the server;
764 * 2) 1, if no locks prevent us but we need to request to the server;
765 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
768 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
771 struct cifsLockInfo *conf_lock;
772 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
778 mutex_lock(&cinode->lock_mutex);
780 exist = cifs_find_lock_conflict(cinode, lock->offset, lock->length,
781 lock->type, cfile->netfid, &conf_lock);
782 if (!exist && cinode->can_cache_brlcks) {
783 list_add_tail(&lock->llist, &cfile->llist);
784 mutex_unlock(&cinode->lock_mutex);
793 list_add_tail(&lock->blist, &conf_lock->blist);
794 mutex_unlock(&cinode->lock_mutex);
795 rc = wait_event_interruptible(lock->block_q,
796 (lock->blist.prev == &lock->blist) &&
797 (lock->blist.next == &lock->blist));
800 mutex_lock(&cinode->lock_mutex);
801 list_del_init(&lock->blist);
804 mutex_unlock(&cinode->lock_mutex);
809 * Check if there is another lock that prevents us to set the lock (posix
810 * style). If such a lock exists, update the flock structure with its
811 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
812 * or leave it the same if we can't. Returns 0 if we don't need to request to
813 * the server or 1 otherwise.
816 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
819 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
820 unsigned char saved_type = flock->fl_type;
822 if ((flock->fl_flags & FL_POSIX) == 0)
825 mutex_lock(&cinode->lock_mutex);
826 posix_test_lock(file, flock);
828 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
829 flock->fl_type = saved_type;
833 mutex_unlock(&cinode->lock_mutex);
838 * Set the byte-range lock (posix style). Returns:
839 * 1) 0, if we set the lock and don't need to request to the server;
840 * 2) 1, if we need to request to the server;
841 * 3) <0, if the error occurs while setting the lock.
844 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
846 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
849 if ((flock->fl_flags & FL_POSIX) == 0)
853 mutex_lock(&cinode->lock_mutex);
854 if (!cinode->can_cache_brlcks) {
855 mutex_unlock(&cinode->lock_mutex);
859 rc = posix_lock_file(file, flock, NULL);
860 mutex_unlock(&cinode->lock_mutex);
861 if (rc == FILE_LOCK_DEFERRED) {
862 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
865 locks_delete_block(flock);
871 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
873 int xid, rc = 0, stored_rc;
874 struct cifsLockInfo *li, *tmp;
875 struct cifs_tcon *tcon;
876 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
877 unsigned int num, max_num;
878 LOCKING_ANDX_RANGE *buf, *cur;
879 int types[] = {LOCKING_ANDX_LARGE_FILES,
880 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
884 tcon = tlink_tcon(cfile->tlink);
886 mutex_lock(&cinode->lock_mutex);
887 if (!cinode->can_cache_brlcks) {
888 mutex_unlock(&cinode->lock_mutex);
893 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) /
894 sizeof(LOCKING_ANDX_RANGE);
895 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
897 mutex_unlock(&cinode->lock_mutex);
902 for (i = 0; i < 2; i++) {
905 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
906 if (li->type != types[i])
908 cur->Pid = cpu_to_le16(li->pid);
909 cur->LengthLow = cpu_to_le32((u32)li->length);
910 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
911 cur->OffsetLow = cpu_to_le32((u32)li->offset);
912 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
913 if (++num == max_num) {
914 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
915 li->type, 0, num, buf);
925 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
926 types[i], 0, num, buf);
932 cinode->can_cache_brlcks = false;
933 mutex_unlock(&cinode->lock_mutex);
940 /* copied from fs/locks.c with a name change */
941 #define cifs_for_each_lock(inode, lockp) \
942 for (lockp = &inode->i_flock; *lockp != NULL; \
943 lockp = &(*lockp)->fl_next)
945 struct lock_to_push {
946 struct list_head llist;
955 cifs_push_posix_locks(struct cifsFileInfo *cfile)
957 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
958 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
959 struct file_lock *flock, **before;
960 unsigned int count = 0, i = 0;
961 int rc = 0, xid, type;
962 struct list_head locks_to_send, *el;
963 struct lock_to_push *lck, *tmp;
968 mutex_lock(&cinode->lock_mutex);
969 if (!cinode->can_cache_brlcks) {
970 mutex_unlock(&cinode->lock_mutex);
976 cifs_for_each_lock(cfile->dentry->d_inode, before) {
977 if ((*before)->fl_flags & FL_POSIX)
982 INIT_LIST_HEAD(&locks_to_send);
985 * Allocating count locks is enough because no FL_POSIX locks can be
986 * added to the list while we are holding cinode->lock_mutex that
987 * protects locking operations of this inode.
989 for (; i < count; i++) {
990 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
995 list_add_tail(&lck->llist, &locks_to_send);
998 el = locks_to_send.next;
1000 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1002 if ((flock->fl_flags & FL_POSIX) == 0)
1004 if (el == &locks_to_send) {
1006 * The list ended. We don't have enough allocated
1007 * structures - something is really wrong.
1009 cERROR(1, "Can't push all brlocks!");
1012 length = 1 + flock->fl_end - flock->fl_start;
1013 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1017 lck = list_entry(el, struct lock_to_push, llist);
1018 lck->pid = flock->fl_pid;
1019 lck->netfid = cfile->netfid;
1020 lck->length = length;
1022 lck->offset = flock->fl_start;
1027 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1028 struct file_lock tmp_lock;
1031 tmp_lock.fl_start = lck->offset;
1032 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1033 0, lck->length, &tmp_lock,
1037 list_del(&lck->llist);
1042 cinode->can_cache_brlcks = false;
1043 mutex_unlock(&cinode->lock_mutex);
1048 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1049 list_del(&lck->llist);
1056 cifs_push_locks(struct cifsFileInfo *cfile)
1058 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1059 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1061 if ((tcon->ses->capabilities & CAP_UNIX) &&
1062 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1063 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1064 return cifs_push_posix_locks(cfile);
1066 return cifs_push_mandatory_locks(cfile);
1070 cifs_read_flock(struct file_lock *flock, __u8 *type, int *lock, int *unlock,
1073 if (flock->fl_flags & FL_POSIX)
1075 if (flock->fl_flags & FL_FLOCK)
1077 if (flock->fl_flags & FL_SLEEP) {
1078 cFYI(1, "Blocking lock");
1081 if (flock->fl_flags & FL_ACCESS)
1082 cFYI(1, "Process suspended by mandatory locking - "
1083 "not implemented yet");
1084 if (flock->fl_flags & FL_LEASE)
1085 cFYI(1, "Lease on file - not implemented yet");
1086 if (flock->fl_flags &
1087 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1088 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1090 *type = LOCKING_ANDX_LARGE_FILES;
1091 if (flock->fl_type == F_WRLCK) {
1092 cFYI(1, "F_WRLCK ");
1094 } else if (flock->fl_type == F_UNLCK) {
1097 /* Check if unlock includes more than one lock range */
1098 } else if (flock->fl_type == F_RDLCK) {
1100 *type |= LOCKING_ANDX_SHARED_LOCK;
1102 } else if (flock->fl_type == F_EXLCK) {
1105 } else if (flock->fl_type == F_SHLCK) {
1107 *type |= LOCKING_ANDX_SHARED_LOCK;
1110 cFYI(1, "Unknown type of lock");
1114 cifs_getlk(struct file *file, struct file_lock *flock, __u8 type,
1115 bool wait_flag, bool posix_lck, int xid)
1118 __u64 length = 1 + flock->fl_end - flock->fl_start;
1119 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1120 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1121 __u16 netfid = cfile->netfid;
1124 int posix_lock_type;
1126 rc = cifs_posix_lock_test(file, flock);
1130 if (type & LOCKING_ANDX_SHARED_LOCK)
1131 posix_lock_type = CIFS_RDLCK;
1133 posix_lock_type = CIFS_WRLCK;
1134 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1135 1 /* get */, length, flock,
1136 posix_lock_type, wait_flag);
1140 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1144 /* BB we could chain these into one lock request BB */
1145 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
1146 flock->fl_start, 0, 1, type, 0, 0);
1148 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
1149 length, flock->fl_start, 1, 0,
1151 flock->fl_type = F_UNLCK;
1153 cERROR(1, "Error unlocking previously locked "
1154 "range %d during test of lock", rc);
1158 if (type & LOCKING_ANDX_SHARED_LOCK) {
1159 flock->fl_type = F_WRLCK;
1163 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
1164 flock->fl_start, 0, 1,
1165 type | LOCKING_ANDX_SHARED_LOCK, 0, 0);
1167 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
1168 length, flock->fl_start, 1, 0,
1169 type | LOCKING_ANDX_SHARED_LOCK,
1171 flock->fl_type = F_RDLCK;
1173 cERROR(1, "Error unlocking previously locked "
1174 "range %d during test of lock", rc);
1176 flock->fl_type = F_WRLCK;
1182 cifs_move_llist(struct list_head *source, struct list_head *dest)
1184 struct list_head *li, *tmp;
1185 list_for_each_safe(li, tmp, source)
1186 list_move(li, dest);
1190 cifs_free_llist(struct list_head *llist)
1192 struct cifsLockInfo *li, *tmp;
1193 list_for_each_entry_safe(li, tmp, llist, llist) {
1194 cifs_del_lock_waiters(li);
1195 list_del(&li->llist);
1201 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1203 int rc = 0, stored_rc;
1204 int types[] = {LOCKING_ANDX_LARGE_FILES,
1205 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1207 unsigned int max_num, num;
1208 LOCKING_ANDX_RANGE *buf, *cur;
1209 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1210 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1211 struct cifsLockInfo *li, *tmp;
1212 __u64 length = 1 + flock->fl_end - flock->fl_start;
1213 struct list_head tmp_llist;
1215 INIT_LIST_HEAD(&tmp_llist);
1217 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) /
1218 sizeof(LOCKING_ANDX_RANGE);
1219 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1223 mutex_lock(&cinode->lock_mutex);
1224 for (i = 0; i < 2; i++) {
1227 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1228 if (flock->fl_start > li->offset ||
1229 (flock->fl_start + length) <
1230 (li->offset + li->length))
1232 if (current->tgid != li->pid)
1234 if (types[i] != li->type)
1236 if (!cinode->can_cache_brlcks) {
1237 cur->Pid = cpu_to_le16(li->pid);
1238 cur->LengthLow = cpu_to_le32((u32)li->length);
1240 cpu_to_le32((u32)(li->length>>32));
1241 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1243 cpu_to_le32((u32)(li->offset>>32));
1245 * We need to save a lock here to let us add
1246 * it again to the file's list if the unlock
1247 * range request fails on the server.
1249 list_move(&li->llist, &tmp_llist);
1250 if (++num == max_num) {
1251 stored_rc = cifs_lockv(xid, tcon,
1257 * We failed on the unlock range
1258 * request - add all locks from
1259 * the tmp list to the head of
1262 cifs_move_llist(&tmp_llist,
1267 * The unlock range request
1268 * succeed - free the tmp list.
1270 cifs_free_llist(&tmp_llist);
1277 * We can cache brlock requests - simply remove
1278 * a lock from the file's list.
1280 list_del(&li->llist);
1281 cifs_del_lock_waiters(li);
1286 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1287 types[i], num, 0, buf);
1289 cifs_move_llist(&tmp_llist, &cfile->llist);
1292 cifs_free_llist(&tmp_llist);
1296 mutex_unlock(&cinode->lock_mutex);
1302 cifs_setlk(struct file *file, struct file_lock *flock, __u8 type,
1303 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
1306 __u64 length = 1 + flock->fl_end - flock->fl_start;
1307 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1308 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1309 __u16 netfid = cfile->netfid;
1312 int posix_lock_type;
1314 rc = cifs_posix_lock_set(file, flock);
1318 if (type & LOCKING_ANDX_SHARED_LOCK)
1319 posix_lock_type = CIFS_RDLCK;
1321 posix_lock_type = CIFS_WRLCK;
1324 posix_lock_type = CIFS_UNLCK;
1326 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1327 0 /* set */, length, flock,
1328 posix_lock_type, wait_flag);
1333 struct cifsLockInfo *lock;
1335 lock = cifs_lock_init(flock->fl_start, length, type);
1339 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1345 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
1346 flock->fl_start, 0, 1, type, wait_flag, 0);
1352 cifs_lock_add(cfile, lock);
1354 rc = cifs_unlock_range(cfile, flock, xid);
1357 if (flock->fl_flags & FL_POSIX)
1358 posix_lock_file_wait(file, flock);
1362 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1365 int lock = 0, unlock = 0;
1366 bool wait_flag = false;
1367 bool posix_lck = false;
1368 struct cifs_sb_info *cifs_sb;
1369 struct cifs_tcon *tcon;
1370 struct cifsInodeInfo *cinode;
1371 struct cifsFileInfo *cfile;
1378 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1379 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1380 flock->fl_start, flock->fl_end);
1382 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag);
1384 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1385 cfile = (struct cifsFileInfo *)file->private_data;
1386 tcon = tlink_tcon(cfile->tlink);
1387 netfid = cfile->netfid;
1388 cinode = CIFS_I(file->f_path.dentry->d_inode);
1390 if ((tcon->ses->capabilities & CAP_UNIX) &&
1391 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1392 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1395 * BB add code here to normalize offset and length to account for
1396 * negative length which we can not accept over the wire.
1398 if (IS_GETLK(cmd)) {
1399 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1404 if (!lock && !unlock) {
1406 * if no lock or unlock then nothing to do since we do not
1413 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1420 * update the file size (if needed) after a write. Should be called with
1421 * the inode->i_lock held
1424 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1425 unsigned int bytes_written)
1427 loff_t end_of_write = offset + bytes_written;
1429 if (end_of_write > cifsi->server_eof)
1430 cifsi->server_eof = end_of_write;
1433 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1434 const char *write_data, size_t write_size,
1438 unsigned int bytes_written = 0;
1439 unsigned int total_written;
1440 struct cifs_sb_info *cifs_sb;
1441 struct cifs_tcon *pTcon;
1443 struct dentry *dentry = open_file->dentry;
1444 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1445 struct cifs_io_parms io_parms;
1447 cifs_sb = CIFS_SB(dentry->d_sb);
1449 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1450 *poffset, dentry->d_name.name);
1452 pTcon = tlink_tcon(open_file->tlink);
1456 for (total_written = 0; write_size > total_written;
1457 total_written += bytes_written) {
1459 while (rc == -EAGAIN) {
1463 if (open_file->invalidHandle) {
1464 /* we could deadlock if we called
1465 filemap_fdatawait from here so tell
1466 reopen_file not to flush data to
1468 rc = cifs_reopen_file(open_file, false);
1473 len = min((size_t)cifs_sb->wsize,
1474 write_size - total_written);
1475 /* iov[0] is reserved for smb header */
1476 iov[1].iov_base = (char *)write_data + total_written;
1477 iov[1].iov_len = len;
1478 io_parms.netfid = open_file->netfid;
1480 io_parms.tcon = pTcon;
1481 io_parms.offset = *poffset;
1482 io_parms.length = len;
1483 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1486 if (rc || (bytes_written == 0)) {
1494 spin_lock(&dentry->d_inode->i_lock);
1495 cifs_update_eof(cifsi, *poffset, bytes_written);
1496 spin_unlock(&dentry->d_inode->i_lock);
1497 *poffset += bytes_written;
1501 cifs_stats_bytes_written(pTcon, total_written);
1503 if (total_written > 0) {
1504 spin_lock(&dentry->d_inode->i_lock);
1505 if (*poffset > dentry->d_inode->i_size)
1506 i_size_write(dentry->d_inode, *poffset);
1507 spin_unlock(&dentry->d_inode->i_lock);
1509 mark_inode_dirty_sync(dentry->d_inode);
1511 return total_written;
1514 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1517 struct cifsFileInfo *open_file = NULL;
1518 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1520 /* only filter by fsuid on multiuser mounts */
1521 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1524 spin_lock(&cifs_file_list_lock);
1525 /* we could simply get the first_list_entry since write-only entries
1526 are always at the end of the list but since the first entry might
1527 have a close pending, we go through the whole list */
1528 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1529 if (fsuid_only && open_file->uid != current_fsuid())
1531 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1532 if (!open_file->invalidHandle) {
1533 /* found a good file */
1534 /* lock it so it will not be closed on us */
1535 cifsFileInfo_get(open_file);
1536 spin_unlock(&cifs_file_list_lock);
1538 } /* else might as well continue, and look for
1539 another, or simply have the caller reopen it
1540 again rather than trying to fix this handle */
1541 } else /* write only file */
1542 break; /* write only files are last so must be done */
1544 spin_unlock(&cifs_file_list_lock);
1548 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1551 struct cifsFileInfo *open_file;
1552 struct cifs_sb_info *cifs_sb;
1553 bool any_available = false;
1556 /* Having a null inode here (because mapping->host was set to zero by
1557 the VFS or MM) should not happen but we had reports of on oops (due to
1558 it being zero) during stress testcases so we need to check for it */
1560 if (cifs_inode == NULL) {
1561 cERROR(1, "Null inode passed to cifs_writeable_file");
1566 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1568 /* only filter by fsuid on multiuser mounts */
1569 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1572 spin_lock(&cifs_file_list_lock);
1574 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1575 if (!any_available && open_file->pid != current->tgid)
1577 if (fsuid_only && open_file->uid != current_fsuid())
1579 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1580 cifsFileInfo_get(open_file);
1582 if (!open_file->invalidHandle) {
1583 /* found a good writable file */
1584 spin_unlock(&cifs_file_list_lock);
1588 spin_unlock(&cifs_file_list_lock);
1590 /* Had to unlock since following call can block */
1591 rc = cifs_reopen_file(open_file, false);
1595 /* if it fails, try another handle if possible */
1596 cFYI(1, "wp failed on reopen file");
1597 cifsFileInfo_put(open_file);
1599 spin_lock(&cifs_file_list_lock);
1601 /* else we simply continue to the next entry. Thus
1602 we do not loop on reopen errors. If we
1603 can not reopen the file, for example if we
1604 reconnected to a server with another client
1605 racing to delete or lock the file we would not
1606 make progress if we restarted before the beginning
1607 of the loop here. */
1610 /* couldn't find useable FH with same pid, try any available */
1611 if (!any_available) {
1612 any_available = true;
1613 goto refind_writable;
1615 spin_unlock(&cifs_file_list_lock);
1619 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1621 struct address_space *mapping = page->mapping;
1622 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1625 int bytes_written = 0;
1626 struct inode *inode;
1627 struct cifsFileInfo *open_file;
1629 if (!mapping || !mapping->host)
1632 inode = page->mapping->host;
1634 offset += (loff_t)from;
1635 write_data = kmap(page);
1638 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1643 /* racing with truncate? */
1644 if (offset > mapping->host->i_size) {
1646 return 0; /* don't care */
1649 /* check to make sure that we are not extending the file */
1650 if (mapping->host->i_size - offset < (loff_t)to)
1651 to = (unsigned)(mapping->host->i_size - offset);
1653 open_file = find_writable_file(CIFS_I(mapping->host), false);
1655 bytes_written = cifs_write(open_file, open_file->pid,
1656 write_data, to - from, &offset);
1657 cifsFileInfo_put(open_file);
1658 /* Does mm or vfs already set times? */
1659 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1660 if ((bytes_written > 0) && (offset))
1662 else if (bytes_written < 0)
1665 cFYI(1, "No writeable filehandles for inode");
1674 * Marshal up the iov array, reserving the first one for the header. Also,
1678 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1681 struct inode *inode = wdata->cfile->dentry->d_inode;
1682 loff_t size = i_size_read(inode);
1684 /* marshal up the pages into iov array */
1686 for (i = 0; i < wdata->nr_pages; i++) {
1687 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1688 (loff_t)PAGE_CACHE_SIZE);
1689 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1690 wdata->bytes += iov[i + 1].iov_len;
1694 static int cifs_writepages(struct address_space *mapping,
1695 struct writeback_control *wbc)
1697 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1698 bool done = false, scanned = false, range_whole = false;
1700 struct cifs_writedata *wdata;
1705 * If wsize is smaller than the page cache size, default to writing
1706 * one page at a time via cifs_writepage
1708 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1709 return generic_writepages(mapping, wbc);
1711 if (wbc->range_cyclic) {
1712 index = mapping->writeback_index; /* Start from prev offset */
1715 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1716 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1717 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1722 while (!done && index <= end) {
1723 unsigned int i, nr_pages, found_pages;
1724 pgoff_t next = 0, tofind;
1725 struct page **pages;
1727 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1730 wdata = cifs_writedata_alloc((unsigned int)tofind,
1731 cifs_writev_complete);
1738 * find_get_pages_tag seems to return a max of 256 on each
1739 * iteration, so we must call it several times in order to
1740 * fill the array or the wsize is effectively limited to
1741 * 256 * PAGE_CACHE_SIZE.
1744 pages = wdata->pages;
1746 nr_pages = find_get_pages_tag(mapping, &index,
1747 PAGECACHE_TAG_DIRTY,
1749 found_pages += nr_pages;
1752 } while (nr_pages && tofind && index <= end);
1754 if (found_pages == 0) {
1755 kref_put(&wdata->refcount, cifs_writedata_release);
1760 for (i = 0; i < found_pages; i++) {
1761 page = wdata->pages[i];
1763 * At this point we hold neither mapping->tree_lock nor
1764 * lock on the page itself: the page may be truncated or
1765 * invalidated (changing page->mapping to NULL), or even
1766 * swizzled back from swapper_space to tmpfs file
1772 else if (!trylock_page(page))
1775 if (unlikely(page->mapping != mapping)) {
1780 if (!wbc->range_cyclic && page->index > end) {
1786 if (next && (page->index != next)) {
1787 /* Not next consecutive page */
1792 if (wbc->sync_mode != WB_SYNC_NONE)
1793 wait_on_page_writeback(page);
1795 if (PageWriteback(page) ||
1796 !clear_page_dirty_for_io(page)) {
1802 * This actually clears the dirty bit in the radix tree.
1803 * See cifs_writepage() for more commentary.
1805 set_page_writeback(page);
1807 if (page_offset(page) >= mapping->host->i_size) {
1810 end_page_writeback(page);
1814 wdata->pages[i] = page;
1815 next = page->index + 1;
1819 /* reset index to refind any pages skipped */
1821 index = wdata->pages[0]->index + 1;
1823 /* put any pages we aren't going to use */
1824 for (i = nr_pages; i < found_pages; i++) {
1825 page_cache_release(wdata->pages[i]);
1826 wdata->pages[i] = NULL;
1829 /* nothing to write? */
1830 if (nr_pages == 0) {
1831 kref_put(&wdata->refcount, cifs_writedata_release);
1835 wdata->sync_mode = wbc->sync_mode;
1836 wdata->nr_pages = nr_pages;
1837 wdata->offset = page_offset(wdata->pages[0]);
1838 wdata->marshal_iov = cifs_writepages_marshal_iov;
1841 if (wdata->cfile != NULL)
1842 cifsFileInfo_put(wdata->cfile);
1843 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1845 if (!wdata->cfile) {
1846 cERROR(1, "No writable handles for inode");
1850 wdata->pid = wdata->cfile->pid;
1851 rc = cifs_async_writev(wdata);
1852 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1854 for (i = 0; i < nr_pages; ++i)
1855 unlock_page(wdata->pages[i]);
1857 /* send failure -- clean up the mess */
1859 for (i = 0; i < nr_pages; ++i) {
1861 redirty_page_for_writepage(wbc,
1864 SetPageError(wdata->pages[i]);
1865 end_page_writeback(wdata->pages[i]);
1866 page_cache_release(wdata->pages[i]);
1869 mapping_set_error(mapping, rc);
1871 kref_put(&wdata->refcount, cifs_writedata_release);
1873 wbc->nr_to_write -= nr_pages;
1874 if (wbc->nr_to_write <= 0)
1880 if (!scanned && !done) {
1882 * We hit the last page and there is more work to be done: wrap
1883 * back to the start of the file
1890 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1891 mapping->writeback_index = index;
1897 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1903 /* BB add check for wbc flags */
1904 page_cache_get(page);
1905 if (!PageUptodate(page))
1906 cFYI(1, "ppw - page not up to date");
1909 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1911 * A writepage() implementation always needs to do either this,
1912 * or re-dirty the page with "redirty_page_for_writepage()" in
1913 * the case of a failure.
1915 * Just unlocking the page will cause the radix tree tag-bits
1916 * to fail to update with the state of the page correctly.
1918 set_page_writeback(page);
1920 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1921 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1923 else if (rc == -EAGAIN)
1924 redirty_page_for_writepage(wbc, page);
1928 SetPageUptodate(page);
1929 end_page_writeback(page);
1930 page_cache_release(page);
1935 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1937 int rc = cifs_writepage_locked(page, wbc);
1942 static int cifs_write_end(struct file *file, struct address_space *mapping,
1943 loff_t pos, unsigned len, unsigned copied,
1944 struct page *page, void *fsdata)
1947 struct inode *inode = mapping->host;
1948 struct cifsFileInfo *cfile = file->private_data;
1949 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1952 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1955 pid = current->tgid;
1957 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1960 if (PageChecked(page)) {
1962 SetPageUptodate(page);
1963 ClearPageChecked(page);
1964 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1965 SetPageUptodate(page);
1967 if (!PageUptodate(page)) {
1969 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1973 /* this is probably better than directly calling
1974 partialpage_write since in this function the file handle is
1975 known which we might as well leverage */
1976 /* BB check if anything else missing out of ppw
1977 such as updating last write time */
1978 page_data = kmap(page);
1979 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
1980 /* if (rc < 0) should we set writebehind rc? */
1987 set_page_dirty(page);
1991 spin_lock(&inode->i_lock);
1992 if (pos > inode->i_size)
1993 i_size_write(inode, pos);
1994 spin_unlock(&inode->i_lock);
1998 page_cache_release(page);
2003 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2008 struct cifs_tcon *tcon;
2009 struct cifsFileInfo *smbfile = file->private_data;
2010 struct inode *inode = file->f_path.dentry->d_inode;
2011 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2013 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2016 mutex_lock(&inode->i_mutex);
2020 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2021 file->f_path.dentry->d_name.name, datasync);
2023 if (!CIFS_I(inode)->clientCanCacheRead) {
2024 rc = cifs_invalidate_mapping(inode);
2026 cFYI(1, "rc: %d during invalidate phase", rc);
2027 rc = 0; /* don't care about it in fsync */
2031 tcon = tlink_tcon(smbfile->tlink);
2032 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2033 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2036 mutex_unlock(&inode->i_mutex);
2040 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2044 struct cifs_tcon *tcon;
2045 struct cifsFileInfo *smbfile = file->private_data;
2046 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2047 struct inode *inode = file->f_mapping->host;
2049 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2052 mutex_lock(&inode->i_mutex);
2056 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2057 file->f_path.dentry->d_name.name, datasync);
2059 tcon = tlink_tcon(smbfile->tlink);
2060 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2061 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2064 mutex_unlock(&inode->i_mutex);
2069 * As file closes, flush all cached write data for this inode checking
2070 * for write behind errors.
2072 int cifs_flush(struct file *file, fl_owner_t id)
2074 struct inode *inode = file->f_path.dentry->d_inode;
2077 if (file->f_mode & FMODE_WRITE)
2078 rc = filemap_write_and_wait(inode->i_mapping);
2080 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2086 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2091 for (i = 0; i < num_pages; i++) {
2092 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2095 * save number of pages we have already allocated and
2096 * return with ENOMEM error
2105 for (i = 0; i < num_pages; i++)
2112 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2117 clen = min_t(const size_t, len, wsize);
2118 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2127 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2130 size_t bytes = wdata->bytes;
2132 /* marshal up the pages into iov array */
2133 for (i = 0; i < wdata->nr_pages; i++) {
2134 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2135 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2136 bytes -= iov[i + 1].iov_len;
2141 cifs_uncached_writev_complete(struct work_struct *work)
2144 struct cifs_writedata *wdata = container_of(work,
2145 struct cifs_writedata, work);
2146 struct inode *inode = wdata->cfile->dentry->d_inode;
2147 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2149 spin_lock(&inode->i_lock);
2150 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2151 if (cifsi->server_eof > inode->i_size)
2152 i_size_write(inode, cifsi->server_eof);
2153 spin_unlock(&inode->i_lock);
2155 complete(&wdata->done);
2157 if (wdata->result != -EAGAIN) {
2158 for (i = 0; i < wdata->nr_pages; i++)
2159 put_page(wdata->pages[i]);
2162 kref_put(&wdata->refcount, cifs_writedata_release);
2165 /* attempt to send write to server, retry on any -EAGAIN errors */
2167 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2172 if (wdata->cfile->invalidHandle) {
2173 rc = cifs_reopen_file(wdata->cfile, false);
2177 rc = cifs_async_writev(wdata);
2178 } while (rc == -EAGAIN);
2184 cifs_iovec_write(struct file *file, const struct iovec *iov,
2185 unsigned long nr_segs, loff_t *poffset)
2187 unsigned long nr_pages, i;
2188 size_t copied, len, cur_len;
2189 ssize_t total_written = 0;
2192 struct cifsFileInfo *open_file;
2193 struct cifs_tcon *tcon;
2194 struct cifs_sb_info *cifs_sb;
2195 struct cifs_writedata *wdata, *tmp;
2196 struct list_head wdata_list;
2200 len = iov_length(iov, nr_segs);
2204 rc = generic_write_checks(file, poffset, &len, 0);
2208 INIT_LIST_HEAD(&wdata_list);
2209 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2210 open_file = file->private_data;
2211 tcon = tlink_tcon(open_file->tlink);
2214 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2215 pid = open_file->pid;
2217 pid = current->tgid;
2219 iov_iter_init(&it, iov, nr_segs, len, 0);
2223 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2224 wdata = cifs_writedata_alloc(nr_pages,
2225 cifs_uncached_writev_complete);
2231 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2238 for (i = 0; i < nr_pages; i++) {
2239 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2240 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2243 iov_iter_advance(&it, copied);
2245 cur_len = save_len - cur_len;
2247 wdata->sync_mode = WB_SYNC_ALL;
2248 wdata->nr_pages = nr_pages;
2249 wdata->offset = (__u64)offset;
2250 wdata->cfile = cifsFileInfo_get(open_file);
2252 wdata->bytes = cur_len;
2253 wdata->marshal_iov = cifs_uncached_marshal_iov;
2254 rc = cifs_uncached_retry_writev(wdata);
2256 kref_put(&wdata->refcount, cifs_writedata_release);
2260 list_add_tail(&wdata->list, &wdata_list);
2266 * If at least one write was successfully sent, then discard any rc
2267 * value from the later writes. If the other write succeeds, then
2268 * we'll end up returning whatever was written. If it fails, then
2269 * we'll get a new rc value from that.
2271 if (!list_empty(&wdata_list))
2275 * Wait for and collect replies for any successful sends in order of
2276 * increasing offset. Once an error is hit or we get a fatal signal
2277 * while waiting, then return without waiting for any more replies.
2280 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2282 /* FIXME: freezable too? */
2283 rc = wait_for_completion_killable(&wdata->done);
2286 else if (wdata->result)
2289 total_written += wdata->bytes;
2291 /* resend call if it's a retryable error */
2292 if (rc == -EAGAIN) {
2293 rc = cifs_uncached_retry_writev(wdata);
2297 list_del_init(&wdata->list);
2298 kref_put(&wdata->refcount, cifs_writedata_release);
2301 if (total_written > 0)
2302 *poffset += total_written;
2304 cifs_stats_bytes_written(tcon, total_written);
2305 return total_written ? total_written : (ssize_t)rc;
2308 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2309 unsigned long nr_segs, loff_t pos)
2312 struct inode *inode;
2314 inode = iocb->ki_filp->f_path.dentry->d_inode;
2317 * BB - optimize the way when signing is disabled. We can drop this
2318 * extra memory-to-memory copying and use iovec buffers for constructing
2322 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2324 CIFS_I(inode)->invalid_mapping = true;
2331 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2332 unsigned long nr_segs, loff_t pos)
2334 struct inode *inode;
2336 inode = iocb->ki_filp->f_path.dentry->d_inode;
2338 if (CIFS_I(inode)->clientCanCacheAll)
2339 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2342 * In strict cache mode we need to write the data to the server exactly
2343 * from the pos to pos+len-1 rather than flush all affected pages
2344 * because it may cause a error with mandatory locks on these pages but
2345 * not on the region from pos to ppos+len-1.
2348 return cifs_user_writev(iocb, iov, nr_segs, pos);
2351 static struct cifs_readdata *
2352 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2354 struct cifs_readdata *rdata;
2356 rdata = kzalloc(sizeof(*rdata) +
2357 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2358 if (rdata != NULL) {
2359 kref_init(&rdata->refcount);
2360 INIT_LIST_HEAD(&rdata->list);
2361 init_completion(&rdata->done);
2362 INIT_WORK(&rdata->work, complete);
2363 INIT_LIST_HEAD(&rdata->pages);
2369 cifs_readdata_release(struct kref *refcount)
2371 struct cifs_readdata *rdata = container_of(refcount,
2372 struct cifs_readdata, refcount);
2375 cifsFileInfo_put(rdata->cfile);
2381 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2384 struct page *page, *tpage;
2387 for (i = 0; i < npages; i++) {
2388 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2393 list_add(&page->lru, list);
2397 list_for_each_entry_safe(page, tpage, list, lru) {
2398 list_del(&page->lru);
2406 cifs_uncached_readdata_release(struct kref *refcount)
2408 struct page *page, *tpage;
2409 struct cifs_readdata *rdata = container_of(refcount,
2410 struct cifs_readdata, refcount);
2412 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2413 list_del(&page->lru);
2416 cifs_readdata_release(refcount);
2420 cifs_retry_async_readv(struct cifs_readdata *rdata)
2425 if (rdata->cfile->invalidHandle) {
2426 rc = cifs_reopen_file(rdata->cfile, true);
2430 rc = cifs_async_readv(rdata);
2431 } while (rc == -EAGAIN);
2437 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2438 * @rdata: the readdata response with list of pages holding data
2439 * @iov: vector in which we should copy the data
2440 * @nr_segs: number of segments in vector
2441 * @offset: offset into file of the first iovec
2442 * @copied: used to return the amount of data copied to the iov
2444 * This function copies data from a list of pages in a readdata response into
2445 * an array of iovecs. It will first calculate where the data should go
2446 * based on the info in the readdata and then copy the data into that spot.
2449 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2450 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2454 size_t pos = rdata->offset - offset;
2455 struct page *page, *tpage;
2456 ssize_t remaining = rdata->bytes;
2457 unsigned char *pdata;
2459 /* set up iov_iter and advance to the correct offset */
2460 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2461 iov_iter_advance(&ii, pos);
2464 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2467 /* copy a whole page or whatever's left */
2468 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2470 /* ...but limit it to whatever space is left in the iov */
2471 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2473 /* go while there's data to be copied and no errors */
2476 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2482 iov_iter_advance(&ii, copy);
2486 list_del(&page->lru);
2494 cifs_uncached_readv_complete(struct work_struct *work)
2496 struct cifs_readdata *rdata = container_of(work,
2497 struct cifs_readdata, work);
2499 /* if the result is non-zero then the pages weren't kmapped */
2500 if (rdata->result == 0) {
2503 list_for_each_entry(page, &rdata->pages, lru)
2507 complete(&rdata->done);
2508 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2512 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2513 unsigned int remaining)
2516 struct page *page, *tpage;
2519 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2520 if (remaining >= PAGE_SIZE) {
2521 /* enough data to fill the page */
2522 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2523 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2524 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2525 rdata->nr_iov, page->index,
2526 rdata->iov[rdata->nr_iov].iov_base,
2527 rdata->iov[rdata->nr_iov].iov_len);
2530 remaining -= PAGE_SIZE;
2531 } else if (remaining > 0) {
2532 /* enough for partial page, fill and zero the rest */
2533 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2534 rdata->iov[rdata->nr_iov].iov_len = remaining;
2535 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2536 rdata->nr_iov, page->index,
2537 rdata->iov[rdata->nr_iov].iov_base,
2538 rdata->iov[rdata->nr_iov].iov_len);
2539 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2540 '\0', PAGE_SIZE - remaining);
2545 /* no need to hold page hostage */
2546 list_del(&page->lru);
2555 cifs_iovec_read(struct file *file, const struct iovec *iov,
2556 unsigned long nr_segs, loff_t *poffset)
2559 size_t len, cur_len;
2560 ssize_t total_read = 0;
2561 loff_t offset = *poffset;
2562 unsigned int npages;
2563 struct cifs_sb_info *cifs_sb;
2564 struct cifs_tcon *tcon;
2565 struct cifsFileInfo *open_file;
2566 struct cifs_readdata *rdata, *tmp;
2567 struct list_head rdata_list;
2573 len = iov_length(iov, nr_segs);
2577 INIT_LIST_HEAD(&rdata_list);
2578 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2579 open_file = file->private_data;
2580 tcon = tlink_tcon(open_file->tlink);
2582 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2583 pid = open_file->pid;
2585 pid = current->tgid;
2587 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2588 cFYI(1, "attempting read on write only file instance");
2591 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2592 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2594 /* allocate a readdata struct */
2595 rdata = cifs_readdata_alloc(npages,
2596 cifs_uncached_readv_complete);
2602 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2606 rdata->cfile = cifsFileInfo_get(open_file);
2607 rdata->offset = offset;
2608 rdata->bytes = cur_len;
2610 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2612 rc = cifs_retry_async_readv(rdata);
2615 kref_put(&rdata->refcount,
2616 cifs_uncached_readdata_release);
2620 list_add_tail(&rdata->list, &rdata_list);
2625 /* if at least one read request send succeeded, then reset rc */
2626 if (!list_empty(&rdata_list))
2629 /* the loop below should proceed in the order of increasing offsets */
2631 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2635 /* FIXME: freezable sleep too? */
2636 rc = wait_for_completion_killable(&rdata->done);
2639 else if (rdata->result)
2642 rc = cifs_readdata_to_iov(rdata, iov,
2645 total_read += copied;
2648 /* resend call if it's a retryable error */
2649 if (rc == -EAGAIN) {
2650 rc = cifs_retry_async_readv(rdata);
2654 list_del_init(&rdata->list);
2655 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2658 cifs_stats_bytes_read(tcon, total_read);
2659 *poffset += total_read;
2661 return total_read ? total_read : rc;
2664 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2665 unsigned long nr_segs, loff_t pos)
2669 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2676 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2677 unsigned long nr_segs, loff_t pos)
2679 struct inode *inode;
2681 inode = iocb->ki_filp->f_path.dentry->d_inode;
2683 if (CIFS_I(inode)->clientCanCacheRead)
2684 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2687 * In strict cache mode we need to read from the server all the time
2688 * if we don't have level II oplock because the server can delay mtime
2689 * change - so we can't make a decision about inode invalidating.
2690 * And we can also fail with pagereading if there are mandatory locks
2691 * on pages affected by this read but not on the region from pos to
2695 return cifs_user_readv(iocb, iov, nr_segs, pos);
2698 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2702 unsigned int bytes_read = 0;
2703 unsigned int total_read;
2704 unsigned int current_read_size;
2706 struct cifs_sb_info *cifs_sb;
2707 struct cifs_tcon *pTcon;
2709 char *current_offset;
2710 struct cifsFileInfo *open_file;
2711 struct cifs_io_parms io_parms;
2712 int buf_type = CIFS_NO_BUFFER;
2716 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2718 /* FIXME: set up handlers for larger reads and/or convert to async */
2719 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2721 if (file->private_data == NULL) {
2726 open_file = file->private_data;
2727 pTcon = tlink_tcon(open_file->tlink);
2729 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2730 pid = open_file->pid;
2732 pid = current->tgid;
2734 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2735 cFYI(1, "attempting read on write only file instance");
2737 for (total_read = 0, current_offset = read_data;
2738 read_size > total_read;
2739 total_read += bytes_read, current_offset += bytes_read) {
2740 current_read_size = min_t(uint, read_size - total_read, rsize);
2742 /* For windows me and 9x we do not want to request more
2743 than it negotiated since it will refuse the read then */
2745 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
2746 current_read_size = min_t(uint, current_read_size,
2750 while (rc == -EAGAIN) {
2751 if (open_file->invalidHandle) {
2752 rc = cifs_reopen_file(open_file, true);
2756 io_parms.netfid = open_file->netfid;
2758 io_parms.tcon = pTcon;
2759 io_parms.offset = *poffset;
2760 io_parms.length = current_read_size;
2761 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2762 ¤t_offset, &buf_type);
2764 if (rc || (bytes_read == 0)) {
2772 cifs_stats_bytes_read(pTcon, total_read);
2773 *poffset += bytes_read;
2781 * If the page is mmap'ed into a process' page tables, then we need to make
2782 * sure that it doesn't change while being written back.
2785 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2787 struct page *page = vmf->page;
2790 return VM_FAULT_LOCKED;
2793 static struct vm_operations_struct cifs_file_vm_ops = {
2794 .fault = filemap_fault,
2795 .page_mkwrite = cifs_page_mkwrite,
2798 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2801 struct inode *inode = file->f_path.dentry->d_inode;
2805 if (!CIFS_I(inode)->clientCanCacheRead) {
2806 rc = cifs_invalidate_mapping(inode);
2811 rc = generic_file_mmap(file, vma);
2813 vma->vm_ops = &cifs_file_vm_ops;
2818 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2823 rc = cifs_revalidate_file(file);
2825 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2829 rc = generic_file_mmap(file, vma);
2831 vma->vm_ops = &cifs_file_vm_ops;
2837 cifs_readv_complete(struct work_struct *work)
2839 struct cifs_readdata *rdata = container_of(work,
2840 struct cifs_readdata, work);
2841 struct page *page, *tpage;
2843 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2844 list_del(&page->lru);
2845 lru_cache_add_file(page);
2847 if (rdata->result == 0) {
2849 flush_dcache_page(page);
2850 SetPageUptodate(page);
2855 if (rdata->result == 0)
2856 cifs_readpage_to_fscache(rdata->mapping->host, page);
2858 page_cache_release(page);
2860 kref_put(&rdata->refcount, cifs_readdata_release);
2864 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2867 struct page *page, *tpage;
2871 /* determine the eof that the server (probably) has */
2872 eof = CIFS_I(rdata->mapping->host)->server_eof;
2873 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2874 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2877 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2878 if (remaining >= PAGE_CACHE_SIZE) {
2879 /* enough data to fill the page */
2880 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2881 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2882 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2883 rdata->nr_iov, page->index,
2884 rdata->iov[rdata->nr_iov].iov_base,
2885 rdata->iov[rdata->nr_iov].iov_len);
2887 len += PAGE_CACHE_SIZE;
2888 remaining -= PAGE_CACHE_SIZE;
2889 } else if (remaining > 0) {
2890 /* enough for partial page, fill and zero the rest */
2891 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2892 rdata->iov[rdata->nr_iov].iov_len = remaining;
2893 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2894 rdata->nr_iov, page->index,
2895 rdata->iov[rdata->nr_iov].iov_base,
2896 rdata->iov[rdata->nr_iov].iov_len);
2897 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2898 '\0', PAGE_CACHE_SIZE - remaining);
2902 } else if (page->index > eof_index) {
2904 * The VFS will not try to do readahead past the
2905 * i_size, but it's possible that we have outstanding
2906 * writes with gaps in the middle and the i_size hasn't
2907 * caught up yet. Populate those with zeroed out pages
2908 * to prevent the VFS from repeatedly attempting to
2909 * fill them until the writes are flushed.
2911 zero_user(page, 0, PAGE_CACHE_SIZE);
2912 list_del(&page->lru);
2913 lru_cache_add_file(page);
2914 flush_dcache_page(page);
2915 SetPageUptodate(page);
2917 page_cache_release(page);
2919 /* no need to hold page hostage */
2920 list_del(&page->lru);
2921 lru_cache_add_file(page);
2923 page_cache_release(page);
2930 static int cifs_readpages(struct file *file, struct address_space *mapping,
2931 struct list_head *page_list, unsigned num_pages)
2934 struct list_head tmplist;
2935 struct cifsFileInfo *open_file = file->private_data;
2936 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2937 unsigned int rsize = cifs_sb->rsize;
2941 * Give up immediately if rsize is too small to read an entire page.
2942 * The VFS will fall back to readpage. We should never reach this
2943 * point however since we set ra_pages to 0 when the rsize is smaller
2944 * than a cache page.
2946 if (unlikely(rsize < PAGE_CACHE_SIZE))
2950 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2951 * immediately if the cookie is negative
2953 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2958 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2959 pid = open_file->pid;
2961 pid = current->tgid;
2964 INIT_LIST_HEAD(&tmplist);
2966 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
2967 mapping, num_pages);
2970 * Start with the page at end of list and move it to private
2971 * list. Do the same with any following pages until we hit
2972 * the rsize limit, hit an index discontinuity, or run out of
2973 * pages. Issue the async read and then start the loop again
2974 * until the list is empty.
2976 * Note that list order is important. The page_list is in
2977 * the order of declining indexes. When we put the pages in
2978 * the rdata->pages, then we want them in increasing order.
2980 while (!list_empty(page_list)) {
2981 unsigned int bytes = PAGE_CACHE_SIZE;
2982 unsigned int expected_index;
2983 unsigned int nr_pages = 1;
2985 struct page *page, *tpage;
2986 struct cifs_readdata *rdata;
2988 page = list_entry(page_list->prev, struct page, lru);
2991 * Lock the page and put it in the cache. Since no one else
2992 * should have access to this page, we're safe to simply set
2993 * PG_locked without checking it first.
2995 __set_page_locked(page);
2996 rc = add_to_page_cache_locked(page, mapping,
2997 page->index, GFP_KERNEL);
2999 /* give up if we can't stick it in the cache */
3001 __clear_page_locked(page);
3005 /* move first page to the tmplist */
3006 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3007 list_move_tail(&page->lru, &tmplist);
3009 /* now try and add more pages onto the request */
3010 expected_index = page->index + 1;
3011 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3012 /* discontinuity ? */
3013 if (page->index != expected_index)
3016 /* would this page push the read over the rsize? */
3017 if (bytes + PAGE_CACHE_SIZE > rsize)
3020 __set_page_locked(page);
3021 if (add_to_page_cache_locked(page, mapping,
3022 page->index, GFP_KERNEL)) {
3023 __clear_page_locked(page);
3026 list_move_tail(&page->lru, &tmplist);
3027 bytes += PAGE_CACHE_SIZE;
3032 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3034 /* best to give up if we're out of mem */
3035 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3036 list_del(&page->lru);
3037 lru_cache_add_file(page);
3039 page_cache_release(page);
3045 spin_lock(&cifs_file_list_lock);
3046 spin_unlock(&cifs_file_list_lock);
3047 rdata->cfile = cifsFileInfo_get(open_file);
3048 rdata->mapping = mapping;
3049 rdata->offset = offset;
3050 rdata->bytes = bytes;
3052 rdata->marshal_iov = cifs_readpages_marshal_iov;
3053 list_splice_init(&tmplist, &rdata->pages);
3055 rc = cifs_retry_async_readv(rdata);
3057 list_for_each_entry_safe(page, tpage, &rdata->pages,
3059 list_del(&page->lru);
3060 lru_cache_add_file(page);
3062 page_cache_release(page);
3064 kref_put(&rdata->refcount, cifs_readdata_release);
3068 kref_put(&rdata->refcount, cifs_readdata_release);
3074 static int cifs_readpage_worker(struct file *file, struct page *page,
3080 /* Is the page cached? */
3081 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3085 page_cache_get(page);
3086 read_data = kmap(page);
3087 /* for reads over a certain size could initiate async read ahead */
3089 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3094 cFYI(1, "Bytes read %d", rc);
3096 file->f_path.dentry->d_inode->i_atime =
3097 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3099 if (PAGE_CACHE_SIZE > rc)
3100 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3102 flush_dcache_page(page);
3103 SetPageUptodate(page);
3105 /* send this page to the cache */
3106 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3112 page_cache_release(page);
3118 static int cifs_readpage(struct file *file, struct page *page)
3120 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3126 if (file->private_data == NULL) {
3132 cFYI(1, "readpage %p at offset %d 0x%x\n",
3133 page, (int)offset, (int)offset);
3135 rc = cifs_readpage_worker(file, page, &offset);
3143 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3145 struct cifsFileInfo *open_file;
3147 spin_lock(&cifs_file_list_lock);
3148 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3149 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3150 spin_unlock(&cifs_file_list_lock);
3154 spin_unlock(&cifs_file_list_lock);
3158 /* We do not want to update the file size from server for inodes
3159 open for write - to avoid races with writepage extending
3160 the file - in the future we could consider allowing
3161 refreshing the inode only on increases in the file size
3162 but this is tricky to do without racing with writebehind
3163 page caching in the current Linux kernel design */
3164 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3169 if (is_inode_writable(cifsInode)) {
3170 /* This inode is open for write at least once */
3171 struct cifs_sb_info *cifs_sb;
3173 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3174 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3175 /* since no page cache to corrupt on directio
3176 we can change size safely */
3180 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3188 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3189 loff_t pos, unsigned len, unsigned flags,
3190 struct page **pagep, void **fsdata)
3192 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3193 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3194 loff_t page_start = pos & PAGE_MASK;
3199 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3201 page = grab_cache_page_write_begin(mapping, index, flags);
3207 if (PageUptodate(page))
3211 * If we write a full page it will be up to date, no need to read from
3212 * the server. If the write is short, we'll end up doing a sync write
3215 if (len == PAGE_CACHE_SIZE)
3219 * optimize away the read when we have an oplock, and we're not
3220 * expecting to use any of the data we'd be reading in. That
3221 * is, when the page lies beyond the EOF, or straddles the EOF
3222 * and the write will cover all of the existing data.
3224 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3225 i_size = i_size_read(mapping->host);
3226 if (page_start >= i_size ||
3227 (offset == 0 && (pos + len) >= i_size)) {
3228 zero_user_segments(page, 0, offset,
3232 * PageChecked means that the parts of the page
3233 * to which we're not writing are considered up
3234 * to date. Once the data is copied to the
3235 * page, it can be set uptodate.
3237 SetPageChecked(page);
3242 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3244 * might as well read a page, it is fast enough. If we get
3245 * an error, we don't need to return it. cifs_write_end will
3246 * do a sync write instead since PG_uptodate isn't set.
3248 cifs_readpage_worker(file, page, &page_start);
3250 /* we could try using another file handle if there is one -
3251 but how would we lock it to prevent close of that handle
3252 racing with this read? In any case
3253 this will be written out by write_end so is fine */
3260 static int cifs_release_page(struct page *page, gfp_t gfp)
3262 if (PagePrivate(page))
3265 return cifs_fscache_release_page(page, gfp);
3268 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3270 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3273 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3276 static int cifs_launder_page(struct page *page)
3279 loff_t range_start = page_offset(page);
3280 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3281 struct writeback_control wbc = {
3282 .sync_mode = WB_SYNC_ALL,
3284 .range_start = range_start,
3285 .range_end = range_end,
3288 cFYI(1, "Launder page: %p", page);
3290 if (clear_page_dirty_for_io(page))
3291 rc = cifs_writepage_locked(page, &wbc);
3293 cifs_fscache_invalidate_page(page, page->mapping->host);
3297 void cifs_oplock_break(struct work_struct *work)
3299 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3301 struct inode *inode = cfile->dentry->d_inode;
3302 struct cifsInodeInfo *cinode = CIFS_I(inode);
3305 if (inode && S_ISREG(inode->i_mode)) {
3306 if (cinode->clientCanCacheRead)
3307 break_lease(inode, O_RDONLY);
3309 break_lease(inode, O_WRONLY);
3310 rc = filemap_fdatawrite(inode->i_mapping);
3311 if (cinode->clientCanCacheRead == 0) {
3312 rc = filemap_fdatawait(inode->i_mapping);
3313 mapping_set_error(inode->i_mapping, rc);
3314 invalidate_remote_inode(inode);
3316 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3319 rc = cifs_push_locks(cfile);
3321 cERROR(1, "Push locks rc = %d", rc);
3324 * releasing stale oplock after recent reconnect of smb session using
3325 * a now incorrect file handle is not a data integrity issue but do
3326 * not bother sending an oplock release if session to server still is
3327 * disconnected since oplock already released by the server
3329 if (!cfile->oplock_break_cancelled) {
3330 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
3331 current->tgid, 0, 0, 0, 0,
3332 LOCKING_ANDX_OPLOCK_RELEASE, false,
3333 cinode->clientCanCacheRead ? 1 : 0);
3334 cFYI(1, "Oplock release rc = %d", rc);
3338 const struct address_space_operations cifs_addr_ops = {
3339 .readpage = cifs_readpage,
3340 .readpages = cifs_readpages,
3341 .writepage = cifs_writepage,
3342 .writepages = cifs_writepages,
3343 .write_begin = cifs_write_begin,
3344 .write_end = cifs_write_end,
3345 .set_page_dirty = __set_page_dirty_nobuffers,
3346 .releasepage = cifs_release_page,
3347 .invalidatepage = cifs_invalidate_page,
3348 .launder_page = cifs_launder_page,
3352 * cifs_readpages requires the server to support a buffer large enough to
3353 * contain the header plus one complete page of data. Otherwise, we need
3354 * to leave cifs_readpages out of the address space operations.
3356 const struct address_space_operations cifs_addr_ops_smallbuf = {
3357 .readpage = cifs_readpage,
3358 .writepage = cifs_writepage,
3359 .writepages = cifs_writepages,
3360 .write_begin = cifs_write_begin,
3361 .write_end = cifs_write_end,
3362 .set_page_dirty = __set_page_dirty_nobuffers,
3363 .releasepage = cifs_release_page,
3364 .invalidatepage = cifs_invalidate_page,
3365 .launder_page = cifs_launder_page,