4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173 struct cifs_fid *fid, unsigned int xid)
178 int create_options = CREATE_NOT_DIR;
181 if (!tcon->ses->server->ops->open)
184 desired_access = cifs_convert_flags(f_flags);
186 /*********************************************************************
187 * open flag mapping table:
189 * POSIX Flag CIFS Disposition
190 * ---------- ----------------
191 * O_CREAT FILE_OPEN_IF
192 * O_CREAT | O_EXCL FILE_CREATE
193 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
194 * O_TRUNC FILE_OVERWRITE
195 * none of the above FILE_OPEN
197 * Note that there is not a direct match between disposition
198 * FILE_SUPERSEDE (ie create whether or not file exists although
199 * O_CREAT | O_TRUNC is similar but truncates the existing
200 * file rather than creating a new file as FILE_SUPERSEDE does
201 * (which uses the attributes / metadata passed in on open call)
203 *? O_SYNC is a reasonable match to CIFS writethrough flag
204 *? and the read write flags match reasonably. O_LARGEFILE
205 *? is irrelevant because largefile support is always used
206 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
207 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
208 *********************************************************************/
210 disposition = cifs_get_disposition(f_flags);
212 /* BB pass O_SYNC flag through on file attributes .. BB */
214 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
218 if (backup_cred(cifs_sb))
219 create_options |= CREATE_OPEN_BACKUP_INTENT;
221 rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition,
222 desired_access, create_options, fid,
223 oplock, buf, cifs_sb);
229 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
232 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
240 struct cifsFileInfo *
241 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
242 struct tcon_link *tlink, __u32 oplock)
244 struct dentry *dentry = file->f_path.dentry;
245 struct inode *inode = dentry->d_inode;
246 struct cifsInodeInfo *cinode = CIFS_I(inode);
247 struct cifsFileInfo *cfile;
248 struct cifs_fid_locks *fdlocks;
250 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
260 INIT_LIST_HEAD(&fdlocks->locks);
261 fdlocks->cfile = cfile;
262 cfile->llist = fdlocks;
263 mutex_lock(&cinode->lock_mutex);
264 list_add(&fdlocks->llist, &cinode->llist);
265 mutex_unlock(&cinode->lock_mutex);
268 cfile->pid = current->tgid;
269 cfile->uid = current_fsuid();
270 cfile->dentry = dget(dentry);
271 cfile->f_flags = file->f_flags;
272 cfile->invalidHandle = false;
273 cfile->tlink = cifs_get_tlink(tlink);
274 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
275 mutex_init(&cfile->fh_mutex);
276 tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
278 spin_lock(&cifs_file_list_lock);
279 list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
280 /* if readable file instance put first in list*/
281 if (file->f_mode & FMODE_READ)
282 list_add(&cfile->flist, &cinode->openFileList);
284 list_add_tail(&cfile->flist, &cinode->openFileList);
285 spin_unlock(&cifs_file_list_lock);
287 file->private_data = cfile;
291 struct cifsFileInfo *
292 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
294 spin_lock(&cifs_file_list_lock);
295 cifsFileInfo_get_locked(cifs_file);
296 spin_unlock(&cifs_file_list_lock);
301 * Release a reference on the file private data. This may involve closing
302 * the filehandle out on the server. Must be called without holding
303 * cifs_file_list_lock.
305 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
307 struct inode *inode = cifs_file->dentry->d_inode;
308 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
309 struct cifsInodeInfo *cifsi = CIFS_I(inode);
310 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
311 struct cifsLockInfo *li, *tmp;
313 spin_lock(&cifs_file_list_lock);
314 if (--cifs_file->count > 0) {
315 spin_unlock(&cifs_file_list_lock);
319 /* remove it from the lists */
320 list_del(&cifs_file->flist);
321 list_del(&cifs_file->tlist);
323 if (list_empty(&cifsi->openFileList)) {
324 cFYI(1, "closing last open instance for inode %p",
325 cifs_file->dentry->d_inode);
327 * In strict cache mode we need invalidate mapping on the last
328 * close because it may cause a error when we open this file
329 * again and get at least level II oplock.
331 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
332 CIFS_I(inode)->invalid_mapping = true;
333 cifs_set_oplock_level(cifsi, 0);
335 spin_unlock(&cifs_file_list_lock);
337 cancel_work_sync(&cifs_file->oplock_break);
339 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
340 struct TCP_Server_Info *server = tcon->ses->server;
345 if (server->ops->close)
346 rc = server->ops->close(xid, tcon, &cifs_file->fid);
351 * Delete any outstanding lock records. We'll lose them when the file
354 mutex_lock(&cifsi->lock_mutex);
355 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
356 list_del(&li->llist);
357 cifs_del_lock_waiters(li);
360 list_del(&cifs_file->llist->llist);
361 kfree(cifs_file->llist);
362 mutex_unlock(&cifsi->lock_mutex);
364 cifs_put_tlink(cifs_file->tlink);
365 dput(cifs_file->dentry);
369 int cifs_open(struct inode *inode, struct file *file)
374 struct cifs_sb_info *cifs_sb;
375 struct cifs_tcon *tcon;
376 struct tcon_link *tlink;
377 struct cifsFileInfo *cfile = NULL;
378 char *full_path = NULL;
379 bool posix_open_ok = false;
384 cifs_sb = CIFS_SB(inode->i_sb);
385 tlink = cifs_sb_tlink(cifs_sb);
388 return PTR_ERR(tlink);
390 tcon = tlink_tcon(tlink);
392 full_path = build_path_from_dentry(file->f_path.dentry);
393 if (full_path == NULL) {
398 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
399 inode, file->f_flags, full_path);
401 if (tcon->ses->server->oplocks)
406 if (!tcon->broken_posix_open && tcon->unix_ext &&
407 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
408 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
409 /* can not refresh inode info since size could be stale */
410 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
411 cifs_sb->mnt_file_mode /* ignored */,
412 file->f_flags, &oplock, &fid.netfid, xid);
414 cFYI(1, "posix open succeeded");
415 posix_open_ok = true;
416 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
417 if (tcon->ses->serverNOS)
418 cERROR(1, "server %s of type %s returned"
419 " unexpected error on SMB posix open"
420 ", disabling posix open support."
421 " Check if server update available.",
422 tcon->ses->serverName,
423 tcon->ses->serverNOS);
424 tcon->broken_posix_open = true;
425 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
426 (rc != -EOPNOTSUPP)) /* path not found or net err */
429 * Else fallthrough to retry open the old way on network i/o
434 if (!posix_open_ok) {
435 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
436 file->f_flags, &oplock, &fid, xid);
441 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
443 if (tcon->ses->server->ops->close)
444 tcon->ses->server->ops->close(xid, tcon, &fid);
449 cifs_fscache_set_inode_cookie(inode, file);
451 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
453 * Time to set mode which we can not set earlier due to
454 * problems creating new read-only files.
456 struct cifs_unix_set_info_args args = {
457 .mode = inode->i_mode,
460 .ctime = NO_CHANGE_64,
461 .atime = NO_CHANGE_64,
462 .mtime = NO_CHANGE_64,
465 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
472 cifs_put_tlink(tlink);
477 * Try to reacquire byte range locks that were released when session
480 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
484 /* BB list all locks open on this file and relock */
490 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
495 struct cifs_sb_info *cifs_sb;
496 struct cifs_tcon *tcon;
497 struct TCP_Server_Info *server;
498 struct cifsInodeInfo *cinode;
500 char *full_path = NULL;
502 int disposition = FILE_OPEN;
503 int create_options = CREATE_NOT_DIR;
507 mutex_lock(&cfile->fh_mutex);
508 if (!cfile->invalidHandle) {
509 mutex_unlock(&cfile->fh_mutex);
515 inode = cfile->dentry->d_inode;
516 cifs_sb = CIFS_SB(inode->i_sb);
517 tcon = tlink_tcon(cfile->tlink);
518 server = tcon->ses->server;
521 * Can not grab rename sem here because various ops, including those
522 * that already have the rename sem can end up causing writepage to get
523 * called and if the server was down that means we end up here, and we
524 * can never tell if the caller already has the rename_sem.
526 full_path = build_path_from_dentry(cfile->dentry);
527 if (full_path == NULL) {
529 mutex_unlock(&cfile->fh_mutex);
534 cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
537 if (tcon->ses->server->oplocks)
542 if (tcon->unix_ext && cap_unix(tcon->ses) &&
543 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
544 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
546 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
547 * original open. Must mask them off for a reopen.
549 unsigned int oflags = cfile->f_flags &
550 ~(O_CREAT | O_EXCL | O_TRUNC);
552 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
553 cifs_sb->mnt_file_mode /* ignored */,
554 oflags, &oplock, &fid.netfid, xid);
556 cFYI(1, "posix reopen succeeded");
560 * fallthrough to retry open the old way on errors, especially
561 * in the reconnect path it is important to retry hard
565 desired_access = cifs_convert_flags(cfile->f_flags);
567 if (backup_cred(cifs_sb))
568 create_options |= CREATE_OPEN_BACKUP_INTENT;
571 * Can not refresh inode by passing in file_info buf to be returned by
572 * CIFSSMBOpen and then calling get_inode_info with returned buf since
573 * file might have write behind data that needs to be flushed and server
574 * version of file size can be stale. If we knew for sure that inode was
575 * not dirty locally we could do this.
577 rc = server->ops->open(xid, tcon, full_path, disposition,
578 desired_access, create_options, &fid, &oplock,
581 mutex_unlock(&cfile->fh_mutex);
582 cFYI(1, "cifs_reopen returned 0x%x", rc);
583 cFYI(1, "oplock: %d", oplock);
584 goto reopen_error_exit;
588 cfile->invalidHandle = false;
589 mutex_unlock(&cfile->fh_mutex);
590 cinode = CIFS_I(inode);
593 rc = filemap_write_and_wait(inode->i_mapping);
594 mapping_set_error(inode->i_mapping, rc);
597 rc = cifs_get_inode_info_unix(&inode, full_path,
600 rc = cifs_get_inode_info(&inode, full_path, NULL,
601 inode->i_sb, xid, NULL);
604 * Else we are writing out data to server already and could deadlock if
605 * we tried to flush data, and since we do not know if we have data that
606 * would invalidate the current end of file on the server we can not go
607 * to the server to get the new inode info.
610 server->ops->set_fid(cfile, &fid, oplock);
611 cifs_relock_file(cfile);
619 int cifs_close(struct inode *inode, struct file *file)
621 if (file->private_data != NULL) {
622 cifsFileInfo_put(file->private_data);
623 file->private_data = NULL;
626 /* return code from the ->release op is always ignored */
630 int cifs_closedir(struct inode *inode, struct file *file)
634 struct cifsFileInfo *cfile = file->private_data;
635 struct cifs_tcon *tcon;
636 struct TCP_Server_Info *server;
639 cFYI(1, "Closedir inode = 0x%p", inode);
645 tcon = tlink_tcon(cfile->tlink);
646 server = tcon->ses->server;
648 cFYI(1, "Freeing private data in close dir");
649 spin_lock(&cifs_file_list_lock);
650 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
651 cfile->invalidHandle = true;
652 spin_unlock(&cifs_file_list_lock);
653 if (server->ops->close_dir)
654 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
657 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
658 /* not much we can do if it fails anyway, ignore rc */
661 spin_unlock(&cifs_file_list_lock);
663 buf = cfile->srch_inf.ntwrk_buf_start;
665 cFYI(1, "closedir free smb buf in srch struct");
666 cfile->srch_inf.ntwrk_buf_start = NULL;
667 if (cfile->srch_inf.smallBuf)
668 cifs_small_buf_release(buf);
670 cifs_buf_release(buf);
673 cifs_put_tlink(cfile->tlink);
674 kfree(file->private_data);
675 file->private_data = NULL;
676 /* BB can we lock the filestruct while this is going on? */
681 static struct cifsLockInfo *
682 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
684 struct cifsLockInfo *lock =
685 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
688 lock->offset = offset;
689 lock->length = length;
691 lock->pid = current->tgid;
692 INIT_LIST_HEAD(&lock->blist);
693 init_waitqueue_head(&lock->block_q);
698 cifs_del_lock_waiters(struct cifsLockInfo *lock)
700 struct cifsLockInfo *li, *tmp;
701 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
702 list_del_init(&li->blist);
703 wake_up(&li->block_q);
708 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
709 __u64 length, __u8 type, struct cifsFileInfo *cfile,
710 struct cifsLockInfo **conf_lock)
712 struct cifsLockInfo *li;
713 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
714 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
716 list_for_each_entry(li, &fdlocks->locks, llist) {
717 if (offset + length <= li->offset ||
718 offset >= li->offset + li->length)
720 if ((type & server->vals->shared_lock_type) &&
721 ((server->ops->compare_fids(cfile, cur_cfile) &&
722 current->tgid == li->pid) || type == li->type))
731 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
732 __u8 type, struct cifsLockInfo **conf_lock)
735 struct cifs_fid_locks *cur;
736 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
738 list_for_each_entry(cur, &cinode->llist, llist) {
739 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
749 * Check if there is another lock that prevents us to set the lock (mandatory
750 * style). If such a lock exists, update the flock structure with its
751 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
752 * or leave it the same if we can't. Returns 0 if we don't need to request to
753 * the server or 1 otherwise.
756 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
757 __u8 type, struct file_lock *flock)
760 struct cifsLockInfo *conf_lock;
761 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
762 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
765 mutex_lock(&cinode->lock_mutex);
767 exist = cifs_find_lock_conflict(cfile, offset, length, type,
770 flock->fl_start = conf_lock->offset;
771 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
772 flock->fl_pid = conf_lock->pid;
773 if (conf_lock->type & server->vals->shared_lock_type)
774 flock->fl_type = F_RDLCK;
776 flock->fl_type = F_WRLCK;
777 } else if (!cinode->can_cache_brlcks)
780 flock->fl_type = F_UNLCK;
782 mutex_unlock(&cinode->lock_mutex);
787 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
789 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
790 mutex_lock(&cinode->lock_mutex);
791 list_add_tail(&lock->llist, &cfile->llist->locks);
792 mutex_unlock(&cinode->lock_mutex);
796 * Set the byte-range lock (mandatory style). Returns:
797 * 1) 0, if we set the lock and don't need to request to the server;
798 * 2) 1, if no locks prevent us but we need to request to the server;
799 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
802 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
805 struct cifsLockInfo *conf_lock;
806 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
812 mutex_lock(&cinode->lock_mutex);
814 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
815 lock->type, &conf_lock);
816 if (!exist && cinode->can_cache_brlcks) {
817 list_add_tail(&lock->llist, &cfile->llist->locks);
818 mutex_unlock(&cinode->lock_mutex);
827 list_add_tail(&lock->blist, &conf_lock->blist);
828 mutex_unlock(&cinode->lock_mutex);
829 rc = wait_event_interruptible(lock->block_q,
830 (lock->blist.prev == &lock->blist) &&
831 (lock->blist.next == &lock->blist));
834 mutex_lock(&cinode->lock_mutex);
835 list_del_init(&lock->blist);
838 mutex_unlock(&cinode->lock_mutex);
843 * Check if there is another lock that prevents us to set the lock (posix
844 * style). If such a lock exists, update the flock structure with its
845 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
846 * or leave it the same if we can't. Returns 0 if we don't need to request to
847 * the server or 1 otherwise.
850 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
853 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
854 unsigned char saved_type = flock->fl_type;
856 if ((flock->fl_flags & FL_POSIX) == 0)
859 mutex_lock(&cinode->lock_mutex);
860 posix_test_lock(file, flock);
862 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
863 flock->fl_type = saved_type;
867 mutex_unlock(&cinode->lock_mutex);
872 * Set the byte-range lock (posix style). Returns:
873 * 1) 0, if we set the lock and don't need to request to the server;
874 * 2) 1, if we need to request to the server;
875 * 3) <0, if the error occurs while setting the lock.
878 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
880 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
883 if ((flock->fl_flags & FL_POSIX) == 0)
887 mutex_lock(&cinode->lock_mutex);
888 if (!cinode->can_cache_brlcks) {
889 mutex_unlock(&cinode->lock_mutex);
893 rc = posix_lock_file(file, flock, NULL);
894 mutex_unlock(&cinode->lock_mutex);
895 if (rc == FILE_LOCK_DEFERRED) {
896 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
899 locks_delete_block(flock);
905 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
908 int rc = 0, stored_rc;
909 struct cifsLockInfo *li, *tmp;
910 struct cifs_tcon *tcon;
911 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
912 unsigned int num, max_num, max_buf;
913 LOCKING_ANDX_RANGE *buf, *cur;
914 int types[] = {LOCKING_ANDX_LARGE_FILES,
915 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
919 tcon = tlink_tcon(cfile->tlink);
921 mutex_lock(&cinode->lock_mutex);
922 if (!cinode->can_cache_brlcks) {
923 mutex_unlock(&cinode->lock_mutex);
929 * Accessing maxBuf is racy with cifs_reconnect - need to store value
930 * and check it for zero before using.
932 max_buf = tcon->ses->server->maxBuf;
934 mutex_unlock(&cinode->lock_mutex);
939 max_num = (max_buf - sizeof(struct smb_hdr)) /
940 sizeof(LOCKING_ANDX_RANGE);
941 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
943 mutex_unlock(&cinode->lock_mutex);
948 for (i = 0; i < 2; i++) {
951 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
952 if (li->type != types[i])
954 cur->Pid = cpu_to_le16(li->pid);
955 cur->LengthLow = cpu_to_le32((u32)li->length);
956 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
957 cur->OffsetLow = cpu_to_le32((u32)li->offset);
958 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
959 if (++num == max_num) {
960 stored_rc = cifs_lockv(xid, tcon,
962 (__u8)li->type, 0, num,
973 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
974 (__u8)types[i], 0, num, buf);
980 cinode->can_cache_brlcks = false;
981 mutex_unlock(&cinode->lock_mutex);
988 /* copied from fs/locks.c with a name change */
989 #define cifs_for_each_lock(inode, lockp) \
990 for (lockp = &inode->i_flock; *lockp != NULL; \
991 lockp = &(*lockp)->fl_next)
993 struct lock_to_push {
994 struct list_head llist;
1003 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1005 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1006 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1007 struct file_lock *flock, **before;
1008 unsigned int count = 0, i = 0;
1009 int rc = 0, xid, type;
1010 struct list_head locks_to_send, *el;
1011 struct lock_to_push *lck, *tmp;
1016 mutex_lock(&cinode->lock_mutex);
1017 if (!cinode->can_cache_brlcks) {
1018 mutex_unlock(&cinode->lock_mutex);
1024 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1025 if ((*before)->fl_flags & FL_POSIX)
1030 INIT_LIST_HEAD(&locks_to_send);
1033 * Allocating count locks is enough because no FL_POSIX locks can be
1034 * added to the list while we are holding cinode->lock_mutex that
1035 * protects locking operations of this inode.
1037 for (; i < count; i++) {
1038 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1043 list_add_tail(&lck->llist, &locks_to_send);
1046 el = locks_to_send.next;
1048 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1050 if ((flock->fl_flags & FL_POSIX) == 0)
1052 if (el == &locks_to_send) {
1054 * The list ended. We don't have enough allocated
1055 * structures - something is really wrong.
1057 cERROR(1, "Can't push all brlocks!");
1060 length = 1 + flock->fl_end - flock->fl_start;
1061 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1065 lck = list_entry(el, struct lock_to_push, llist);
1066 lck->pid = flock->fl_pid;
1067 lck->netfid = cfile->fid.netfid;
1068 lck->length = length;
1070 lck->offset = flock->fl_start;
1075 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1078 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1079 lck->offset, lck->length, NULL,
1083 list_del(&lck->llist);
1088 cinode->can_cache_brlcks = false;
1089 mutex_unlock(&cinode->lock_mutex);
1094 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1095 list_del(&lck->llist);
1102 cifs_push_locks(struct cifsFileInfo *cfile)
1104 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1105 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1107 if (cap_unix(tcon->ses) &&
1108 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1109 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1110 return cifs_push_posix_locks(cfile);
1112 return tcon->ses->server->ops->push_mand_locks(cfile);
1116 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1117 bool *wait_flag, struct TCP_Server_Info *server)
1119 if (flock->fl_flags & FL_POSIX)
1121 if (flock->fl_flags & FL_FLOCK)
1123 if (flock->fl_flags & FL_SLEEP) {
1124 cFYI(1, "Blocking lock");
1127 if (flock->fl_flags & FL_ACCESS)
1128 cFYI(1, "Process suspended by mandatory locking - "
1129 "not implemented yet");
1130 if (flock->fl_flags & FL_LEASE)
1131 cFYI(1, "Lease on file - not implemented yet");
1132 if (flock->fl_flags &
1133 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1134 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1136 *type = server->vals->large_lock_type;
1137 if (flock->fl_type == F_WRLCK) {
1138 cFYI(1, "F_WRLCK ");
1139 *type |= server->vals->exclusive_lock_type;
1141 } else if (flock->fl_type == F_UNLCK) {
1143 *type |= server->vals->unlock_lock_type;
1145 /* Check if unlock includes more than one lock range */
1146 } else if (flock->fl_type == F_RDLCK) {
1148 *type |= server->vals->shared_lock_type;
1150 } else if (flock->fl_type == F_EXLCK) {
1152 *type |= server->vals->exclusive_lock_type;
1154 } else if (flock->fl_type == F_SHLCK) {
1156 *type |= server->vals->shared_lock_type;
1159 cFYI(1, "Unknown type of lock");
1163 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1164 bool wait_flag, bool posix_lck, unsigned int xid)
1167 __u64 length = 1 + flock->fl_end - flock->fl_start;
1168 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1169 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1170 struct TCP_Server_Info *server = tcon->ses->server;
1171 __u16 netfid = cfile->fid.netfid;
1174 int posix_lock_type;
1176 rc = cifs_posix_lock_test(file, flock);
1180 if (type & server->vals->shared_lock_type)
1181 posix_lock_type = CIFS_RDLCK;
1183 posix_lock_type = CIFS_WRLCK;
1184 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1185 flock->fl_start, length, flock,
1186 posix_lock_type, wait_flag);
1190 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1194 /* BB we could chain these into one lock request BB */
1195 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1198 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1200 flock->fl_type = F_UNLCK;
1202 cERROR(1, "Error unlocking previously locked "
1203 "range %d during test of lock", rc);
1207 if (type & server->vals->shared_lock_type) {
1208 flock->fl_type = F_WRLCK;
1212 type &= ~server->vals->exclusive_lock_type;
1214 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1215 type | server->vals->shared_lock_type,
1218 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1219 type | server->vals->shared_lock_type, 0, 1, false);
1220 flock->fl_type = F_RDLCK;
1222 cERROR(1, "Error unlocking previously locked "
1223 "range %d during test of lock", rc);
1225 flock->fl_type = F_WRLCK;
1231 cifs_move_llist(struct list_head *source, struct list_head *dest)
1233 struct list_head *li, *tmp;
1234 list_for_each_safe(li, tmp, source)
1235 list_move(li, dest);
1239 cifs_free_llist(struct list_head *llist)
1241 struct cifsLockInfo *li, *tmp;
1242 list_for_each_entry_safe(li, tmp, llist, llist) {
1243 cifs_del_lock_waiters(li);
1244 list_del(&li->llist);
1250 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1253 int rc = 0, stored_rc;
1254 int types[] = {LOCKING_ANDX_LARGE_FILES,
1255 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1257 unsigned int max_num, num, max_buf;
1258 LOCKING_ANDX_RANGE *buf, *cur;
1259 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1260 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1261 struct cifsLockInfo *li, *tmp;
1262 __u64 length = 1 + flock->fl_end - flock->fl_start;
1263 struct list_head tmp_llist;
1265 INIT_LIST_HEAD(&tmp_llist);
1268 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1269 * and check it for zero before using.
1271 max_buf = tcon->ses->server->maxBuf;
1275 max_num = (max_buf - sizeof(struct smb_hdr)) /
1276 sizeof(LOCKING_ANDX_RANGE);
1277 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1281 mutex_lock(&cinode->lock_mutex);
1282 for (i = 0; i < 2; i++) {
1285 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1286 if (flock->fl_start > li->offset ||
1287 (flock->fl_start + length) <
1288 (li->offset + li->length))
1290 if (current->tgid != li->pid)
1292 if (types[i] != li->type)
1294 if (cinode->can_cache_brlcks) {
1296 * We can cache brlock requests - simply remove
1297 * a lock from the file's list.
1299 list_del(&li->llist);
1300 cifs_del_lock_waiters(li);
1304 cur->Pid = cpu_to_le16(li->pid);
1305 cur->LengthLow = cpu_to_le32((u32)li->length);
1306 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1307 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1308 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1310 * We need to save a lock here to let us add it again to
1311 * the file's list if the unlock range request fails on
1314 list_move(&li->llist, &tmp_llist);
1315 if (++num == max_num) {
1316 stored_rc = cifs_lockv(xid, tcon,
1318 li->type, num, 0, buf);
1321 * We failed on the unlock range
1322 * request - add all locks from the tmp
1323 * list to the head of the file's list.
1325 cifs_move_llist(&tmp_llist,
1326 &cfile->llist->locks);
1330 * The unlock range request succeed -
1331 * free the tmp list.
1333 cifs_free_llist(&tmp_llist);
1340 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1341 types[i], num, 0, buf);
1343 cifs_move_llist(&tmp_llist,
1344 &cfile->llist->locks);
1347 cifs_free_llist(&tmp_llist);
1351 mutex_unlock(&cinode->lock_mutex);
1357 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1358 bool wait_flag, bool posix_lck, int lock, int unlock,
1362 __u64 length = 1 + flock->fl_end - flock->fl_start;
1363 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1364 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1365 struct TCP_Server_Info *server = tcon->ses->server;
1368 int posix_lock_type;
1370 rc = cifs_posix_lock_set(file, flock);
1374 if (type & server->vals->shared_lock_type)
1375 posix_lock_type = CIFS_RDLCK;
1377 posix_lock_type = CIFS_WRLCK;
1380 posix_lock_type = CIFS_UNLCK;
1382 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1383 current->tgid, flock->fl_start, length,
1384 NULL, posix_lock_type, wait_flag);
1389 struct cifsLockInfo *lock;
1391 lock = cifs_lock_init(flock->fl_start, length, type);
1395 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1401 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1402 type, 1, 0, wait_flag);
1408 cifs_lock_add(cfile, lock);
1410 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1413 if (flock->fl_flags & FL_POSIX)
1414 posix_lock_file_wait(file, flock);
1418 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1421 int lock = 0, unlock = 0;
1422 bool wait_flag = false;
1423 bool posix_lck = false;
1424 struct cifs_sb_info *cifs_sb;
1425 struct cifs_tcon *tcon;
1426 struct cifsInodeInfo *cinode;
1427 struct cifsFileInfo *cfile;
1434 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1435 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1436 flock->fl_start, flock->fl_end);
1438 cfile = (struct cifsFileInfo *)file->private_data;
1439 tcon = tlink_tcon(cfile->tlink);
1441 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1444 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1445 netfid = cfile->fid.netfid;
1446 cinode = CIFS_I(file->f_path.dentry->d_inode);
1448 if (cap_unix(tcon->ses) &&
1449 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1450 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1453 * BB add code here to normalize offset and length to account for
1454 * negative length which we can not accept over the wire.
1456 if (IS_GETLK(cmd)) {
1457 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1462 if (!lock && !unlock) {
1464 * if no lock or unlock then nothing to do since we do not
1471 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1478 * update the file size (if needed) after a write. Should be called with
1479 * the inode->i_lock held
1482 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1483 unsigned int bytes_written)
1485 loff_t end_of_write = offset + bytes_written;
1487 if (end_of_write > cifsi->server_eof)
1488 cifsi->server_eof = end_of_write;
1492 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1493 size_t write_size, loff_t *offset)
1496 unsigned int bytes_written = 0;
1497 unsigned int total_written;
1498 struct cifs_sb_info *cifs_sb;
1499 struct cifs_tcon *tcon;
1500 struct TCP_Server_Info *server;
1502 struct dentry *dentry = open_file->dentry;
1503 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1504 struct cifs_io_parms io_parms;
1506 cifs_sb = CIFS_SB(dentry->d_sb);
1508 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1509 *offset, dentry->d_name.name);
1511 tcon = tlink_tcon(open_file->tlink);
1512 server = tcon->ses->server;
1514 if (!server->ops->sync_write)
1519 for (total_written = 0; write_size > total_written;
1520 total_written += bytes_written) {
1522 while (rc == -EAGAIN) {
1526 if (open_file->invalidHandle) {
1527 /* we could deadlock if we called
1528 filemap_fdatawait from here so tell
1529 reopen_file not to flush data to
1531 rc = cifs_reopen_file(open_file, false);
1536 len = min((size_t)cifs_sb->wsize,
1537 write_size - total_written);
1538 /* iov[0] is reserved for smb header */
1539 iov[1].iov_base = (char *)write_data + total_written;
1540 iov[1].iov_len = len;
1542 io_parms.tcon = tcon;
1543 io_parms.offset = *offset;
1544 io_parms.length = len;
1545 rc = server->ops->sync_write(xid, open_file, &io_parms,
1546 &bytes_written, iov, 1);
1548 if (rc || (bytes_written == 0)) {
1556 spin_lock(&dentry->d_inode->i_lock);
1557 cifs_update_eof(cifsi, *offset, bytes_written);
1558 spin_unlock(&dentry->d_inode->i_lock);
1559 *offset += bytes_written;
1563 cifs_stats_bytes_written(tcon, total_written);
1565 if (total_written > 0) {
1566 spin_lock(&dentry->d_inode->i_lock);
1567 if (*offset > dentry->d_inode->i_size)
1568 i_size_write(dentry->d_inode, *offset);
1569 spin_unlock(&dentry->d_inode->i_lock);
1571 mark_inode_dirty_sync(dentry->d_inode);
1573 return total_written;
1576 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1579 struct cifsFileInfo *open_file = NULL;
1580 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1582 /* only filter by fsuid on multiuser mounts */
1583 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1586 spin_lock(&cifs_file_list_lock);
1587 /* we could simply get the first_list_entry since write-only entries
1588 are always at the end of the list but since the first entry might
1589 have a close pending, we go through the whole list */
1590 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1591 if (fsuid_only && open_file->uid != current_fsuid())
1593 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1594 if (!open_file->invalidHandle) {
1595 /* found a good file */
1596 /* lock it so it will not be closed on us */
1597 cifsFileInfo_get_locked(open_file);
1598 spin_unlock(&cifs_file_list_lock);
1600 } /* else might as well continue, and look for
1601 another, or simply have the caller reopen it
1602 again rather than trying to fix this handle */
1603 } else /* write only file */
1604 break; /* write only files are last so must be done */
1606 spin_unlock(&cifs_file_list_lock);
1610 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1613 struct cifsFileInfo *open_file, *inv_file = NULL;
1614 struct cifs_sb_info *cifs_sb;
1615 bool any_available = false;
1617 unsigned int refind = 0;
1619 /* Having a null inode here (because mapping->host was set to zero by
1620 the VFS or MM) should not happen but we had reports of on oops (due to
1621 it being zero) during stress testcases so we need to check for it */
1623 if (cifs_inode == NULL) {
1624 cERROR(1, "Null inode passed to cifs_writeable_file");
1629 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1631 /* only filter by fsuid on multiuser mounts */
1632 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1635 spin_lock(&cifs_file_list_lock);
1637 if (refind > MAX_REOPEN_ATT) {
1638 spin_unlock(&cifs_file_list_lock);
1641 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1642 if (!any_available && open_file->pid != current->tgid)
1644 if (fsuid_only && open_file->uid != current_fsuid())
1646 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1647 if (!open_file->invalidHandle) {
1648 /* found a good writable file */
1649 cifsFileInfo_get_locked(open_file);
1650 spin_unlock(&cifs_file_list_lock);
1654 inv_file = open_file;
1658 /* couldn't find useable FH with same pid, try any available */
1659 if (!any_available) {
1660 any_available = true;
1661 goto refind_writable;
1665 any_available = false;
1666 cifsFileInfo_get_locked(inv_file);
1669 spin_unlock(&cifs_file_list_lock);
1672 rc = cifs_reopen_file(inv_file, false);
1676 spin_lock(&cifs_file_list_lock);
1677 list_move_tail(&inv_file->flist,
1678 &cifs_inode->openFileList);
1679 spin_unlock(&cifs_file_list_lock);
1680 cifsFileInfo_put(inv_file);
1681 spin_lock(&cifs_file_list_lock);
1683 goto refind_writable;
1690 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1692 struct address_space *mapping = page->mapping;
1693 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1696 int bytes_written = 0;
1697 struct inode *inode;
1698 struct cifsFileInfo *open_file;
1700 if (!mapping || !mapping->host)
1703 inode = page->mapping->host;
1705 offset += (loff_t)from;
1706 write_data = kmap(page);
1709 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1714 /* racing with truncate? */
1715 if (offset > mapping->host->i_size) {
1717 return 0; /* don't care */
1720 /* check to make sure that we are not extending the file */
1721 if (mapping->host->i_size - offset < (loff_t)to)
1722 to = (unsigned)(mapping->host->i_size - offset);
1724 open_file = find_writable_file(CIFS_I(mapping->host), false);
1726 bytes_written = cifs_write(open_file, open_file->pid,
1727 write_data, to - from, &offset);
1728 cifsFileInfo_put(open_file);
1729 /* Does mm or vfs already set times? */
1730 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1731 if ((bytes_written > 0) && (offset))
1733 else if (bytes_written < 0)
1736 cFYI(1, "No writeable filehandles for inode");
1744 static int cifs_writepages(struct address_space *mapping,
1745 struct writeback_control *wbc)
1747 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1748 bool done = false, scanned = false, range_whole = false;
1750 struct cifs_writedata *wdata;
1751 struct TCP_Server_Info *server;
1754 loff_t isize = i_size_read(mapping->host);
1757 * If wsize is smaller than the page cache size, default to writing
1758 * one page at a time via cifs_writepage
1760 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1761 return generic_writepages(mapping, wbc);
1763 if (wbc->range_cyclic) {
1764 index = mapping->writeback_index; /* Start from prev offset */
1767 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1768 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1769 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1774 while (!done && index <= end) {
1775 unsigned int i, nr_pages, found_pages;
1776 pgoff_t next = 0, tofind;
1777 struct page **pages;
1779 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1782 wdata = cifs_writedata_alloc((unsigned int)tofind,
1783 cifs_writev_complete);
1790 * find_get_pages_tag seems to return a max of 256 on each
1791 * iteration, so we must call it several times in order to
1792 * fill the array or the wsize is effectively limited to
1793 * 256 * PAGE_CACHE_SIZE.
1796 pages = wdata->pages;
1798 nr_pages = find_get_pages_tag(mapping, &index,
1799 PAGECACHE_TAG_DIRTY,
1801 found_pages += nr_pages;
1804 } while (nr_pages && tofind && index <= end);
1806 if (found_pages == 0) {
1807 kref_put(&wdata->refcount, cifs_writedata_release);
1812 for (i = 0; i < found_pages; i++) {
1813 page = wdata->pages[i];
1815 * At this point we hold neither mapping->tree_lock nor
1816 * lock on the page itself: the page may be truncated or
1817 * invalidated (changing page->mapping to NULL), or even
1818 * swizzled back from swapper_space to tmpfs file
1824 else if (!trylock_page(page))
1827 if (unlikely(page->mapping != mapping)) {
1832 if (!wbc->range_cyclic && page->index > end) {
1838 if (next && (page->index != next)) {
1839 /* Not next consecutive page */
1844 if (wbc->sync_mode != WB_SYNC_NONE)
1845 wait_on_page_writeback(page);
1847 if (PageWriteback(page) ||
1848 !clear_page_dirty_for_io(page)) {
1854 * This actually clears the dirty bit in the radix tree.
1855 * See cifs_writepage() for more commentary.
1857 set_page_writeback(page);
1859 if (page_offset(page) >= isize) {
1862 end_page_writeback(page);
1866 wdata->pages[i] = page;
1867 next = page->index + 1;
1871 /* reset index to refind any pages skipped */
1873 index = wdata->pages[0]->index + 1;
1875 /* put any pages we aren't going to use */
1876 for (i = nr_pages; i < found_pages; i++) {
1877 page_cache_release(wdata->pages[i]);
1878 wdata->pages[i] = NULL;
1881 /* nothing to write? */
1882 if (nr_pages == 0) {
1883 kref_put(&wdata->refcount, cifs_writedata_release);
1887 wdata->sync_mode = wbc->sync_mode;
1888 wdata->nr_pages = nr_pages;
1889 wdata->offset = page_offset(wdata->pages[0]);
1890 wdata->pagesz = PAGE_CACHE_SIZE;
1892 min(isize - page_offset(wdata->pages[nr_pages - 1]),
1893 (loff_t)PAGE_CACHE_SIZE);
1894 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1898 if (wdata->cfile != NULL)
1899 cifsFileInfo_put(wdata->cfile);
1900 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1902 if (!wdata->cfile) {
1903 cERROR(1, "No writable handles for inode");
1907 wdata->pid = wdata->cfile->pid;
1908 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1909 rc = server->ops->async_writev(wdata);
1910 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1912 for (i = 0; i < nr_pages; ++i)
1913 unlock_page(wdata->pages[i]);
1915 /* send failure -- clean up the mess */
1917 for (i = 0; i < nr_pages; ++i) {
1919 redirty_page_for_writepage(wbc,
1922 SetPageError(wdata->pages[i]);
1923 end_page_writeback(wdata->pages[i]);
1924 page_cache_release(wdata->pages[i]);
1927 mapping_set_error(mapping, rc);
1929 kref_put(&wdata->refcount, cifs_writedata_release);
1931 wbc->nr_to_write -= nr_pages;
1932 if (wbc->nr_to_write <= 0)
1938 if (!scanned && !done) {
1940 * We hit the last page and there is more work to be done: wrap
1941 * back to the start of the file
1948 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1949 mapping->writeback_index = index;
1955 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1961 /* BB add check for wbc flags */
1962 page_cache_get(page);
1963 if (!PageUptodate(page))
1964 cFYI(1, "ppw - page not up to date");
1967 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1969 * A writepage() implementation always needs to do either this,
1970 * or re-dirty the page with "redirty_page_for_writepage()" in
1971 * the case of a failure.
1973 * Just unlocking the page will cause the radix tree tag-bits
1974 * to fail to update with the state of the page correctly.
1976 set_page_writeback(page);
1978 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1979 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1981 else if (rc == -EAGAIN)
1982 redirty_page_for_writepage(wbc, page);
1986 SetPageUptodate(page);
1987 end_page_writeback(page);
1988 page_cache_release(page);
1993 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1995 int rc = cifs_writepage_locked(page, wbc);
2000 static int cifs_write_end(struct file *file, struct address_space *mapping,
2001 loff_t pos, unsigned len, unsigned copied,
2002 struct page *page, void *fsdata)
2005 struct inode *inode = mapping->host;
2006 struct cifsFileInfo *cfile = file->private_data;
2007 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2010 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2013 pid = current->tgid;
2015 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2018 if (PageChecked(page)) {
2020 SetPageUptodate(page);
2021 ClearPageChecked(page);
2022 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2023 SetPageUptodate(page);
2025 if (!PageUptodate(page)) {
2027 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2031 /* this is probably better than directly calling
2032 partialpage_write since in this function the file handle is
2033 known which we might as well leverage */
2034 /* BB check if anything else missing out of ppw
2035 such as updating last write time */
2036 page_data = kmap(page);
2037 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2038 /* if (rc < 0) should we set writebehind rc? */
2045 set_page_dirty(page);
2049 spin_lock(&inode->i_lock);
2050 if (pos > inode->i_size)
2051 i_size_write(inode, pos);
2052 spin_unlock(&inode->i_lock);
2056 page_cache_release(page);
2061 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2066 struct cifs_tcon *tcon;
2067 struct TCP_Server_Info *server;
2068 struct cifsFileInfo *smbfile = file->private_data;
2069 struct inode *inode = file->f_path.dentry->d_inode;
2070 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2072 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2075 mutex_lock(&inode->i_mutex);
2079 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2080 file->f_path.dentry->d_name.name, datasync);
2082 if (!CIFS_I(inode)->clientCanCacheRead) {
2083 rc = cifs_invalidate_mapping(inode);
2085 cFYI(1, "rc: %d during invalidate phase", rc);
2086 rc = 0; /* don't care about it in fsync */
2090 tcon = tlink_tcon(smbfile->tlink);
2091 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2092 server = tcon->ses->server;
2093 if (server->ops->flush)
2094 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2100 mutex_unlock(&inode->i_mutex);
2104 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2108 struct cifs_tcon *tcon;
2109 struct TCP_Server_Info *server;
2110 struct cifsFileInfo *smbfile = file->private_data;
2111 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2112 struct inode *inode = file->f_mapping->host;
2114 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2117 mutex_lock(&inode->i_mutex);
2121 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2122 file->f_path.dentry->d_name.name, datasync);
2124 tcon = tlink_tcon(smbfile->tlink);
2125 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2126 server = tcon->ses->server;
2127 if (server->ops->flush)
2128 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2134 mutex_unlock(&inode->i_mutex);
2139 * As file closes, flush all cached write data for this inode checking
2140 * for write behind errors.
2142 int cifs_flush(struct file *file, fl_owner_t id)
2144 struct inode *inode = file->f_path.dentry->d_inode;
2147 if (file->f_mode & FMODE_WRITE)
2148 rc = filemap_write_and_wait(inode->i_mapping);
2150 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2156 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2161 for (i = 0; i < num_pages; i++) {
2162 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2165 * save number of pages we have already allocated and
2166 * return with ENOMEM error
2175 for (i = 0; i < num_pages; i++)
2182 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2187 clen = min_t(const size_t, len, wsize);
2188 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2197 cifs_uncached_writev_complete(struct work_struct *work)
2200 struct cifs_writedata *wdata = container_of(work,
2201 struct cifs_writedata, work);
2202 struct inode *inode = wdata->cfile->dentry->d_inode;
2203 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2205 spin_lock(&inode->i_lock);
2206 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2207 if (cifsi->server_eof > inode->i_size)
2208 i_size_write(inode, cifsi->server_eof);
2209 spin_unlock(&inode->i_lock);
2211 complete(&wdata->done);
2213 if (wdata->result != -EAGAIN) {
2214 for (i = 0; i < wdata->nr_pages; i++)
2215 put_page(wdata->pages[i]);
2218 kref_put(&wdata->refcount, cifs_writedata_release);
2221 /* attempt to send write to server, retry on any -EAGAIN errors */
2223 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2226 struct TCP_Server_Info *server;
2228 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2231 if (wdata->cfile->invalidHandle) {
2232 rc = cifs_reopen_file(wdata->cfile, false);
2236 rc = server->ops->async_writev(wdata);
2237 } while (rc == -EAGAIN);
2243 cifs_iovec_write(struct file *file, const struct iovec *iov,
2244 unsigned long nr_segs, loff_t *poffset)
2246 unsigned long nr_pages, i;
2247 size_t copied, len, cur_len;
2248 ssize_t total_written = 0;
2251 struct cifsFileInfo *open_file;
2252 struct cifs_tcon *tcon;
2253 struct cifs_sb_info *cifs_sb;
2254 struct cifs_writedata *wdata, *tmp;
2255 struct list_head wdata_list;
2259 len = iov_length(iov, nr_segs);
2263 rc = generic_write_checks(file, poffset, &len, 0);
2267 INIT_LIST_HEAD(&wdata_list);
2268 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2269 open_file = file->private_data;
2270 tcon = tlink_tcon(open_file->tlink);
2272 if (!tcon->ses->server->ops->async_writev)
2277 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2278 pid = open_file->pid;
2280 pid = current->tgid;
2282 iov_iter_init(&it, iov, nr_segs, len, 0);
2286 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2287 wdata = cifs_writedata_alloc(nr_pages,
2288 cifs_uncached_writev_complete);
2294 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2301 for (i = 0; i < nr_pages; i++) {
2302 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2303 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2306 iov_iter_advance(&it, copied);
2308 cur_len = save_len - cur_len;
2310 wdata->sync_mode = WB_SYNC_ALL;
2311 wdata->nr_pages = nr_pages;
2312 wdata->offset = (__u64)offset;
2313 wdata->cfile = cifsFileInfo_get(open_file);
2315 wdata->bytes = cur_len;
2316 wdata->pagesz = PAGE_SIZE;
2317 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2318 rc = cifs_uncached_retry_writev(wdata);
2320 kref_put(&wdata->refcount, cifs_writedata_release);
2324 list_add_tail(&wdata->list, &wdata_list);
2330 * If at least one write was successfully sent, then discard any rc
2331 * value from the later writes. If the other write succeeds, then
2332 * we'll end up returning whatever was written. If it fails, then
2333 * we'll get a new rc value from that.
2335 if (!list_empty(&wdata_list))
2339 * Wait for and collect replies for any successful sends in order of
2340 * increasing offset. Once an error is hit or we get a fatal signal
2341 * while waiting, then return without waiting for any more replies.
2344 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2346 /* FIXME: freezable too? */
2347 rc = wait_for_completion_killable(&wdata->done);
2350 else if (wdata->result)
2353 total_written += wdata->bytes;
2355 /* resend call if it's a retryable error */
2356 if (rc == -EAGAIN) {
2357 rc = cifs_uncached_retry_writev(wdata);
2361 list_del_init(&wdata->list);
2362 kref_put(&wdata->refcount, cifs_writedata_release);
2365 if (total_written > 0)
2366 *poffset += total_written;
2368 cifs_stats_bytes_written(tcon, total_written);
2369 return total_written ? total_written : (ssize_t)rc;
2372 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2373 unsigned long nr_segs, loff_t pos)
2376 struct inode *inode;
2378 inode = iocb->ki_filp->f_path.dentry->d_inode;
2381 * BB - optimize the way when signing is disabled. We can drop this
2382 * extra memory-to-memory copying and use iovec buffers for constructing
2386 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2388 CIFS_I(inode)->invalid_mapping = true;
2395 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2396 unsigned long nr_segs, loff_t pos)
2398 struct inode *inode;
2400 inode = iocb->ki_filp->f_path.dentry->d_inode;
2402 if (CIFS_I(inode)->clientCanCacheAll)
2403 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2406 * In strict cache mode we need to write the data to the server exactly
2407 * from the pos to pos+len-1 rather than flush all affected pages
2408 * because it may cause a error with mandatory locks on these pages but
2409 * not on the region from pos to ppos+len-1.
2412 return cifs_user_writev(iocb, iov, nr_segs, pos);
2415 static struct cifs_readdata *
2416 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2418 struct cifs_readdata *rdata;
2420 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2422 if (rdata != NULL) {
2423 kref_init(&rdata->refcount);
2424 INIT_LIST_HEAD(&rdata->list);
2425 init_completion(&rdata->done);
2426 INIT_WORK(&rdata->work, complete);
2433 cifs_readdata_release(struct kref *refcount)
2435 struct cifs_readdata *rdata = container_of(refcount,
2436 struct cifs_readdata, refcount);
2439 cifsFileInfo_put(rdata->cfile);
2445 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2451 for (i = 0; i < nr_pages; i++) {
2452 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2457 rdata->pages[i] = page;
2461 for (i = 0; i < nr_pages; i++) {
2462 put_page(rdata->pages[i]);
2463 rdata->pages[i] = NULL;
2470 cifs_uncached_readdata_release(struct kref *refcount)
2472 struct cifs_readdata *rdata = container_of(refcount,
2473 struct cifs_readdata, refcount);
2476 for (i = 0; i < rdata->nr_pages; i++) {
2477 put_page(rdata->pages[i]);
2478 rdata->pages[i] = NULL;
2480 cifs_readdata_release(refcount);
2484 cifs_retry_async_readv(struct cifs_readdata *rdata)
2487 struct TCP_Server_Info *server;
2489 server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2492 if (rdata->cfile->invalidHandle) {
2493 rc = cifs_reopen_file(rdata->cfile, true);
2497 rc = server->ops->async_readv(rdata);
2498 } while (rc == -EAGAIN);
2504 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2505 * @rdata: the readdata response with list of pages holding data
2506 * @iov: vector in which we should copy the data
2507 * @nr_segs: number of segments in vector
2508 * @offset: offset into file of the first iovec
2509 * @copied: used to return the amount of data copied to the iov
2511 * This function copies data from a list of pages in a readdata response into
2512 * an array of iovecs. It will first calculate where the data should go
2513 * based on the info in the readdata and then copy the data into that spot.
2516 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2517 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2521 size_t pos = rdata->offset - offset;
2522 ssize_t remaining = rdata->bytes;
2523 unsigned char *pdata;
2526 /* set up iov_iter and advance to the correct offset */
2527 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2528 iov_iter_advance(&ii, pos);
2531 for (i = 0; i < rdata->nr_pages; i++) {
2533 struct page *page = rdata->pages[i];
2535 /* copy a whole page or whatever's left */
2536 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2538 /* ...but limit it to whatever space is left in the iov */
2539 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2541 /* go while there's data to be copied and no errors */
2544 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2550 iov_iter_advance(&ii, copy);
2559 cifs_uncached_readv_complete(struct work_struct *work)
2561 struct cifs_readdata *rdata = container_of(work,
2562 struct cifs_readdata, work);
2564 complete(&rdata->done);
2565 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2569 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2570 struct cifs_readdata *rdata, unsigned int len)
2572 int total_read = 0, result = 0;
2574 unsigned int nr_pages = rdata->nr_pages;
2577 rdata->tailsz = PAGE_SIZE;
2578 for (i = 0; i < nr_pages; i++) {
2579 struct page *page = rdata->pages[i];
2581 if (len >= PAGE_SIZE) {
2582 /* enough data to fill the page */
2583 iov.iov_base = kmap(page);
2584 iov.iov_len = PAGE_SIZE;
2585 cFYI(1, "%u: iov_base=%p iov_len=%zu",
2586 i, iov.iov_base, iov.iov_len);
2588 } else if (len > 0) {
2589 /* enough for partial page, fill and zero the rest */
2590 iov.iov_base = kmap(page);
2592 cFYI(1, "%u: iov_base=%p iov_len=%zu",
2593 i, iov.iov_base, iov.iov_len);
2594 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2595 rdata->tailsz = len;
2598 /* no need to hold page hostage */
2599 rdata->pages[i] = NULL;
2605 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2610 total_read += result;
2613 return total_read > 0 ? total_read : result;
2617 cifs_iovec_read(struct file *file, const struct iovec *iov,
2618 unsigned long nr_segs, loff_t *poffset)
2621 size_t len, cur_len;
2622 ssize_t total_read = 0;
2623 loff_t offset = *poffset;
2624 unsigned int npages;
2625 struct cifs_sb_info *cifs_sb;
2626 struct cifs_tcon *tcon;
2627 struct cifsFileInfo *open_file;
2628 struct cifs_readdata *rdata, *tmp;
2629 struct list_head rdata_list;
2635 len = iov_length(iov, nr_segs);
2639 INIT_LIST_HEAD(&rdata_list);
2640 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2641 open_file = file->private_data;
2642 tcon = tlink_tcon(open_file->tlink);
2644 if (!tcon->ses->server->ops->async_readv)
2647 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2648 pid = open_file->pid;
2650 pid = current->tgid;
2652 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2653 cFYI(1, "attempting read on write only file instance");
2656 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2657 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2659 /* allocate a readdata struct */
2660 rdata = cifs_readdata_alloc(npages,
2661 cifs_uncached_readv_complete);
2667 rc = cifs_read_allocate_pages(rdata, npages);
2671 rdata->cfile = cifsFileInfo_get(open_file);
2672 rdata->nr_pages = npages;
2673 rdata->offset = offset;
2674 rdata->bytes = cur_len;
2676 rdata->pagesz = PAGE_SIZE;
2677 rdata->read_into_pages = cifs_uncached_read_into_pages;
2679 rc = cifs_retry_async_readv(rdata);
2682 kref_put(&rdata->refcount,
2683 cifs_uncached_readdata_release);
2687 list_add_tail(&rdata->list, &rdata_list);
2692 /* if at least one read request send succeeded, then reset rc */
2693 if (!list_empty(&rdata_list))
2696 /* the loop below should proceed in the order of increasing offsets */
2698 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2702 /* FIXME: freezable sleep too? */
2703 rc = wait_for_completion_killable(&rdata->done);
2706 else if (rdata->result)
2709 rc = cifs_readdata_to_iov(rdata, iov,
2712 total_read += copied;
2715 /* resend call if it's a retryable error */
2716 if (rc == -EAGAIN) {
2717 rc = cifs_retry_async_readv(rdata);
2721 list_del_init(&rdata->list);
2722 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2725 cifs_stats_bytes_read(tcon, total_read);
2726 *poffset += total_read;
2728 /* mask nodata case */
2732 return total_read ? total_read : rc;
2735 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2736 unsigned long nr_segs, loff_t pos)
2740 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2747 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2748 unsigned long nr_segs, loff_t pos)
2750 struct inode *inode;
2752 inode = iocb->ki_filp->f_path.dentry->d_inode;
2754 if (CIFS_I(inode)->clientCanCacheRead)
2755 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2758 * In strict cache mode we need to read from the server all the time
2759 * if we don't have level II oplock because the server can delay mtime
2760 * change - so we can't make a decision about inode invalidating.
2761 * And we can also fail with pagereading if there are mandatory locks
2762 * on pages affected by this read but not on the region from pos to
2766 return cifs_user_readv(iocb, iov, nr_segs, pos);
2770 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2773 unsigned int bytes_read = 0;
2774 unsigned int total_read;
2775 unsigned int current_read_size;
2777 struct cifs_sb_info *cifs_sb;
2778 struct cifs_tcon *tcon;
2779 struct TCP_Server_Info *server;
2782 struct cifsFileInfo *open_file;
2783 struct cifs_io_parms io_parms;
2784 int buf_type = CIFS_NO_BUFFER;
2788 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2790 /* FIXME: set up handlers for larger reads and/or convert to async */
2791 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2793 if (file->private_data == NULL) {
2798 open_file = file->private_data;
2799 tcon = tlink_tcon(open_file->tlink);
2800 server = tcon->ses->server;
2802 if (!server->ops->sync_read) {
2807 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2808 pid = open_file->pid;
2810 pid = current->tgid;
2812 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2813 cFYI(1, "attempting read on write only file instance");
2815 for (total_read = 0, cur_offset = read_data; read_size > total_read;
2816 total_read += bytes_read, cur_offset += bytes_read) {
2817 current_read_size = min_t(uint, read_size - total_read, rsize);
2819 * For windows me and 9x we do not want to request more than it
2820 * negotiated since it will refuse the read then.
2822 if ((tcon->ses) && !(tcon->ses->capabilities &
2823 tcon->ses->server->vals->cap_large_files)) {
2824 current_read_size = min_t(uint, current_read_size,
2828 while (rc == -EAGAIN) {
2829 if (open_file->invalidHandle) {
2830 rc = cifs_reopen_file(open_file, true);
2835 io_parms.tcon = tcon;
2836 io_parms.offset = *offset;
2837 io_parms.length = current_read_size;
2838 rc = server->ops->sync_read(xid, open_file, &io_parms,
2839 &bytes_read, &cur_offset,
2842 if (rc || (bytes_read == 0)) {
2850 cifs_stats_bytes_read(tcon, total_read);
2851 *offset += bytes_read;
2859 * If the page is mmap'ed into a process' page tables, then we need to make
2860 * sure that it doesn't change while being written back.
2863 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2865 struct page *page = vmf->page;
2868 return VM_FAULT_LOCKED;
2871 static struct vm_operations_struct cifs_file_vm_ops = {
2872 .fault = filemap_fault,
2873 .page_mkwrite = cifs_page_mkwrite,
2876 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2879 struct inode *inode = file->f_path.dentry->d_inode;
2883 if (!CIFS_I(inode)->clientCanCacheRead) {
2884 rc = cifs_invalidate_mapping(inode);
2889 rc = generic_file_mmap(file, vma);
2891 vma->vm_ops = &cifs_file_vm_ops;
2896 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2901 rc = cifs_revalidate_file(file);
2903 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2907 rc = generic_file_mmap(file, vma);
2909 vma->vm_ops = &cifs_file_vm_ops;
2915 cifs_readv_complete(struct work_struct *work)
2918 struct cifs_readdata *rdata = container_of(work,
2919 struct cifs_readdata, work);
2921 for (i = 0; i < rdata->nr_pages; i++) {
2922 struct page *page = rdata->pages[i];
2924 lru_cache_add_file(page);
2926 if (rdata->result == 0) {
2927 flush_dcache_page(page);
2928 SetPageUptodate(page);
2933 if (rdata->result == 0)
2934 cifs_readpage_to_fscache(rdata->mapping->host, page);
2936 page_cache_release(page);
2937 rdata->pages[i] = NULL;
2939 kref_put(&rdata->refcount, cifs_readdata_release);
2943 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
2944 struct cifs_readdata *rdata, unsigned int len)
2946 int total_read = 0, result = 0;
2950 unsigned int nr_pages = rdata->nr_pages;
2953 /* determine the eof that the server (probably) has */
2954 eof = CIFS_I(rdata->mapping->host)->server_eof;
2955 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2956 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2958 rdata->tailsz = PAGE_CACHE_SIZE;
2959 for (i = 0; i < nr_pages; i++) {
2960 struct page *page = rdata->pages[i];
2962 if (len >= PAGE_CACHE_SIZE) {
2963 /* enough data to fill the page */
2964 iov.iov_base = kmap(page);
2965 iov.iov_len = PAGE_CACHE_SIZE;
2966 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2967 i, page->index, iov.iov_base, iov.iov_len);
2968 len -= PAGE_CACHE_SIZE;
2969 } else if (len > 0) {
2970 /* enough for partial page, fill and zero the rest */
2971 iov.iov_base = kmap(page);
2973 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2974 i, page->index, iov.iov_base, iov.iov_len);
2975 memset(iov.iov_base + len,
2976 '\0', PAGE_CACHE_SIZE - len);
2977 rdata->tailsz = len;
2979 } else if (page->index > eof_index) {
2981 * The VFS will not try to do readahead past the
2982 * i_size, but it's possible that we have outstanding
2983 * writes with gaps in the middle and the i_size hasn't
2984 * caught up yet. Populate those with zeroed out pages
2985 * to prevent the VFS from repeatedly attempting to
2986 * fill them until the writes are flushed.
2988 zero_user(page, 0, PAGE_CACHE_SIZE);
2989 lru_cache_add_file(page);
2990 flush_dcache_page(page);
2991 SetPageUptodate(page);
2993 page_cache_release(page);
2994 rdata->pages[i] = NULL;
2998 /* no need to hold page hostage */
2999 lru_cache_add_file(page);
3001 page_cache_release(page);
3002 rdata->pages[i] = NULL;
3007 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3012 total_read += result;
3015 return total_read > 0 ? total_read : result;
3018 static int cifs_readpages(struct file *file, struct address_space *mapping,
3019 struct list_head *page_list, unsigned num_pages)
3022 struct list_head tmplist;
3023 struct cifsFileInfo *open_file = file->private_data;
3024 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3025 unsigned int rsize = cifs_sb->rsize;
3029 * Give up immediately if rsize is too small to read an entire page.
3030 * The VFS will fall back to readpage. We should never reach this
3031 * point however since we set ra_pages to 0 when the rsize is smaller
3032 * than a cache page.
3034 if (unlikely(rsize < PAGE_CACHE_SIZE))
3038 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3039 * immediately if the cookie is negative
3041 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3046 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3047 pid = open_file->pid;
3049 pid = current->tgid;
3052 INIT_LIST_HEAD(&tmplist);
3054 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3055 mapping, num_pages);
3058 * Start with the page at end of list and move it to private
3059 * list. Do the same with any following pages until we hit
3060 * the rsize limit, hit an index discontinuity, or run out of
3061 * pages. Issue the async read and then start the loop again
3062 * until the list is empty.
3064 * Note that list order is important. The page_list is in
3065 * the order of declining indexes. When we put the pages in
3066 * the rdata->pages, then we want them in increasing order.
3068 while (!list_empty(page_list)) {
3070 unsigned int bytes = PAGE_CACHE_SIZE;
3071 unsigned int expected_index;
3072 unsigned int nr_pages = 1;
3074 struct page *page, *tpage;
3075 struct cifs_readdata *rdata;
3077 page = list_entry(page_list->prev, struct page, lru);
3080 * Lock the page and put it in the cache. Since no one else
3081 * should have access to this page, we're safe to simply set
3082 * PG_locked without checking it first.
3084 __set_page_locked(page);
3085 rc = add_to_page_cache_locked(page, mapping,
3086 page->index, GFP_KERNEL);
3088 /* give up if we can't stick it in the cache */
3090 __clear_page_locked(page);
3094 /* move first page to the tmplist */
3095 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3096 list_move_tail(&page->lru, &tmplist);
3098 /* now try and add more pages onto the request */
3099 expected_index = page->index + 1;
3100 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3101 /* discontinuity ? */
3102 if (page->index != expected_index)
3105 /* would this page push the read over the rsize? */
3106 if (bytes + PAGE_CACHE_SIZE > rsize)
3109 __set_page_locked(page);
3110 if (add_to_page_cache_locked(page, mapping,
3111 page->index, GFP_KERNEL)) {
3112 __clear_page_locked(page);
3115 list_move_tail(&page->lru, &tmplist);
3116 bytes += PAGE_CACHE_SIZE;
3121 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3123 /* best to give up if we're out of mem */
3124 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3125 list_del(&page->lru);
3126 lru_cache_add_file(page);
3128 page_cache_release(page);
3134 rdata->cfile = cifsFileInfo_get(open_file);
3135 rdata->mapping = mapping;
3136 rdata->offset = offset;
3137 rdata->bytes = bytes;
3139 rdata->pagesz = PAGE_CACHE_SIZE;
3140 rdata->read_into_pages = cifs_readpages_read_into_pages;
3142 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3143 list_del(&page->lru);
3144 rdata->pages[rdata->nr_pages++] = page;
3147 rc = cifs_retry_async_readv(rdata);
3149 for (i = 0; i < rdata->nr_pages; i++) {
3150 page = rdata->pages[i];
3151 lru_cache_add_file(page);
3153 page_cache_release(page);
3155 kref_put(&rdata->refcount, cifs_readdata_release);
3159 kref_put(&rdata->refcount, cifs_readdata_release);
3165 static int cifs_readpage_worker(struct file *file, struct page *page,
3171 /* Is the page cached? */
3172 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3176 page_cache_get(page);
3177 read_data = kmap(page);
3178 /* for reads over a certain size could initiate async read ahead */
3180 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3185 cFYI(1, "Bytes read %d", rc);
3187 file->f_path.dentry->d_inode->i_atime =
3188 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3190 if (PAGE_CACHE_SIZE > rc)
3191 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3193 flush_dcache_page(page);
3194 SetPageUptodate(page);
3196 /* send this page to the cache */
3197 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3203 page_cache_release(page);
3209 static int cifs_readpage(struct file *file, struct page *page)
3211 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3217 if (file->private_data == NULL) {
3223 cFYI(1, "readpage %p at offset %d 0x%x",
3224 page, (int)offset, (int)offset);
3226 rc = cifs_readpage_worker(file, page, &offset);
3234 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3236 struct cifsFileInfo *open_file;
3238 spin_lock(&cifs_file_list_lock);
3239 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3240 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3241 spin_unlock(&cifs_file_list_lock);
3245 spin_unlock(&cifs_file_list_lock);
3249 /* We do not want to update the file size from server for inodes
3250 open for write - to avoid races with writepage extending
3251 the file - in the future we could consider allowing
3252 refreshing the inode only on increases in the file size
3253 but this is tricky to do without racing with writebehind
3254 page caching in the current Linux kernel design */
3255 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3260 if (is_inode_writable(cifsInode)) {
3261 /* This inode is open for write at least once */
3262 struct cifs_sb_info *cifs_sb;
3264 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3265 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3266 /* since no page cache to corrupt on directio
3267 we can change size safely */
3271 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3279 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3280 loff_t pos, unsigned len, unsigned flags,
3281 struct page **pagep, void **fsdata)
3283 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3284 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3285 loff_t page_start = pos & PAGE_MASK;
3290 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3292 page = grab_cache_page_write_begin(mapping, index, flags);
3298 if (PageUptodate(page))
3302 * If we write a full page it will be up to date, no need to read from
3303 * the server. If the write is short, we'll end up doing a sync write
3306 if (len == PAGE_CACHE_SIZE)
3310 * optimize away the read when we have an oplock, and we're not
3311 * expecting to use any of the data we'd be reading in. That
3312 * is, when the page lies beyond the EOF, or straddles the EOF
3313 * and the write will cover all of the existing data.
3315 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3316 i_size = i_size_read(mapping->host);
3317 if (page_start >= i_size ||
3318 (offset == 0 && (pos + len) >= i_size)) {
3319 zero_user_segments(page, 0, offset,
3323 * PageChecked means that the parts of the page
3324 * to which we're not writing are considered up
3325 * to date. Once the data is copied to the
3326 * page, it can be set uptodate.
3328 SetPageChecked(page);
3333 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3335 * might as well read a page, it is fast enough. If we get
3336 * an error, we don't need to return it. cifs_write_end will
3337 * do a sync write instead since PG_uptodate isn't set.
3339 cifs_readpage_worker(file, page, &page_start);
3341 /* we could try using another file handle if there is one -
3342 but how would we lock it to prevent close of that handle
3343 racing with this read? In any case
3344 this will be written out by write_end so is fine */
3351 static int cifs_release_page(struct page *page, gfp_t gfp)
3353 if (PagePrivate(page))
3356 return cifs_fscache_release_page(page, gfp);
3359 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3361 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3364 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3367 static int cifs_launder_page(struct page *page)
3370 loff_t range_start = page_offset(page);
3371 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3372 struct writeback_control wbc = {
3373 .sync_mode = WB_SYNC_ALL,
3375 .range_start = range_start,
3376 .range_end = range_end,
3379 cFYI(1, "Launder page: %p", page);
3381 if (clear_page_dirty_for_io(page))
3382 rc = cifs_writepage_locked(page, &wbc);
3384 cifs_fscache_invalidate_page(page, page->mapping->host);
3388 void cifs_oplock_break(struct work_struct *work)
3390 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3392 struct inode *inode = cfile->dentry->d_inode;
3393 struct cifsInodeInfo *cinode = CIFS_I(inode);
3394 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3397 if (inode && S_ISREG(inode->i_mode)) {
3398 if (cinode->clientCanCacheRead)
3399 break_lease(inode, O_RDONLY);
3401 break_lease(inode, O_WRONLY);
3402 rc = filemap_fdatawrite(inode->i_mapping);
3403 if (cinode->clientCanCacheRead == 0) {
3404 rc = filemap_fdatawait(inode->i_mapping);
3405 mapping_set_error(inode->i_mapping, rc);
3406 invalidate_remote_inode(inode);
3408 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3411 rc = cifs_push_locks(cfile);
3413 cERROR(1, "Push locks rc = %d", rc);
3416 * releasing stale oplock after recent reconnect of smb session using
3417 * a now incorrect file handle is not a data integrity issue but do
3418 * not bother sending an oplock release if session to server still is
3419 * disconnected since oplock already released by the server
3421 if (!cfile->oplock_break_cancelled) {
3422 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3424 cFYI(1, "Oplock release rc = %d", rc);
3428 const struct address_space_operations cifs_addr_ops = {
3429 .readpage = cifs_readpage,
3430 .readpages = cifs_readpages,
3431 .writepage = cifs_writepage,
3432 .writepages = cifs_writepages,
3433 .write_begin = cifs_write_begin,
3434 .write_end = cifs_write_end,
3435 .set_page_dirty = __set_page_dirty_nobuffers,
3436 .releasepage = cifs_release_page,
3437 .invalidatepage = cifs_invalidate_page,
3438 .launder_page = cifs_launder_page,
3442 * cifs_readpages requires the server to support a buffer large enough to
3443 * contain the header plus one complete page of data. Otherwise, we need
3444 * to leave cifs_readpages out of the address space operations.
3446 const struct address_space_operations cifs_addr_ops_smallbuf = {
3447 .readpage = cifs_readpage,
3448 .writepage = cifs_writepage,
3449 .writepages = cifs_writepages,
3450 .write_begin = cifs_write_begin,
3451 .write_end = cifs_write_end,
3452 .set_page_dirty = __set_page_dirty_nobuffers,
3453 .releasepage = cifs_release_page,
3454 .invalidatepage = cifs_invalidate_page,
3455 .launder_page = cifs_launder_page,