Merge tag 'nfs-for-4.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 15 Jan 2016 00:08:23 +0000 (16:08 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 15 Jan 2016 00:08:23 +0000 (16:08 -0800)
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - Fix a regression in the SunRPC socket polling code
   - Fix the attribute cache revalidation code
   - Fix race in __update_open_stateid()
   - Fix an lo->plh_block_lgets imbalance in layoutreturn
   - Fix an Oopsable typo in ff_mirror_match_fh()

  Features:
   - pNFS layout recall performance improvements.
   - pNFS/flexfiles: Support server-supplied layoutstats sampling period

  Bugfixes + cleanups:
   - NFSv4: Don't perform cached access checks before we've OPENed the
     file
   - Fix starvation issues with background flushes
   - Reclaim writes should be flushed as unstable writes if there are
     already entries in the commit lists
   - Various bugfixes from Chuck to fix NFS/RDMA send queue ordering
     problems
   - Ensure that we propagate fatal layoutget errors back to the
     application
   - Fixes for sundry flexfiles layoutstats bugs
   - Fix files/flexfiles to not cache invalidated layouts in the DS
     commit buckets"

* tag 'nfs-for-4.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (68 commits)
  NFS: Fix a compile warning about unused variable in nfs_generic_pg_pgios()
  NFSv4: Fix a compile warning about no prototype for nfs4_ioctl()
  NFS: Use wait_on_atomic_t() for unlock after readahead
  SUNRPC: Fixup socket wait for memory
  NFSv4.1/pNFS: Cleanup constify struct pnfs_layout_range arguments
  NFSv4.1/pnfs: Cleanup copying of pnfs_layout_range structures
  NFSv4.1/pNFS: Cleanup pnfs_mark_matching_lsegs_invalid()
  NFSv4.1/pNFS: Fix a race in initiate_file_draining()
  NFSv4.1/pNFS: pnfs_error_mark_layout_for_return() must always return layout
  NFSv4.1/pNFS: pnfs_mark_matching_lsegs_return() should set the iomode
  NFSv4.1/pNFS: Use nfs4_stateid_copy for copying stateids
  NFSv4.1/pNFS: Don't pass stateids by value to pnfs_send_layoutreturn()
  NFS: Relax requirements in nfs_flush_incompatible
  NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid
  NFS: Allow multiple commit requests in flight per file
  NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs
  SUNRPC: Fix a missing break in rpc_anyaddr()
  pNFS/flexfiles: Fix an Oopsable typo in ff_mirror_match_fh()
  NFS: Fix attribute cache revalidation
  NFS: Ensure we revalidate attributes before using execute_ok()
  ...

1  2 
fs/nfs/dir.c
fs/nfs/inode.c
fs/nfs/nfs4proc.c
include/linux/nfs4.h
include/linux/nfs_fs.h

diff --combined fs/nfs/dir.c
@@@ -1894,14 -1894,15 +1894,14 @@@ int nfs_symlink(struct inode *dir, stru
        attr.ia_mode = S_IFLNK | S_IRWXUGO;
        attr.ia_valid = ATTR_MODE;
  
 -      page = alloc_page(GFP_HIGHUSER);
 +      page = alloc_page(GFP_USER);
        if (!page)
                return -ENOMEM;
  
 -      kaddr = kmap_atomic(page);
 +      kaddr = page_address(page);
        memcpy(kaddr, symname, pathlen);
        if (pathlen < PAGE_SIZE)
                memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
 -      kunmap_atomic(kaddr);
  
        trace_nfs_symlink_enter(dir, dentry);
        error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
@@@ -2431,6 -2432,20 +2431,20 @@@ int nfs_may_open(struct inode *inode, s
  }
  EXPORT_SYMBOL_GPL(nfs_may_open);
  
+ static int nfs_execute_ok(struct inode *inode, int mask)
+ {
+       struct nfs_server *server = NFS_SERVER(inode);
+       int ret;
+       if (mask & MAY_NOT_BLOCK)
+               ret = nfs_revalidate_inode_rcu(server, inode);
+       else
+               ret = nfs_revalidate_inode(server, inode);
+       if (ret == 0 && !execute_ok(inode))
+               ret = -EACCES;
+       return ret;
+ }
  int nfs_permission(struct inode *inode, int mask)
  {
        struct rpc_cred *cred;
                case S_IFLNK:
                        goto out;
                case S_IFREG:
+                       if ((mask & MAY_OPEN) &&
+                          nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
+                               return 0;
                        break;
                case S_IFDIR:
                        /*
@@@ -2480,8 -2498,8 +2497,8 @@@ force_lookup
                        res = PTR_ERR(cred);
        }
  out:
-       if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
-               res = -EACCES;
+       if (!res && (mask & MAY_EXEC))
+               res = nfs_execute_ok(inode, mask);
  
        dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
                inode->i_sb->s_id, inode->i_ino, mask, res);
diff --combined fs/nfs/inode.c
@@@ -71,19 -71,25 +71,25 @@@ nfs_fattr_to_ino_t(struct nfs_fattr *fa
        return nfs_fileid_to_ino_t(fattr->fileid);
  }
  
- /**
-  * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
-  * @word: long word containing the bit lock
-  */
- int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
+ static int nfs_wait_killable(int mode)
  {
        freezable_schedule_unsafe();
        if (signal_pending_state(mode, current))
                return -ERESTARTSYS;
        return 0;
  }
+ int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
+ {
+       return nfs_wait_killable(mode);
+ }
  EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
  
+ int nfs_wait_atomic_killable(atomic_t *p)
+ {
+       return nfs_wait_killable(TASK_KILLABLE);
+ }
  /**
   * nfs_compat_user_ino64 - returns the user-visible inode number
   * @fileid: 64-bit fileid
@@@ -408,10 -414,9 +414,10 @@@ nfs_fhget(struct super_block *sb, struc
                                inode->i_fop = NULL;
                                inode->i_flags |= S_AUTOMOUNT;
                        }
 -              } else if (S_ISLNK(inode->i_mode))
 +              } else if (S_ISLNK(inode->i_mode)) {
                        inode->i_op = &nfs_symlink_inode_operations;
 -              else
 +                      inode_nohighmem(inode);
 +              } else
                        init_special_inode(inode, inode->i_mode, fattr->rdev);
  
                memset(&inode->i_atime, 0, sizeof(inode->i_atime));
@@@ -700,7 -705,7 +706,7 @@@ static void nfs_init_lock_context(struc
        l_ctx->lockowner.l_owner = current->files;
        l_ctx->lockowner.l_pid = current->tgid;
        INIT_LIST_HEAD(&l_ctx->list);
-       nfs_iocounter_init(&l_ctx->io_count);
+       atomic_set(&l_ctx->io_count, 0);
  }
  
  static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
@@@ -913,6 -918,12 +919,12 @@@ void nfs_file_clear_open_context(struc
        if (ctx) {
                struct inode *inode = d_inode(ctx->dentry);
  
+               /*
+                * We fatal error on write before. Try to writeback
+                * every page again.
+                */
+               if (ctx->error < 0)
+                       invalidate_inode_pages2(inode->i_mapping);
                filp->private_data = NULL;
                spin_lock(&inode->i_lock);
                list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
@@@ -1087,27 -1098,6 +1099,27 @@@ static bool nfs_mapping_need_revalidate
                || NFS_STALE(inode);
  }
  
 +int nfs_revalidate_mapping_rcu(struct inode *inode)
 +{
 +      struct nfs_inode *nfsi = NFS_I(inode);
 +      unsigned long *bitlock = &nfsi->flags;
 +      int ret = 0;
 +
 +      if (IS_SWAPFILE(inode))
 +              goto out;
 +      if (nfs_mapping_need_revalidate_inode(inode)) {
 +              ret = -ECHILD;
 +              goto out;
 +      }
 +      spin_lock(&inode->i_lock);
 +      if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
 +          (nfsi->cache_validity & NFS_INO_INVALID_DATA))
 +              ret = -ECHILD;
 +      spin_unlock(&inode->i_lock);
 +out:
 +      return ret;
 +}
 +
  /**
   * __nfs_revalidate_mapping - Revalidate the pagecache
   * @inode - pointer to host inode
@@@ -1663,6 -1653,7 +1675,7 @@@ static int nfs_update_inode(struct inod
        unsigned long invalid = 0;
        unsigned long now = jiffies;
        unsigned long save_cache_validity;
+       bool cache_revalidated = true;
  
        dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
                        __func__, inode->i_sb->s_id, inode->i_ino,
                                nfs_force_lookup_revalidate(inode);
                        inode->i_version = fattr->change_attr;
                }
-       } else
+       } else {
                nfsi->cache_validity |= save_cache_validity;
+               cache_revalidated = false;
+       }
  
        if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
                memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-       } else if (server->caps & NFS_CAP_MTIME)
+       } else if (server->caps & NFS_CAP_MTIME) {
                nfsi->cache_validity |= save_cache_validity &
                                (NFS_INO_INVALID_ATTR
                                | NFS_INO_REVAL_FORCED);
+               cache_revalidated = false;
+       }
  
        if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
                memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-       } else if (server->caps & NFS_CAP_CTIME)
+       } else if (server->caps & NFS_CAP_CTIME) {
                nfsi->cache_validity |= save_cache_validity &
                                (NFS_INO_INVALID_ATTR
                                | NFS_INO_REVAL_FORCED);
+               cache_revalidated = false;
+       }
  
        /* Check if our cached file size is stale */
        if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
                                        (long long)cur_isize,
                                        (long long)new_isize);
                }
-       } else
+       } else {
                nfsi->cache_validity |= save_cache_validity &
                                (NFS_INO_INVALID_ATTR
                                | NFS_INO_REVAL_PAGECACHE
                                | NFS_INO_REVAL_FORCED);
+               cache_revalidated = false;
+       }
  
  
        if (fattr->valid & NFS_ATTR_FATTR_ATIME)
                memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
-       else if (server->caps & NFS_CAP_ATIME)
+       else if (server->caps & NFS_CAP_ATIME) {
                nfsi->cache_validity |= save_cache_validity &
                                (NFS_INO_INVALID_ATIME
                                | NFS_INO_REVAL_FORCED);
+               cache_revalidated = false;
+       }
  
        if (fattr->valid & NFS_ATTR_FATTR_MODE) {
                if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
                        inode->i_mode = newmode;
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
                }
-       } else if (server->caps & NFS_CAP_MODE)
+       } else if (server->caps & NFS_CAP_MODE) {
                nfsi->cache_validity |= save_cache_validity &
                                (NFS_INO_INVALID_ATTR
                                | NFS_INO_INVALID_ACCESS
                                | NFS_INO_INVALID_ACL
                                | NFS_INO_REVAL_FORCED);
+               cache_revalidated = false;
+       }
  
        if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
                if (!uid_eq(inode->i_uid, fattr->uid)) {
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
                        inode->i_uid = fattr->uid;
                }
-       } else if (server->caps & NFS_CAP_OWNER)
+       } else if (server->caps & NFS_CAP_OWNER) {
                nfsi->cache_validity |= save_cache_validity &
                                (NFS_INO_INVALID_ATTR
                                | NFS_INO_INVALID_ACCESS
                                | NFS_INO_INVALID_ACL
                                | NFS_INO_REVAL_FORCED);
+               cache_revalidated = false;
+       }
  
        if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
                if (!gid_eq(inode->i_gid, fattr->gid)) {
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
                        inode->i_gid = fattr->gid;
                }
-       } else if (server->caps & NFS_CAP_OWNER_GROUP)
+       } else if (server->caps & NFS_CAP_OWNER_GROUP) {
                nfsi->cache_validity |= save_cache_validity &
                                (NFS_INO_INVALID_ATTR
                                | NFS_INO_INVALID_ACCESS
                                | NFS_INO_INVALID_ACL
                                | NFS_INO_REVAL_FORCED);
+               cache_revalidated = false;
+       }
  
        if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
                if (inode->i_nlink != fattr->nlink) {
                                invalid |= NFS_INO_INVALID_DATA;
                        set_nlink(inode, fattr->nlink);
                }
-       } else if (server->caps & NFS_CAP_NLINK)
+       } else if (server->caps & NFS_CAP_NLINK) {
                nfsi->cache_validity |= save_cache_validity &
                                (NFS_INO_INVALID_ATTR
                                | NFS_INO_REVAL_FORCED);
+               cache_revalidated = false;
+       }
  
        if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
                /*
                 * report the blocks in 512byte units
                 */
                inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-       }
-       if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+       } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
                inode->i_blocks = fattr->du.nfs2.blocks;
+       else
+               cache_revalidated = false;
  
        /* Update attrtimeo value if we're out of the unstable period */
        if (invalid & NFS_INO_INVALID_ATTR) {
                /* Set barrier to be more recent than all outstanding updates */
                nfsi->attr_gencount = nfs_inc_attr_generation_counter();
        } else {
-               if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
-                       if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
-                               nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
+               if (cache_revalidated) {
+                       if (!time_in_range_open(now, nfsi->attrtimeo_timestamp,
+                               nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
+                               nfsi->attrtimeo <<= 1;
+                               if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode))
+                                       nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
+                       }
                        nfsi->attrtimeo_timestamp = now;
                }
                /* Set the barrier to be more recent than this fattr */
        }
  
        /* Don't declare attrcache up to date if there were no attrs! */
-       if (fattr->valid != 0)
+       if (cache_revalidated)
                invalid &= ~NFS_INO_INVALID_ATTR;
  
        /* Don't invalidate the data if we were to blame */
diff --combined fs/nfs/nfs4proc.c
@@@ -208,6 -208,9 +208,9 @@@ static const u32 nfs4_pnfs_open_bitmap[
        | FATTR4_WORD1_TIME_METADATA
        | FATTR4_WORD1_TIME_MODIFY,
        FATTR4_WORD2_MDSTHRESHOLD
+ #ifdef CONFIG_NFS_V4_SECURITY_LABEL
+       | FATTR4_WORD2_SECURITY_LABEL
+ #endif
  };
  
  static const u32 nfs4_open_noattr_bitmap[3] = {
@@@ -1385,6 -1388,7 +1388,7 @@@ static void __update_open_stateid(struc
         * Protect the call to nfs4_state_set_mode_locked and
         * serialise the stateid update
         */
+       spin_lock(&state->owner->so_lock);
        write_seqlock(&state->seqlock);
        if (deleg_stateid != NULL) {
                nfs4_stateid_copy(&state->stateid, deleg_stateid);
        if (open_stateid != NULL)
                nfs_set_open_stateid_locked(state, open_stateid, fmode);
        write_sequnlock(&state->seqlock);
-       spin_lock(&state->owner->so_lock);
        update_open_stateflags(state, fmode);
        spin_unlock(&state->owner->so_lock);
  }
@@@ -1598,6 -1601,7 +1601,7 @@@ _nfs4_opendata_to_nfs4_state(struct nfs
  
        if (!data->rpc_done) {
                state = nfs4_try_open_cached(data);
+               trace_nfs4_cached_open(data->state);
                goto out;
        }
  
@@@ -2015,6 -2019,7 +2019,7 @@@ static void nfs4_open_prepare(struct rp
        }
        return;
  unlock_no_action:
+       trace_nfs4_cached_open(data->state);
        rcu_read_unlock();
  out_no_action:
        task->tk_action = NULL;
@@@ -2703,6 -2708,7 +2708,7 @@@ static int _nfs4_do_setattr(struct inod
        status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
        if (status == 0 && state != NULL)
                renew_lease(server, timestamp);
+       trace_nfs4_setattr(inode, &arg.stateid, status);
        return status;
  }
  
@@@ -2719,7 -2725,6 +2725,6 @@@ static int nfs4_do_setattr(struct inod
        int err;
        do {
                err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel);
-               trace_nfs4_setattr(inode, err);
                switch (err) {
                case -NFS4ERR_OPENMODE:
                        if (!(sattr->ia_valid & ATTR_SIZE)) {
@@@ -5048,7 -5053,6 +5053,6 @@@ static void nfs4_init_boot_verifier(con
  static int
  nfs4_init_nonuniform_client_string(struct nfs_client *clp)
  {
-       int result;
        size_t len;
        char *str;
  
                return -ENOMEM;
  
        rcu_read_lock();
-       result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s",
+       scnprintf(str, len, "Linux NFSv4.0 %s/%s %s",
                        clp->cl_ipaddr,
                        rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
                        rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO));
  static int
  nfs4_init_uniquifier_client_string(struct nfs_client *clp)
  {
-       int result;
        size_t len;
        char *str;
  
        if (!str)
                return -ENOMEM;
  
-       result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
+       scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
                        clp->rpc_ops->version, clp->cl_minorversion,
                        nfs4_client_id_uniquifier,
                        clp->cl_rpcclient->cl_nodename);
  static int
  nfs4_init_uniform_client_string(struct nfs_client *clp)
  {
-       int result;
        size_t len;
        char *str;
  
        if (!str)
                return -ENOMEM;
  
-       result = scnprintf(str, len, "Linux NFSv%u.%u %s",
+       scnprintf(str, len, "Linux NFSv%u.%u %s",
                        clp->rpc_ops->version, clp->cl_minorversion,
                        clp->cl_rpcclient->cl_nodename);
        clp->cl_owner_id = str;
@@@ -5384,6 -5386,11 +5386,11 @@@ static int _nfs4_proc_delegreturn(struc
        if (data == NULL)
                return -ENOMEM;
        nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+       nfs4_state_protect(server->nfs_client,
+                       NFS_SP4_MACH_CRED_CLEANUP,
+                       &task_setup_data.rpc_client, &msg);
        data->args.fhandle = &data->fh;
        data->args.stateid = &data->stateid;
        data->args.bitmask = server->cache_consistency_bitmask;
@@@ -5426,7 -5433,7 +5433,7 @@@ int nfs4_proc_delegreturn(struct inode 
        int err;
        do {
                err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
-               trace_nfs4_delegreturn(inode, err);
+               trace_nfs4_delegreturn(inode, stateid, err);
                switch (err) {
                        case -NFS4ERR_STALE_STATEID:
                        case -NFS4ERR_EXPIRED:
@@@ -5936,6 -5943,7 +5943,7 @@@ static int _nfs4_do_setlk(struct nfs4_s
                data->cancelled = 1;
        rpc_put_task(task);
        dprintk("%s: done, ret = %d!\n", __func__, ret);
+       trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
        return ret;
  }
  
@@@ -5952,7 -5960,6 +5960,6 @@@ static int nfs4_lock_reclaim(struct nfs
                if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
                        return 0;
                err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
-               trace_nfs4_lock_reclaim(request, state, F_SETLK, err);
                if (err != -NFS4ERR_DELAY)
                        break;
                nfs4_handle_exception(server, err, &exception);
@@@ -5979,7 -5986,6 +5986,6 @@@ static int nfs4_lock_expired(struct nfs
                if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
                        return 0;
                err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
-               trace_nfs4_lock_expired(request, state, F_SETLK, err);
                switch (err) {
                default:
                        goto out;
@@@ -6087,7 -6093,6 +6093,6 @@@ static int nfs4_proc_setlk(struct nfs4_
  
        do {
                err = _nfs4_proc_setlk(state, cmd, request);
-               trace_nfs4_set_lock(request, state, cmd, err);
                if (err == -NFS4ERR_DENIED)
                        err = -EAGAIN;
                err = nfs4_handle_exception(NFS_SERVER(state->inode),
@@@ -6253,6 -6258,9 +6258,6 @@@ static int nfs4_xattr_set_nfs4_acl(cons
                                   const void *buf, size_t buflen,
                                   int flags)
  {
 -      if (strcmp(key, "") != 0)
 -              return -EINVAL;
 -
        return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
  }
  
@@@ -6260,15 -6268,32 +6265,15 @@@ static int nfs4_xattr_get_nfs4_acl(cons
                                   struct dentry *dentry, const char *key,
                                   void *buf, size_t buflen)
  {
 -      if (strcmp(key, "") != 0)
 -              return -EINVAL;
 -
        return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
  }
  
 -static size_t nfs4_xattr_list_nfs4_acl(const struct xattr_handler *handler,
 -                                     struct dentry *dentry, char *list,
 -                                     size_t list_len, const char *name,
 -                                     size_t name_len)
 +static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
  {
 -      size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
 -
 -      if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
 -              return 0;
 -
 -      if (list && len <= list_len)
 -              memcpy(list, XATTR_NAME_NFSV4_ACL, len);
 -      return len;
 +      return nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry)));
  }
  
  #ifdef CONFIG_NFS_V4_SECURITY_LABEL
 -static inline int nfs4_server_supports_labels(struct nfs_server *server)
 -{
 -      return server->caps & NFS_CAP_SECURITY_LABEL;
 -}
  
  static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
                                     struct dentry *dentry, const char *key,
@@@ -6290,34 -6315,29 +6295,34 @@@ static int nfs4_xattr_get_nfs4_label(co
        return -EOPNOTSUPP;
  }
  
 -static size_t nfs4_xattr_list_nfs4_label(const struct xattr_handler *handler,
 -                                       struct dentry *dentry, char *list,
 -                                       size_t list_len, const char *name,
 -                                       size_t name_len)
 +static ssize_t
 +nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
  {
 -      size_t len = 0;
 +      int len = 0;
  
 -      if (nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
 -              len = security_inode_listsecurity(d_inode(dentry), NULL, 0);
 -              if (list && len <= list_len)
 -                      security_inode_listsecurity(d_inode(dentry), list, len);
 +      if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) {
 +              len = security_inode_listsecurity(inode, list, list_len);
 +              if (list_len && len > list_len)
 +                      return -ERANGE;
        }
        return len;
  }
  
  static const struct xattr_handler nfs4_xattr_nfs4_label_handler = {
        .prefix = XATTR_SECURITY_PREFIX,
 -      .list   = nfs4_xattr_list_nfs4_label,
        .get    = nfs4_xattr_get_nfs4_label,
        .set    = nfs4_xattr_set_nfs4_label,
  };
 -#endif
  
 +#else
 +
 +static ssize_t
 +nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
 +{
 +      return 0;
 +}
 +
 +#endif
  
  /*
   * nfs_fhget will use either the mounted_on_fileid or the fileid
@@@ -6847,10 -6867,13 +6852,13 @@@ static const struct nfs41_state_protect
        },
        .allow.u.words = {
                [0] = 1 << (OP_CLOSE) |
+                     1 << (OP_OPEN_DOWNGRADE) |
                      1 << (OP_LOCKU) |
+                     1 << (OP_DELEGRETURN) |
                      1 << (OP_COMMIT),
                [1] = 1 << (OP_SECINFO - 32) |
                      1 << (OP_SECINFO_NO_NAME - 32) |
+                     1 << (OP_LAYOUTRETURN - 32) |
                      1 << (OP_TEST_STATEID - 32) |
                      1 << (OP_FREE_STATEID - 32) |
                      1 << (OP_WRITE - 32)
@@@ -6915,11 -6938,19 +6923,19 @@@ static int nfs4_sp4_select_mode(struct 
                }
  
                if (test_bit(OP_CLOSE, sp->allow.u.longs) &&
+                   test_bit(OP_OPEN_DOWNGRADE, sp->allow.u.longs) &&
+                   test_bit(OP_DELEGRETURN, sp->allow.u.longs) &&
                    test_bit(OP_LOCKU, sp->allow.u.longs)) {
                        dfprintk(MOUNT, "  cleanup mode enabled\n");
                        set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags);
                }
  
+               if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) {
+                       dfprintk(MOUNT, "  pnfs cleanup mode enabled\n");
+                       set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP,
+                               &clp->cl_sp4_flags);
+               }
                if (test_bit(OP_SECINFO, sp->allow.u.longs) &&
                    test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) {
                        dfprintk(MOUNT, "  secinfo mode enabled\n");
@@@ -7748,6 -7779,7 +7764,7 @@@ nfs4_layoutget_prepare(struct rpc_task 
        struct nfs4_layoutget *lgp = calldata;
        struct nfs_server *server = NFS_SERVER(lgp->args.inode);
        struct nfs4_session *session = nfs4_get_session(server);
+       int ret;
  
        dprintk("--> %s\n", __func__);
        /* Note the is a race here, where a CB_LAYOUTRECALL can come in
        if (nfs41_setup_sequence(session, &lgp->args.seq_args,
                                &lgp->res.seq_res, task))
                return;
-       if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
+       ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid,
                                          NFS_I(lgp->args.inode)->layout,
                                          &lgp->args.range,
-                                         lgp->args.ctx->state)) {
-               rpc_exit(task, NFS4_OK);
-       }
+                                         lgp->args.ctx->state);
+       if (ret < 0)
+               rpc_exit(task, ret);
  }
  
  static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
        switch (task->tk_status) {
        case 0:
                goto out;
+       /*
+        * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs
+        * on the file. set tk_status to -ENODATA to tell upper layer to
+        * retry go inband.
+        */
+       case -NFS4ERR_LAYOUTUNAVAILABLE:
+               task->tk_status = -ENODATA;
+               goto out;
        /*
         * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of
         * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3).
@@@ -7979,6 -8020,7 +8005,7 @@@ nfs4_proc_layoutget(struct nfs4_layoutg
        trace_nfs4_layoutget(lgp->args.ctx,
                        &lgp->args.range,
                        &lgp->res.range,
+                       &lgp->res.stateid,
                        status);
        /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
        if (status == 0 && lgp->res.layoutp->len)
@@@ -8035,11 -8077,11 +8062,11 @@@ static void nfs4_layoutreturn_release(v
  
        dprintk("--> %s\n", __func__);
        spin_lock(&lo->plh_inode->i_lock);
+       pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
+       pnfs_mark_layout_returned_if_empty(lo);
        if (lrp->res.lrs_present)
                pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
-       pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
        pnfs_clear_layoutreturn_waitbit(lo);
-       lo->plh_block_lgets--;
        spin_unlock(&lo->plh_inode->i_lock);
        pnfs_free_lseg_list(&freeme);
        pnfs_put_layout_hdr(lrp->args.layout);
@@@ -8071,6 -8113,10 +8098,10 @@@ int nfs4_proc_layoutreturn(struct nfs4_
        };
        int status = 0;
  
+       nfs4_state_protect(NFS_SERVER(lrp->args.inode)->nfs_client,
+                       NFS_SP4_MACH_CRED_PNFS_CLEANUP,
+                       &task_setup_data.rpc_client, &msg);
        dprintk("--> %s\n", __func__);
        if (!sync) {
                lrp->inode = nfs_igrab_and_active(lrp->args.inode);
                return PTR_ERR(task);
        if (sync)
                status = task->tk_status;
-       trace_nfs4_layoutreturn(lrp->args.inode, status);
+       trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
        dprintk("<-- %s status=%d\n", __func__, status);
        rpc_put_task(task);
        return status;
@@@ -8234,7 -8280,7 +8265,7 @@@ nfs4_proc_layoutcommit(struct nfs4_layo
                return PTR_ERR(task);
        if (sync)
                status = task->tk_status;
-       trace_nfs4_layoutcommit(data->args.inode, status);
+       trace_nfs4_layoutcommit(data->args.inode, &data->args.stateid, status);
        dprintk("%s: status %d\n", __func__, status);
        rpc_put_task(task);
        return status;
@@@ -8734,24 -8780,6 +8765,24 @@@ const struct nfs4_minor_version_ops *nf
  #endif
  };
  
 +ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
 +{
 +      ssize_t error, error2;
 +
 +      error = generic_listxattr(dentry, list, size);
 +      if (error < 0)
 +              return error;
 +      if (list) {
 +              list += error;
 +              size -= error;
 +      }
 +
 +      error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
 +      if (error2 < 0)
 +              return error2;
 +      return error + error2;
 +}
 +
  static const struct inode_operations nfs4_dir_inode_operations = {
        .create         = nfs_create,
        .lookup         = nfs_lookup,
        .setattr        = nfs_setattr,
        .getxattr       = generic_getxattr,
        .setxattr       = generic_setxattr,
 -      .listxattr      = generic_listxattr,
 +      .listxattr      = nfs4_listxattr,
        .removexattr    = generic_removexattr,
  };
  
@@@ -8778,7 -8806,7 +8809,7 @@@ static const struct inode_operations nf
        .setattr        = nfs_setattr,
        .getxattr       = generic_getxattr,
        .setxattr       = generic_setxattr,
 -      .listxattr      = generic_listxattr,
 +      .listxattr      = nfs4_listxattr,
        .removexattr    = generic_removexattr,
  };
  
@@@ -8837,7 -8865,7 +8868,7 @@@ const struct nfs_rpc_ops nfs_v4_cliento
  };
  
  static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
 -      .prefix = XATTR_NAME_NFSV4_ACL,
 +      .name   = XATTR_NAME_NFSV4_ACL,
        .list   = nfs4_xattr_list_nfs4_acl,
        .get    = nfs4_xattr_get_nfs4_acl,
        .set    = nfs4_xattr_set_nfs4_acl,
diff --combined include/linux/nfs4.h
@@@ -139,10 -139,10 +139,10 @@@ enum nfs_opnum4 
  Needs to be updated if more operations are defined in future.*/
  
  #define FIRST_NFS4_OP OP_ACCESS
 -#define LAST_NFS4_OP  OP_WRITE_SAME
  #define LAST_NFS40_OP OP_RELEASE_LOCKOWNER
  #define LAST_NFS41_OP OP_RECLAIM_COMPLETE
 -#define LAST_NFS42_OP OP_WRITE_SAME
 +#define LAST_NFS42_OP OP_CLONE
 +#define LAST_NFS4_OP  LAST_NFS42_OP
  
  enum nfsstat4 {
        NFS4_OK = 0,
@@@ -592,4 -592,18 +592,18 @@@ enum data_content4 
        NFS4_CONTENT_HOLE               = 1,
  };
  
+ enum pnfs_update_layout_reason {
+       PNFS_UPDATE_LAYOUT_UNKNOWN = 0,
+       PNFS_UPDATE_LAYOUT_NO_PNFS,
+       PNFS_UPDATE_LAYOUT_RD_ZEROLEN,
+       PNFS_UPDATE_LAYOUT_MDSTHRESH,
+       PNFS_UPDATE_LAYOUT_NOMEM,
+       PNFS_UPDATE_LAYOUT_BULK_RECALL,
+       PNFS_UPDATE_LAYOUT_IO_TEST_FAIL,
+       PNFS_UPDATE_LAYOUT_FOUND_CACHED,
+       PNFS_UPDATE_LAYOUT_RETURN,
+       PNFS_UPDATE_LAYOUT_BLOCKED,
+       PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET,
+ };
  #endif
diff --combined include/linux/nfs_fs.h
@@@ -60,18 -60,12 +60,12 @@@ struct nfs_lockowner 
        pid_t l_pid;
  };
  
- #define NFS_IO_INPROGRESS 0
- struct nfs_io_counter {
-       unsigned long flags;
-       atomic_t io_count;
- };
  struct nfs_lock_context {
        atomic_t count;
        struct list_head list;
        struct nfs_open_context *open_context;
        struct nfs_lockowner lockowner;
-       struct nfs_io_counter io_count;
+       atomic_t io_count;
  };
  
  struct nfs4_state;
@@@ -216,7 -210,6 +210,6 @@@ struct nfs_inode 
  #define NFS_INO_FLUSHING      (4)             /* inode is flushing out data */
  #define NFS_INO_FSCACHE               (5)             /* inode can be cached by FS-Cache */
  #define NFS_INO_FSCACHE_LOCK  (6)             /* FS-Cache cookie management lock */
- #define NFS_INO_COMMIT                (7)             /* inode is committing unstable writes */
  #define NFS_INO_LAYOUTCOMMIT  (9)             /* layoutcommit required */
  #define NFS_INO_LAYOUTCOMMITTING (10)         /* layoutcommit inflight */
  #define NFS_INO_LAYOUTSTATS   (11)            /* layoutstats inflight */
@@@ -359,7 -352,6 +352,7 @@@ extern int nfs_revalidate_inode(struct 
  extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
  extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
  extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 +extern int nfs_revalidate_mapping_rcu(struct inode *inode);
  extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping);
  extern int nfs_setattr(struct dentry *, struct iattr *);
  extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
@@@ -518,12 -510,24 +511,24 @@@ extern int  nfs_updatepage(struct file 
   */
  extern int nfs_sync_inode(struct inode *inode);
  extern int nfs_wb_all(struct inode *inode);
- extern int nfs_wb_page(struct inode *inode, struct page* page);
+ extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
  extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
  extern int  nfs_commit_inode(struct inode *, int);
  extern struct nfs_commit_data *nfs_commitdata_alloc(void);
  extern void nfs_commit_free(struct nfs_commit_data *data);
  
+ static inline int
+ nfs_wb_launder_page(struct inode *inode, struct page *page)
+ {
+       return nfs_wb_single_page(inode, page, true);
+ }
+ static inline int
+ nfs_wb_page(struct inode *inode, struct page *page)
+ {
+       return nfs_wb_single_page(inode, page, false);
+ }
  static inline int
  nfs_have_writebacks(struct inode *inode)
  {