Merge tag 'nfs-for-3.16-3' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Jul 2014 02:55:44 +0000 (19:55 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Jul 2014 02:55:44 +0000 (19:55 -0700)
Pull NFS client fixes from Trond Myklebust:
 "Apologies for the relative lateness of this pull request, however the
  commits fix some issues with the NFS read/write code updates in
  3.16-rc1 that can cause serious Oopsing when using small r/wsize.  The
  delay was mainly due to extra testing to make sure that the fixes
  behave correctly.

  Highlights include;
   - Stable fix for an NFSv3 posix ACL regression
   - Multiple fixes for regressions to the NFS generic read/write code:
     - Fix page splitting bugs that come into play when a small
       rsize/wsize read/write needs to be sent again (due to error
       conditions or page redirty)
     - Fix nfs_wb_page_cancel, which is called by the "invalidatepage"
       method
   - Fix 2 compile warnings about unused variables
   - Fix a performance issue affecting unstable writes"

* tag 'nfs-for-3.16-3' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
  NFS: Don't reset pg_moreio in __nfs_pageio_add_request
  NFS: Remove 2 unused variables
  nfs: handle multiple reqs in nfs_wb_page_cancel
  nfs: handle multiple reqs in nfs_page_async_flush
  nfs: change find_request to find_head_request
  nfs: nfs_page should take a ref on the head req
  nfs: mark nfs_page reqs with flag for extra ref
  nfs: only show Posix ACLs in listxattr if actually present

fs/nfs/direct.c
fs/nfs/internal.h
fs/nfs/nfs3acl.c
fs/nfs/nfs3proc.c
fs/nfs/pagelist.c
fs/nfs/write.c

index 8f98138..f11b9ee 100644 (file)
@@ -756,7 +756,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
        spin_unlock(&dreq->lock);
 
        while (!list_empty(&hdr->pages)) {
-               bool do_destroy = true;
 
                req = nfs_list_entry(hdr->pages.next);
                nfs_list_remove_request(req);
@@ -765,7 +764,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
                case NFS_IOHDR_NEED_COMMIT:
                        kref_get(&req->wb_kref);
                        nfs_mark_request_commit(req, hdr->lseg, &cinfo);
-                       do_destroy = false;
                }
                nfs_unlock_and_release_request(req);
        }
index 82ddbf4..f415cbf 100644 (file)
@@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
 int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
                      const struct rpc_call_ops *, int, int);
+void nfs_free_request(struct nfs_page *req);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
index 871d6ed..8f854dd 100644 (file)
@@ -247,3 +247,46 @@ const struct xattr_handler *nfs3_xattr_handlers[] = {
        &posix_acl_default_xattr_handler,
        NULL,
 };
+
+static int
+nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
+               size_t size, ssize_t *result)
+{
+       struct posix_acl *acl;
+       char *p = data + *result;
+
+       acl = get_acl(inode, type);
+       if (!acl)
+               return 0;
+
+       posix_acl_release(acl);
+
+       *result += strlen(name);
+       *result += 1;
+       if (!size)
+               return 0;
+       if (*result > size)
+               return -ERANGE;
+
+       strcpy(p, name);
+       return 0;
+}
+
+ssize_t
+nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+       struct inode *inode = dentry->d_inode;
+       ssize_t result = 0;
+       int error;
+
+       error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
+                       POSIX_ACL_XATTR_ACCESS, data, size, &result);
+       if (error)
+               return error;
+
+       error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
+                       POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+       if (error)
+               return error;
+       return result;
+}
index e7daa42..f0afa29 100644 (file)
@@ -885,7 +885,7 @@ static const struct inode_operations nfs3_dir_inode_operations = {
        .getattr        = nfs_getattr,
        .setattr        = nfs_setattr,
 #ifdef CONFIG_NFS_V3_ACL
-       .listxattr      = generic_listxattr,
+       .listxattr      = nfs3_listxattr,
        .getxattr       = generic_getxattr,
        .setxattr       = generic_setxattr,
        .removexattr    = generic_removexattr,
@@ -899,7 +899,7 @@ static const struct inode_operations nfs3_file_inode_operations = {
        .getattr        = nfs_getattr,
        .setattr        = nfs_setattr,
 #ifdef CONFIG_NFS_V3_ACL
-       .listxattr      = generic_listxattr,
+       .listxattr      = nfs3_listxattr,
        .getxattr       = generic_getxattr,
        .setxattr       = generic_setxattr,
        .removexattr    = generic_removexattr,
index b6ee3a6..17fab89 100644 (file)
@@ -29,8 +29,6 @@
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
 
-static void nfs_free_request(struct nfs_page *);
-
 static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
 {
        p->npages = pagecount;
@@ -239,20 +237,28 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
        WARN_ON_ONCE(prev == req);
 
        if (!prev) {
+               /* a head request */
                req->wb_head = req;
                req->wb_this_page = req;
        } else {
+               /* a subrequest */
                WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
                WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
                req->wb_head = prev->wb_head;
                req->wb_this_page = prev->wb_this_page;
                prev->wb_this_page = req;
 
+               /* All subrequests take a ref on the head request until
+                * nfs_page_group_destroy is called */
+               kref_get(&req->wb_head->wb_kref);
+
                /* grab extra ref if head request has extra ref from
                 * the write/commit path to handle handoff between write
                 * and commit lists */
-               if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+               if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+                       set_bit(PG_INODE_REF, &req->wb_flags);
                        kref_get(&req->wb_kref);
+               }
        }
 }
 
@@ -269,6 +275,10 @@ nfs_page_group_destroy(struct kref *kref)
        struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
        struct nfs_page *tmp, *next;
 
+       /* subrequests must release the ref on the head request */
+       if (req->wb_head != req)
+               nfs_release_request(req->wb_head);
+
        if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
                return;
 
@@ -394,7 +404,7 @@ static void nfs_clear_request(struct nfs_page *req)
  *
  * Note: Should never be called with the spinlock held!
  */
-static void nfs_free_request(struct nfs_page *req)
+void nfs_free_request(struct nfs_page *req)
 {
        WARN_ON_ONCE(req->wb_this_page != req);
 
@@ -925,7 +935,6 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                        nfs_pageio_doio(desc);
                        if (desc->pg_error < 0)
                                return 0;
-                       desc->pg_moreio = 0;
                        if (desc->pg_recoalesce)
                                return 0;
                        /* retry add_request for this subreq */
@@ -972,6 +981,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
                desc->pg_count = 0;
                desc->pg_base = 0;
                desc->pg_recoalesce = 0;
+               desc->pg_moreio = 0;
 
                while (!list_empty(&head)) {
                        struct nfs_page *req;
index 98ff061..5e2f103 100644 (file)
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
 static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_clear_request_commit(struct nfs_page *req);
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -91,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
        set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 }
 
+/*
+ * nfs_page_find_head_request_locked - find head request associated with @page
+ *
+ * must be called while holding the inode lock.
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
 static struct nfs_page *
-nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
 {
        struct nfs_page *req = NULL;
 
@@ -104,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
                /* Linearly search the commit list for the correct req */
                list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
                        if (freq->wb_page == page) {
-                               req = freq;
+                               req = freq->wb_head;
                                break;
                        }
                }
        }
 
-       if (req)
+       if (req) {
+               WARN_ON_ONCE(req->wb_head != req);
+
                kref_get(&req->wb_kref);
+       }
 
        return req;
 }
 
-static struct nfs_page *nfs_page_find_request(struct page *page)
+/*
+ * nfs_page_find_head_request - find head request associated with @page
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
+static struct nfs_page *nfs_page_find_head_request(struct page *page)
 {
        struct inode *inode = page_file_mapping(page)->host;
        struct nfs_page *req = NULL;
 
        spin_lock(&inode->i_lock);
-       req = nfs_page_find_request_locked(NFS_I(inode), page);
+       req = nfs_page_find_head_request_locked(NFS_I(inode), page);
        spin_unlock(&inode->i_lock);
        return req;
 }
@@ -274,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
                clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
 
-static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
+
+/* nfs_page_group_clear_bits
+ *   @req - an nfs request
+ * clears all page group related bits from @req
+ */
+static void
+nfs_page_group_clear_bits(struct nfs_page *req)
+{
+       clear_bit(PG_TEARDOWN, &req->wb_flags);
+       clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
+       clear_bit(PG_UPTODATE, &req->wb_flags);
+       clear_bit(PG_WB_END, &req->wb_flags);
+       clear_bit(PG_REMOVE, &req->wb_flags);
+}
+
+
+/*
+ * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req
+ *
+ * this is a helper function for nfs_lock_and_join_requests
+ *
+ * @inode - inode associated with request page group, must be holding inode lock
+ * @head  - head request of page group, must be holding head lock
+ * @req   - request that couldn't lock and needs to wait on the req bit lock
+ * @nonblock - if true, don't actually wait
+ *
+ * NOTE: this must be called holding page_group bit lock and inode spin lock
+ *       and BOTH will be released before returning.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int
+nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
+                         struct nfs_page *req, bool nonblock)
+       __releases(&inode->i_lock)
+{
+       struct nfs_page *tmp;
+       int ret;
+
+       /* relinquish all the locks successfully grabbed this run */
+       for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
+               nfs_unlock_request(tmp);
+
+       WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+
+       /* grab a ref on the request that will be waited on */
+       kref_get(&req->wb_kref);
+
+       nfs_page_group_unlock(head);
+       spin_unlock(&inode->i_lock);
+
+       /* release ref from nfs_page_find_head_request_locked */
+       nfs_release_request(head);
+
+       if (!nonblock)
+               ret = nfs_wait_on_request(req);
+       else
+               ret = -EAGAIN;
+       nfs_release_request(req);
+
+       return ret;
+}
+
+/*
+ * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
+ *
+ * @destroy_list - request list (using wb_this_page) terminated by @old_head
+ * @old_head - the old head of the list
+ *
+ * All subrequests must be locked and removed from all lists, so at this point
+ * they are only "active" in this function, and possibly in nfs_wait_on_request
+ * with a reference held by some other context.
+ */
+static void
+nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+                                struct nfs_page *old_head)
+{
+       while (destroy_list) {
+               struct nfs_page *subreq = destroy_list;
+
+               destroy_list = (subreq->wb_this_page == old_head) ?
+                                  NULL : subreq->wb_this_page;
+
+               WARN_ON_ONCE(old_head != subreq->wb_head);
+
+               /* make sure old group is not used */
+               subreq->wb_head = subreq;
+               subreq->wb_this_page = subreq;
+
+               nfs_clear_request_commit(subreq);
+
+               /* subreq is now totally disconnected from page group or any
+                * write / commit lists. last chance to wake any waiters */
+               nfs_unlock_request(subreq);
+
+               if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+                       /* release ref on old head request */
+                       nfs_release_request(old_head);
+
+                       nfs_page_group_clear_bits(subreq);
+
+                       /* release the PG_INODE_REF reference */
+                       if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+                               nfs_release_request(subreq);
+                       else
+                               WARN_ON_ONCE(1);
+               } else {
+                       WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
+                       /* zombie requests have already released the last
+                        * reference and were waiting on the rest of the
+                        * group to complete. Since it's no longer part of a
+                        * group, simply free the request */
+                       nfs_page_group_clear_bits(subreq);
+                       nfs_free_request(subreq);
+               }
+       }
+}
+
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req and return
+ *                              a locked reference, cancelling any pending
+ *                              operations for this page.
+ *
+ * @page - the page used to lookup the "page group" of nfs_page structures
+ * @nonblock - if true, don't block waiting for request locks
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group.  All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page, bool nonblock)
 {
        struct inode *inode = page_file_mapping(page)->host;
-       struct nfs_page *req;
+       struct nfs_page *head, *subreq;
+       struct nfs_page *destroy_list = NULL;
+       unsigned int total_bytes;
        int ret;
 
+try_again:
+       total_bytes = 0;
+
+       WARN_ON_ONCE(destroy_list);
+
        spin_lock(&inode->i_lock);
-       for (;;) {
-               req = nfs_page_find_request_locked(NFS_I(inode), page);
-               if (req == NULL)
-                       break;
-               if (nfs_lock_request(req))
-                       break;
-               /* Note: If we hold the page lock, as is the case in nfs_writepage,
-                *       then the call to nfs_lock_request() will always
-                *       succeed provided that someone hasn't already marked the
-                *       request as dirty (in which case we don't care).
-                */
+
+       /*
+        * A reference is taken only on the head request which acts as a
+        * reference to the whole page group - the group will not be destroyed
+        * until the head reference is released.
+        */
+       head = nfs_page_find_head_request_locked(NFS_I(inode), page);
+
+       if (!head) {
                spin_unlock(&inode->i_lock);
-               if (!nonblock)
-                       ret = nfs_wait_on_request(req);
-               else
-                       ret = -EAGAIN;
-               nfs_release_request(req);
-               if (ret != 0)
+               return NULL;
+       }
+
+       /* lock each request in the page group */
+       nfs_page_group_lock(head);
+       subreq = head;
+       do {
+               /*
+                * Subrequests are always contiguous, non overlapping
+                * and in order. If not, it's a programming error.
+                */
+               WARN_ON_ONCE(subreq->wb_offset !=
+                    (head->wb_offset + total_bytes));
+
+               /* keep track of how many bytes this group covers */
+               total_bytes += subreq->wb_bytes;
+
+               if (!nfs_lock_request(subreq)) {
+                       /* releases page group bit lock and
+                        * inode spin lock and all references */
+                       ret = nfs_unroll_locks_and_wait(inode, head,
+                               subreq, nonblock);
+
+                       if (ret == 0)
+                               goto try_again;
+
                        return ERR_PTR(ret);
-               spin_lock(&inode->i_lock);
+               }
+
+               subreq = subreq->wb_this_page;
+       } while (subreq != head);
+
+       /* Now that all requests are locked, make sure they aren't on any list.
+        * Commit list removal accounting is done after locks are dropped */
+       subreq = head;
+       do {
+               nfs_list_remove_request(subreq);
+               subreq = subreq->wb_this_page;
+       } while (subreq != head);
+
+       /* unlink subrequests from head, destroy them later */
+       if (head->wb_this_page != head) {
+               /* destroy list will be terminated by head */
+               destroy_list = head->wb_this_page;
+               head->wb_this_page = head;
+
+               /* change head request to cover whole range that
+                * the former page group covered */
+               head->wb_bytes = total_bytes;
        }
+
+       /*
+        * prepare head request to be added to new pgio descriptor
+        */
+       nfs_page_group_clear_bits(head);
+
+       /*
+        * some part of the group was still on the inode list - otherwise
+        * the group wouldn't be involved in async write.
+        * grab a reference for the head request, iff it needs one.
+        */
+       if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+               kref_get(&head->wb_kref);
+
+       nfs_page_group_unlock(head);
+
+       /* drop lock to clear_request_commit the head req and clean up
+        * requests on destroy list */
        spin_unlock(&inode->i_lock);
-       return req;
+
+       nfs_destroy_unlinked_subrequests(destroy_list, head);
+
+       /* clean up commit list state */
+       nfs_clear_request_commit(head);
+
+       /* still holds ref on head from nfs_page_find_head_request_locked
+        * and still has lock on head from lock loop */
+       return head;
 }
 
 /*
@@ -316,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
        struct nfs_page *req;
        int ret = 0;
 
-       req = nfs_find_and_lock_request(page, nonblock);
+       req = nfs_lock_and_join_requests(page, nonblock);
        if (!req)
                goto out;
        ret = PTR_ERR(req);
@@ -448,7 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
                set_page_private(req->wb_page, (unsigned long)req);
        }
        nfsi->npages++;
-       set_bit(PG_INODE_REF, &req->wb_flags);
+       /* this a head request for a page group - mark it as having an
+        * extra reference so sub groups can follow suit */
+       WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
        kref_get(&req->wb_kref);
        spin_unlock(&inode->i_lock);
 }
@@ -474,7 +702,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
                nfsi->npages--;
                spin_unlock(&inode->i_lock);
        }
-       nfs_release_request(req);
+
+       if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+               nfs_release_request(req);
 }
 
 static void
@@ -638,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
        struct nfs_commit_info cinfo;
        unsigned long bytes = 0;
-       bool do_destroy;
 
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
                goto out;
@@ -668,7 +897,6 @@ remove_req:
 next:
                nfs_unlock_request(req);
                nfs_end_page_writeback(req);
-               do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
                nfs_release_request(req);
        }
 out:
@@ -769,7 +997,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
        spin_lock(&inode->i_lock);
 
        for (;;) {
-               req = nfs_page_find_request_locked(NFS_I(inode), page);
+               req = nfs_page_find_head_request_locked(NFS_I(inode), page);
                if (req == NULL)
                        goto out_unlock;
 
@@ -877,7 +1105,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
         * dropped page.
         */
        do {
-               req = nfs_page_find_request(page);
+               req = nfs_page_find_head_request(page);
                if (req == NULL)
                        return 0;
                l_ctx = req->wb_lock_context;
@@ -1569,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
        struct nfs_page *req;
        int ret = 0;
 
-       for (;;) {
-               wait_on_page_writeback(page);
-               req = nfs_page_find_request(page);
-               if (req == NULL)
-                       break;
-               if (nfs_lock_request(req)) {
-                       nfs_clear_request_commit(req);
-                       nfs_inode_remove_request(req);
-                       /*
-                        * In case nfs_inode_remove_request has marked the
-                        * page as being dirty
-                        */
-                       cancel_dirty_page(page, PAGE_CACHE_SIZE);
-                       nfs_unlock_and_release_request(req);
-                       break;
-               }
-               ret = nfs_wait_on_request(req);
-               nfs_release_request(req);
-               if (ret < 0)
-                       break;
+       wait_on_page_writeback(page);
+
+       /* blocking call to cancel all requests and join to a single (head)
+        * request */
+       req = nfs_lock_and_join_requests(page, false);
+
+       if (IS_ERR(req)) {
+               ret = PTR_ERR(req);
+       } else if (req) {
+               /* all requests from this page have been cancelled by
+                * nfs_lock_and_join_requests, so just remove the head
+                * request from the inode / page_private pointer and
+                * release it */
+               nfs_inode_remove_request(req);
+               /*
+                * In case nfs_inode_remove_request has marked the
+                * page as being dirty
+                */
+               cancel_dirty_page(page, PAGE_CACHE_SIZE);
+               nfs_unlock_and_release_request(req);
        }
+
        return ret;
 }