NFS: Move the pnfs read code into pnfs.c
[cascardo/linux.git] / fs / nfs / pnfs.c
index ef535f2..9eca5a8 100644 (file)
@@ -28,6 +28,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 #include "internal.h"
 #include "pnfs.h"
 #include "iostat.h"
@@ -243,7 +244,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg)
 {
        struct inode *inode = lseg->pls_layout->plh_inode;
 
-       BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+       WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
        list_del_init(&lseg->pls_list);
        if (list_empty(&lseg->pls_layout->plh_segs)) {
                set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
@@ -448,11 +449,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 void
 pnfs_destroy_all_layouts(struct nfs_client *clp)
 {
+       struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
        LIST_HEAD(tmp_list);
 
+       nfs4_deviceid_mark_client_invalid(clp);
+       nfs4_deviceid_purge_client(clp);
+
        spin_lock(&clp->cl_lock);
-       list_splice_init(&clp->cl_layouts, &tmp_list);
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+               if (!list_empty(&server->layouts))
+                       list_splice_init(&server->layouts, &tmp_list);
+       }
+       rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
 
        while (!list_empty(&tmp_list)) {
@@ -619,6 +629,57 @@ out_err_free:
        return NULL;
 }
 
+/* Initiates a LAYOUTRETURN(FILE) */
+int
+_pnfs_return_layout(struct inode *ino)
+{
+       struct pnfs_layout_hdr *lo = NULL;
+       struct nfs_inode *nfsi = NFS_I(ino);
+       LIST_HEAD(tmp_list);
+       struct nfs4_layoutreturn *lrp;
+       nfs4_stateid stateid;
+       int status = 0;
+
+       dprintk("--> %s\n", __func__);
+
+       spin_lock(&ino->i_lock);
+       lo = nfsi->layout;
+       if (!lo) {
+               spin_unlock(&ino->i_lock);
+               dprintk("%s: no layout to return\n", __func__);
+               return status;
+       }
+       stateid = nfsi->layout->plh_stateid;
+       /* Reference matched in nfs4_layoutreturn_release */
+       get_layout_hdr(lo);
+       mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
+       lo->plh_block_lgets++;
+       spin_unlock(&ino->i_lock);
+       pnfs_free_lseg_list(&tmp_list);
+
+       WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
+
+       lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+       if (unlikely(lrp == NULL)) {
+               status = -ENOMEM;
+               set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
+               set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
+               put_layout_hdr(lo);
+               goto out;
+       }
+
+       lrp->args.stateid = stateid;
+       lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
+       lrp->args.inode = ino;
+       lrp->args.layout = lo;
+       lrp->clp = NFS_SERVER(ino)->nfs_client;
+
+       status = nfs4_proc_layoutreturn(lrp);
+out:
+       dprintk("<-- %s status: %d\n", __func__, status);
+       return status;
+}
+
 bool pnfs_roc(struct inode *ino)
 {
        struct pnfs_layout_hdr *lo;
@@ -842,7 +903,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
                        ret = get_lseg(lseg);
                        break;
                }
-               if (cmp_layout(range, &lseg->pls_range) > 0)
+               if (lseg->pls_range.offset > range->offset)
                        break;
        }
 
@@ -870,7 +931,8 @@ pnfs_update_layout(struct inode *ino,
        };
        unsigned pg_offset;
        struct nfs_inode *nfsi = NFS_I(ino);
-       struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+       struct nfs_server *server = NFS_SERVER(ino);
+       struct nfs_client *clp = server->nfs_client;
        struct pnfs_layout_hdr *lo;
        struct pnfs_layout_segment *lseg = NULL;
        bool first = false;
@@ -914,7 +976,7 @@ pnfs_update_layout(struct inode *ino,
                 */
                spin_lock(&clp->cl_lock);
                BUG_ON(!list_empty(&lo->plh_layouts));
-               list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+               list_add_tail(&lo->plh_layouts, &server->layouts);
                spin_unlock(&clp->cl_lock);
        }
 
@@ -923,7 +985,8 @@ pnfs_update_layout(struct inode *ino,
                arg.offset -= pg_offset;
                arg.length += pg_offset;
        }
-       arg.length = PAGE_CACHE_ALIGN(arg.length);
+       if (arg.length != NFS4_MAX_UINT64)
+               arg.length = PAGE_CACHE_ALIGN(arg.length);
 
        lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
        if (!lseg && first) {
@@ -941,6 +1004,7 @@ out_unlock:
        spin_unlock(&ino->i_lock);
        goto out;
 }
+EXPORT_SYMBOL_GPL(pnfs_update_layout);
 
 int
 pnfs_layout_process(struct nfs4_layoutget *lgp)
@@ -998,61 +1062,113 @@ out_forget_reply:
        goto out;
 }
 
-static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
-                            struct nfs_page *prev,
-                            struct nfs_page *req)
+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
-       if (pgio->pg_count == prev->wb_bytes) {
-               /* This is first coelesce call for a series of nfs_pages */
-               pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  prev->wb_context,
-                                                  req_offset(req),
-                                                  pgio->pg_count,
-                                                  IOMODE_READ,
-                                                  GFP_KERNEL);
-       } else if (pgio->pg_lseg &&
-                  req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
-                                               pgio->pg_lseg->pls_range.length))
-               return 0;
-       return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          req_offset(req),
+                                          req->wb_bytes,
+                                          IOMODE_READ,
+                                          GFP_KERNEL);
+       /* If no lseg, fall back to read through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_init_read_mds(pgio, pgio->pg_inode);
+
 }
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
 
 void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          req_offset(req),
+                                          req->wb_bytes,
+                                          IOMODE_RW,
+                                          GFP_NOFS);
+       /* If no lseg, fall back to write through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
+bool
 pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 {
-       struct pnfs_layoutdriver_type *ld;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
-       ld = NFS_SERVER(inode)->pnfs_curr_ld;
-       pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
+       if (ld == NULL)
+               return false;
+       nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+       return true;
 }
 
-static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
-                             struct nfs_page *prev,
-                             struct nfs_page *req)
+bool
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
 {
-       if (pgio->pg_count == prev->wb_bytes) {
-               /* This is first coelesce call for a series of nfs_pages */
-               pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  prev->wb_context,
-                                                  req_offset(req),
-                                                  pgio->pg_count,
-                                                  IOMODE_RW,
-                                                  GFP_NOFS);
-       } else if (pgio->pg_lseg &&
-                  req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
-                                               pgio->pg_lseg->pls_range.length))
-               return 0;
-       return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+       if (ld == NULL)
+               return false;
+       nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+       return true;
 }
 
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+bool
+pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+                    struct nfs_page *req)
 {
-       struct pnfs_layoutdriver_type *ld;
+       if (pgio->pg_lseg == NULL)
+               return nfs_generic_pg_test(pgio, prev, req);
 
-       ld = NFS_SERVER(inode)->pnfs_curr_ld;
-       pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
+       /*
+        * Test if a nfs_page is fully contained in the pnfs_layout_range.
+        * Note that this test makes several assumptions:
+        * - that the previous nfs_page in the struct nfs_pageio_descriptor
+        *   is known to lie within the range.
+        *   - that the nfs_page being tested is known to be contiguous with the
+        *   previous nfs_page.
+        *   - Layout ranges are page aligned, so we only have to test the
+        *   start offset of the request.
+        *
+        * Please also note that 'end_offset' is actually the offset of the
+        * first byte that lies outside the pnfs_layout_range. FIXME?
+        *
+        */
+       return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
+                                        pgio->pg_lseg->pls_range.length);
 }
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
+
+/*
+ * Called by non rpc-based layout drivers
+ */
+int
+pnfs_ld_write_done(struct nfs_write_data *data)
+{
+       int status;
+
+       if (!data->pnfs_error) {
+               pnfs_set_layoutcommit(data);
+               data->mds_ops->rpc_call_done(&data->task, data);
+               data->mds_ops->rpc_release(data);
+               return 0;
+       }
+
+       dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
+               data->pnfs_error);
+       status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
+                                   data->mds_ops, NFS_FILE_SYNC);
+       return status ? : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
 enum pnfs_try_status
 pnfs_try_to_write_data(struct nfs_write_data *wdata,
@@ -1078,18 +1194,55 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
        return trypnfs;
 }
 
+/*
+ * Called by non rpc-based layout drivers
+ */
+int
+pnfs_ld_read_done(struct nfs_read_data *data)
+{
+       int status;
+
+       if (!data->pnfs_error) {
+               __nfs4_read_done_cb(data);
+               data->mds_ops->rpc_call_done(&data->task, data);
+               data->mds_ops->rpc_release(data);
+               return 0;
+       }
+
+       dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
+               data->pnfs_error);
+       status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
+                                  data->mds_ops);
+       return status ? : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
+
+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+               struct nfs_read_data *data)
+{
+       list_splice_tail_init(&data->pages, &desc->pg_list);
+       if (data->req && list_empty(&data->req->wb_list))
+               nfs_list_add_request(data->req, &desc->pg_list);
+       nfs_pageio_reset_read_mds(desc);
+       desc->pg_recoalesce = 1;
+       nfs_readdata_release(data);
+}
+
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
-enum pnfs_try_status
+static enum pnfs_try_status
 pnfs_try_to_read_data(struct nfs_read_data *rdata,
-                      const struct rpc_call_ops *call_ops)
+                      const struct rpc_call_ops *call_ops,
+                      struct pnfs_layout_segment *lseg)
 {
        struct inode *inode = rdata->inode;
        struct nfs_server *nfss = NFS_SERVER(inode);
        enum pnfs_try_status trypnfs;
 
        rdata->mds_ops = call_ops;
+       rdata->lseg = get_lseg(lseg);
 
        dprintk("%s: Reading ino:%lu %u@%llu\n",
                __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1105,6 +1258,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
        return trypnfs;
 }
 
+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+       struct nfs_read_data *data;
+       const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+       struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+       desc->pg_lseg = NULL;
+       while (!list_empty(head)) {
+               enum pnfs_try_status trypnfs;
+
+               data = list_entry(head->next, struct nfs_read_data, list);
+               list_del_init(&data->list);
+
+               trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+               if (trypnfs == PNFS_NOT_ATTEMPTED)
+                       pnfs_read_through_mds(desc, data);
+       }
+       put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_pagein(desc, &head);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+               return ret;
+       }
+       pnfs_do_multiple_reads(desc, &head);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
 /*
  * Currently there is only one (whole file) write lseg.
  */
@@ -1122,7 +1313,7 @@ void
 pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 {
        struct nfs_inode *nfsi = NFS_I(wdata->inode);
-       loff_t end_pos = wdata->args.offset + wdata->res.count;
+       loff_t end_pos = wdata->mds_offset + wdata->res.count;
        bool mark_as_dirty = false;
 
        spin_lock(&nfsi->vfs_inode.i_lock);