orangefs: delay freeing slot until cancel completes
authorAl Viro <viro@zeniv.linux.org.uk>
Fri, 12 Feb 2016 04:07:19 +0000 (23:07 -0500)
committerMike Marshall <hubcap@omnibond.com>
Fri, 19 Feb 2016 18:45:53 +0000 (13:45 -0500)
Make cancels reuse the aborted read/write op, to make sure they do not
fail on lack of memory.

Don't issue a cancel unless the daemon has seen our read/write, has not
replied and isn't being shut down.

If cancel *is* issued, don't wait for it to complete; stash the slot
in there and just have it freed when cancel is finally replied to or
purged (and delay dropping the reference until then, obviously).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
fs/orangefs/devorangefs-req.c
fs/orangefs/file.c
fs/orangefs/orangefs-cache.c
fs/orangefs/orangefs-kernel.h
fs/orangefs/orangefs-mod.c
fs/orangefs/orangefs-utils.c
fs/orangefs/waitqueue.c

index 37278f5..6a7df12 100644 (file)
@@ -438,6 +438,8 @@ wakeup:
                }
        }
 out:
+       if (unlikely(op_is_cancel(op)))
+               put_cancel(op);
        op_release(op);
        return ret;
 
@@ -546,6 +548,11 @@ int is_daemon_in_service(void)
        return in_service;
 }
 
+bool __is_daemon_in_service(void)
+{
+       return open_access_count == 1;
+}
+
 static inline long check_ioctl_command(unsigned int command)
 {
        /* Check for valid ioctl codes */
index 193671c..3b1e9e8 100644 (file)
@@ -181,17 +181,6 @@ populate_shared_memory:
        }
 
        if (ret < 0) {
-               /*
-                * XXX: needs to be optimized - we only need to cancel if it
-                * had been seen by daemon and not completed
-                */
-               if (!op_state_serviced(new_op)) {
-                       orangefs_cancel_op_in_progress(new_op->tag);
-               } else {
-                       complete(&new_op->done);
-               }
-               orangefs_bufmap_put(buffer_index);
-               buffer_index = -1;
                /*
                 * don't write an error to syslog on signaled operation
                 * termination unless we've got debugging turned on, as
@@ -207,7 +196,10 @@ populate_shared_memory:
                                type == ORANGEFS_IO_READ ?
                                        "read from" : "write to",
                                handle, ret);
-               goto out;
+               if (orangefs_cancel_op_in_progress(new_op))
+                       return ret;
+
+               goto done_copying;
        }
 
        /*
index 3b3de91..59ab0c2 100644 (file)
@@ -101,6 +101,15 @@ char *get_opname_string(struct orangefs_kernel_op_s *new_op)
        return "OP_UNKNOWN?";
 }
 
+void orangefs_new_tag(struct orangefs_kernel_op_s *op)
+{
+       spin_lock(&next_tag_value_lock);
+       op->tag = next_tag_value++;
+       if (next_tag_value == 0)
+               next_tag_value = 100;
+       spin_unlock(&next_tag_value_lock);
+}
+
 struct orangefs_kernel_op_s *op_alloc(__s32 type)
 {
        struct orangefs_kernel_op_s *new_op = NULL;
@@ -120,14 +129,9 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type)
                new_op->downcall.status = -1;
 
                new_op->op_state = OP_VFS_STATE_UNKNOWN;
-               new_op->tag = 0;
 
                /* initialize the op specific tag and upcall credentials */
-               spin_lock(&next_tag_value_lock);
-               new_op->tag = next_tag_value++;
-               if (next_tag_value == 0)
-                       next_tag_value = 100;
-               spin_unlock(&next_tag_value_lock);
+               orangefs_new_tag(new_op);
                new_op->upcall.type = type;
                new_op->attempts = 0;
                gossip_debug(GOSSIP_CACHE_DEBUG,
index a8cde90..3ceeeae 100644 (file)
@@ -190,9 +190,14 @@ struct orangefs_kernel_op_s {
        /*
         * Set uses_shared_memory to 1 if this operation uses shared memory.
         * If true, then a retry on the op must also get a new shared memory
-        * buffer and re-populate it.
+        * buffer and re-populate it.  Cancels don't care - it only matters
+        * for service_operation() retry logics and cancels don't go through
+        * it anymore.
         */
-       int uses_shared_memory;
+       union {
+               int uses_shared_memory;
+               int slot_to_free;
+       };
 
        struct orangefs_upcall_s upcall;
        struct orangefs_downcall_s downcall;
@@ -219,17 +224,13 @@ static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op)
        op->op_state = OP_VFS_STATE_SERVICED;
        wake_up_interruptible(&op->waitq);
 }
-static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
-{
-       op->op_state |= OP_VFS_STATE_PURGED;
-       wake_up_interruptible(&op->waitq);
-}
 
 #define op_state_waiting(op)     ((op)->op_state & OP_VFS_STATE_WAITING)
 #define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR)
 #define op_state_serviced(op)    ((op)->op_state & OP_VFS_STATE_SERVICED)
 #define op_state_purged(op)      ((op)->op_state & OP_VFS_STATE_PURGED)
 #define op_state_given_up(op)    ((op)->op_state & OP_VFS_STATE_GIVEN_UP)
+#define op_is_cancel(op)         ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL)
 
 static inline void get_op(struct orangefs_kernel_op_s *op)
 {
@@ -249,6 +250,27 @@ static inline void op_release(struct orangefs_kernel_op_s *op)
        }
 }
 
+extern void orangefs_bufmap_put(int);
+static inline void put_cancel(struct orangefs_kernel_op_s *op)
+{
+       orangefs_bufmap_put(op->slot_to_free);
+       op_release(op);
+}
+
+static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
+{
+       spin_lock(&op->lock);
+       if (unlikely(op_is_cancel(op))) {
+               list_del(&op->list);
+               spin_unlock(&op->lock);
+               put_cancel(op);
+       } else {
+               op->op_state |= OP_VFS_STATE_PURGED;
+               wake_up_interruptible(&op->waitq);
+               spin_unlock(&op->lock);
+       }
+}
+
 /* per inode private orangefs info */
 struct orangefs_inode_s {
        struct orangefs_object_kref refn;
@@ -448,6 +470,7 @@ static inline int match_handle(struct orangefs_khandle resp_handle,
 int op_cache_initialize(void);
 int op_cache_finalize(void);
 struct orangefs_kernel_op_s *op_alloc(__s32 type);
+void orangefs_new_tag(struct orangefs_kernel_op_s *op);
 char *get_opname_string(struct orangefs_kernel_op_s *new_op);
 
 int orangefs_inode_cache_initialize(void);
@@ -528,6 +551,7 @@ ssize_t orangefs_inode_read(struct inode *inode,
 int orangefs_dev_init(void);
 void orangefs_dev_cleanup(void);
 int is_daemon_in_service(void);
+bool __is_daemon_in_service(void);
 int fs_mount_pending(__s32 fsid);
 
 /*
@@ -562,7 +586,7 @@ void orangefs_set_signals(sigset_t *);
 
 int orangefs_unmount_sb(struct super_block *sb);
 
-int orangefs_cancel_op_in_progress(__u64 tag);
+bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
 
 static inline __u64 orangefs_convert_time_field(const struct timespec *ts)
 {
index 7639ab2..965959c 100644 (file)
@@ -260,14 +260,12 @@ void purge_inprogress_ops(void)
                                         next,
                                         &htable_ops_in_progress[i],
                                         list) {
-                       spin_lock(&op->lock);
                        gossip_debug(GOSSIP_INIT_DEBUG,
                                "pvfs2-client-core: purging in-progress op tag "
                                "%llu %s\n",
                                llu(op->tag),
                                get_opname_string(op));
                        set_op_state_purged(op);
-                       spin_unlock(&op->lock);
                }
                spin_unlock(&htable_ops_in_progress_lock);
        }
index fa3ed8a..08f9c2d 100644 (file)
@@ -688,38 +688,6 @@ int orangefs_unmount_sb(struct super_block *sb)
        return ret;
 }
 
-/*
- * NOTE: on successful cancellation, be sure to return -EINTR, as
- * that's the return value the caller expects
- */
-int orangefs_cancel_op_in_progress(__u64 tag)
-{
-       int ret = -EINVAL;
-       struct orangefs_kernel_op_s *new_op = NULL;
-
-       gossip_debug(GOSSIP_UTILS_DEBUG,
-                    "orangefs_cancel_op_in_progress called on tag %llu\n",
-                    llu(tag));
-
-       new_op = op_alloc(ORANGEFS_VFS_OP_CANCEL);
-       if (!new_op)
-               return -ENOMEM;
-       new_op->upcall.req.cancel.op_tag = tag;
-
-       gossip_debug(GOSSIP_UTILS_DEBUG,
-                    "Attempting ORANGEFS operation cancellation of tag %llu\n",
-                    llu(new_op->upcall.req.cancel.op_tag));
-
-       ret = service_operation(new_op, "orangefs_cancel", ORANGEFS_OP_CANCELLATION);
-
-       gossip_debug(GOSSIP_UTILS_DEBUG,
-                    "orangefs_cancel_op_in_progress: got return value of %d\n",
-                    ret);
-
-       op_release(new_op);
-       return ret;
-}
-
 void orangefs_make_bad_inode(struct inode *inode)
 {
        if (is_root_handle(inode)) {
index 191d886..3ea1665 100644 (file)
@@ -16,7 +16,6 @@
 #include "orangefs-kernel.h"
 #include "orangefs-bufmap.h"
 
-static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *);
 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
 
 /*
@@ -36,25 +35,29 @@ void purge_waiting_ops(void)
                             "pvfs2-client-core: purging op tag %llu %s\n",
                             llu(op->tag),
                             get_opname_string(op));
-               spin_lock(&op->lock);
                set_op_state_purged(op);
-               spin_unlock(&op->lock);
        }
        spin_unlock(&orangefs_request_list_lock);
 }
 
 static inline void
-add_op_to_request_list(struct orangefs_kernel_op_s *op)
+__add_op_to_request_list(struct orangefs_kernel_op_s *op)
 {
-       spin_lock(&orangefs_request_list_lock);
        spin_lock(&op->lock);
        set_op_state_waiting(op);
        list_add_tail(&op->list, &orangefs_request_list);
-       spin_unlock(&orangefs_request_list_lock);
        spin_unlock(&op->lock);
        wake_up_interruptible(&orangefs_request_list_waitq);
 }
 
+static inline void
+add_op_to_request_list(struct orangefs_kernel_op_s *op)
+{
+       spin_lock(&orangefs_request_list_lock);
+       __add_op_to_request_list(op);
+       spin_unlock(&orangefs_request_list_lock);
+}
+
 static inline
 void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op)
 {
@@ -159,15 +162,7 @@ retry_servicing:
        if (flags & ORANGEFS_OP_ASYNC)
                return 0;
 
-       if (flags & ORANGEFS_OP_CANCELLATION) {
-               gossip_debug(GOSSIP_WAIT_DEBUG,
-                            "%s:"
-                            "About to call wait_for_cancellation_downcall.\n",
-                            __func__);
-               ret = wait_for_cancellation_downcall(op);
-       } else {
-               ret = wait_for_matching_downcall(op);
-       }
+       ret = wait_for_matching_downcall(op);
 
        if (ret < 0) {
                /* failed to get matching downcall */
@@ -273,6 +268,36 @@ retry_servicing:
        return ret;
 }
 
+bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op)
+{
+       u64 tag = op->tag;
+       if (!op_state_in_progress(op))
+               return false;
+
+       op->slot_to_free = op->upcall.req.io.buf_index;
+       memset(&op->upcall, 0, sizeof(op->upcall));
+       memset(&op->downcall, 0, sizeof(op->downcall));
+       op->upcall.type = ORANGEFS_VFS_OP_CANCEL;
+       op->upcall.req.cancel.op_tag = tag;
+       op->downcall.type = ORANGEFS_VFS_OP_INVALID;
+       op->downcall.status = -1;
+       orangefs_new_tag(op);
+
+       spin_lock(&orangefs_request_list_lock);
+       /* orangefs_request_list_lock is enough of a barrier here */
+       if (!__is_daemon_in_service()) {
+               spin_unlock(&orangefs_request_list_lock);
+               return false;
+       }
+       __add_op_to_request_list(op);
+       spin_unlock(&orangefs_request_list_lock);
+
+       gossip_debug(GOSSIP_UTILS_DEBUG,
+                    "Attempting ORANGEFS operation cancellation of tag %llu\n",
+                    llu(tag));
+       return true;
+}
+
 static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
 {
        /*
@@ -426,81 +451,3 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
 
        return ret;
 }
-
-/*
- * similar to wait_for_matching_downcall(), but used in the special case
- * of I/O cancellations.
- *
- * Note we need a special wait function because if this is called we already
- *      know that a signal is pending in current and need to service the
- *      cancellation upcall anyway.  the only way to exit this is to either
- *      timeout or have the cancellation be serviced properly.
- */
-static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
-{
-       int ret = -EINVAL;
-       DEFINE_WAIT(wait_entry);
-
-       while (1) {
-               spin_lock(&op->lock);
-               prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
-               if (op_state_serviced(op)) {
-                       gossip_debug(GOSSIP_WAIT_DEBUG,
-                                    "%s:op-state is SERVICED.\n",
-                                    __func__);
-                       spin_unlock(&op->lock);
-                       ret = 0;
-                       break;
-               }
-
-               if (signal_pending(current)) {
-                       gossip_debug(GOSSIP_WAIT_DEBUG,
-                                    "%s:operation interrupted by a signal (tag"
-                                    " %llu, op %p)\n",
-                                    __func__,
-                                    llu(op->tag),
-                                    op);
-                       orangefs_clean_up_interrupted_operation(op);
-                       ret = -EINTR;
-                       break;
-               }
-
-               gossip_debug(GOSSIP_WAIT_DEBUG,
-                            "%s:About to call schedule_timeout.\n",
-                            __func__);
-               spin_unlock(&op->lock);
-               ret = schedule_timeout(op_timeout_secs * HZ);
-
-               gossip_debug(GOSSIP_WAIT_DEBUG,
-                            "%s:Value returned from schedule_timeout(%d).\n",
-                            __func__,
-                            ret);
-               if (!ret) {
-                       gossip_debug(GOSSIP_WAIT_DEBUG,
-                                    "%s:*** operation timed out: %p\n",
-                                    __func__,
-                                    op);
-                       spin_lock(&op->lock);
-                       orangefs_clean_up_interrupted_operation(op);
-                       ret = -ETIMEDOUT;
-                       break;
-               }
-
-               gossip_debug(GOSSIP_WAIT_DEBUG,
-                            "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
-                            __func__);
-               ret = -ETIMEDOUT;
-               break;
-       }
-
-       spin_lock(&op->lock);
-       finish_wait(&op->waitq, &wait_entry);
-       spin_unlock(&op->lock);
-
-       gossip_debug(GOSSIP_WAIT_DEBUG,
-                    "%s:returning ret(%d)\n",
-                    __func__,
-                    ret);
-
-       return ret;
-}