cxlflash: Fix to avoid stall while waiting on TMF
authorMatthew R. Ochs <mrochs@linux.vnet.ibm.com>
Wed, 21 Oct 2015 20:13:21 +0000 (15:13 -0500)
committerJames Bottomley <JBottomley@Odin.com>
Fri, 30 Oct 2015 08:10:38 +0000 (17:10 +0900)
Borrowing the TMF waitq's spinlock causes a stall condition when
waiting for the TMF to complete. To remedy, introduce our own spin
lock to serialize TMF and use the appropriate wait services.

Also add a timeout while waiting for a TMF completion. When a TMF
times out, report back a failure such that a bigger hammer reset
can occur.

Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
drivers/scsi/cxlflash/common.h
drivers/scsi/cxlflash/main.c

index b038ac7..7a0cb5c 100644 (file)
@@ -124,6 +124,7 @@ struct cxlflash_cfg {
        struct list_head lluns; /* list of llun_info structs */
 
        wait_queue_head_t tmf_waitq;
+       spinlock_t tmf_slock;
        bool tmf_active;
        wait_queue_head_t reset_waitq;
        enum cxlflash_state state;
index 527ff85..110037d 100644 (file)
@@ -249,11 +249,10 @@ static void cmd_complete(struct afu_cmd *cmd)
                scp->scsi_done(scp);
 
                if (cmd_is_tmf) {
-                       spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+                       spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
                        cfg->tmf_active = false;
                        wake_up_all_locked(&cfg->tmf_waitq);
-                       spin_unlock_irqrestore(&cfg->tmf_waitq.lock,
-                                              lock_flags);
+                       spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
                }
        } else
                complete(&cmd->cevent);
@@ -420,6 +419,7 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
        struct device *dev = &cfg->dev->dev;
        ulong lock_flags;
        int rc = 0;
+       ulong to;
 
        cmd = cmd_checkout(afu);
        if (unlikely(!cmd)) {
@@ -428,15 +428,15 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
                goto out;
        }
 
-       /* If a Task Management Function is active, do not send one more.
-        */
-       spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+       /* When Task Management Function is active do not send another */
+       spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
        if (cfg->tmf_active)
-               wait_event_interruptible_locked_irq(cfg->tmf_waitq,
-                                                   !cfg->tmf_active);
+               wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+                                                 !cfg->tmf_active,
+                                                 cfg->tmf_slock);
        cfg->tmf_active = true;
        cmd->cmd_tmf = true;
-       spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+       spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
        cmd->rcb.ctx_id = afu->ctx_hndl;
        cmd->rcb.port_sel = port_sel;
@@ -457,15 +457,24 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
        rc = send_cmd(afu, cmd);
        if (unlikely(rc)) {
                cmd_checkin(cmd);
-               spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+               spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
                cfg->tmf_active = false;
-               spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+               spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
                goto out;
        }
 
-       spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
-       wait_event_interruptible_locked_irq(cfg->tmf_waitq, !cfg->tmf_active);
-       spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+       spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
+       to = msecs_to_jiffies(5000);
+       to = wait_event_interruptible_lock_irq_timeout(cfg->tmf_waitq,
+                                                      !cfg->tmf_active,
+                                                      cfg->tmf_slock,
+                                                      to);
+       if (!to) {
+               cfg->tmf_active = false;
+               dev_err(dev, "%s: TMF timed out!\n", __func__);
+               rc = -1;
+       }
+       spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 out:
        return rc;
 }
@@ -512,16 +521,17 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
                            get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
                            get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
 
-       /* If a Task Management Function is active, wait for it to complete
+       /*
+        * If a Task Management Function is active, wait for it to complete
         * before continuing with regular commands.
         */
-       spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+       spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
        if (cfg->tmf_active) {
-               spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+               spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
                rc = SCSI_MLQUEUE_HOST_BUSY;
                goto out;
        }
-       spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+       spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
        switch (cfg->state) {
        case STATE_RESET:
@@ -713,11 +723,12 @@ static void cxlflash_remove(struct pci_dev *pdev)
        /* If a Task Management Function is active, wait for it to complete
         * before continuing with remove.
         */
-       spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+       spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
        if (cfg->tmf_active)
-               wait_event_interruptible_locked_irq(cfg->tmf_waitq,
-                                                   !cfg->tmf_active);
-       spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+               wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+                                                 !cfg->tmf_active,
+                                                 cfg->tmf_slock);
+       spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
        cfg->state = STATE_FAILTERM;
        cxlflash_stop_term_user_contexts(cfg);