#include <linux/module.h>
+#include <linux/kthread.h>
#include <linux/dlm.h>
#include <linux/sched.h>
#include <linux/raid/md_p.h>
struct dlm_lksb lksb;
char *name; /* lock name. */
uint32_t flags; /* flags to pass to dlm_lock() */
- struct completion completion; /* completion for synchronized locking */
+ wait_queue_head_t sync_locking; /* wait queue for synchronized locking */
+ bool sync_locking_done;
void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
struct mddev *mddev; /* pointing back to mddev. */
int mode;
struct dlm_lock_resource *res;
res = arg;
- complete(&res->completion);
+ res->sync_locking_done = true;
+ wake_up(&res->sync_locking);
}
static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
0, sync_ast, res, res->bast);
if (ret)
return ret;
- wait_for_completion(&res->completion);
+ wait_event(res->sync_locking, res->sync_locking_done);
+ res->sync_locking_done = false;
if (res->lksb.sb_status == 0)
res->mode = mode;
return res->lksb.sb_status;
return dlm_lock_sync(res, DLM_LOCK_NL);
}
+/*
+ * An variation of dlm_lock_sync, which make lock request could
+ * be interrupted
+ */
+static int dlm_lock_sync_interruptible(struct dlm_lock_resource *res, int mode,
+ struct mddev *mddev)
+{
+ int ret = 0;
+
+ ret = dlm_lock(res->ls, mode, &res->lksb,
+ res->flags, res->name, strlen(res->name),
+ 0, sync_ast, res, res->bast);
+ if (ret)
+ return ret;
+
+ wait_event(res->sync_locking, res->sync_locking_done
+ || kthread_should_stop()
+ || test_bit(MD_CLOSING, &mddev->flags));
+ if (!res->sync_locking_done) {
+ /*
+ * the convert queue contains the lock request when request is
+ * interrupted, and sync_ast could still be run, so need to
+ * cancel the request and reset completion
+ */
+ ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_CANCEL,
+ &res->lksb, res);
+ res->sync_locking_done = false;
+ if (unlikely(ret != 0))
+ pr_info("failed to cancel previous lock request "
+ "%s return %d\n", res->name, ret);
+ return -EPERM;
+ } else
+ res->sync_locking_done = false;
+ if (res->lksb.sb_status == 0)
+ res->mode = mode;
+ return res->lksb.sb_status;
+}
+
static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
char *name, void (*bastfn)(void *arg, int mode), int with_lvb)
{
res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
if (!res)
return NULL;
- init_completion(&res->completion);
+ init_waitqueue_head(&res->sync_locking);
+ res->sync_locking_done = false;
res->ls = cinfo->lockspace;
res->mddev = mddev;
res->mode = DLM_LOCK_IV;
static void lockres_free(struct dlm_lock_resource *res)
{
- int ret;
+ int ret = 0;
if (!res)
return;
- /* cancel a lock request or a conversion request that is blocked */
- res->flags |= DLM_LKF_CANCEL;
-retry:
- ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
- if (unlikely(ret != 0)) {
- pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
-
- /* if a lock conversion is cancelled, then the lock is put
- * back to grant queue, need to ensure it is unlocked */
- if (ret == -DLM_ECANCEL)
- goto retry;
- }
- res->flags &= ~DLM_LKF_CANCEL;
- wait_for_completion(&res->completion);
+ /*
+ * use FORCEUNLOCK flag, so we can unlock even the lock is on the
+ * waiting or convert queue
+ */
+ ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_FORCEUNLOCK,
+ &res->lksb, res);
+ if (unlikely(ret != 0))
+ pr_err("failed to unlock %s return %d\n", res->name, ret);
+ else
+ wait_event(res->sync_locking, res->sync_locking_done);
kfree(res->name);
kfree(res->lksb.sb_lvbptr);
goto clear_bit;
}
- ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
+ ret = dlm_lock_sync_interruptible(bm_lockres, DLM_LOCK_PW, mddev);
if (ret) {
pr_err("md-cluster: Could not DLM lock %s: %d\n",
str, ret);
ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
if (ret) {
pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
- goto dlm_unlock;
+ goto clear_bit;
}
if (hi > 0) {
if (lo < mddev->recovery_cp)
md_wakeup_thread(mddev->thread);
}
}
-dlm_unlock:
- dlm_unlock_sync(bm_lockres);
clear_bit:
lockres_free(bm_lockres);
clear_bit(slot, &cinfo->recovery_map);
static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
{
- struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev,
- le32_to_cpu(msg->raid_slot));
+ struct md_rdev *rdev;
+ rcu_read_lock();
+ rdev = md_find_rdev_nr_rcu(mddev, le32_to_cpu(msg->raid_slot));
if (rdev) {
set_bit(ClusterRemove, &rdev->flags);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
else
pr_warn("%s: %d Could not find disk(%d) to REMOVE\n",
__func__, __LINE__, le32_to_cpu(msg->raid_slot));
+ rcu_read_unlock();
}
static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
{
- struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev,
- le32_to_cpu(msg->raid_slot));
+ struct md_rdev *rdev;
+ rcu_read_lock();
+ rdev = md_find_rdev_nr_rcu(mddev, le32_to_cpu(msg->raid_slot));
if (rdev && test_bit(Faulty, &rdev->flags))
clear_bit(Faulty, &rdev->flags);
else
pr_warn("%s: %d Could not find disk(%d) which is faulty",
__func__, __LINE__, le32_to_cpu(msg->raid_slot));
+ rcu_read_unlock();
}
static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
md_check_recovery(mddev);
}
- dlm_unlock_sync(bm_lockres);
lockres_free(bm_lockres);
}
out:
static int resync_start(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
- return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
+ return dlm_lock_sync_interruptible(cinfo->resync_lockres, DLM_LOCK_EX, mddev);
}
static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
if (cinfo->other_bitmap_lockres) {
for (i = 0; i < mddev->bitmap_info.nodes - 1; i++) {
if (cinfo->other_bitmap_lockres[i]) {
- dlm_unlock_sync(cinfo->other_bitmap_lockres[i]);
lockres_free(cinfo->other_bitmap_lockres[i]);
}
}