locking/qspinlock: Add comments

author Peter Zijlstra <peterz@infradead.org>

Wed, 8 Jun 2016 08:36:53 +0000 (10:36 +0200)

committer Ingo Molnar <mingo@kernel.org>

Wed, 8 Jun 2016 12:44:01 +0000 (14:44 +0200)
author Peter Zijlstra <peterz@infradead.org>
Wed, 8 Jun 2016 08:36:53 +0000 (10:36 +0200)
committer Ingo Molnar <mingo@kernel.org>
Wed, 8 Jun 2016 12:44:01 +0000 (14:44 +0200)
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c

index ee7deb0..2f9153b 100644 (file)
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,63 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
  #define queued_spin_lock_slowpath      native_queued_spin_lock_slowpath
  #endif
  
+/*
+ * Various notes on spin_is_locked() and spin_unlock_wait(), which are
+ * 'interesting' functions:
+ *
+ * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
+ * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
+ * PPC). Also qspinlock has a similar issue per construction, the setting of
+ * the locked byte can be unordered acquiring the lock proper.
+ *
+ * This gets to be 'interesting' in the following cases, where the /should/s
+ * end up false because of this issue.
+ *
+ *
+ * CASE 1:
+ *
+ * So the spin_is_locked() correctness issue comes from something like:
+ *
+ *   CPU0                              CPU1
+ *
+ *   global_lock();                    local_lock(i)
+ *     spin_lock(&G)                     spin_lock(&L[i])
+ *     for (i)                           if (!spin_is_locked(&G)) {
+ *       spin_unlock_wait(&L[i]);          smp_acquire__after_ctrl_dep();
+ *                                         return;
+ *                                       }
+ *                                       // deal with fail
+ *
+ * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
+ * that there is exclusion between the two critical sections.
+ *
+ * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
+ * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
+ * /should/ be constrained by the ACQUIRE from spin_lock(&G).
+ *
+ * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
+ *
+ *
+ * CASE 2:
+ *
+ * For spin_unlock_wait() there is a second correctness issue, namely:
+ *
+ *   CPU0                              CPU1
+ *
+ *   flag = set;
+ *   smp_mb();                         spin_lock(&l)
+ *   spin_unlock_wait(&l);             if (!flag)
+ *                                       // add to lockless list
+ *                                     spin_unlock(&l);
+ *   // iterate lockless list
+ *
+ * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
+ * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
+ * semantics etc..)
+ *
+ * Where flag /should/ be ordered against the locked store of l.
+ */
+
  /*
   * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
   * issuing an _unordered_ store to set _Q_LOCKED_VAL.
author	Peter Zijlstra <peterz@infradead.org>
	Wed, 8 Jun 2016 08:36:53 +0000 (10:36 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 8 Jun 2016 12:44:01 +0000 (14:44 +0200)