net/mlx4_core: Adjust port number in qp_attach wrapper when detaching
[cascardo/linux.git] / kernel / futex.c
index 67dacaf..5f58927 100644 (file)
 #include "locking/rtmutex_common.h"
 
 /*
- * Basic futex operation and ordering guarantees:
+ * READ this before attempting to hack on futexes!
+ *
+ * Basic futex operation and ordering guarantees
+ * =============================================
  *
  * The waiter reads the futex value in user space and calls
  * futex_wait(). This function computes the hash bucket and acquires
  * sys_futex(WAIT, futex, val);
  *   futex_wait(futex, val);
  *
- *   waiters++;
+ *   waiters++; (a)
  *   mb(); (A) <-- paired with -.
  *                              |
  *   lock(hash_bucket(futex));  |
  *     unlock(hash_bucket(futex));
  *     schedule();                         if (waiters)
  *                                           lock(hash_bucket(futex));
- *                                           wake_waiters(futex);
- *                                           unlock(hash_bucket(futex));
+ *   else                                    wake_waiters(futex);
+ *     waiters--; (b)                        unlock(hash_bucket(futex));
  *
- * Where (A) orders the waiters increment and the futex value read -- this
- * is guaranteed by the head counter in the hb spinlock; and where (B)
- * orders the write to futex and the waiters read -- this is done by the
- * barriers in get_futex_key_refs(), through either ihold or atomic_inc,
- * depending on the futex type.
+ * Where (A) orders the waiters increment and the futex value read through
+ * atomic operations (see hb_waiters_inc) and where (B) orders the write
+ * to futex and the waiters read -- this is done by the barriers in
+ * get_futex_key_refs(), through either ihold or atomic_inc, depending on the
+ * futex type.
  *
  * This yields the following case (where X:=waiters, Y:=futex):
  *
  * Which guarantees that x==0 && y==0 is impossible; which translates back into
  * the guarantee that we cannot both miss the futex variable change and the
  * enqueue.
+ *
+ * Note that a new waiter is accounted for in (a) even when it is possible that
+ * the wait call can return error, in which case we backtrack from it in (b).
+ * Refer to the comment in queue_lock().
+ *
+ * Similarly, in order to account for waiters being requeued on another
+ * address we always increment the waiters for the destination bucket before
+ * acquiring the lock. It then decrements them again  after releasing it -
+ * the code that actually moves the futex(es) between hash buckets (requeue_futex)
+ * will do the additional required waiter count housekeeping. This is done for
+ * double_lock_hb() and double_unlock_hb(), respectively.
  */
 
 #ifndef CONFIG_HAVE_FUTEX_CMPXCHG
@@ -1452,6 +1466,7 @@ retry:
        hb2 = hash_futex(&key2);
 
 retry_private:
+       hb_waiters_inc(hb2);
        double_lock_hb(hb1, hb2);
 
        if (likely(cmpval != NULL)) {
@@ -1461,6 +1476,7 @@ retry_private:
 
                if (unlikely(ret)) {
                        double_unlock_hb(hb1, hb2);
+                       hb_waiters_dec(hb2);
 
                        ret = get_user(curval, uaddr1);
                        if (ret)
@@ -1510,6 +1526,7 @@ retry_private:
                        break;
                case -EFAULT:
                        double_unlock_hb(hb1, hb2);
+                       hb_waiters_dec(hb2);
                        put_futex_key(&key2);
                        put_futex_key(&key1);
                        ret = fault_in_user_writeable(uaddr2);
@@ -1519,6 +1536,7 @@ retry_private:
                case -EAGAIN:
                        /* The owner was exiting, try again. */
                        double_unlock_hb(hb1, hb2);
+                       hb_waiters_dec(hb2);
                        put_futex_key(&key2);
                        put_futex_key(&key1);
                        cond_resched();
@@ -1594,6 +1612,7 @@ retry_private:
 
 out_unlock:
        double_unlock_hb(hb1, hb2);
+       hb_waiters_dec(hb2);
 
        /*
         * drop_futex_key_refs() must be called outside the spinlocks. During