]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - kernel/futex.c
printk: rename DEFAULT_MESSAGE_LOGLEVEL
[karo-tx-linux.git] / kernel / futex.c
index b632b5f3f09408e8e5cc8eadce4630a333db2750..d3a9d946d0b7f918e5622a7c6e2ef8c9fe88775c 100644 (file)
@@ -792,93 +792,90 @@ void exit_pi_state_list(struct task_struct *curr)
  * [10] There is no transient state which leaves owner and user space
  *     TID out of sync.
  */
-static int
-lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
-               union futex_key *key, struct futex_pi_state **ps)
+
+/*
+ * Validate that the existing waiter has a pi_state and sanity check
+ * the pi_state against the user space value. If correct, attach to
+ * it.
+ */
+static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
+                             struct futex_pi_state **ps)
 {
-       struct futex_pi_state *pi_state = NULL;
-       struct futex_q *this, *next;
-       struct task_struct *p;
        pid_t pid = uval & FUTEX_TID_MASK;
 
-       plist_for_each_entry_safe(this, next, &hb->chain, list) {
-               if (match_futex(&this->key, key)) {
-                       /*
-                        * Sanity check the waiter before increasing
-                        * the refcount and attaching to it.
-                        */
-                       pi_state = this->pi_state;
-                       /*
-                        * Userspace might have messed up non-PI and
-                        * PI futexes [3]
-                        */
-                       if (unlikely(!pi_state))
-                               return -EINVAL;
+       /*
+        * Userspace might have messed up non-PI and PI futexes [3]
+        */
+       if (unlikely(!pi_state))
+               return -EINVAL;
 
-                       WARN_ON(!atomic_read(&pi_state->refcount));
+       WARN_ON(!atomic_read(&pi_state->refcount));
 
+       /*
+        * Handle the owner died case:
+        */
+       if (uval & FUTEX_OWNER_DIED) {
+               /*
+                * exit_pi_state_list sets owner to NULL and wakes the
+                * topmost waiter. The task which acquires the
+                * pi_state->rt_mutex will fixup owner.
+                */
+               if (!pi_state->owner) {
                        /*
-                        * Handle the owner died case:
+                        * No pi state owner, but the user space TID
+                        * is not 0. Inconsistent state. [5]
                         */
-                       if (uval & FUTEX_OWNER_DIED) {
-                               /*
-                                * exit_pi_state_list sets owner to NULL and
-                                * wakes the topmost waiter. The task which
-                                * acquires the pi_state->rt_mutex will fixup
-                                * owner.
-                                */
-                               if (!pi_state->owner) {
-                                       /*
-                                        * No pi state owner, but the user
-                                        * space TID is not 0. Inconsistent
-                                        * state. [5]
-                                        */
-                                       if (pid)
-                                               return -EINVAL;
-                                       /*
-                                        * Take a ref on the state and
-                                        * return. [4]
-                                        */
-                                       goto out_state;
-                               }
-
-                               /*
-                                * If TID is 0, then either the dying owner
-                                * has not yet executed exit_pi_state_list()
-                                * or some waiter acquired the rtmutex in the
-                                * pi state, but did not yet fixup the TID in
-                                * user space.
-                                *
-                                * Take a ref on the state and return. [6]
-                                */
-                               if (!pid)
-                                       goto out_state;
-                       } else {
-                               /*
-                                * If the owner died bit is not set,
-                                * then the pi_state must have an
-                                * owner. [7]
-                                */
-                               if (!pi_state->owner)
-                                       return -EINVAL;
-                       }
-
+                       if (pid)
+                               return -EINVAL;
                        /*
-                        * Bail out if user space manipulated the
-                        * futex value. If pi state exists then the
-                        * owner TID must be the same as the user
-                        * space TID. [9/10]
+                        * Take a ref on the state and return success. [4]
                         */
-                       if (pid != task_pid_vnr(pi_state->owner))
-                               return -EINVAL;
-
-               out_state:
-                       atomic_inc(&pi_state->refcount);
-                       *ps = pi_state;
-                       return 0;
+                       goto out_state;
                }
+
+               /*
+                * If TID is 0, then either the dying owner has not
+                * yet executed exit_pi_state_list() or some waiter
+                * acquired the rtmutex in the pi state, but did not
+                * yet fixup the TID in user space.
+                *
+                * Take a ref on the state and return success. [6]
+                */
+               if (!pid)
+                       goto out_state;
+       } else {
+               /*
+                * If the owner died bit is not set, then the pi_state
+                * must have an owner. [7]
+                */
+               if (!pi_state->owner)
+                       return -EINVAL;
        }
 
+       /*
+        * Bail out if user space manipulated the futex value. If pi
+        * state exists then the owner TID must be the same as the
+        * user space TID. [9/10]
+        */
+       if (pid != task_pid_vnr(pi_state->owner))
+               return -EINVAL;
+out_state:
+       atomic_inc(&pi_state->refcount);
+       *ps = pi_state;
+       return 0;
+}
+
+/*
+ * Lookup the task for the TID provided from user space and attach to
+ * it after doing proper sanity checks.
+ */
+static int attach_to_pi_owner(u32 uval, union futex_key *key,
+                             struct futex_pi_state **ps)
+{
+       pid_t pid = uval & FUTEX_TID_MASK;
+       struct futex_pi_state *pi_state;
+       struct task_struct *p;
+
        /*
         * We are the first waiter - try to look up the real owner and attach
         * the new pi_state to it, but bail out when TID = 0 [1]
@@ -920,7 +917,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
        pi_state = alloc_pi_state();
 
        /*
-        * Initialize the pi_mutex in locked state and make 'p'
+        * Initialize the pi_mutex in locked state and make @p
         * the owner of it:
         */
        rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
@@ -940,6 +937,36 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
        return 0;
 }
 
+static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+                          union futex_key *key, struct futex_pi_state **ps)
+{
+       struct futex_q *match = futex_top_waiter(hb, key);
+
+       /*
+        * If there is a waiter on that futex, validate it and
+        * attach to the pi_state when the validation succeeds.
+        */
+       if (match)
+               return attach_to_pi_state(uval, match->pi_state, ps);
+
+       /*
+        * We are the first waiter - try to look up the owner based on
+        * @uval and attach to it.
+        */
+       return attach_to_pi_owner(uval, key, ps);
+}
+
+static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+{
+       u32 uninitialized_var(curval);
+
+       if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
+               return -EFAULT;
+
+       /*If user space value changed, let the caller retry */
+       return curval != uval ? -EAGAIN : 0;
+}
+
 /**
  * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
  * @uaddr:             the pi futex user address
@@ -963,113 +990,69 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
                                struct futex_pi_state **ps,
                                struct task_struct *task, int set_waiters)
 {
-       int lock_taken, ret, force_take = 0;
-       u32 uval, newval, curval, vpid = task_pid_vnr(task);
-
-retry:
-       ret = lock_taken = 0;
+       u32 uval, newval, vpid = task_pid_vnr(task);
+       struct futex_q *match;
+       int ret;
 
        /*
-        * To avoid races, we attempt to take the lock here again
-        * (by doing a 0 -> TID atomic cmpxchg), while holding all
-        * the locks. It will most likely not succeed.
+        * Read the user space value first so we can validate a few
+        * things before proceeding further.
         */
-       newval = vpid;
-       if (set_waiters)
-               newval |= FUTEX_WAITERS;
-
-       if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
+       if (get_futex_value_locked(&uval, uaddr))
                return -EFAULT;
 
        /*
         * Detect deadlocks.
         */
-       if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
+       if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
                return -EDEADLK;
 
        /*
-        * Surprise - we got the lock, but we do not trust user space at all.
-        */
-       if (unlikely(!curval)) {
-               /*
-                * We verify whether there is kernel state for this
-                * futex. If not, we can safely assume, that the 0 ->
-                * TID transition is correct. If state exists, we do
-                * not bother to fixup the user space state as it was
-                * corrupted already.
-                */
-               return futex_top_waiter(hb, key) ? -EINVAL : 1;
-       }
-
-       uval = curval;
-
-       /*
-        * Set the FUTEX_WAITERS flag, so the owner will know it has someone
-        * to wake at the next unlock.
+        * Lookup existing state first. If it exists, try to attach to
+        * its pi_state.
         */
-       newval = curval | FUTEX_WAITERS;
+       match = futex_top_waiter(hb, key);
+       if (match)
+               return attach_to_pi_state(uval, match->pi_state, ps);
 
        /*
-        * Should we force take the futex? See below.
+        * No waiter and user TID is 0. We are here because the
+        * waiters or the owner died bit is set or called from
+        * requeue_cmp_pi or for whatever reason something took the
+        * syscall.
         */
-       if (unlikely(force_take)) {
+       if (!(uval & FUTEX_TID_MASK)) {
                /*
-                * Keep the OWNER_DIED and the WAITERS bit and set the
-                * new TID value.
+                * We take over the futex. No other waiters and the user space
+                * TID is 0. We preserve the owner died bit.
                 */
-               newval = (curval & ~FUTEX_TID_MASK) | vpid;
-               force_take = 0;
-               lock_taken = 1;
-       }
+               newval = uval & FUTEX_OWNER_DIED;
+               newval |= vpid;
 
-       if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
-               return -EFAULT;
-       if (unlikely(curval != uval))
-               goto retry;
+               /* The futex requeue_pi code can enforce the waiters bit */
+               if (set_waiters)
+                       newval |= FUTEX_WAITERS;
+
+               ret = lock_pi_update_atomic(uaddr, uval, newval);
+               /* If the take over worked, return 1 */
+               return ret < 0 ? ret : 1;
+       }
 
        /*
-        * We took the lock due to forced take over.
+        * First waiter. Set the waiters bit before attaching ourself to
+        * the owner. If owner tries to unlock, it will be forced into
+        * the kernel and blocked on hb->lock.
         */
-       if (unlikely(lock_taken))
-               return 1;
-
+       newval = uval | FUTEX_WAITERS;
+       ret = lock_pi_update_atomic(uaddr, uval, newval);
+       if (ret)
+               return ret;
        /*
-        * We dont have the lock. Look up the PI state (or create it if
-        * we are the first waiter):
+        * If the update of the user space value succeeded, we try to
+        * attach to the owner. If that fails, no harm done, we only
+        * set the FUTEX_WAITERS bit in the user space variable.
         */
-       ret = lookup_pi_state(uval, hb, key, ps);
-
-       if (unlikely(ret)) {
-               switch (ret) {
-               case -ESRCH:
-                       /*
-                        * We failed to find an owner for this
-                        * futex. So we have no pi_state to block
-                        * on. This can happen in two cases:
-                        *
-                        * 1) The owner died
-                        * 2) A stale FUTEX_WAITERS bit
-                        *
-                        * Re-read the futex value.
-                        */
-                       if (get_futex_value_locked(&curval, uaddr))
-                               return -EFAULT;
-
-                       /*
-                        * If the owner died or we have a stale
-                        * WAITERS bit the owner TID in the user space
-                        * futex is 0.
-                        */
-                       if (!(curval & FUTEX_TID_MASK)) {
-                               force_take = 1;
-                               goto retry;
-                       }
-               default:
-                       break;
-               }
-       }
-
-       return ret;
+       return attach_to_pi_owner(uval, key, ps);
 }
 
 /**
@@ -1186,22 +1169,6 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
        return 0;
 }
 
-static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
-{
-       u32 uninitialized_var(oldval);
-
-       /*
-        * There is no waiter, so we unlock the futex. The owner died
-        * bit has not to be preserved here. We are the owner:
-        */
-       if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
-               return -EFAULT;
-       if (oldval != uval)
-               return -EAGAIN;
-
-       return 0;
-}
-
 /*
  * Express the locking dependencies for lockdep:
  */
@@ -1659,7 +1626,12 @@ retry_private:
                                goto retry;
                        goto out;
                case -EAGAIN:
-                       /* The owner was exiting, try again. */
+                       /*
+                        * Two reasons for this:
+                        * - Owner is exiting and we just wait for the
+                        *   exit to complete.
+                        * - The user space value changed.
+                        */
                        double_unlock_hb(hb1, hb2);
                        hb_waiters_dec(hb2);
                        put_futex_key(&key2);
@@ -1718,7 +1690,7 @@ retry_private:
                        this->pi_state = pi_state;
                        ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
                                                        this->rt_waiter,
-                                                       this->task, 1);
+                                                       this->task);
                        if (ret == 1) {
                                /* We got the lock. */
                                requeue_pi_wake_futex(this, &key2, hb2);
@@ -2316,8 +2288,10 @@ retry_private:
                        goto uaddr_faulted;
                case -EAGAIN:
                        /*
-                        * Task is exiting and we just wait for the
-                        * exit to complete.
+                        * Two reasons for this:
+                        * - Task is exiting and we just wait for the
+                        *   exit to complete.
+                        * - The user space value changed.
                         */
                        queue_unlock(hb);
                        put_futex_key(&q.key);
@@ -2337,9 +2311,9 @@ retry_private:
        /*
         * Block on the PI mutex:
         */
-       if (!trylock)
-               ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
-       else {
+       if (!trylock) {
+               ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
+       else {
                ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
                /* Fixup the trylock return value: */
                ret = ret ? 0 : -EWOULDBLOCK;
@@ -2401,10 +2375,10 @@ uaddr_faulted:
  */
 static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 {
-       struct futex_hash_bucket *hb;
-       struct futex_q *this, *next;
+       u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
        union futex_key key = FUTEX_KEY_INIT;
-       u32 uval, vpid = task_pid_vnr(current);
+       struct futex_hash_bucket *hb;
+       struct futex_q *match;
        int ret;
 
 retry:
@@ -2417,57 +2391,47 @@ retry:
                return -EPERM;
 
        ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
-       if (unlikely(ret != 0))
-               goto out;
+       if (ret)
+               return ret;
 
        hb = hash_futex(&key);
        spin_lock(&hb->lock);
 
        /*
-        * To avoid races, try to do the TID -> 0 atomic transition
-        * again. If it succeeds then we can return without waking
-        * anyone else up. We only try this if neither the waiters nor
-        * the owner died bit are set.
-        */
-       if (!(uval & ~FUTEX_TID_MASK) &&
-           cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
-               goto pi_faulted;
-       /*
-        * Rare case: we managed to release the lock atomically,
-        * no need to wake anyone else up:
-        */
-       if (unlikely(uval == vpid))
-               goto out_unlock;
-
-       /*
-        * Ok, other tasks may need to be woken up - check waiters
-        * and do the wakeup if necessary:
+        * Check waiters first. We do not trust user space values at
+        * all and we at least want to know if user space fiddled
+        * with the futex value instead of blindly unlocking.
         */
-       plist_for_each_entry_safe(this, next, &hb->chain, list) {
-               if (!match_futex (&this->key, &key))
-                       continue;
-               ret = wake_futex_pi(uaddr, uval, this);
+       match = futex_top_waiter(hb, &key);
+       if (match) {
+               ret = wake_futex_pi(uaddr, uval, match);
                /*
-                * The atomic access to the futex value
-                * generated a pagefault, so retry the
-                * user-access and the wakeup:
+                * The atomic access to the futex value generated a
+                * pagefault, so retry the user-access and the wakeup:
                 */
                if (ret == -EFAULT)
                        goto pi_faulted;
                goto out_unlock;
        }
+
        /*
-        * No waiters - kernel unlocks the futex:
+        * We have no kernel internal state, i.e. no waiters in the
+        * kernel. Waiters which are about to queue themselves are stuck
+        * on hb->lock. So we can safely ignore them. We do neither
+        * preserve the WAITERS bit not the OWNER_DIED one. We are the
+        * owner.
         */
-       ret = unlock_futex_pi(uaddr, uval);
-       if (ret == -EFAULT)
+       if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
                goto pi_faulted;
 
+       /*
+        * If uval has changed, let user space handle it.
+        */
+       ret = (curval == uval) ? 0 : -EAGAIN;
+
 out_unlock:
        spin_unlock(&hb->lock);
        put_futex_key(&key);
-
-out:
        return ret;
 
 pi_faulted:
@@ -2669,7 +2633,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                 */
                WARN_ON(!q.pi_state);
                pi_mutex = &q.pi_state->pi_mutex;
-               ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+               ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
                debug_rt_mutex_free_waiter(&rt_waiter);
 
                spin_lock(q.lock_ptr);