Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Nov 2015 00:10:43 +0000 (16:10 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Nov 2015 00:10:43 +0000 (16:10 -0800)
Pull locking changes from Ingo Molnar:
 "The main changes in this cycle were:

   - More gradual enhancements to atomic ops: new atomic*_read_ctrl()
     ops, synchronize atomic_{read,set}() ordering requirements between
     architectures, add atomic_long_t bitops.  (Peter Zijlstra)

   - Add _{relaxed|acquire|release}() variants for inc/dec atomics and
     use them in various locking primitives: mutex, rtmutex, mcs, rwsem.
     This enables weakly ordered architectures (such as arm64) to make
     use of more locking related optimizations.  (Davidlohr Bueso)

   - Implement atomic[64]_{inc,dec}_relaxed() on ARM.  (Will Deacon)

   - Futex kernel data cache footprint micro-optimization.  (Rasmus
     Villemoes)

   - pvqspinlock runtime overhead micro-optimization.  (Waiman Long)

   - misc smaller fixlets"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  ARM, locking/atomics: Implement _relaxed variants of atomic[64]_{inc,dec}
  locking/rwsem: Use acquire/release semantics
  locking/mcs: Use acquire/release semantics
  locking/rtmutex: Use acquire/release semantics
  locking/mutex: Use acquire/release semantics
  locking/asm-generic: Add _{relaxed|acquire|release}() variants for inc/dec atomics
  atomic: Implement atomic_read_ctrl()
  atomic, arch: Audit atomic_{read,set}()
  atomic: Add atomic_long_t bitops
  futex: Force hot variables into a single cache line
  locking/pvqspinlock: Kick the PV CPU unconditionally when _Q_SLOW_VAL
  locking/osq: Relax atomic semantics
  locking/qrwlock: Rename ->lock to ->wait_lock
  locking/Documentation/lockstat: Fix typo - lokcing -> locking
  locking/atomics, cmpxchg: Privatize the inclusion of asm/cmpxchg.h

44 files changed:
Documentation/atomic_ops.txt
Documentation/locking/lockstat.txt
Documentation/memory-barriers.txt
arch/alpha/include/asm/atomic.h
arch/arc/include/asm/atomic.h
arch/arm/include/asm/atomic.h
arch/arm64/include/asm/atomic.h
arch/avr32/include/asm/atomic.h
arch/frv/include/asm/atomic.h
arch/h8300/include/asm/atomic.h
arch/hexagon/include/asm/atomic.h
arch/ia64/include/asm/atomic.h
arch/m32r/include/asm/atomic.h
arch/m68k/include/asm/atomic.h
arch/metag/include/asm/atomic_lnkget.h
arch/metag/include/asm/atomic_lock1.h
arch/mips/include/asm/atomic.h
arch/mn10300/include/asm/atomic.h
arch/parisc/include/asm/atomic.h
arch/sh/include/asm/atomic.h
arch/sparc/include/asm/atomic_64.h
arch/tile/include/asm/atomic.h
arch/tile/include/asm/atomic_64.h
arch/x86/include/asm/atomic.h
arch/x86/include/asm/atomic64_64.h
arch/xtensa/include/asm/atomic.h
drivers/net/ethernet/sfc/mcdi.c
drivers/phy/phy-rcar-gen2.c
drivers/staging/speakup/selection.c
include/asm-generic/atomic-long.h
include/asm-generic/atomic.h
include/asm-generic/mutex-dec.h
include/asm-generic/mutex-xchg.h
include/asm-generic/qrwlock_types.h
include/asm-generic/rwsem.h
include/linux/atomic.h
kernel/futex.c
kernel/locking/mcs_spinlock.h
kernel/locking/mutex.c
kernel/locking/osq_lock.c
kernel/locking/qrwlock.c
kernel/locking/qspinlock_paravirt.h
kernel/locking/rtmutex.c
kernel/locking/rwsem-xadd.c

index b19fc34..c9d1cac 100644 (file)
@@ -542,6 +542,10 @@ The routines xchg() and cmpxchg() must provide the same exact
 memory-barrier semantics as the atomic and bit operations returning
 values.
 
+Note: If someone wants to use xchg(), cmpxchg() and their variants,
+linux/atomic.h should be included rather than asm/cmpxchg.h, unless
+the code is in arch/* and can take care of itself.
+
 Spinlocks and rwlocks have memory barrier expectations as well.
 The rule to follow is simple:
 
index 568bbba..5786ad2 100644 (file)
@@ -12,7 +12,7 @@ Because things like lock contention can severely impact performance.
 - HOW
 
 Lockdep already has hooks in the lock functions and maps lock instances to
-lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt).
+lock classes. We build on that (see Documentation/locking/lockdep-design.txt).
 The graph below shows the relation between the lock functions and the various
 hooks therein.
 
index 8e7cf9a..b5fe765 100644 (file)
@@ -637,7 +637,8 @@ as follows:
        b = p;  /* BUG: Compiler and CPU can both reorder!!! */
 
 Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
-that DEC Alpha needs in order to respect control depedencies.
+that DEC Alpha needs in order to respect control depedencies. Alternatively
+use one of atomic{,64}_read_ctrl().
 
 So don't leave out the READ_ONCE_CTRL().
 
@@ -796,9 +797,9 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
 
 In summary:
 
-  (*) Control dependencies must be headed by READ_ONCE_CTRL().
-      Or, as a much less preferable alternative, interpose
-      smp_read_barrier_depends() between a READ_ONCE() and the
+  (*) Control dependencies must be headed by READ_ONCE_CTRL(),
+      atomic{,64}_read_ctrl(). Or, as a much less preferable alternative,
+      interpose smp_read_barrier_depends() between a READ_ONCE() and the
       control-dependent write.
 
   (*) Control dependencies can order prior loads against later stores.
@@ -820,10 +821,10 @@ In summary:
       and WRITE_ONCE() can help to preserve the needed conditional.
 
   (*) Control dependencies require that the compiler avoid reordering the
-      dependency into nonexistence.  Careful use of READ_ONCE_CTRL()
-      or smp_read_barrier_depends() can help to preserve your control
-      dependency.  Please see the Compiler Barrier section for more
-      information.
+      dependency into nonexistence.  Careful use of READ_ONCE_CTRL(),
+      atomic{,64}_read_ctrl() or smp_read_barrier_depends() can help to
+      preserve your control dependency.  Please see the Compiler Barrier
+      section for more information.
 
   (*) Control dependencies pair normally with other types of barriers.
 
index e8c9560..572b228 100644 (file)
 #define ATOMIC_INIT(i)         { (i) }
 #define ATOMIC64_INIT(i)       { (i) }
 
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)       ACCESS_ONCE((v)->counter)
+#define atomic_read(v)         READ_ONCE((v)->counter)
+#define atomic64_read(v)       READ_ONCE((v)->counter)
 
-#define atomic_set(v,i)                ((v)->counter = (i))
-#define atomic64_set(v,i)      ((v)->counter = (i))
+#define atomic_set(v,i)                WRITE_ONCE((v)->counter, (i))
+#define atomic64_set(v,i)      WRITE_ONCE((v)->counter, (i))
 
 /*
  * To get proper branch prediction for the main line, we must branch
index c3ecda0..7730d30 100644 (file)
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
-#define atomic_read(v)  ((v)->counter)
+#define atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #ifdef CONFIG_ARC_STAR_9000923308
 
@@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
 #ifndef CONFIG_SMP
 
  /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #else
 
@@ -125,7 +125,7 @@ static inline void atomic_set(atomic_t *v, int i)
        unsigned long flags;
 
        atomic_ops_lock(flags);
-       v->counter = i;
+       WRITE_ONCE(v->counter, i);
        atomic_ops_unlock(flags);
 }
 
index fe3ef39..9e10c45 100644 (file)
@@ -27,8 +27,8 @@
  * strex/ldrex monitor on some implementations. The reason we can use it for
  * atomic_set() is the clrex or dummy strex done on every exception return.
  */
-#define atomic_read(v) ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i)        (((v)->counter) = (i))
+#define atomic_read(v) READ_ONCE((v)->counter)
+#define atomic_set(v,i)        WRITE_ONCE(((v)->counter), (i))
 
 #if __LINUX_ARM_ARCH__ >= 6
 
@@ -210,8 +210,8 @@ ATOMIC_OP(xor, ^=, eor)
 
 #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
 #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
-#define atomic_inc_return(v)    (atomic_add_return(1, v))
-#define atomic_dec_return(v)    (atomic_sub_return(1, v))
+#define atomic_inc_return_relaxed(v)    (atomic_add_return_relaxed(1, v))
+#define atomic_dec_return_relaxed(v)    (atomic_sub_return_relaxed(1, v))
 #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
 
 #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
@@ -442,11 +442,11 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 
 #define atomic64_add_negative(a, v)    (atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)                        atomic64_add(1LL, (v))
-#define atomic64_inc_return(v)         atomic64_add_return(1LL, (v))
+#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1LL, (v))
 #define atomic64_inc_and_test(v)       (atomic64_inc_return(v) == 0)
 #define atomic64_sub_and_test(a, v)    (atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)                        atomic64_sub(1LL, (v))
-#define atomic64_dec_return(v)         atomic64_sub_return(1LL, (v))
+#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1LL, (v))
 #define atomic64_dec_and_test(v)       (atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)       atomic64_add_unless((v), 1LL, 0LL)
 
index 35a6778..1e247ac 100644 (file)
@@ -54,7 +54,7 @@
 #define ATOMIC_INIT(i) { (i) }
 
 #define atomic_read(v)                 READ_ONCE((v)->counter)
-#define atomic_set(v, i)               (((v)->counter) = (i))
+#define atomic_set(v, i)               WRITE_ONCE(((v)->counter), (i))
 #define atomic_xchg(v, new)            xchg(&((v)->counter), (new))
 #define atomic_cmpxchg(v, old, new)    cmpxchg(&((v)->counter), (old), (new))
 
index 97c9bdf..d74fd8c 100644 (file)
@@ -19,8 +19,8 @@
 
 #define ATOMIC_INIT(i)  { (i) }
 
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)       (((v)->counter) = i)
+#define atomic_read(v)         READ_ONCE((v)->counter)
+#define atomic_set(v, i)       WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP_RETURN(op, asm_op, asm_con)                          \
 static inline int __atomic_##op##_return(int i, atomic_t *v)           \
index 0da689d..64f02d4 100644 (file)
@@ -32,8 +32,8 @@
  */
 
 #define ATOMIC_INIT(i)         { (i) }
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)       (((v)->counter) = (i))
+#define atomic_read(v)         READ_ONCE((v)->counter)
+#define atomic_set(v, i)       WRITE_ONCE(((v)->counter), (i))
 
 static inline int atomic_inc_return(atomic_t *v)
 {
index 702ee53..4435a44 100644 (file)
@@ -11,8 +11,8 @@
 
 #define ATOMIC_INIT(i) { (i) }
 
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)       (((v)->counter) = i)
+#define atomic_read(v)         READ_ONCE((v)->counter)
+#define atomic_set(v, i)       WRITE_ONCE(((v)->counter), (i))
 
 #include <linux/kernel.h>
 
index 811d61f..55696c4 100644 (file)
@@ -48,7 +48,7 @@ static inline void atomic_set(atomic_t *v, int new)
  *
  * Assumes all word reads on our architecture are atomic.
  */
-#define atomic_read(v)         ((v)->counter)
+#define atomic_read(v)         READ_ONCE((v)->counter)
 
 /**
  * atomic_xchg - atomic
index be4beeb..8dfb5f6 100644 (file)
 #define ATOMIC_INIT(i)         { (i) }
 #define ATOMIC64_INIT(i)       { (i) }
 
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)       ACCESS_ONCE((v)->counter)
+#define atomic_read(v)         READ_ONCE((v)->counter)
+#define atomic64_read(v)       READ_ONCE((v)->counter)
 
-#define atomic_set(v,i)                (((v)->counter) = (i))
-#define atomic64_set(v,i)      (((v)->counter) = (i))
+#define atomic_set(v,i)                WRITE_ONCE(((v)->counter), (i))
+#define atomic64_set(v,i)      WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op, c_op)                                            \
 static __inline__ int                                                  \
index 025e2a1..ea35160 100644 (file)
@@ -28,7 +28,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v) ACCESS_ONCE((v)->counter)
+#define atomic_read(v) READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -37,7 +37,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v,i)        (((v)->counter) = (i))
+#define atomic_set(v,i)        WRITE_ONCE(((v)->counter), (i))
 
 #ifdef CONFIG_CHIP_M32700_TS1
 #define __ATOMIC_CLOBBER       , "r4"
index 039fac1..4858178 100644 (file)
@@ -17,8 +17,8 @@
 
 #define ATOMIC_INIT(i) { (i) }
 
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)       (((v)->counter) = i)
+#define atomic_read(v)         READ_ONCE((v)->counter)
+#define atomic_set(v, i)       WRITE_ONCE(((v)->counter), (i))
 
 /*
  * The ColdFire parts cannot do some immediate to memory operations,
index 21c4c26..a625818 100644 (file)
@@ -3,7 +3,7 @@
 
 #define ATOMIC_INIT(i) { (i) }
 
-#define atomic_set(v, i)               ((v)->counter = (i))
+#define atomic_set(v, i)               WRITE_ONCE((v)->counter, (i))
 
 #include <linux/compiler.h>
 
index f8efe38..0295d9b 100644 (file)
@@ -10,7 +10,7 @@
 
 static inline int atomic_read(const atomic_t *v)
 {
-       return (v)->counter;
+       return READ_ONCE((v)->counter);
 }
 
 /*
index 4c42fd9..f82d3af 100644 (file)
@@ -30,7 +30,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
+#define atomic_read(v)         READ_ONCE((v)->counter)
 
 /*
  * atomic_set - set atomic variable
@@ -39,7 +39,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v, i)               ((v)->counter = (i))
+#define atomic_set(v, i)       WRITE_ONCE((v)->counter, (i))
 
 #define ATOMIC_OP(op, c_op, asm_op)                                          \
 static __inline__ void atomic_##op(int i, atomic_t * v)                              \
@@ -315,14 +315,14 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
  * @v: pointer of type atomic64_t
  *
  */
-#define atomic64_read(v)       ACCESS_ONCE((v)->counter)
+#define atomic64_read(v)       READ_ONCE((v)->counter)
 
 /*
  * atomic64_set - set atomic variable
  * @v: pointer of type atomic64_t
  * @i: required value
  */
-#define atomic64_set(v, i)     ((v)->counter = (i))
+#define atomic64_set(v, i)     WRITE_ONCE((v)->counter, (i))
 
 #define ATOMIC64_OP(op, c_op, asm_op)                                        \
 static __inline__ void atomic64_##op(long i, atomic64_t * v)                 \
index 375e591..ce318d5 100644 (file)
@@ -34,7 +34,7 @@
  *
  * Atomically reads the value of @v.  Note that the guaranteed
  */
-#define atomic_read(v) (ACCESS_ONCE((v)->counter))
+#define atomic_read(v) READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -43,7 +43,7 @@
  *
  * Atomically sets the value of @v to @i.  Note that the guaranteed
  */
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op)                                                  \
 static inline void atomic_##op(int i, atomic_t *v)                     \
index 2536965..1d10999 100644 (file)
@@ -67,7 +67,7 @@ static __inline__ void atomic_set(atomic_t *v, int i)
 
 static __inline__ int atomic_read(const atomic_t *v)
 {
-       return ACCESS_ONCE((v)->counter);
+       return READ_ONCE((v)->counter);
 }
 
 /* exported interface */
index 05b9f74..c399e1c 100644 (file)
@@ -14,8 +14,8 @@
 
 #define ATOMIC_INIT(i) { (i) }
 
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i)                ((v)->counter = (i))
+#define atomic_read(v)         READ_ONCE((v)->counter)
+#define atomic_set(v,i)                WRITE_ONCE((v)->counter, (i))
 
 #if defined(CONFIG_GUSA_RB)
 #include <asm/atomic-grb.h>
index 917084a..f2fbf9e 100644 (file)
 #define ATOMIC_INIT(i)         { (i) }
 #define ATOMIC64_INIT(i)       { (i) }
 
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)       ACCESS_ONCE((v)->counter)
+#define atomic_read(v)         READ_ONCE((v)->counter)
+#define atomic64_read(v)       READ_ONCE((v)->counter)
 
-#define atomic_set(v, i)       (((v)->counter) = i)
-#define atomic64_set(v, i)     (((v)->counter) = i)
+#define atomic_set(v, i)       WRITE_ONCE(((v)->counter), (i))
+#define atomic64_set(v, i)     WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op)                                                  \
 void atomic_##op(int, atomic_t *);                                     \
index 7097984..9fc0107 100644 (file)
@@ -34,7 +34,7 @@
  */
 static inline int atomic_read(const atomic_t *v)
 {
-       return ACCESS_ONCE(v->counter);
+       return READ_ONCE(v->counter);
 }
 
 /**
index 096a56d..51cabc2 100644 (file)
@@ -24,7 +24,7 @@
 
 /* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
 
-#define atomic_set(v, i) ((v)->counter = (i))
+#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
 
 /*
  * The smp_mb() operations throughout are to support the fact that
@@ -82,8 +82,8 @@ static inline void atomic_xor(int i, atomic_t *v)
 
 #define ATOMIC64_INIT(i)       { (i) }
 
-#define atomic64_read(v)               ((v)->counter)
-#define atomic64_set(v, i) ((v)->counter = (i))
+#define atomic64_read(v)       READ_ONCE((v)->counter)
+#define atomic64_set(v, i)     WRITE_ONCE((v)->counter, (i))
 
 static inline void atomic64_add(long i, atomic64_t *v)
 {
index fb52aa6..ae5fb83 100644 (file)
@@ -24,7 +24,7 @@
  */
 static __always_inline int atomic_read(const atomic_t *v)
 {
-       return ACCESS_ONCE((v)->counter);
+       return READ_ONCE((v)->counter);
 }
 
 /**
@@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v)
  */
 static __always_inline void atomic_set(atomic_t *v, int i)
 {
-       v->counter = i;
+       WRITE_ONCE(v->counter, i);
 }
 
 /**
index 50e33ef..0373510 100644 (file)
@@ -18,7 +18,7 @@
  */
 static inline long atomic64_read(const atomic64_t *v)
 {
-       return ACCESS_ONCE((v)->counter);
+       return READ_ONCE((v)->counter);
 }
 
 /**
@@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v)
  */
 static inline void atomic64_set(atomic64_t *v, long i)
 {
-       v->counter = i;
+       WRITE_ONCE(v->counter, i);
 }
 
 /**
index 93795d0..fd8017c 100644 (file)
@@ -47,7 +47,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)         ACCESS_ONCE((v)->counter)
+#define atomic_read(v)         READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -56,7 +56,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v,i)                ((v)->counter = (i))
+#define atomic_set(v,i)                WRITE_ONCE((v)->counter, (i))
 
 #if XCHAL_HAVE_S32C1I
 #define ATOMIC_OP(op)                                                  \
index 98d172b..a9b9460 100644 (file)
@@ -9,7 +9,7 @@
 
 #include <linux/delay.h>
 #include <linux/moduleparam.h>
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
 #include "net_driver.h"
 #include "nic.h"
 #include "io.h"
index 6e0d9fa..c7a0599 100644 (file)
@@ -17,8 +17,7 @@
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
-
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
 
 #define USBHS_LPSTS                    0x02
 #define USBHS_UGCTRL                   0x80
index 98af3b1..aa5ab6c 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/workqueue.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
 
 #include "speakup.h"
 
index a94cbeb..f91093c 100644 (file)
@@ -35,7 +35,7 @@ typedef atomic_t atomic_long_t;
 #endif
 
 #define ATOMIC_LONG_READ_OP(mo)                                                \
-static inline long atomic_long_read##mo(atomic_long_t *l)              \
+static inline long atomic_long_read##mo(const atomic_long_t *l)                \
 {                                                                      \
        ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;              \
                                                                        \
@@ -43,6 +43,7 @@ static inline long atomic_long_read##mo(atomic_long_t *l)             \
 }
 ATOMIC_LONG_READ_OP()
 ATOMIC_LONG_READ_OP(_acquire)
+ATOMIC_LONG_READ_OP(_ctrl)
 
 #undef ATOMIC_LONG_READ_OP
 
@@ -112,19 +113,23 @@ static inline void atomic_long_dec(atomic_long_t *l)
        ATOMIC_LONG_PFX(_dec)(v);
 }
 
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-       ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-       ATOMIC_LONG_PFX(_add)(i, v);
+#define ATOMIC_LONG_OP(op)                                             \
+static inline void                                                     \
+atomic_long_##op(long i, atomic_long_t *l)                             \
+{                                                                      \
+       ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;              \
+                                                                       \
+       ATOMIC_LONG_PFX(_##op)(i, v);                                   \
 }
 
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-       ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+ATOMIC_LONG_OP(add)
+ATOMIC_LONG_OP(sub)
+ATOMIC_LONG_OP(and)
+ATOMIC_LONG_OP(or)
+ATOMIC_LONG_OP(xor)
+ATOMIC_LONG_OP(andnot)
 
-       ATOMIC_LONG_PFX(_sub)(i, v);
-}
+#undef ATOMIC_LONG_OP
 
 static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
 {
@@ -154,19 +159,24 @@ static inline int atomic_long_add_negative(long i, atomic_long_t *l)
        return ATOMIC_LONG_PFX(_add_negative)(i, v);
 }
 
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-       ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-       return (long)ATOMIC_LONG_PFX(_inc_return)(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-       ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-       return (long)ATOMIC_LONG_PFX(_dec_return)(v);
+#define ATOMIC_LONG_INC_DEC_OP(op, mo)                                 \
+static inline long                                                     \
+atomic_long_##op##_return##mo(atomic_long_t *l)                                \
+{                                                                      \
+       ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;              \
+                                                                       \
+       return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v);            \
 }
+ATOMIC_LONG_INC_DEC_OP(inc,)
+ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
+ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
+ATOMIC_LONG_INC_DEC_OP(inc, _release)
+ATOMIC_LONG_INC_DEC_OP(dec,)
+ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
+ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
+ATOMIC_LONG_INC_DEC_OP(dec, _release)
+
+#undef ATOMIC_LONG_INC_DEC_OP
 
 static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 {
index d4d7e33..74f1a37 100644 (file)
@@ -127,7 +127,7 @@ ATOMIC_OP(xor, ^)
  * Atomically reads the value of @v.
  */
 #ifndef atomic_read
-#define atomic_read(v) ACCESS_ONCE((v)->counter)
+#define atomic_read(v) READ_ONCE((v)->counter)
 #endif
 
 /**
@@ -137,7 +137,7 @@ ATOMIC_OP(xor, ^)
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #include <linux/irqflags.h>
 
index d4f9fb4..fd694cf 100644 (file)
@@ -20,7 +20,7 @@
 static inline void
 __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
-       if (unlikely(atomic_dec_return(count) < 0))
+       if (unlikely(atomic_dec_return_acquire(count) < 0))
                fail_fn(count);
 }
 
@@ -35,7 +35,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_lock_retval(atomic_t *count)
 {
-       if (unlikely(atomic_dec_return(count) < 0))
+       if (unlikely(atomic_dec_return_acquire(count) < 0))
                return -1;
        return 0;
 }
@@ -56,7 +56,7 @@ __mutex_fastpath_lock_retval(atomic_t *count)
 static inline void
 __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
-       if (unlikely(atomic_inc_return(count) <= 0))
+       if (unlikely(atomic_inc_return_release(count) <= 0))
                fail_fn(count);
 }
 
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+       if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
                return 1;
        return 0;
 }
index f169ec0..a6b4a7b 100644 (file)
@@ -31,7 +31,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
                 * to ensure that any waiting tasks are woken up by the
                 * unlock slow path.
                 */
-               if (likely(atomic_xchg(count, -1) != 1))
+               if (likely(atomic_xchg_acquire(count, -1) != 1))
                        fail_fn(count);
 }
 
@@ -46,7 +46,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_lock_retval(atomic_t *count)
 {
-       if (unlikely(atomic_xchg(count, 0) != 1))
+       if (unlikely(atomic_xchg_acquire(count, 0) != 1))
                if (likely(atomic_xchg(count, -1) != 1))
                        return -1;
        return 0;
@@ -67,7 +67,7 @@ __mutex_fastpath_lock_retval(atomic_t *count)
 static inline void
 __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
-       if (unlikely(atomic_xchg(count, 1) != 0))
+       if (unlikely(atomic_xchg_release(count, 1) != 0))
                fail_fn(count);
 }
 
@@ -91,7 +91,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       int prev = atomic_xchg(count, 0);
+       int prev = atomic_xchg_acquire(count, 0);
 
        if (unlikely(prev < 0)) {
                /*
@@ -105,7 +105,7 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
                 *   owner's unlock path needlessly, but that's not a problem
                 *   in practice. ]
                 */
-               prev = atomic_xchg(count, prev);
+               prev = atomic_xchg_acquire(count, prev);
                if (prev < 0)
                        prev = 0;
        }
index 4d76f24..0abc6b6 100644 (file)
 
 typedef struct qrwlock {
        atomic_t                cnts;
-       arch_spinlock_t         lock;
+       arch_spinlock_t         wait_lock;
 } arch_rwlock_t;
 
 #define        __ARCH_RW_LOCK_UNLOCKED {               \
        .cnts = ATOMIC_INIT(0),                 \
-       .lock = __ARCH_SPIN_LOCK_UNLOCKED,      \
+       .wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
 }
 
 #endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */
index d48bf5a..d6d5dc9 100644 (file)
@@ -33,7 +33,7 @@
  */
 static inline void __down_read(struct rw_semaphore *sem)
 {
-       if (unlikely(atomic_long_inc_return((atomic_long_t *)&sem->count) <= 0))
+       if (unlikely(atomic_long_inc_return_acquire((atomic_long_t *)&sem->count) <= 0))
                rwsem_down_read_failed(sem);
 }
 
@@ -42,7 +42,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
        long tmp;
 
        while ((tmp = sem->count) >= 0) {
-               if (tmp == cmpxchg(&sem->count, tmp,
+               if (tmp == cmpxchg_acquire(&sem->count, tmp,
                                   tmp + RWSEM_ACTIVE_READ_BIAS)) {
                        return 1;
                }
@@ -57,7 +57,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
        long tmp;
 
-       tmp = atomic_long_add_return(RWSEM_ACTIVE_WRITE_BIAS,
+       tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
                                     (atomic_long_t *)&sem->count);
        if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
                rwsem_down_write_failed(sem);
@@ -72,7 +72,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
        long tmp;
 
-       tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+       tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
                      RWSEM_ACTIVE_WRITE_BIAS);
        return tmp == RWSEM_UNLOCKED_VALUE;
 }
@@ -84,7 +84,7 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
        long tmp;
 
-       tmp = atomic_long_dec_return((atomic_long_t *)&sem->count);
+       tmp = atomic_long_dec_return_release((atomic_long_t *)&sem->count);
        if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
                rwsem_wake(sem);
 }
@@ -94,7 +94,7 @@ static inline void __up_read(struct rw_semaphore *sem)
  */
 static inline void __up_write(struct rw_semaphore *sem)
 {
-       if (unlikely(atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
+       if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
                                 (atomic_long_t *)&sem->count) < 0))
                rwsem_wake(sem);
 }
@@ -114,7 +114,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 {
        long tmp;
 
-       tmp = atomic_long_add_return(-RWSEM_WAITING_BIAS,
+       /*
+        * When downgrading from exclusive to shared ownership,
+        * anything inside the write-locked region cannot leak
+        * into the read side. In contrast, anything in the
+        * read-locked region is ok to be re-ordered into the
+        * write side. As such, rely on RELEASE semantics.
+        */
+       tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS,
                                     (atomic_long_t *)&sem->count);
        if (tmp < 0)
                rwsem_downgrade_wake(sem);
index 00a5763..27e580d 100644 (file)
@@ -4,6 +4,15 @@
 #include <asm/atomic.h>
 #include <asm/barrier.h>
 
+#ifndef atomic_read_ctrl
+static inline int atomic_read_ctrl(const atomic_t *v)
+{
+       int val = atomic_read(v);
+       smp_read_barrier_depends(); /* Enforce control dependency. */
+       return val;
+}
+#endif
+
 /*
  * Relaxed variants of xchg, cmpxchg and some atomic operations.
  *
 #endif
 #endif /* atomic_add_return_relaxed */
 
+/* atomic_inc_return_relaxed */
+#ifndef atomic_inc_return_relaxed
+#define  atomic_inc_return_relaxed     atomic_inc_return
+#define  atomic_inc_return_acquire     atomic_inc_return
+#define  atomic_inc_return_release     atomic_inc_return
+
+#else /* atomic_inc_return_relaxed */
+
+#ifndef atomic_inc_return_acquire
+#define  atomic_inc_return_acquire(...)                                        \
+       __atomic_op_acquire(atomic_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_inc_return_release
+#define  atomic_inc_return_release(...)                                        \
+       __atomic_op_release(atomic_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_inc_return
+#define  atomic_inc_return(...)                                                \
+       __atomic_op_fence(atomic_inc_return, __VA_ARGS__)
+#endif
+#endif /* atomic_inc_return_relaxed */
+
 /* atomic_sub_return_relaxed */
 #ifndef atomic_sub_return_relaxed
 #define  atomic_sub_return_relaxed     atomic_sub_return
 #endif
 #endif /* atomic_sub_return_relaxed */
 
+/* atomic_dec_return_relaxed */
+#ifndef atomic_dec_return_relaxed
+#define  atomic_dec_return_relaxed     atomic_dec_return
+#define  atomic_dec_return_acquire     atomic_dec_return
+#define  atomic_dec_return_release     atomic_dec_return
+
+#else /* atomic_dec_return_relaxed */
+
+#ifndef atomic_dec_return_acquire
+#define  atomic_dec_return_acquire(...)                                        \
+       __atomic_op_acquire(atomic_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_dec_return_release
+#define  atomic_dec_return_release(...)                                        \
+       __atomic_op_release(atomic_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_dec_return
+#define  atomic_dec_return(...)                                                \
+       __atomic_op_fence(atomic_dec_return, __VA_ARGS__)
+#endif
+#endif /* atomic_dec_return_relaxed */
+
 /* atomic_xchg_relaxed */
 #ifndef atomic_xchg_relaxed
 #define  atomic_xchg_relaxed           atomic_xchg
 #endif
 #endif /* atomic64_add_return_relaxed */
 
+/* atomic64_inc_return_relaxed */
+#ifndef atomic64_inc_return_relaxed
+#define  atomic64_inc_return_relaxed   atomic64_inc_return
+#define  atomic64_inc_return_acquire   atomic64_inc_return
+#define  atomic64_inc_return_release   atomic64_inc_return
+
+#else /* atomic64_inc_return_relaxed */
+
+#ifndef atomic64_inc_return_acquire
+#define  atomic64_inc_return_acquire(...)                              \
+       __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return_release
+#define  atomic64_inc_return_release(...)                              \
+       __atomic_op_release(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return
+#define  atomic64_inc_return(...)                                      \
+       __atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_inc_return_relaxed */
+
+
 /* atomic64_sub_return_relaxed */
 #ifndef atomic64_sub_return_relaxed
 #define  atomic64_sub_return_relaxed   atomic64_sub_return
 #endif
 #endif /* atomic64_sub_return_relaxed */
 
+/* atomic64_dec_return_relaxed */
+#ifndef atomic64_dec_return_relaxed
+#define  atomic64_dec_return_relaxed   atomic64_dec_return
+#define  atomic64_dec_return_acquire   atomic64_dec_return
+#define  atomic64_dec_return_release   atomic64_dec_return
+
+#else /* atomic64_dec_return_relaxed */
+
+#ifndef atomic64_dec_return_acquire
+#define  atomic64_dec_return_acquire(...)                              \
+       __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return_release
+#define  atomic64_dec_return_release(...)                              \
+       __atomic_op_release(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return
+#define  atomic64_dec_return(...)                                      \
+       __atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_dec_return_relaxed */
+
 /* atomic64_xchg_relaxed */
 #ifndef atomic64_xchg_relaxed
 #define  atomic64_xchg_relaxed         atomic64_xchg
@@ -451,11 +557,19 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-#include <asm-generic/atomic-long.h>
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
 
+#ifndef atomic64_read_ctrl
+static inline long long atomic64_read_ctrl(const atomic64_t *v)
+{
+       long long val = atomic64_read(v);
+       smp_read_barrier_depends(); /* Enforce control dependency. */
+       return val;
+}
+#endif
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
@@ -463,4 +577,6 @@ static inline void atomic64_andnot(long long i, atomic64_t *v)
 }
 #endif
 
+#include <asm-generic/atomic-long.h>
+
 #endif /* _LINUX_ATOMIC_H */
index 6e443ef..dfc86e9 100644 (file)
@@ -255,9 +255,18 @@ struct futex_hash_bucket {
        struct plist_head chain;
 } ____cacheline_aligned_in_smp;
 
-static unsigned long __read_mostly futex_hashsize;
+/*
+ * The base of the bucket array and its size are always used together
+ * (after initialization only in hash_futex()), so ensure that they
+ * reside in the same cacheline.
+ */
+static struct {
+       struct futex_hash_bucket *queues;
+       unsigned long            hashsize;
+} __futex_data __read_mostly __aligned(2*sizeof(long));
+#define futex_queues   (__futex_data.queues)
+#define futex_hashsize (__futex_data.hashsize)
 
-static struct futex_hash_bucket *futex_queues;
 
 /*
  * Fault injections for futexes.
index fd91aaa..5b9102a 100644 (file)
@@ -67,7 +67,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
        node->locked = 0;
        node->next   = NULL;
 
-       prev = xchg(lock, node);
+       prev = xchg_acquire(lock, node);
        if (likely(prev == NULL)) {
                /*
                 * Lock acquired, don't need to set node->locked to 1. Threads
@@ -98,7 +98,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
                /*
                 * Release the lock by setting it to NULL
                 */
-               if (likely(cmpxchg(lock, node, NULL) == node))
+               if (likely(cmpxchg_release(lock, node, NULL) == node))
                        return;
                /* Wait until the next pointer is set */
                while (!(next = READ_ONCE(node->next)))
index 4cccea6..0551c21 100644 (file)
@@ -277,7 +277,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
 static inline bool mutex_try_to_acquire(struct mutex *lock)
 {
        return !mutex_is_locked(lock) &&
-               (atomic_cmpxchg(&lock->count, 1, 0) == 1);
+               (atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
 }
 
 /*
@@ -529,7 +529,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
         * Once more, try to acquire the lock. Only try-lock the mutex if
         * it is unlocked to reduce unnecessary xchg() operations.
         */
-       if (!mutex_is_locked(lock) && (atomic_xchg(&lock->count, 0) == 1))
+       if (!mutex_is_locked(lock) &&
+           (atomic_xchg_acquire(&lock->count, 0) == 1))
                goto skip_wait;
 
        debug_mutex_lock_common(lock, &waiter);
@@ -553,7 +554,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 * non-negative in order to avoid unnecessary xchg operations:
                 */
                if (atomic_read(&lock->count) >= 0 &&
-                   (atomic_xchg(&lock->count, -1) == 1))
+                   (atomic_xchg_acquire(&lock->count, -1) == 1))
                        break;
 
                /*
@@ -867,7 +868,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
 
        spin_lock_mutex(&lock->wait_lock, flags);
 
-       prev = atomic_xchg(&lock->count, -1);
+       prev = atomic_xchg_acquire(&lock->count, -1);
        if (likely(prev == 1)) {
                mutex_set_owner(lock);
                mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
index dc85ee2..d092a0c 100644 (file)
@@ -50,7 +50,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
 
        for (;;) {
                if (atomic_read(&lock->tail) == curr &&
-                   atomic_cmpxchg(&lock->tail, curr, old) == curr) {
+                   atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
                        /*
                         * We were the last queued, we moved @lock back. @prev
                         * will now observe @lock and will complete its
@@ -92,7 +92,11 @@ bool osq_lock(struct optimistic_spin_queue *lock)
        node->next = NULL;
        node->cpu = curr;
 
-       old = atomic_xchg(&lock->tail, curr);
+       /*
+        * ACQUIRE semantics, pairs with corresponding RELEASE
+        * in unlock() uncontended, or fastpath.
+        */
+       old = atomic_xchg_acquire(&lock->tail, curr);
        if (old == OSQ_UNLOCKED_VAL)
                return true;
 
@@ -184,7 +188,8 @@ void osq_unlock(struct optimistic_spin_queue *lock)
        /*
         * Fast path for the uncontended case.
         */
-       if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
+       if (likely(atomic_cmpxchg_release(&lock->tail, curr,
+                                         OSQ_UNLOCKED_VAL) == curr))
                return;
 
        /*
index f17a3e3..fec0823 100644 (file)
@@ -86,7 +86,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
        /*
         * Put the reader into the wait queue
         */
-       arch_spin_lock(&lock->lock);
+       arch_spin_lock(&lock->wait_lock);
 
        /*
         * The ACQUIRE semantics of the following spinning code ensure
@@ -99,7 +99,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
        /*
         * Signal the next one in queue to become queue head
         */
-       arch_spin_unlock(&lock->lock);
+       arch_spin_unlock(&lock->wait_lock);
 }
 EXPORT_SYMBOL(queued_read_lock_slowpath);
 
@@ -112,7 +112,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
        u32 cnts;
 
        /* Put the writer into the wait queue */
-       arch_spin_lock(&lock->lock);
+       arch_spin_lock(&lock->wait_lock);
 
        /* Try to acquire the lock directly if no reader is present */
        if (!atomic_read(&lock->cnts) &&
@@ -144,6 +144,6 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
                cpu_relax_lowlatency();
        }
 unlock:
-       arch_spin_unlock(&lock->lock);
+       arch_spin_unlock(&lock->wait_lock);
 }
 EXPORT_SYMBOL(queued_write_lock_slowpath);
index c8e6e9a..f0450ff 100644 (file)
@@ -267,7 +267,6 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
                }
 
                if (!lp) { /* ONCE */
-                       WRITE_ONCE(pn->state, vcpu_hashed);
                        lp = pv_hash(lock, pn);
 
                        /*
@@ -275,11 +274,9 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
                         * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
                         * we'll be sure to be able to observe our hash entry.
                         *
-                        *   [S] pn->state
                         *   [S] <hash>                 [Rmw] l->locked == _Q_SLOW_VAL
                         *       MB                           RMB
                         * [RmW] l->locked = _Q_SLOW_VAL  [L] <unhash>
-                        *                                [L] pn->state
                         *
                         * Matches the smp_rmb() in __pv_queued_spin_unlock().
                         */
@@ -364,8 +361,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
         * vCPU is harmless other than the additional latency in completing
         * the unlock.
         */
-       if (READ_ONCE(node->state) == vcpu_hashed)
-               pv_kick(node->cpu);
+       pv_kick(node->cpu);
 }
 /*
  * Include the architecture specific callee-save thunk of the
index 7781d80..bbb72b4 100644 (file)
@@ -74,14 +74,23 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  * set up.
  */
 #ifndef CONFIG_DEBUG_RT_MUTEXES
-# define rt_mutex_cmpxchg(l,c,n)       (cmpxchg(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
+
+/*
+ * Callers must hold the ->wait_lock -- which is the whole purpose as we force
+ * all future threads that attempt to [Rmw] the lock to the slowpath. As such
+ * relaxed semantics suffice.
+ */
 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 {
        unsigned long owner, *p = (unsigned long *) &lock->owner;
 
        do {
                owner = *p;
-       } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
+       } while (cmpxchg_relaxed(p, owner,
+                                owner | RT_MUTEX_HAS_WAITERS) != owner);
 }
 
 /*
@@ -121,11 +130,14 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
         *                                      lock(wait_lock);
         *                                      acquire(lock);
         */
-       return rt_mutex_cmpxchg(lock, owner, NULL);
+       return rt_mutex_cmpxchg_release(lock, owner, NULL);
 }
 
 #else
-# define rt_mutex_cmpxchg(l,c,n)       (0)
+# define rt_mutex_cmpxchg_relaxed(l,c,n)       (0)
+# define rt_mutex_cmpxchg_acquire(l,c,n)       (0)
+# define rt_mutex_cmpxchg_release(l,c,n)       (0)
+
 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 {
        lock->owner = (struct task_struct *)
@@ -1321,7 +1333,7 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
                                struct hrtimer_sleeper *timeout,
                                enum rtmutex_chainwalk chwalk))
 {
-       if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+       if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
                rt_mutex_deadlock_account_lock(lock, current);
                return 0;
        } else
@@ -1337,7 +1349,7 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
                                      enum rtmutex_chainwalk chwalk))
 {
        if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
-           likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+           likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
                rt_mutex_deadlock_account_lock(lock, current);
                return 0;
        } else
@@ -1348,7 +1360,7 @@ static inline int
 rt_mutex_fasttrylock(struct rt_mutex *lock,
                     int (*slowfn)(struct rt_mutex *lock))
 {
-       if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+       if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
                rt_mutex_deadlock_account_lock(lock, current);
                return 1;
        }
@@ -1362,7 +1374,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
 {
        WAKE_Q(wake_q);
 
-       if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
+       if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
                rt_mutex_deadlock_account_unlock(current);
 
        } else {
@@ -1484,7 +1496,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
                                   struct wake_q_head *wqh)
 {
-       if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
+       if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
                rt_mutex_deadlock_account_unlock(current);
                return false;
        }
index 0f18971..a4d4de0 100644 (file)
@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
         * to reduce unnecessary expensive cmpxchg() operations.
         */
        if (count == RWSEM_WAITING_BIAS &&
-           cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
+           cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
                    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
                if (!list_is_singular(&sem->wait_list))
                        rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
@@ -285,7 +285,8 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
                if (!(count == 0 || count == RWSEM_WAITING_BIAS))
                        return false;
 
-               old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
+               old = cmpxchg_acquire(&sem->count, count,
+                                     count + RWSEM_ACTIVE_WRITE_BIAS);
                if (old == count) {
                        rwsem_set_owner(sem);
                        return true;