]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Jul 2016 19:41:29 +0000 (12:41 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Jul 2016 19:41:29 +0000 (12:41 -0700)
Pull locking updates from Ingo Molnar:
 "The locking tree was busier in this cycle than the usual pattern - a
  couple of major projects happened to coincide.

  The main changes are:

   - implement the atomic_fetch_{add,sub,and,or,xor}() API natively
     across all SMP architectures (Peter Zijlstra)

   - add atomic_fetch_{inc/dec}() as well, using the generic primitives
     (Davidlohr Bueso)

   - optimize various aspects of rwsems (Jason Low, Davidlohr Bueso,
     Waiman Long)

   - optimize smp_cond_load_acquire() on arm64 and implement LSE based
     atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
     on arm64 (Will Deacon)

   - introduce smp_acquire__after_ctrl_dep() and fix various barrier
     mis-uses and bugs (Peter Zijlstra)

   - after discovering ancient spin_unlock_wait() barrier bugs in its
     implementation and usage, strengthen its semantics and update/fix
     usage sites (Peter Zijlstra)

   - optimize mutex_trylock() fastpath (Peter Zijlstra)

   - ... misc fixes and cleanups"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (67 commits)
  locking/atomic: Introduce inc/dec variants for the atomic_fetch_$op() API
  locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire()
  locking/static_keys: Fix non static symbol Sparse warning
  locking/qspinlock: Use __this_cpu_dec() instead of full-blown this_cpu_dec()
  locking/atomic, arch/tile: Fix tilepro build
  locking/atomic, arch/m68k: Remove comment
  locking/atomic, arch/arc: Fix build
  locking/Documentation: Clarify limited control-dependency scope
  locking/atomic, arch/rwsem: Employ atomic_long_fetch_add()
  locking/atomic, arch/qrwlock: Employ atomic_fetch_add_acquire()
  locking/atomic, arch/mips: Convert to _relaxed atomics
  locking/atomic, arch/alpha: Convert to _relaxed atomics
  locking/atomic: Remove the deprecated atomic_{set,clear}_mask() functions
  locking/atomic: Remove linux/atomic.h:atomic_fetch_or()
  locking/atomic: Implement atomic{,64,_long}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
  locking/atomic: Fix atomic64_relaxed() bits
  locking/atomic, arch/xtensa: Implement atomic_fetch_{add,sub,and,or,xor}()
  locking/atomic, arch/x86: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()
  locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()
  locking/atomic, arch/sparc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()
  ...

108 files changed:
Documentation/memory-barriers.txt
MAINTAINERS
arch/alpha/include/asm/atomic.h
arch/alpha/include/asm/rwsem.h
arch/alpha/include/asm/spinlock.h
arch/arc/include/asm/atomic.h
arch/arc/include/asm/spinlock.h
arch/arm/include/asm/atomic.h
arch/arm/include/asm/spinlock.h
arch/arm64/include/asm/atomic.h
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/atomic_lse.h
arch/arm64/include/asm/barrier.h
arch/arm64/include/asm/cmpxchg.h
arch/avr32/include/asm/atomic.h
arch/blackfin/include/asm/atomic.h
arch/blackfin/include/asm/spinlock.h
arch/blackfin/kernel/bfin_ksyms.c
arch/blackfin/mach-bf561/atomic.S
arch/frv/include/asm/atomic.h
arch/frv/include/asm/atomic_defs.h
arch/h8300/include/asm/atomic.h
arch/hexagon/include/asm/atomic.h
arch/hexagon/include/asm/spinlock.h
arch/ia64/include/asm/atomic.h
arch/ia64/include/asm/mutex.h
arch/ia64/include/asm/rwsem.h
arch/ia64/include/asm/spinlock.h
arch/m32r/include/asm/atomic.h
arch/m32r/include/asm/spinlock.h
arch/m68k/include/asm/atomic.h
arch/metag/include/asm/atomic_lnkget.h
arch/metag/include/asm/atomic_lock1.h
arch/metag/include/asm/spinlock.h
arch/mips/include/asm/atomic.h
arch/mips/include/asm/spinlock.h
arch/mn10300/include/asm/atomic.h
arch/mn10300/include/asm/spinlock.h
arch/parisc/include/asm/atomic.h
arch/parisc/include/asm/spinlock.h
arch/powerpc/include/asm/atomic.h
arch/powerpc/include/asm/mutex.h
arch/s390/include/asm/atomic.h
arch/s390/include/asm/rwsem.h
arch/s390/include/asm/spinlock.h
arch/sh/include/asm/atomic-grb.h
arch/sh/include/asm/atomic-irq.h
arch/sh/include/asm/atomic-llsc.h
arch/sh/include/asm/spinlock.h
arch/sparc/include/asm/atomic_32.h
arch/sparc/include/asm/atomic_64.h
arch/sparc/include/asm/spinlock_32.h
arch/sparc/include/asm/spinlock_64.h
arch/sparc/lib/atomic32.c
arch/sparc/lib/atomic_64.S
arch/sparc/lib/ksyms.c
arch/tile/include/asm/atomic.h
arch/tile/include/asm/atomic_32.h
arch/tile/include/asm/atomic_64.h
arch/tile/include/asm/barrier.h
arch/tile/include/asm/bitops_32.h
arch/tile/include/asm/futex.h
arch/tile/lib/atomic_32.c
arch/tile/lib/atomic_asm_32.S
arch/tile/lib/spinlock_32.c
arch/tile/lib/spinlock_64.c
arch/x86/include/asm/atomic.h
arch/x86/include/asm/atomic64_32.h
arch/x86/include/asm/atomic64_64.h
arch/x86/include/asm/mutex_32.h
arch/x86/include/asm/mutex_64.h
arch/x86/include/asm/rwsem.h
arch/xtensa/include/asm/atomic.h
arch/xtensa/include/asm/spinlock.h
include/asm-generic/atomic-long.h
include/asm-generic/atomic.h
include/asm-generic/atomic64.h
include/asm-generic/barrier.h
include/asm-generic/mutex-dec.h
include/asm-generic/mutex-xchg.h
include/asm-generic/qspinlock.h
include/asm-generic/rwsem.h
include/linux/atomic.h
include/linux/compiler.h
include/linux/percpu-refcount.h
include/linux/rwsem.h
include/linux/spinlock_up.h
ipc/sem.c
kernel/exit.c
kernel/jump_label.c
kernel/locking/lockdep.c
kernel/locking/mutex-debug.h
kernel/locking/mutex.h
kernel/locking/qrwlock.c
kernel/locking/qspinlock.c
kernel/locking/qspinlock_paravirt.h
kernel/locking/rtmutex.c
kernel/locking/rwsem-xadd.c
kernel/locking/rwsem.c
kernel/locking/rwsem.h
kernel/rcu/tree_exp.h
kernel/sched/core.c
kernel/sched/sched.h
kernel/smp.c
kernel/task_work.c
lib/atomic64.c
lib/atomic64_test.c
net/netfilter/nf_conntrack_core.c

index 147ae8ec836f85666110634ff5565f4016de1d80..a4d0a99de04da76d62bc0ceebae88fd5283473e4 100644 (file)
@@ -806,6 +806,41 @@ out-guess your code.  More generally, although READ_ONCE() does force
 the compiler to actually emit code for a given load, it does not force
 the compiler to use the results.
 
+In addition, control dependencies apply only to the then-clause and
+else-clause of the if-statement in question.  In particular, it does
+not necessarily apply to code following the if-statement:
+
+       q = READ_ONCE(a);
+       if (q) {
+               WRITE_ONCE(b, p);
+       } else {
+               WRITE_ONCE(b, r);
+       }
+       WRITE_ONCE(c, 1);  /* BUG: No ordering against the read from "a". */
+
+It is tempting to argue that there in fact is ordering because the
+compiler cannot reorder volatile accesses and also cannot reorder
+the writes to "b" with the condition.  Unfortunately for this line
+of reasoning, the compiler might compile the two writes to "b" as
+conditional-move instructions, as in this fanciful pseudo-assembly
+language:
+
+       ld r1,a
+       ld r2,p
+       ld r3,r
+       cmp r1,$0
+       cmov,ne r4,r2
+       cmov,eq r4,r3
+       st r4,b
+       st $1,c
+
+A weakly ordered CPU would have no dependency of any sort between the load
+from "a" and the store to "c".  The control dependencies would extend
+only to the pair of cmov instructions and the store depending on them.
+In short, control dependencies apply only to the stores in the then-clause
+and else-clause of the if-statement in question (including functions
+invoked by those two clauses), not to code following that if-statement.
+
 Finally, control dependencies do -not- provide transitivity.  This is
 demonstrated by two related examples, with the initial values of
 x and y both being zero:
@@ -869,6 +904,12 @@ In summary:
       atomic{,64}_read() can help to preserve your control dependency.
       Please see the COMPILER BARRIER section for more information.
 
+  (*) Control dependencies apply only to the then-clause and else-clause
+      of the if-statement containing the control dependency, including
+      any functions that these two clauses call.  Control dependencies
+      do -not- apply to code following the if-statement containing the
+      control dependency.
+
   (*) Control dependencies pair normally with other types of barriers.
 
   (*) Control dependencies do -not- provide transitivity.  If you
index 452beeeb7fbdca19b432144f0fb4aa28f6fd13c6..09bb92fb80c8b4292bc7e5565e191b22854cd433 100644 (file)
@@ -7024,15 +7024,23 @@ Q:      http://patchwork.linuxtv.org/project/linux-media/list/
 S:     Maintained
 F:     drivers/media/usb/dvb-usb-v2/lmedm04*
 
-LOCKDEP AND LOCKSTAT
+LOCKING PRIMITIVES
 M:     Peter Zijlstra <peterz@infradead.org>
 M:     Ingo Molnar <mingo@redhat.com>
 L:     linux-kernel@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
 S:     Maintained
-F:     Documentation/locking/lockdep*.txt
-F:     Documentation/locking/lockstat.txt
+F:     Documentation/locking/
 F:     include/linux/lockdep.h
+F:     include/linux/spinlock*.h
+F:     arch/*/include/asm/spinlock*.h
+F:     include/linux/rwlock*.h
+F:     include/linux/mutex*.h
+F:     arch/*/include/asm/mutex*.h
+F:     include/linux/rwsem*.h
+F:     arch/*/include/asm/rwsem.h
+F:     include/linux/seqlock.h
+F:     lib/locking*.[ch]
 F:     kernel/locking/
 
 LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks)
index 572b228c44c7a80aec6eed925715c8d2ebacb00d..498933a7df97e9831ed3957349777c817f64a2b1 100644 (file)
@@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v)                      \
 }                                                                      \
 
 #define ATOMIC_OP_RETURN(op, asm_op)                                   \
-static inline int atomic_##op##_return(int i, atomic_t *v)             \
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)     \
 {                                                                      \
        long temp, result;                                              \
-       smp_mb();                                                       \
        __asm__ __volatile__(                                           \
        "1:     ldl_l %0,%1\n"                                          \
        "       " #asm_op " %0,%3,%2\n"                                 \
@@ -61,7 +60,23 @@ static inline int atomic_##op##_return(int i, atomic_t *v)           \
        ".previous"                                                     \
        :"=&r" (temp), "=m" (v->counter), "=&r" (result)                \
        :"Ir" (i), "m" (v->counter) : "memory");                        \
-       smp_mb();                                                       \
+       return result;                                                  \
+}
+
+#define ATOMIC_FETCH_OP(op, asm_op)                                    \
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)      \
+{                                                                      \
+       long temp, result;                                              \
+       __asm__ __volatile__(                                           \
+       "1:     ldl_l %2,%1\n"                                          \
+       "       " #asm_op " %2,%3,%0\n"                                 \
+       "       stl_c %0,%1\n"                                          \
+       "       beq %0,2f\n"                                            \
+       ".subsection 2\n"                                               \
+       "2:     br 1b\n"                                                \
+       ".previous"                                                     \
+       :"=&r" (temp), "=m" (v->counter), "=&r" (result)                \
+       :"Ir" (i), "m" (v->counter) : "memory");                        \
        return result;                                                  \
 }
 
@@ -82,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)         \
 }                                                                      \
 
 #define ATOMIC64_OP_RETURN(op, asm_op)                                 \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)  \
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v)  \
 {                                                                      \
        long temp, result;                                              \
-       smp_mb();                                                       \
        __asm__ __volatile__(                                           \
        "1:     ldq_l %0,%1\n"                                          \
        "       " #asm_op " %0,%3,%2\n"                                 \
@@ -97,34 +111,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)      \
        ".previous"                                                     \
        :"=&r" (temp), "=m" (v->counter), "=&r" (result)                \
        :"Ir" (i), "m" (v->counter) : "memory");                        \
-       smp_mb();                                                       \
+       return result;                                                  \
+}
+
+#define ATOMIC64_FETCH_OP(op, asm_op)                                  \
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)   \
+{                                                                      \
+       long temp, result;                                              \
+       __asm__ __volatile__(                                           \
+       "1:     ldq_l %2,%1\n"                                          \
+       "       " #asm_op " %2,%3,%0\n"                                 \
+       "       stq_c %0,%1\n"                                          \
+       "       beq %0,2f\n"                                            \
+       ".subsection 2\n"                                               \
+       "2:     br 1b\n"                                                \
+       ".previous"                                                     \
+       :"=&r" (temp), "=m" (v->counter), "=&r" (result)                \
+       :"Ir" (i), "m" (v->counter) : "memory");                        \
        return result;                                                  \
 }
 
 #define ATOMIC_OPS(op)                                                 \
        ATOMIC_OP(op, op##l)                                            \
        ATOMIC_OP_RETURN(op, op##l)                                     \
+       ATOMIC_FETCH_OP(op, op##l)                                      \
        ATOMIC64_OP(op, op##q)                                          \
-       ATOMIC64_OP_RETURN(op, op##q)
+       ATOMIC64_OP_RETURN(op, op##q)                                   \
+       ATOMIC64_FETCH_OP(op, op##q)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define atomic_add_return_relaxed      atomic_add_return_relaxed
+#define atomic_sub_return_relaxed      atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed       atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed       atomic_fetch_sub_relaxed
+
+#define atomic64_add_return_relaxed    atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed    atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed     atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed     atomic64_fetch_sub_relaxed
+
 #define atomic_andnot atomic_andnot
 #define atomic64_andnot atomic64_andnot
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, bis)
-ATOMIC_OP(xor, xor)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, bis)
-ATOMIC64_OP(xor, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm)                                            \
+       ATOMIC_OP(op, asm)                                              \
+       ATOMIC_FETCH_OP(op, asm)                                        \
+       ATOMIC64_OP(op, asm)                                            \
+       ATOMIC64_FETCH_OP(op, asm)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, bis)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed       atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed    atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed                atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed       atomic_fetch_xor_relaxed
+
+#define atomic64_fetch_and_relaxed     atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed  atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed      atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed     atomic64_fetch_xor_relaxed
 
 #undef ATOMIC_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 0131a7058778ed67fd32da6595052a0287b742fa..77873d0ad2937b130ab0a8fa737b5a61b821d087 100644 (file)
@@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem)
 {
        long oldcount;
 #ifndef        CONFIG_SMP
-       oldcount = sem->count;
-       sem->count += RWSEM_ACTIVE_READ_BIAS;
+       oldcount = sem->count.counter;
+       sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
 #else
        long temp;
        __asm__ __volatile__(
@@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
        long old, new, res;
 
-       res = sem->count;
+       res = atomic_long_read(&sem->count);
        do {
                new = res + RWSEM_ACTIVE_READ_BIAS;
                if (new <= 0)
                        break;
                old = res;
-               res = cmpxchg(&sem->count, old, new);
+               res = atomic_long_cmpxchg(&sem->count, old, new);
        } while (res != old);
        return res >= 0 ? 1 : 0;
 }
@@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem)
 {
        long oldcount;
 #ifndef        CONFIG_SMP
-       oldcount = sem->count;
-       sem->count += RWSEM_ACTIVE_WRITE_BIAS;
+       oldcount = sem->count.counter;
+       sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
 #else
        long temp;
        __asm__ __volatile__(
@@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
  */
 static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
-       long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+       long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
                           RWSEM_ACTIVE_WRITE_BIAS);
        if (ret == RWSEM_UNLOCKED_VALUE)
                return 1;
@@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
        long oldcount;
 #ifndef        CONFIG_SMP
-       oldcount = sem->count;
-       sem->count -= RWSEM_ACTIVE_READ_BIAS;
+       oldcount = sem->count.counter;
+       sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
 #else
        long temp;
        __asm__ __volatile__(
@@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem)
 {
        long count;
 #ifndef        CONFIG_SMP
-       sem->count -= RWSEM_ACTIVE_WRITE_BIAS;
-       count = sem->count;
+       sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
+       count = sem->count.counter;
 #else
        long temp;
        __asm__ __volatile__(
@@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 {
        long oldcount;
 #ifndef        CONFIG_SMP
-       oldcount = sem->count;
-       sem->count -= RWSEM_WAITING_BIAS;
+       oldcount = sem->count.counter;
+       sem->count.counter -= RWSEM_WAITING_BIAS;
 #else
        long temp;
        __asm__ __volatile__(
@@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
                rwsem_downgrade_wake(sem);
 }
 
-static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem)
-{
-#ifndef        CONFIG_SMP
-       sem->count += val;
-#else
-       long temp;
-       __asm__ __volatile__(
-       "1:     ldq_l   %0,%1\n"
-       "       addq    %0,%2,%0\n"
-       "       stq_c   %0,%1\n"
-       "       beq     %0,2f\n"
-       ".subsection 2\n"
-       "2:     br      1b\n"
-       ".previous"
-       :"=&r" (temp), "=m" (sem->count)
-       :"Ir" (val), "m" (sem->count));
-#endif
-}
-
-static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
-{
-#ifndef        CONFIG_SMP
-       sem->count += val;
-       return sem->count;
-#else
-       long ret, temp;
-       __asm__ __volatile__(
-       "1:     ldq_l   %0,%1\n"
-       "       addq    %0,%3,%2\n"
-       "       addq    %0,%3,%0\n"
-       "       stq_c   %2,%1\n"
-       "       beq     %2,2f\n"
-       ".subsection 2\n"
-       "2:     br      1b\n"
-       ".previous"
-       :"=&r" (ret), "=m" (sem->count), "=&r" (temp)
-       :"Ir" (val), "m" (sem->count));
-
-       return ret;
-#endif
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ALPHA_RWSEM_H */
index fed9c6f44c191295d36f4ab8a9a9352883a1c5b1..a40b9fc0c6c3cafbffbf61fdde57c47afb68250d 100644 (file)
@@ -3,6 +3,8 @@
 
 #include <linux/kernel.h>
 #include <asm/current.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 #define arch_spin_is_locked(x) ((x)->lock != 0)
-#define arch_spin_unlock_wait(x) \
-               do { cpu_relax(); } while ((x)->lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
index dd683995bc9d119136807adcbf71b189714fe359..4e3c1b6b0806bf1087b36f21cf96e5dfb60715b1 100644 (file)
@@ -67,6 +67,33 @@ static inline int atomic_##op##_return(int i, atomic_t *v)           \
        return val;                                                     \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned int val, orig;                                         \
+                                                                       \
+       /*                                                              \
+        * Explicit full memory barrier needed before/after as          \
+        * LLOCK/SCOND thmeselves don't provide any such semantics      \
+        */                                                             \
+       smp_mb();                                                       \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:     llock   %[orig], [%[ctr]]               \n"             \
+       "       " #asm_op " %[val], %[orig], %[i]       \n"             \
+       "       scond   %[val], [%[ctr]]                \n"             \
+       "                                               \n"             \
+       : [val] "=&r"   (val),                                          \
+         [orig] "=&r" (orig)                                           \
+       : [ctr] "r"     (&v->counter),                                  \
+         [i]   "ir"    (i)                                             \
+       : "cc");                                                        \
+                                                                       \
+       smp_mb();                                                       \
+                                                                       \
+       return orig;                                                    \
+}
+
 #else  /* !CONFIG_ARC_HAS_LLSC */
 
 #ifndef CONFIG_SMP
@@ -129,25 +156,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v)                \
        return temp;                                                    \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned long flags;                                            \
+       unsigned long orig;                                             \
+                                                                       \
+       /*                                                              \
+        * spin lock/unlock provides the needed smp_mb() before/after   \
+        */                                                             \
+       atomic_ops_lock(flags);                                         \
+       orig = v->counter;                                              \
+       v->counter c_op i;                                              \
+       atomic_ops_unlock(flags);                                       \
+                                                                       \
+       return orig;                                                    \
+}
+
 #endif /* !CONFIG_ARC_HAS_LLSC */
 
 #define ATOMIC_OPS(op, c_op, asm_op)                                   \
        ATOMIC_OP(op, c_op, asm_op)                                     \
-       ATOMIC_OP_RETURN(op, c_op, asm_op)
+       ATOMIC_OP_RETURN(op, c_op, asm_op)                              \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)                                   \
+       ATOMIC_OP(op, c_op, asm_op)                                     \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
 
-#undef SCOND_FAIL_RETRY_VAR_DEF
-#undef SCOND_FAIL_RETRY_ASM
-#undef SCOND_FAIL_RETRY_VARS
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
 
 #else /* CONFIG_ARC_PLAT_EZNPS */
 
@@ -208,22 +254,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v)                \
        return temp;                                                    \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned int temp = i;                                          \
+                                                                       \
+       /* Explicit full memory barrier needed before/after */          \
+       smp_mb();                                                       \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "       mov r2, %0\n"                                           \
+       "       mov r3, %1\n"                                           \
+       "       .word %2\n"                                             \
+       "       mov %0, r2"                                             \
+       : "+r"(temp)                                                    \
+       : "r"(&v->counter), "i"(asm_op)                                 \
+       : "r2", "r3", "memory");                                        \
+                                                                       \
+       smp_mb();                                                       \
+                                                                       \
+       return temp;                                                    \
+}
+
 #define ATOMIC_OPS(op, c_op, asm_op)                                   \
        ATOMIC_OP(op, c_op, asm_op)                                     \
-       ATOMIC_OP_RETURN(op, c_op, asm_op)
+       ATOMIC_OP_RETURN(op, c_op, asm_op)                              \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
 #define atomic_sub(i, v) atomic_add(-(i), (v))
 #define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
 
-ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)                                   \
+       ATOMIC_OP(op, c_op, asm_op)                                     \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
 #define atomic_andnot(mask, v) atomic_and(~(mask), (v))
-ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
+ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
+ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 
 #endif /* CONFIG_ARC_PLAT_EZNPS */
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index cded4a9b543822d5ff9e2099fb05c67bdfc8e84a..233d5ffe6ec779eb376e5beaf5a031fce6fa97c1 100644 (file)
 
 #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
 #define arch_spin_lock_flags(lock, flags)      arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-       do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
index 9e10c4567eb4dd3c898486f5fc34777c3760adc9..66d0e215a773cb66d3baaa5a08cbc91056fe00be 100644 (file)
@@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)   \
        return result;                                                  \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)      \
+{                                                                      \
+       unsigned long tmp;                                              \
+       int result, val;                                                \
+                                                                       \
+       prefetchw(&v->counter);                                         \
+                                                                       \
+       __asm__ __volatile__("@ atomic_fetch_" #op "\n"                 \
+"1:    ldrex   %0, [%4]\n"                                             \
+"      " #asm_op "     %1, %0, %5\n"                                   \
+"      strex   %2, %1, [%4]\n"                                         \
+"      teq     %2, #0\n"                                               \
+"      bne     1b"                                                     \
+       : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter)  \
+       : "r" (&v->counter), "Ir" (i)                                   \
+       : "cc");                                                        \
+                                                                       \
+       return result;                                                  \
+}
+
 #define atomic_add_return_relaxed      atomic_add_return_relaxed
 #define atomic_sub_return_relaxed      atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed       atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed       atomic_fetch_sub_relaxed
+
+#define atomic_fetch_and_relaxed       atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed    atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed                atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed       atomic_fetch_xor_relaxed
 
 static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 {
@@ -159,6 +187,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v)         \
        return val;                                                     \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned long flags;                                            \
+       int val;                                                        \
+                                                                       \
+       raw_local_irq_save(flags);                                      \
+       val = v->counter;                                               \
+       v->counter c_op i;                                              \
+       raw_local_irq_restore(flags);                                   \
+                                                                       \
+       return val;                                                     \
+}
+
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
        int ret;
@@ -187,19 +229,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 
 #define ATOMIC_OPS(op, c_op, asm_op)                                   \
        ATOMIC_OP(op, c_op, asm_op)                                     \
-       ATOMIC_OP_RETURN(op, c_op, asm_op)
+       ATOMIC_OP_RETURN(op, c_op, asm_op)                              \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or,  |=, orr)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)                                   \
+       ATOMIC_OP(op, c_op, asm_op)                                     \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or,  |=, orr)
+ATOMIC_OPS(xor, ^=, eor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -317,24 +366,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v)                \
        return result;                                                  \
 }
 
+#define ATOMIC64_FETCH_OP(op, op1, op2)                                        \
+static inline long long                                                        \
+atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v)              \
+{                                                                      \
+       long long result, val;                                          \
+       unsigned long tmp;                                              \
+                                                                       \
+       prefetchw(&v->counter);                                         \
+                                                                       \
+       __asm__ __volatile__("@ atomic64_fetch_" #op "\n"               \
+"1:    ldrexd  %0, %H0, [%4]\n"                                        \
+"      " #op1 " %Q1, %Q0, %Q5\n"                                       \
+"      " #op2 " %R1, %R0, %R5\n"                                       \
+"      strexd  %2, %1, %H1, [%4]\n"                                    \
+"      teq     %2, #0\n"                                               \
+"      bne     1b"                                                     \
+       : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter)  \
+       : "r" (&v->counter), "r" (i)                                    \
+       : "cc");                                                        \
+                                                                       \
+       return result;                                                  \
+}
+
 #define ATOMIC64_OPS(op, op1, op2)                                     \
        ATOMIC64_OP(op, op1, op2)                                       \
-       ATOMIC64_OP_RETURN(op, op1, op2)
+       ATOMIC64_OP_RETURN(op, op1, op2)                                \
+       ATOMIC64_FETCH_OP(op, op1, op2)
 
 ATOMIC64_OPS(add, adds, adc)
 ATOMIC64_OPS(sub, subs, sbc)
 
 #define atomic64_add_return_relaxed    atomic64_add_return_relaxed
 #define atomic64_sub_return_relaxed    atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed     atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed     atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)                                     \
+       ATOMIC64_OP(op, op1, op2)                                       \
+       ATOMIC64_FETCH_OP(op, op1, op2)
 
 #define atomic64_andnot atomic64_andnot
 
-ATOMIC64_OP(and, and, and)
-ATOMIC64_OP(andnot, bic, bic)
-ATOMIC64_OP(or,  orr, orr)
-ATOMIC64_OP(xor, eor, eor)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or,  orr, orr)
+ATOMIC64_OPS(xor, eor, eor)
+
+#define atomic64_fetch_and_relaxed     atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed  atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed      atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed     atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
index 0fa418463f49e95696c6d6b7ea9ac8d3d2798647..4bec4544207243d477b2e326b50119cc842526fe 100644 (file)
@@ -6,6 +6,8 @@
 #endif
 
 #include <linux/prefetch.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * sev and wfe are ARMv6K extensions.  Uniprocessor ARMv6 may not have the K
@@ -50,8 +52,21 @@ static inline void dsb_sev(void)
  * memory.
  */
 
-#define arch_spin_unlock_wait(lock) \
-       do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       u16 owner = READ_ONCE(lock->tickets.owner);
+
+       for (;;) {
+               arch_spinlock_t tmp = READ_ONCE(*lock);
+
+               if (tmp.tickets.owner == tmp.tickets.next ||
+                   tmp.tickets.owner != owner)
+                       break;
+
+               wfe();
+       }
+       smp_acquire__after_ctrl_dep();
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
index f3a3586a421c8869d6e88219c686491d71e2ecd5..c0235e0ff8493fc516109d1e2e5cbaa647fd27dc 100644 (file)
 #define atomic_dec_return_release(v)   atomic_sub_return_release(1, (v))
 #define atomic_dec_return(v)           atomic_sub_return(1, (v))
 
+#define atomic_fetch_add_relaxed       atomic_fetch_add_relaxed
+#define atomic_fetch_add_acquire       atomic_fetch_add_acquire
+#define atomic_fetch_add_release       atomic_fetch_add_release
+#define atomic_fetch_add               atomic_fetch_add
+
+#define atomic_fetch_sub_relaxed       atomic_fetch_sub_relaxed
+#define atomic_fetch_sub_acquire       atomic_fetch_sub_acquire
+#define atomic_fetch_sub_release       atomic_fetch_sub_release
+#define atomic_fetch_sub               atomic_fetch_sub
+
+#define atomic_fetch_and_relaxed       atomic_fetch_and_relaxed
+#define atomic_fetch_and_acquire       atomic_fetch_and_acquire
+#define atomic_fetch_and_release       atomic_fetch_and_release
+#define atomic_fetch_and               atomic_fetch_and
+
+#define atomic_fetch_andnot_relaxed    atomic_fetch_andnot_relaxed
+#define atomic_fetch_andnot_acquire    atomic_fetch_andnot_acquire
+#define atomic_fetch_andnot_release    atomic_fetch_andnot_release
+#define atomic_fetch_andnot            atomic_fetch_andnot
+
+#define atomic_fetch_or_relaxed                atomic_fetch_or_relaxed
+#define atomic_fetch_or_acquire                atomic_fetch_or_acquire
+#define atomic_fetch_or_release                atomic_fetch_or_release
+#define atomic_fetch_or                        atomic_fetch_or
+
+#define atomic_fetch_xor_relaxed       atomic_fetch_xor_relaxed
+#define atomic_fetch_xor_acquire       atomic_fetch_xor_acquire
+#define atomic_fetch_xor_release       atomic_fetch_xor_release
+#define atomic_fetch_xor               atomic_fetch_xor
+
 #define atomic_xchg_relaxed(v, new)    xchg_relaxed(&((v)->counter), (new))
 #define atomic_xchg_acquire(v, new)    xchg_acquire(&((v)->counter), (new))
 #define atomic_xchg_release(v, new)    xchg_release(&((v)->counter), (new))
 #define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
 #define atomic64_dec_return(v)         atomic64_sub_return(1, (v))
 
+#define atomic64_fetch_add_relaxed     atomic64_fetch_add_relaxed
+#define atomic64_fetch_add_acquire     atomic64_fetch_add_acquire
+#define atomic64_fetch_add_release     atomic64_fetch_add_release
+#define atomic64_fetch_add             atomic64_fetch_add
+
+#define atomic64_fetch_sub_relaxed     atomic64_fetch_sub_relaxed
+#define atomic64_fetch_sub_acquire     atomic64_fetch_sub_acquire
+#define atomic64_fetch_sub_release     atomic64_fetch_sub_release
+#define atomic64_fetch_sub             atomic64_fetch_sub
+
+#define atomic64_fetch_and_relaxed     atomic64_fetch_and_relaxed
+#define atomic64_fetch_and_acquire     atomic64_fetch_and_acquire
+#define atomic64_fetch_and_release     atomic64_fetch_and_release
+#define atomic64_fetch_and             atomic64_fetch_and
+
+#define atomic64_fetch_andnot_relaxed  atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_andnot_acquire  atomic64_fetch_andnot_acquire
+#define atomic64_fetch_andnot_release  atomic64_fetch_andnot_release
+#define atomic64_fetch_andnot          atomic64_fetch_andnot
+
+#define atomic64_fetch_or_relaxed      atomic64_fetch_or_relaxed
+#define atomic64_fetch_or_acquire      atomic64_fetch_or_acquire
+#define atomic64_fetch_or_release      atomic64_fetch_or_release
+#define atomic64_fetch_or              atomic64_fetch_or
+
+#define atomic64_fetch_xor_relaxed     atomic64_fetch_xor_relaxed
+#define atomic64_fetch_xor_acquire     atomic64_fetch_xor_acquire
+#define atomic64_fetch_xor_release     atomic64_fetch_xor_release
+#define atomic64_fetch_xor             atomic64_fetch_xor
+
 #define atomic64_xchg_relaxed          atomic_xchg_relaxed
 #define atomic64_xchg_acquire          atomic_xchg_acquire
 #define atomic64_xchg_release          atomic_xchg_release
index f61c84f6ba021aa9cd149c8b927680d86f6328b7..f819fdcff1accf694cc0ec7428096fe41aef7f89 100644 (file)
@@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v))              \
 }                                                                      \
 __LL_SC_EXPORT(atomic_##op##_return##name);
 
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)            \
+__LL_SC_INLINE int                                                     \
+__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v))            \
+{                                                                      \
+       unsigned long tmp;                                              \
+       int val, result;                                                \
+                                                                       \
+       asm volatile("// atomic_fetch_" #op #name "\n"                  \
+"      prfm    pstl1strm, %3\n"                                        \
+"1:    ld" #acq "xr    %w0, %3\n"                                      \
+"      " #asm_op "     %w1, %w0, %w4\n"                                \
+"      st" #rel "xr    %w2, %w1, %3\n"                                 \
+"      cbnz    %w2, 1b\n"                                              \
+"      " #mb                                                           \
+       : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
+       : "Ir" (i)                                                      \
+       : cl);                                                          \
+                                                                       \
+       return result;                                                  \
+}                                                                      \
+__LL_SC_EXPORT(atomic_fetch_##op##name);
+
 #define ATOMIC_OPS(...)                                                        \
        ATOMIC_OP(__VA_ARGS__)                                          \
-       ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)
-
-#define ATOMIC_OPS_RLX(...)                                            \
-       ATOMIC_OPS(__VA_ARGS__)                                         \
+       ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)\
        ATOMIC_OP_RETURN(_relaxed,        ,  ,  ,         , __VA_ARGS__)\
        ATOMIC_OP_RETURN(_acquire,        , a,  , "memory", __VA_ARGS__)\
-       ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)
+       ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)\
+       ATOMIC_FETCH_OP (        , dmb ish,  , l, "memory", __VA_ARGS__)\
+       ATOMIC_FETCH_OP (_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+       ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
+       ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS_RLX(add, add)
-ATOMIC_OPS_RLX(sub, sub)
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(...)                                                        \
+       ATOMIC_OP(__VA_ARGS__)                                          \
+       ATOMIC_FETCH_OP (        , dmb ish,  , l, "memory", __VA_ARGS__)\
+       ATOMIC_FETCH_OP (_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+       ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
+       ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, orr)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, orr)
+ATOMIC_OPS(xor, eor)
 
-#undef ATOMIC_OPS_RLX
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v))       \
 }                                                                      \
 __LL_SC_EXPORT(atomic64_##op##_return##name);
 
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)          \
+__LL_SC_INLINE long                                                    \
+__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v))       \
+{                                                                      \
+       long result, val;                                               \
+       unsigned long tmp;                                              \
+                                                                       \
+       asm volatile("// atomic64_fetch_" #op #name "\n"                \
+"      prfm    pstl1strm, %3\n"                                        \
+"1:    ld" #acq "xr    %0, %3\n"                                       \
+"      " #asm_op "     %1, %0, %4\n"                                   \
+"      st" #rel "xr    %w2, %1, %3\n"                                  \
+"      cbnz    %w2, 1b\n"                                              \
+"      " #mb                                                           \
+       : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
+       : "Ir" (i)                                                      \
+       : cl);                                                          \
+                                                                       \
+       return result;                                                  \
+}                                                                      \
+__LL_SC_EXPORT(atomic64_fetch_##op##name);
+
 #define ATOMIC64_OPS(...)                                              \
        ATOMIC64_OP(__VA_ARGS__)                                        \
-       ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)
-
-#define ATOMIC64_OPS_RLX(...)                                          \
-       ATOMIC64_OPS(__VA_ARGS__)                                       \
+       ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)      \
        ATOMIC64_OP_RETURN(_relaxed,,  ,  ,         , __VA_ARGS__)      \
        ATOMIC64_OP_RETURN(_acquire,, a,  , "memory", __VA_ARGS__)      \
-       ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)
+       ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)      \
+       ATOMIC64_FETCH_OP (, dmb ish,  , l, "memory", __VA_ARGS__)      \
+       ATOMIC64_FETCH_OP (_relaxed,,  ,  ,         , __VA_ARGS__)      \
+       ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)      \
+       ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OPS_RLX(add, add)
-ATOMIC64_OPS_RLX(sub, sub)
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, sub)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(...)                                              \
+       ATOMIC64_OP(__VA_ARGS__)                                        \
+       ATOMIC64_FETCH_OP (, dmb ish,  , l, "memory", __VA_ARGS__)      \
+       ATOMIC64_FETCH_OP (_relaxed,,  ,  ,         , __VA_ARGS__)      \
+       ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)      \
+       ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, orr)
-ATOMIC64_OP(xor, eor)
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(andnot, bic)
+ATOMIC64_OPS(or, orr)
+ATOMIC64_OPS(xor, eor)
 
-#undef ATOMIC64_OPS_RLX
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
index 39c1d340fec59136b8ddd6e3ac3f39354289189f..b5890be8f257e5b13bf5191f27b6f71f334004de 100644 (file)
 #endif
 
 #define __LL_SC_ATOMIC(op)     __LL_SC_CALL(atomic_##op)
-
-static inline void atomic_andnot(int i, atomic_t *v)
-{
-       register int w0 asm ("w0") = i;
-       register atomic_t *x1 asm ("x1") = v;
-
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
-       "       stclr   %w[i], %[v]\n")
-       : [i] "+r" (w0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+#define ATOMIC_OP(op, asm_op)                                          \
+static inline void atomic_##op(int i, atomic_t *v)                     \
+{                                                                      \
+       register int w0 asm ("w0") = i;                                 \
+       register atomic_t *x1 asm ("x1") = v;                           \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op),          \
+"      " #asm_op "     %w[i], %[v]\n")                                 \
+       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : __LL_SC_CLOBBERS);                                            \
 }
 
-static inline void atomic_or(int i, atomic_t *v)
-{
-       register int w0 asm ("w0") = i;
-       register atomic_t *x1 asm ("x1") = v;
+ATOMIC_OP(andnot, stclr)
+ATOMIC_OP(or, stset)
+ATOMIC_OP(xor, steor)
+ATOMIC_OP(add, stadd)
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
-       "       stset   %w[i], %[v]\n")
-       : [i] "+r" (w0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
-}
+#undef ATOMIC_OP
 
-static inline void atomic_xor(int i, atomic_t *v)
-{
-       register int w0 asm ("w0") = i;
-       register atomic_t *x1 asm ("x1") = v;
-
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
-       "       steor   %w[i], %[v]\n")
-       : [i] "+r" (w0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...)                   \
+static inline int atomic_fetch_##op##name(int i, atomic_t *v)          \
+{                                                                      \
+       register int w0 asm ("w0") = i;                                 \
+       register atomic_t *x1 asm ("x1") = v;                           \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       __LL_SC_ATOMIC(fetch_##op##name),                               \
+       /* LSE atomics */                                               \
+"      " #asm_op #mb " %w[i], %w[i], %[v]")                            \
+       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
+                                                                       \
+       return w0;                                                      \
 }
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-       register int w0 asm ("w0") = i;
-       register atomic_t *x1 asm ("x1") = v;
+#define ATOMIC_FETCH_OPS(op, asm_op)                                   \
+       ATOMIC_FETCH_OP(_relaxed,   , op, asm_op)                       \
+       ATOMIC_FETCH_OP(_acquire,  a, op, asm_op, "memory")             \
+       ATOMIC_FETCH_OP(_release,  l, op, asm_op, "memory")             \
+       ATOMIC_FETCH_OP(        , al, op, asm_op, "memory")
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
-       "       stadd   %w[i], %[v]\n")
-       : [i] "+r" (w0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
-}
+ATOMIC_FETCH_OPS(andnot, ldclr)
+ATOMIC_FETCH_OPS(or, ldset)
+ATOMIC_FETCH_OPS(xor, ldeor)
+ATOMIC_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_FETCH_OPS
 
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)                          \
 static inline int atomic_add_return##name(int i, atomic_t *v)          \
@@ -119,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v)
        : __LL_SC_CLOBBERS);
 }
 
+#define ATOMIC_FETCH_OP_AND(name, mb, cl...)                           \
+static inline int atomic_fetch_and##name(int i, atomic_t *v)           \
+{                                                                      \
+       register int w0 asm ("w0") = i;                                 \
+       register atomic_t *x1 asm ("x1") = v;                           \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       __LL_SC_ATOMIC(fetch_and##name),                                \
+       /* LSE atomics */                                               \
+       "       mvn     %w[i], %w[i]\n"                                 \
+       "       ldclr" #mb "    %w[i], %w[i], %[v]")                    \
+       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
+                                                                       \
+       return w0;                                                      \
+}
+
+ATOMIC_FETCH_OP_AND(_relaxed,   )
+ATOMIC_FETCH_OP_AND(_acquire,  a, "memory")
+ATOMIC_FETCH_OP_AND(_release,  l, "memory")
+ATOMIC_FETCH_OP_AND(        , al, "memory")
+
+#undef ATOMIC_FETCH_OP_AND
+
 static inline void atomic_sub(int i, atomic_t *v)
 {
        register int w0 asm ("w0") = i;
@@ -164,57 +194,87 @@ ATOMIC_OP_SUB_RETURN(_release,  l, "memory")
 ATOMIC_OP_SUB_RETURN(        , al, "memory")
 
 #undef ATOMIC_OP_SUB_RETURN
-#undef __LL_SC_ATOMIC
-
-#define __LL_SC_ATOMIC64(op)   __LL_SC_CALL(atomic64_##op)
-
-static inline void atomic64_andnot(long i, atomic64_t *v)
-{
-       register long x0 asm ("x0") = i;
-       register atomic64_t *x1 asm ("x1") = v;
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
-       "       stclr   %[i], %[v]\n")
-       : [i] "+r" (x0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+#define ATOMIC_FETCH_OP_SUB(name, mb, cl...)                           \
+static inline int atomic_fetch_sub##name(int i, atomic_t *v)           \
+{                                                                      \
+       register int w0 asm ("w0") = i;                                 \
+       register atomic_t *x1 asm ("x1") = v;                           \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       __LL_SC_ATOMIC(fetch_sub##name),                                \
+       /* LSE atomics */                                               \
+       "       neg     %w[i], %w[i]\n"                                 \
+       "       ldadd" #mb "    %w[i], %w[i], %[v]")                    \
+       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
+                                                                       \
+       return w0;                                                      \
 }
 
-static inline void atomic64_or(long i, atomic64_t *v)
-{
-       register long x0 asm ("x0") = i;
-       register atomic64_t *x1 asm ("x1") = v;
+ATOMIC_FETCH_OP_SUB(_relaxed,   )
+ATOMIC_FETCH_OP_SUB(_acquire,  a, "memory")
+ATOMIC_FETCH_OP_SUB(_release,  l, "memory")
+ATOMIC_FETCH_OP_SUB(        , al, "memory")
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
-       "       stset   %[i], %[v]\n")
-       : [i] "+r" (x0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+#undef ATOMIC_FETCH_OP_SUB
+#undef __LL_SC_ATOMIC
+
+#define __LL_SC_ATOMIC64(op)   __LL_SC_CALL(atomic64_##op)
+#define ATOMIC64_OP(op, asm_op)                                                \
+static inline void atomic64_##op(long i, atomic64_t *v)                        \
+{                                                                      \
+       register long x0 asm ("x0") = i;                                \
+       register atomic64_t *x1 asm ("x1") = v;                         \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op),        \
+"      " #asm_op "     %[i], %[v]\n")                                  \
+       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : __LL_SC_CLOBBERS);                                            \
 }
 
-static inline void atomic64_xor(long i, atomic64_t *v)
-{
-       register long x0 asm ("x0") = i;
-       register atomic64_t *x1 asm ("x1") = v;
+ATOMIC64_OP(andnot, stclr)
+ATOMIC64_OP(or, stset)
+ATOMIC64_OP(xor, steor)
+ATOMIC64_OP(add, stadd)
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
-       "       steor   %[i], %[v]\n")
-       : [i] "+r" (x0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+#undef ATOMIC64_OP
+
+#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...)                 \
+static inline long atomic64_fetch_##op##name(long i, atomic64_t *v)    \
+{                                                                      \
+       register long x0 asm ("x0") = i;                                \
+       register atomic64_t *x1 asm ("x1") = v;                         \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       __LL_SC_ATOMIC64(fetch_##op##name),                             \
+       /* LSE atomics */                                               \
+"      " #asm_op #mb " %[i], %[i], %[v]")                              \
+       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
+                                                                       \
+       return x0;                                                      \
 }
 
-static inline void atomic64_add(long i, atomic64_t *v)
-{
-       register long x0 asm ("x0") = i;
-       register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_FETCH_OPS(op, asm_op)                                 \
+       ATOMIC64_FETCH_OP(_relaxed,   , op, asm_op)                     \
+       ATOMIC64_FETCH_OP(_acquire,  a, op, asm_op, "memory")           \
+       ATOMIC64_FETCH_OP(_release,  l, op, asm_op, "memory")           \
+       ATOMIC64_FETCH_OP(        , al, op, asm_op, "memory")
 
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
-       "       stadd   %[i], %[v]\n")
-       : [i] "+r" (x0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
-}
+ATOMIC64_FETCH_OPS(andnot, ldclr)
+ATOMIC64_FETCH_OPS(or, ldset)
+ATOMIC64_FETCH_OPS(xor, ldeor)
+ATOMIC64_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_FETCH_OPS
 
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)                                \
 static inline long atomic64_add_return##name(long i, atomic64_t *v)    \
@@ -260,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v)
        : __LL_SC_CLOBBERS);
 }
 
+#define ATOMIC64_FETCH_OP_AND(name, mb, cl...)                         \
+static inline long atomic64_fetch_and##name(long i, atomic64_t *v)     \
+{                                                                      \
+       register long x0 asm ("w0") = i;                                \
+       register atomic64_t *x1 asm ("x1") = v;                         \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       __LL_SC_ATOMIC64(fetch_and##name),                              \
+       /* LSE atomics */                                               \
+       "       mvn     %[i], %[i]\n"                                   \
+       "       ldclr" #mb "    %[i], %[i], %[v]")                      \
+       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
+                                                                       \
+       return x0;                                                      \
+}
+
+ATOMIC64_FETCH_OP_AND(_relaxed,   )
+ATOMIC64_FETCH_OP_AND(_acquire,  a, "memory")
+ATOMIC64_FETCH_OP_AND(_release,  l, "memory")
+ATOMIC64_FETCH_OP_AND(        , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_AND
+
 static inline void atomic64_sub(long i, atomic64_t *v)
 {
        register long x0 asm ("x0") = i;
@@ -306,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 
 #undef ATOMIC64_OP_SUB_RETURN
 
+#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)                         \
+static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)     \
+{                                                                      \
+       register long x0 asm ("w0") = i;                                \
+       register atomic64_t *x1 asm ("x1") = v;                         \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       __LL_SC_ATOMIC64(fetch_sub##name),                              \
+       /* LSE atomics */                                               \
+       "       neg     %[i], %[i]\n"                                   \
+       "       ldadd" #mb "    %[i], %[i], %[v]")                      \
+       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
+       : "r" (x1)                                                      \
+       : __LL_SC_CLOBBERS, ##cl);                                      \
+                                                                       \
+       return x0;                                                      \
+}
+
+ATOMIC64_FETCH_OP_SUB(_relaxed,   )
+ATOMIC64_FETCH_OP_SUB(_acquire,  a, "memory")
+ATOMIC64_FETCH_OP_SUB(_release,  l, "memory")
+ATOMIC64_FETCH_OP_SUB(        , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_SUB
+
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
        register long x0 asm ("x0") = (long)v;
index dae5c49618db38021b418379bcb9fe52529ec862..4eea7f618dcefff4be6e3f4d99c503cba5e7c779 100644 (file)
@@ -91,6 +91,19 @@ do {                                                                 \
        __u.__val;                                                      \
 })
 
+#define smp_cond_load_acquire(ptr, cond_expr)                          \
+({                                                                     \
+       typeof(ptr) __PTR = (ptr);                                      \
+       typeof(*ptr) VAL;                                               \
+       for (;;) {                                                      \
+               VAL = smp_load_acquire(__PTR);                          \
+               if (cond_expr)                                          \
+                       break;                                          \
+               __cmpwait_relaxed(__PTR, VAL);                          \
+       }                                                               \
+       VAL;                                                            \
+})
+
 #include <asm-generic/barrier.h>
 
 #endif /* __ASSEMBLY__ */
index 510c7b4044547f82750ca9295e6d35bdeb0b67bf..bd86a79491bce84d34576d261214216282265d9a 100644 (file)
@@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb)
        __ret;                                                          \
 })
 
+#define __CMPWAIT_CASE(w, sz, name)                                    \
+static inline void __cmpwait_case_##name(volatile void *ptr,           \
+                                        unsigned long val)             \
+{                                                                      \
+       unsigned long tmp;                                              \
+                                                                       \
+       asm volatile(                                                   \
+       "       ldxr" #sz "\t%" #w "[tmp], %[v]\n"              \
+       "       eor     %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"     \
+       "       cbnz    %" #w "[tmp], 1f\n"                             \
+       "       wfe\n"                                                  \
+       "1:"                                                            \
+       : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr)           \
+       : [val] "r" (val));                                             \
+}
+
+__CMPWAIT_CASE(w, b, 1);
+__CMPWAIT_CASE(w, h, 2);
+__CMPWAIT_CASE(w,  , 4);
+__CMPWAIT_CASE( ,  , 8);
+
+#undef __CMPWAIT_CASE
+
+#define __CMPWAIT_GEN(sfx)                                             \
+static inline void __cmpwait##sfx(volatile void *ptr,                  \
+                                 unsigned long val,                    \
+                                 int size)                             \
+{                                                                      \
+       switch (size) {                                                 \
+       case 1:                                                         \
+               return __cmpwait_case##sfx##_1(ptr, (u8)val);           \
+       case 2:                                                         \
+               return __cmpwait_case##sfx##_2(ptr, (u16)val);          \
+       case 4:                                                         \
+               return __cmpwait_case##sfx##_4(ptr, val);               \
+       case 8:                                                         \
+               return __cmpwait_case##sfx##_8(ptr, val);               \
+       default:                                                        \
+               BUILD_BUG();                                            \
+       }                                                               \
+                                                                       \
+       unreachable();                                                  \
+}
+
+__CMPWAIT_GEN()
+
+#undef __CMPWAIT_GEN
+
+#define __cmpwait_relaxed(ptr, val) \
+       __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
 #endif /* __ASM_CMPXCHG_H */
index d74fd8ce980aeb327b831fe8c7b4d862b6421524..3d5ce38a6f0b17fbaff74866a1d9b9bb5a8723b2 100644 (file)
@@ -41,21 +41,49 @@ static inline int __atomic_##op##_return(int i, atomic_t *v)                \
        return result;                                                  \
 }
 
+#define ATOMIC_FETCH_OP(op, asm_op, asm_con)                           \
+static inline int __atomic_fetch_##op(int i, atomic_t *v)              \
+{                                                                      \
+       int result, val;                                                \
+                                                                       \
+       asm volatile(                                                   \
+               "/* atomic_fetch_" #op " */\n"                          \
+               "1:     ssrf    5\n"                                    \
+               "       ld.w    %0, %3\n"                               \
+               "       mov     %1, %0\n"                               \
+               "       " #asm_op "     %1, %4\n"                       \
+               "       stcond  %2, %1\n"                               \
+               "       brne    1b"                                     \
+               : "=&r" (result), "=&r" (val), "=o" (v->counter)        \
+               : "m" (v->counter), #asm_con (i)                        \
+               : "cc");                                                \
+                                                                       \
+       return result;                                                  \
+}
+
 ATOMIC_OP_RETURN(sub, sub, rKs21)
 ATOMIC_OP_RETURN(add, add, r)
+ATOMIC_FETCH_OP (sub, sub, rKs21)
+ATOMIC_FETCH_OP (add, add, r)
 
-#define ATOMIC_OP(op, asm_op)                                          \
+#define ATOMIC_OPS(op, asm_op)                                         \
 ATOMIC_OP_RETURN(op, asm_op, r)                                                \
 static inline void atomic_##op(int i, atomic_t *v)                     \
 {                                                                      \
        (void)__atomic_##op##_return(i, v);                             \
+}                                                                      \
+ATOMIC_FETCH_OP(op, asm_op, r)                                         \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                \
+{                                                                      \
+       return __atomic_fetch_##op(i, v);                               \
 }
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, eor)
 
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
 /*
@@ -87,6 +115,14 @@ static inline int atomic_add_return(int i, atomic_t *v)
        return __atomic_add_return(i, v);
 }
 
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+       if (IS_21BIT_CONST(i))
+               return __atomic_fetch_sub(-i, v);
+
+       return __atomic_fetch_add(i, v);
+}
+
 /*
  * atomic_sub_return - subtract the atomic variable
  * @i: integer value to subtract
@@ -102,6 +138,14 @@ static inline int atomic_sub_return(int i, atomic_t *v)
        return __atomic_add_return(-i, v);
 }
 
+static inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+       if (IS_21BIT_CONST(i))
+               return __atomic_fetch_sub(i, v);
+
+       return __atomic_fetch_add(-i, v);
+}
+
 /*
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
index 1c1c42330c99c9e432991429778625be6e5235df..63c7deceeeb6e8f69c5d77e1b518174401229767 100644 (file)
@@ -17,6 +17,7 @@
 
 asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr);
 asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value);
 
 asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value);
 asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value);
@@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
 #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
 
+#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i)
+#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i))
+
 #define atomic_or(i, v)  (void)__raw_atomic_or_asm(&(v)->counter, i)
 #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
 #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
 
+#define atomic_fetch_or(i, v)  __raw_atomic_or_asm(&(v)->counter, i)
+#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i)
+#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i)
+
 #endif
 
 #include <asm-generic/atomic.h>
index 490c7caa02d9337b7b91ae8db6934320e0e91715..c58f4a83ed6f343f6ef45962884352d6368f2dc8 100644 (file)
@@ -12,6 +12,8 @@
 #else
 
 #include <linux/atomic.h>
+#include <asm/processor.h>
+#include <asm/barrier.h>
 
 asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr);
 asmlinkage void __raw_spin_lock_asm(volatile int *ptr);
@@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
-       while (arch_spin_is_locked(lock))
-               cpu_relax();
+       smp_cond_load_acquire(&lock->lock, !VAL);
 }
 
 static inline int arch_read_can_lock(arch_rwlock_t *rw)
index a401c27b69b4a3feb825059d299ce27be64ba191..68096e8f787f7c3c3644bdd702fbeea05cf9eab3 100644 (file)
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16);
 
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(__raw_atomic_add_asm);
+EXPORT_SYMBOL(__raw_atomic_xadd_asm);
 EXPORT_SYMBOL(__raw_atomic_and_asm);
 EXPORT_SYMBOL(__raw_atomic_or_asm);
 EXPORT_SYMBOL(__raw_atomic_xor_asm);
index 26fccb5568b97e4f4234c4e12a5f8cc8189bec56..1e2989c5d6b2bec2fb0ad4bdaacd5948b7a9b133 100644 (file)
@@ -605,6 +605,28 @@ ENTRY(___raw_atomic_add_asm)
        rts;
 ENDPROC(___raw_atomic_add_asm)
 
+/*
+ * r0 = ptr
+ * r1 = value
+ *
+ * ADD a signed value to a 32bit word and return the old value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_xadd_asm)
+       p1 = r0;
+       r3 = r1;
+       [--sp] = rets;
+       call _get_core_lock;
+       r3 = [p1];
+       r2 = r3 + r2;
+       [p1] = r2;
+       r1 = p1;
+       call _put_core_lock;
+       r0 = r3;
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_atomic_add_asm)
+
 /*
  * r0 = ptr
  * r1 = mask
@@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm)
        r3 = r1;
        [--sp] = rets;
        call _get_core_lock;
-       r2 = [p1];
-       r3 = r2 & r3;
-       [p1] = r3;
-       r3 = r2;
+       r3 = [p1];
+       r2 = r2 & r3;
+       [p1] = r2;
        r1 = p1;
        call _put_core_lock;
        r0 = r3;
@@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm)
        r3 = r1;
        [--sp] = rets;
        call _get_core_lock;
-       r2 = [p1];
-       r3 = r2 | r3;
-       [p1] = r3;
-       r3 = r2;
+       r3 = [p1];
+       r2 = r2 | r3;
+       [p1] = r2;
        r1 = p1;
        call _put_core_lock;
        r0 = r3;
@@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm)
        r3 = r1;
        [--sp] = rets;
        call _get_core_lock;
-       r2 = [p1];
-       r3 = r2 ^ r3;
-       [p1] = r3;
-       r3 = r2;
+       r3 = [p1];
+       r2 = r2 ^ r3;
+       [p1] = r2;
        r1 = p1;
        call _put_core_lock;
        r0 = r3;
index 64f02d451aa8c915c81eebbd4c34f5808449a97b..1c2a5e264fc71cfd52f2acb0b24ddb1aff792be7 100644 (file)
@@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
        return atomic_add_return(i, v) < 0;
 }
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-       atomic_add_return(i, v);
-}
-
-static inline void atomic_sub(int i, atomic_t *v)
-{
-       atomic_sub_return(i, v);
-}
-
 static inline void atomic_inc(atomic_t *v)
 {
        atomic_inc_return(v);
@@ -136,16 +126,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v)
        return atomic64_add_return(i, v) < 0;
 }
 
-static inline void atomic64_add(long long i, atomic64_t *v)
-{
-       atomic64_add_return(i, v);
-}
-
-static inline void atomic64_sub(long long i, atomic64_t *v)
-{
-       atomic64_sub_return(i, v);
-}
-
 static inline void atomic64_inc(atomic64_t *v)
 {
        atomic64_inc_return(v);
@@ -182,11 +162,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 }
 
 #define ATOMIC_OP(op)                                                  \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       return __atomic32_fetch_##op(i, &v->counter);                   \
+}                                                                      \
 static inline void atomic_##op(int i, atomic_t *v)                     \
 {                                                                      \
        (void)__atomic32_fetch_##op(i, &v->counter);                    \
 }                                                                      \
                                                                        \
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)        \
+{                                                                      \
+       return __atomic64_fetch_##op(i, &v->counter);                   \
+}                                                                      \
 static inline void atomic64_##op(long long i, atomic64_t *v)           \
 {                                                                      \
        (void)__atomic64_fetch_##op(i, &v->counter);                    \
@@ -195,6 +183,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v)                \
 ATOMIC_OP(or)
 ATOMIC_OP(and)
 ATOMIC_OP(xor)
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
 
 #undef ATOMIC_OP
 
index 36e126d2f801adbedc2340acfedde9141156027e..d4912c88b829dbd8115ad25336101cd47d0f8567 100644 (file)
@@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op);
 ATOMIC_FETCH_OP(or)
 ATOMIC_FETCH_OP(and)
 ATOMIC_FETCH_OP(xor)
+ATOMIC_FETCH_OP(add)
+ATOMIC_FETCH_OP(sub)
 
 ATOMIC_OP_RETURN(add)
 ATOMIC_OP_RETURN(sub)
index 4435a445ae7ec7f8d5fa88edc649d34f6dfdd257..349a47a918db7252229445838357990d7ce8c598 100644 (file)
@@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v)   \
        return ret;                                             \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op)                              \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                \
+{                                                              \
+       h8300flags flags;                                       \
+       int ret;                                                \
+                                                               \
+       flags = arch_local_irq_save();                          \
+       ret = v->counter;                                       \
+       v->counter c_op i;                                      \
+       arch_local_irq_restore(flags);                          \
+       return ret;                                             \
+}
+
 #define ATOMIC_OP(op, c_op)                                    \
 static inline void atomic_##op(int i, atomic_t *v)             \
 {                                                              \
@@ -41,17 +54,21 @@ static inline void atomic_##op(int i, atomic_t *v)          \
 ATOMIC_OP_RETURN(add, +=)
 ATOMIC_OP_RETURN(sub, -=)
 
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or,  |=)
-ATOMIC_OP(xor, ^=)
+#define ATOMIC_OPS(op, c_op)                                   \
+       ATOMIC_OP(op, c_op)                                     \
+       ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or,  |=)
+ATOMIC_OPS(xor, ^=)
+ATOMIC_OPS(add, +=)
+ATOMIC_OPS(sub, -=)
 
+#undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_add(i, v)               (void)atomic_add_return(i, v)
 #define atomic_add_negative(a, v)      (atomic_add_return((a), (v)) < 0)
-
-#define atomic_sub(i, v)               (void)atomic_sub_return(i, v)
 #define atomic_sub_and_test(i, v)      (atomic_sub_return(i, v) == 0)
 
 #define atomic_inc_return(v)           atomic_add_return(1, v)
index 55696c4100d468208c757f30be47806c877dfab9..a62ba368b27d1ddf6f750efd1dcaa2b51419d1d8 100644 (file)
@@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v)                  \
        );                                                              \
 }                                                                      \
 
-#define ATOMIC_OP_RETURN(op)                                                   \
+#define ATOMIC_OP_RETURN(op)                                           \
 static inline int atomic_##op##_return(int i, atomic_t *v)             \
 {                                                                      \
        int output;                                                     \
@@ -127,16 +127,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v)                \
        return output;                                                  \
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       int output, val;                                                \
+                                                                       \
+       __asm__ __volatile__ (                                          \
+               "1:     %0 = memw_locked(%2);\n"                        \
+               "       %1 = "#op "(%0,%3);\n"                          \
+               "       memw_locked(%2,P3)=%1;\n"                       \
+               "       if !P3 jump 1b;\n"                              \
+               : "=&r" (output), "=&r" (val)                           \
+               : "r" (&v->counter), "r" (i)                            \
+               : "memory", "p3"                                        \
+       );                                                              \
+       return output;                                                  \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 12ca4ebc033855c93879c81bafc65af93b9bde97..a1c55788c5d6591eac4786b2dc0fcdd3164eb65c 100644 (file)
@@ -23,6 +23,8 @@
 #define _ASM_SPINLOCK_H
 
 #include <asm/irqflags.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * This file is pulled in for SMP builds.
@@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
  * SMP spinlocks are intended to allow only a single CPU at the lock
  */
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(lock) \
-       do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->lock, !VAL);
+}
+
 #define arch_spin_is_locked(x) ((x)->lock != 0)
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
index 8dfb5f6f6c352c45126656216edb228e15026be9..f565ad3761427afb14625ee612bdb6067c8d28c2 100644 (file)
@@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v)                                        \
        return new;                                                     \
 }
 
-ATOMIC_OP(add, +)
-ATOMIC_OP(sub, -)
+#define ATOMIC_FETCH_OP(op, c_op)                                      \
+static __inline__ int                                                  \
+ia64_atomic_fetch_##op (int i, atomic_t *v)                            \
+{                                                                      \
+       __s32 old, new;                                                 \
+       CMPXCHG_BUGCHECK_DECL                                           \
+                                                                       \
+       do {                                                            \
+               CMPXCHG_BUGCHECK(v);                                    \
+               old = atomic_read(v);                                   \
+               new = old c_op i;                                       \
+       } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \
+       return old;                                                     \
+}
+
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_OP(op, c_op)                                             \
+       ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(add, +)
+ATOMIC_OPS(sub, -)
 
 #define atomic_add_return(i,v)                                         \
 ({                                                                     \
@@ -69,14 +88,44 @@ ATOMIC_OP(sub, -)
                : ia64_atomic_sub(__ia64_asr_i, v);                     \
 })
 
-ATOMIC_OP(and, &)
-ATOMIC_OP(or, |)
-ATOMIC_OP(xor, ^)
+#define atomic_fetch_add(i,v)                                          \
+({                                                                     \
+       int __ia64_aar_i = (i);                                         \
+       (__builtin_constant_p(i)                                        \
+        && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)           \
+            || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)           \
+            || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)           \
+            || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))         \
+               ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)       \
+               : ia64_atomic_fetch_add(__ia64_aar_i, v);               \
+})
+
+#define atomic_fetch_sub(i,v)                                          \
+({                                                                     \
+       int __ia64_asr_i = (i);                                         \
+       (__builtin_constant_p(i)                                        \
+        && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)          \
+            || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)          \
+            || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)          \
+            || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))        \
+               ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)      \
+               : ia64_atomic_fetch_sub(__ia64_asr_i, v);               \
+})
+
+ATOMIC_FETCH_OP(and, &)
+ATOMIC_FETCH_OP(or, |)
+ATOMIC_FETCH_OP(xor, ^)
+
+#define atomic_and(i,v)        (void)ia64_atomic_fetch_and(i,v)
+#define atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v)
+#define atomic_xor(i,v)        (void)ia64_atomic_fetch_xor(i,v)
 
-#define atomic_and(i,v)        (void)ia64_atomic_and(i,v)
-#define atomic_or(i,v) (void)ia64_atomic_or(i,v)
-#define atomic_xor(i,v)        (void)ia64_atomic_xor(i,v)
+#define atomic_fetch_and(i,v)  ia64_atomic_fetch_and(i,v)
+#define atomic_fetch_or(i,v)   ia64_atomic_fetch_or(i,v)
+#define atomic_fetch_xor(i,v)  ia64_atomic_fetch_xor(i,v)
 
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP
 
 #define ATOMIC64_OP(op, c_op)                                          \
@@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v)                         \
        return new;                                                     \
 }
 
-ATOMIC64_OP(add, +)
-ATOMIC64_OP(sub, -)
+#define ATOMIC64_FETCH_OP(op, c_op)                                    \
+static __inline__ long                                                 \
+ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v)                      \
+{                                                                      \
+       __s64 old, new;                                                 \
+       CMPXCHG_BUGCHECK_DECL                                           \
+                                                                       \
+       do {                                                            \
+               CMPXCHG_BUGCHECK(v);                                    \
+               old = atomic64_read(v);                                 \
+               new = old c_op i;                                       \
+       } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \
+       return old;                                                     \
+}
+
+#define ATOMIC64_OPS(op, c_op)                                         \
+       ATOMIC64_OP(op, c_op)                                           \
+       ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(add, +)
+ATOMIC64_OPS(sub, -)
 
 #define atomic64_add_return(i,v)                                       \
 ({                                                                     \
@@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -)
                : ia64_atomic64_sub(__ia64_asr_i, v);                   \
 })
 
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define atomic64_fetch_add(i,v)                                                \
+({                                                                     \
+       long __ia64_aar_i = (i);                                        \
+       (__builtin_constant_p(i)                                        \
+        && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)           \
+            || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)           \
+            || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)           \
+            || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))         \
+               ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)       \
+               : ia64_atomic64_fetch_add(__ia64_aar_i, v);             \
+})
+
+#define atomic64_fetch_sub(i,v)                                                \
+({                                                                     \
+       long __ia64_asr_i = (i);                                        \
+       (__builtin_constant_p(i)                                        \
+        && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)          \
+            || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)          \
+            || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)          \
+            || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))        \
+               ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)      \
+               : ia64_atomic64_fetch_sub(__ia64_asr_i, v);             \
+})
+
+ATOMIC64_FETCH_OP(and, &)
+ATOMIC64_FETCH_OP(or, |)
+ATOMIC64_FETCH_OP(xor, ^)
+
+#define atomic64_and(i,v)      (void)ia64_atomic64_fetch_and(i,v)
+#define atomic64_or(i,v)       (void)ia64_atomic64_fetch_or(i,v)
+#define atomic64_xor(i,v)      (void)ia64_atomic64_fetch_xor(i,v)
 
-#define atomic64_and(i,v)      (void)ia64_atomic64_and(i,v)
-#define atomic64_or(i,v)       (void)ia64_atomic64_or(i,v)
-#define atomic64_xor(i,v)      (void)ia64_atomic64_xor(i,v)
+#define atomic64_fetch_and(i,v)        ia64_atomic64_fetch_and(i,v)
+#define atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v)
+#define atomic64_fetch_xor(i,v)        ia64_atomic64_fetch_xor(i,v)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
index f41e66d65e31c4b6c1eb15e9bdb0ee71d2018378..28cb819e0ff93adadf6ffbb1974cdcd1a5a9d8fc 100644 (file)
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       if (cmpxchg_acq(count, 1, 0) == 1)
+       if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
                return 1;
        return 0;
 }
index 8b23e070b8440e0ce0989ec07bb416c55ad86e35..8fa98dd303b4b4733f862cce39cca2b82235f1b7 100644 (file)
@@ -40,7 +40,7 @@
 static inline void
 __down_read (struct rw_semaphore *sem)
 {
-       long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
+       long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
 
        if (result < 0)
                rwsem_down_read_failed(sem);
@@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem)
        long old, new;
 
        do {
-               old = sem->count;
+               old = atomic_long_read(&sem->count);
                new = old + RWSEM_ACTIVE_WRITE_BIAS;
-       } while (cmpxchg_acq(&sem->count, old, new) != old);
+       } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
 
        return old;
 }
@@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem)
 static inline void
 __up_read (struct rw_semaphore *sem)
 {
-       long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
+       long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
 
        if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
                rwsem_wake(sem);
@@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem)
        long old, new;
 
        do {
-               old = sem->count;
+               old = atomic_long_read(&sem->count);
                new = old - RWSEM_ACTIVE_WRITE_BIAS;
-       } while (cmpxchg_rel(&sem->count, old, new) != old);
+       } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
 
        if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
                rwsem_wake(sem);
@@ -115,8 +115,8 @@ static inline int
 __down_read_trylock (struct rw_semaphore *sem)
 {
        long tmp;
-       while ((tmp = sem->count) >= 0) {
-               if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+       while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+               if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
                        return 1;
                }
        }
@@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem)
 static inline int
 __down_write_trylock (struct rw_semaphore *sem)
 {
-       long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
-                             RWSEM_ACTIVE_WRITE_BIAS);
+       long tmp = atomic_long_cmpxchg_acquire(&sem->count,
+                       RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
        return tmp == RWSEM_UNLOCKED_VALUE;
 }
 
@@ -143,19 +143,12 @@ __downgrade_write (struct rw_semaphore *sem)
        long old, new;
 
        do {
-               old = sem->count;
+               old = atomic_long_read(&sem->count);
                new = old - RWSEM_WAITING_BIAS;
-       } while (cmpxchg_rel(&sem->count, old, new) != old);
+       } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
 
        if (old < 0)
                rwsem_downgrade_wake(sem);
 }
 
-/*
- * Implement atomic add functionality.  These used to be "inline" functions, but GCC v3.1
- * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
- */
-#define rwsem_atomic_add(delta, sem)   atomic64_add(delta, (atomic64_t *)(&(sem)->count))
-#define rwsem_atomic_update(delta, sem)        atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-
 #endif /* _ASM_IA64_RWSEM_H */
index 45698cd15b7b85a19a59e26ea8c91f9255f6454e..ca9e76149a4aa16974047820829e6c017afe77dd 100644 (file)
@@ -15,6 +15,8 @@
 
 #include <linux/atomic.h>
 #include <asm/intrinsics.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 #define arch_spin_lock_init(x)                 ((x)->lock = 0)
 
@@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
                        return;
                cpu_relax();
        }
+
+       smp_acquire__after_ctrl_dep();
 }
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
index ea35160d632bfc073e15c912262d2190d05371b7..640cc1c7099f293700fe8d2ecbb4535da547632b 100644 (file)
@@ -89,16 +89,44 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)              \
        return result;                                                  \
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v)            \
+{                                                                      \
+       unsigned long flags;                                            \
+       int result, val;                                                \
+                                                                       \
+       local_irq_save(flags);                                          \
+       __asm__ __volatile__ (                                          \
+               "# atomic_fetch_" #op "         \n\t"                   \
+               DCACHE_CLEAR("%0", "r4", "%2")                          \
+               M32R_LOCK" %1, @%2;             \n\t"                   \
+               "mv %0, %1                      \n\t"                   \
+               #op " %1, %3;                   \n\t"                   \
+               M32R_UNLOCK" %1, @%2;           \n\t"                   \
+               : "=&r" (result), "=&r" (val)                           \
+               : "r" (&v->counter), "r" (i)                            \
+               : "memory"                                              \
+               __ATOMIC_CLOBBER                                        \
+       );                                                              \
+       local_irq_restore(flags);                                       \
+                                                                       \
+       return result;                                                  \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index fa13694eaae35f29233e3a477c5d6e2aebc95f5e..323c7fc953cdefa3e0b89efaafbe8aace9ed41c0 100644 (file)
@@ -13,6 +13,8 @@
 #include <linux/atomic.h>
 #include <asm/dcache_clear.h>
 #include <asm/page.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
 
 #define arch_spin_is_locked(x)         (*(volatile int *)(&(x)->slock) <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-               do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->slock, VAL > 0);
+}
 
 /**
  * arch_spin_trylock - Try spin lock and return a result
index 4858178260f90435810c8201e220d0bee16e446e..cf4c3a7b1a45d2bfac701d4e6a8a44e2f7f260d4 100644 (file)
@@ -53,6 +53,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v)           \
        return t;                                                       \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       int t, tmp;                                                     \
+                                                                       \
+       __asm__ __volatile__(                                           \
+                       "1:     movel %2,%1\n"                          \
+                       "       " #asm_op "l %3,%1\n"                   \
+                       "       casl %2,%1,%0\n"                        \
+                       "       jne 1b"                                 \
+                       : "+m" (*v), "=&d" (t), "=&d" (tmp)             \
+                       : "g" (i), "2" (atomic_read(v)));               \
+       return tmp;                                                     \
+}
+
 #else
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)                             \
@@ -68,20 +83,41 @@ static inline int atomic_##op##_return(int i, atomic_t * v)         \
        return t;                                                       \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int atomic_fetch_##op(int i, atomic_t * v)               \
+{                                                                      \
+       unsigned long flags;                                            \
+       int t;                                                          \
+                                                                       \
+       local_irq_save(flags);                                          \
+       t = v->counter;                                                 \
+       v->counter c_op i;                                              \
+       local_irq_restore(flags);                                       \
+                                                                       \
+       return t;                                                       \
+}
+
 #endif /* CONFIG_RMW_INSNS */
 
 #define ATOMIC_OPS(op, c_op, asm_op)                                   \
        ATOMIC_OP(op, c_op, asm_op)                                     \
-       ATOMIC_OP_RETURN(op, c_op, asm_op)
+       ATOMIC_OP_RETURN(op, c_op, asm_op)                              \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)                                   \
+       ATOMIC_OP(op, c_op, asm_op)                                     \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, eor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 88fa25fae8bd9ca3065558df052891c1d68d45cd..def2c642f05308b84815fe5bc61b748d882e3d86 100644 (file)
@@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
        return result;                                                  \
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       int result, temp;                                               \
+                                                                       \
+       smp_mb();                                                       \
+                                                                       \
+       asm volatile (                                                  \
+               "1:     LNKGETD %1, [%2]\n"                             \
+               "       " #op " %0, %1, %3\n"                           \
+               "       LNKSETD [%2], %0\n"                             \
+               "       DEFR    %0, TXSTAT\n"                           \
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"              \
+               "       CMPT    %0, #HI(0x02000000)\n"                  \
+               "       BNZ 1b\n"                                       \
+               : "=&d" (temp), "=&d" (result)                          \
+               : "da" (&v->counter), "bd" (i)                          \
+               : "cc");                                                \
+                                                                       \
+       smp_mb();                                                       \
+                                                                       \
+       return result;                                                  \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 0295d9b8d5bf2730dc9fe3fed7ab523444b2354e..6c1380a8a0d4a437dfb2a4146deada934bcdb354 100644 (file)
@@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
        return result;                                                  \
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)                                      \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned long result;                                           \
+       unsigned long flags;                                            \
+                                                                       \
+       __global_lock1(flags);                                          \
+       result = v->counter;                                            \
+       fence();                                                        \
+       v->counter c_op i;                                              \
+       __global_unlock1(flags);                                        \
+                                                                       \
+       return result;                                                  \
+}
+
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_OP(op, c_op)                                             \
+       ATOMIC_OP_RETURN(op, c_op)                                      \
+       ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_OP(op, c_op)                                             \
+       ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 86a7cf3d1386ca6000e111780a87662056b2a427..c0c7a22be1aeff1769308825d9d739837c8baaaa 100644 (file)
@@ -1,14 +1,24 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 #ifdef CONFIG_METAG_ATOMICITY_LOCK1
 #include <asm/spinlock_lock1.h>
 #else
 #include <asm/spinlock_lnkget.h>
 #endif
 
-#define arch_spin_unlock_wait(lock) \
-       do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+/*
+ * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1
+ * locked.
+ */
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
index 835b402e4574fa1a27da780da9ae398bbf3bb276..0ab176bdb8e8107e3a8b700215ce9a96471b3ee9 100644 (file)
@@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)                             \
                        "       " #asm_op " %0, %2                      \n"   \
                        "       sc      %0, %1                          \n"   \
                        "       .set    mips0                           \n"   \
-                       : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
+                       : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)  \
                        : "Ir" (i));                                          \
                } while (unlikely(!temp));                                    \
        } else {                                                              \
@@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v)                           \
 }
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)                                   \
-static __inline__ int atomic_##op##_return(int i, atomic_t * v)                      \
+static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)              \
 {                                                                            \
        int result;                                                           \
                                                                              \
-       smp_mb__before_llsc();                                                \
-                                                                             \
        if (kernel_uses_llsc && R10000_LLSC_WAR) {                            \
                int temp;                                                     \
                                                                              \
@@ -125,23 +123,84 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v)                 \
                raw_local_irq_restore(flags);                                 \
        }                                                                     \
                                                                              \
-       smp_llsc_mb();                                                        \
+       return result;                                                        \
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                                    \
+static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)       \
+{                                                                            \
+       int result;                                                           \
+                                                                             \
+       if (kernel_uses_llsc && R10000_LLSC_WAR) {                            \
+               int temp;                                                     \
+                                                                             \
+               __asm__ __volatile__(                                         \
+               "       .set    arch=r4000                              \n"   \
+               "1:     ll      %1, %2          # atomic_fetch_" #op "  \n"   \
+               "       " #asm_op " %0, %1, %3                          \n"   \
+               "       sc      %0, %2                                  \n"   \
+               "       beqzl   %0, 1b                                  \n"   \
+               "       move    %0, %1                                  \n"   \
+               "       .set    mips0                                   \n"   \
+               : "=&r" (result), "=&r" (temp),                               \
+                 "+" GCC_OFF_SMALL_ASM() (v->counter)                        \
+               : "Ir" (i));                                                  \
+       } else if (kernel_uses_llsc) {                                        \
+               int temp;                                                     \
+                                                                             \
+               do {                                                          \
+                       __asm__ __volatile__(                                 \
+                       "       .set    "MIPS_ISA_LEVEL"                \n"   \
+                       "       ll      %1, %2  # atomic_fetch_" #op "  \n"   \
+                       "       " #asm_op " %0, %1, %3                  \n"   \
+                       "       sc      %0, %2                          \n"   \
+                       "       .set    mips0                           \n"   \
+                       : "=&r" (result), "=&r" (temp),                       \
+                         "+" GCC_OFF_SMALL_ASM() (v->counter)                \
+                       : "Ir" (i));                                          \
+               } while (unlikely(!result));                                  \
+                                                                             \
+               result = temp;                                                \
+       } else {                                                              \
+               unsigned long flags;                                          \
+                                                                             \
+               raw_local_irq_save(flags);                                    \
+               result = v->counter;                                          \
+               v->counter c_op i;                                            \
+               raw_local_irq_restore(flags);                                 \
+       }                                                                     \
                                                                              \
        return result;                                                        \
 }
 
 #define ATOMIC_OPS(op, c_op, asm_op)                                         \
        ATOMIC_OP(op, c_op, asm_op)                                           \
-       ATOMIC_OP_RETURN(op, c_op, asm_op)
+       ATOMIC_OP_RETURN(op, c_op, asm_op)                                    \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, addu)
 ATOMIC_OPS(sub, -=, subu)
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#define atomic_add_return_relaxed      atomic_add_return_relaxed
+#define atomic_sub_return_relaxed      atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed       atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed       atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)                                         \
+       ATOMIC_OP(op, c_op, asm_op)                                           \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define atomic_fetch_and_relaxed       atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed                atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed       atomic_fetch_xor_relaxed
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -362,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)                    \
 }
 
 #define ATOMIC64_OP_RETURN(op, c_op, asm_op)                                 \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)        \
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
 {                                                                            \
        long result;                                                          \
                                                                              \
-       smp_mb__before_llsc();                                                \
-                                                                             \
        if (kernel_uses_llsc && R10000_LLSC_WAR) {                            \
                long temp;                                                    \
                                                                              \
@@ -409,22 +466,85 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)           \
                raw_local_irq_restore(flags);                                 \
        }                                                                     \
                                                                              \
-       smp_llsc_mb();                                                        \
+       return result;                                                        \
+}
+
+#define ATOMIC64_FETCH_OP(op, c_op, asm_op)                                  \
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)  \
+{                                                                            \
+       long result;                                                          \
+                                                                             \
+       if (kernel_uses_llsc && R10000_LLSC_WAR) {                            \
+               long temp;                                                    \
+                                                                             \
+               __asm__ __volatile__(                                         \
+               "       .set    arch=r4000                              \n"   \
+               "1:     lld     %1, %2          # atomic64_fetch_" #op "\n"   \
+               "       " #asm_op " %0, %1, %3                          \n"   \
+               "       scd     %0, %2                                  \n"   \
+               "       beqzl   %0, 1b                                  \n"   \
+               "       move    %0, %1                                  \n"   \
+               "       .set    mips0                                   \n"   \
+               : "=&r" (result), "=&r" (temp),                               \
+                 "+" GCC_OFF_SMALL_ASM() (v->counter)                        \
+               : "Ir" (i));                                                  \
+       } else if (kernel_uses_llsc) {                                        \
+               long temp;                                                    \
+                                                                             \
+               do {                                                          \
+                       __asm__ __volatile__(                                 \
+                       "       .set    "MIPS_ISA_LEVEL"                \n"   \
+                       "       lld     %1, %2  # atomic64_fetch_" #op "\n"   \
+                       "       " #asm_op " %0, %1, %3                  \n"   \
+                       "       scd     %0, %2                          \n"   \
+                       "       .set    mips0                           \n"   \
+                       : "=&r" (result), "=&r" (temp),                       \
+                         "=" GCC_OFF_SMALL_ASM() (v->counter)                \
+                       : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)          \
+                       : "memory");                                          \
+               } while (unlikely(!result));                                  \
+                                                                             \
+               result = temp;                                                \
+       } else {                                                              \
+               unsigned long flags;                                          \
+                                                                             \
+               raw_local_irq_save(flags);                                    \
+               result = v->counter;                                          \
+               v->counter c_op i;                                            \
+               raw_local_irq_restore(flags);                                 \
+       }                                                                     \
                                                                              \
        return result;                                                        \
 }
 
 #define ATOMIC64_OPS(op, c_op, asm_op)                                       \
        ATOMIC64_OP(op, c_op, asm_op)                                         \
-       ATOMIC64_OP_RETURN(op, c_op, asm_op)
+       ATOMIC64_OP_RETURN(op, c_op, asm_op)                                  \
+       ATOMIC64_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC64_OPS(add, +=, daddu)
 ATOMIC64_OPS(sub, -=, dsubu)
-ATOMIC64_OP(and, &=, and)
-ATOMIC64_OP(or, |=, or)
-ATOMIC64_OP(xor, ^=, xor)
+
+#define atomic64_add_return_relaxed    atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed    atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed     atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed     atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op, asm_op)                                       \
+       ATOMIC64_OP(op, c_op, asm_op)                                         \
+       ATOMIC64_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC64_OPS(and, &=, and)
+ATOMIC64_OPS(or, |=, or)
+ATOMIC64_OPS(xor, ^=, xor)
+
+#define atomic64_fetch_and_relaxed     atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed      atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed     atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
index 40196bebe849a0c825cc8a61e40b03440d14f31e..f485afe51514765710eaf990b674e92378effe61 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/compiler.h>
 
 #include <asm/barrier.h>
+#include <asm/processor.h>
 #include <asm/compiler.h>
 #include <asm/war.h>
 
@@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 }
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-       while (arch_spin_is_locked(x)) { cpu_relax(); }
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       u16 owner = READ_ONCE(lock->h.serving_now);
+       smp_rmb();
+       for (;;) {
+               arch_spinlock_t tmp = READ_ONCE(*lock);
+
+               if (tmp.h.serving_now == tmp.h.ticket ||
+                   tmp.h.serving_now != owner)
+                       break;
+
+               cpu_relax();
+       }
+       smp_acquire__after_ctrl_dep();
+}
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
index ce318d5ab23b06484e063d59e564bef6a2673700..36389efd45e8a2e97ae65d6f3630df5b7621dc35 100644 (file)
@@ -84,16 +84,41 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
        return retval;                                                  \
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       int retval, status;                                             \
+                                                                       \
+       asm volatile(                                                   \
+               "1:     mov     %4,(_AAR,%3)    \n"                     \
+               "       mov     (_ADR,%3),%1    \n"                     \
+               "       mov     %1,%0           \n"                     \
+               "       " #op " %5,%0           \n"                     \
+               "       mov     %0,(_ADR,%3)    \n"                     \
+               "       mov     (_ADR,%3),%0    \n"     /* flush */     \
+               "       mov     (_ASR,%3),%0    \n"                     \
+               "       or      %0,%0           \n"                     \
+               "       bne     1b              \n"                     \
+               : "=&r"(status), "=&r"(retval), "=m"(v->counter)        \
+               : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i)   \
+               : "memory", "cc");                                      \
+       return retval;                                                  \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 1ae580f3893344fe7ad99c71b4c5fa19c0692870..9c7b8f7942d8e9aadf3008875774b55a37959ae9 100644 (file)
@@ -12,6 +12,8 @@
 #define _ASM_SPINLOCK_H
 
 #include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 #include <asm/rwlock.h>
 #include <asm/page.h>
 
  */
 
 #define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0)
-#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
index 1d109990a02242aafb76c19da65a2f74d34367b9..5394b9c5f914fdc4339224a3950e9d1c4ccd6bbb 100644 (file)
@@ -121,16 +121,39 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)            \
        return ret;                                                     \
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)                                      \
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v)            \
+{                                                                      \
+       unsigned long flags;                                            \
+       int ret;                                                        \
+                                                                       \
+       _atomic_spin_lock_irqsave(v, flags);                            \
+       ret = v->counter;                                               \
+       v->counter c_op i;                                              \
+       _atomic_spin_unlock_irqrestore(v, flags);                       \
+                                                                       \
+       return ret;                                                     \
+}
+
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_OP(op, c_op)                                             \
+       ATOMIC_OP_RETURN(op, c_op)                                      \
+       ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
 
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_OP(op, c_op)                                             \
+       ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -185,15 +208,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)        \
        return ret;                                                     \
 }
 
-#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op)
+#define ATOMIC64_FETCH_OP(op, c_op)                                    \
+static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v)                \
+{                                                                      \
+       unsigned long flags;                                            \
+       s64 ret;                                                        \
+                                                                       \
+       _atomic_spin_lock_irqsave(v, flags);                            \
+       ret = v->counter;                                               \
+       v->counter c_op i;                                              \
+       _atomic_spin_unlock_irqrestore(v, flags);                       \
+                                                                       \
+       return ret;                                                     \
+}
+
+#define ATOMIC64_OPS(op, c_op)                                         \
+       ATOMIC64_OP(op, c_op)                                           \
+       ATOMIC64_OP_RETURN(op, c_op)                                    \
+       ATOMIC64_FETCH_OP(op, c_op)
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op)                                         \
+       ATOMIC64_OP(op, c_op)                                           \
+       ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
index 64f2992e439fcd81811e7a44d322bdc5aafd9ff1..e32936cd7f1017a418bf1f086cfc819640616260 100644 (file)
@@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
 }
 
 #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
-#define arch_spin_unlock_wait(x) \
-               do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
+{
+       volatile unsigned int *a = __ldcw_align(x);
+
+       smp_cond_load_acquire(a, VAL);
+}
 
 static inline void arch_spin_lock_flags(arch_spinlock_t *x,
                                         unsigned long flags)
index ae0751ef8788fd55f7c4d6b37a8fbb73d82d317f..f08d567e0ca4dfb75d560afa6b72473643f12c56 100644 (file)
@@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v)  \
        return t;                                                       \
 }
 
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op)                            \
+static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v)      \
+{                                                                      \
+       int res, t;                                                     \
+                                                                       \
+       __asm__ __volatile__(                                           \
+"1:    lwarx   %0,0,%4         # atomic_fetch_" #op "_relaxed\n"       \
+       #asm_op " %1,%3,%0\n"                                           \
+       PPC405_ERR77(0, %4)                                             \
+"      stwcx.  %1,0,%4\n"                                              \
+"      bne-    1b\n"                                                   \
+       : "=&r" (res), "=&r" (t), "+m" (v->counter)                     \
+       : "r" (a), "r" (&v->counter)                                    \
+       : "cc");                                                        \
+                                                                       \
+       return res;                                                     \
+}
+
 #define ATOMIC_OPS(op, asm_op)                                         \
        ATOMIC_OP(op, asm_op)                                           \
-       ATOMIC_OP_RETURN_RELAXED(op, asm_op)
+       ATOMIC_OP_RETURN_RELAXED(op, asm_op)                            \
+       ATOMIC_FETCH_OP_RELAXED(op, asm_op)
 
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, subf)
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, xor)
-
 #define atomic_add_return_relaxed atomic_add_return_relaxed
 #define atomic_sub_return_relaxed atomic_sub_return_relaxed
 
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op)                                         \
+       ATOMIC_OP(op, asm_op)                                           \
+       ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed  atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP_RELAXED
 #undef ATOMIC_OP_RETURN_RELAXED
 #undef ATOMIC_OP
 
@@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v)                     \
        return t;                                                       \
 }
 
+#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op)                          \
+static inline long                                                     \
+atomic64_fetch_##op##_relaxed(long a, atomic64_t *v)                   \
+{                                                                      \
+       long res, t;                                                    \
+                                                                       \
+       __asm__ __volatile__(                                           \
+"1:    ldarx   %0,0,%4         # atomic64_fetch_" #op "_relaxed\n"     \
+       #asm_op " %1,%3,%0\n"                                           \
+"      stdcx.  %1,0,%4\n"                                              \
+"      bne-    1b\n"                                                   \
+       : "=&r" (res), "=&r" (t), "+m" (v->counter)                     \
+       : "r" (a), "r" (&v->counter)                                    \
+       : "cc");                                                        \
+                                                                       \
+       return res;                                                     \
+}
+
 #define ATOMIC64_OPS(op, asm_op)                                       \
        ATOMIC64_OP(op, asm_op)                                         \
-       ATOMIC64_OP_RETURN_RELAXED(op, asm_op)
+       ATOMIC64_OP_RETURN_RELAXED(op, asm_op)                          \
+       ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
 
 ATOMIC64_OPS(add, add)
 ATOMIC64_OPS(sub, subf)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(or, or)
-ATOMIC64_OP(xor, xor)
 
 #define atomic64_add_return_relaxed atomic64_add_return_relaxed
 #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
 
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op)                                       \
+       ATOMIC64_OP(op, asm_op)                                         \
+       ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(or, or)
+ATOMIC64_OPS(xor, xor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed  atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+
 #undef ATOPIC64_OPS
+#undef ATOMIC64_FETCH_OP_RELAXED
 #undef ATOMIC64_OP_RETURN_RELAXED
 #undef ATOMIC64_OP
 
index 127ab23e1f6ccdbe038b6cfdd84c569742968e27..078155fa118992f1f456d9643786f7ccf7d19741 100644 (file)
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+       if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
                return 1;
        return 0;
 }
index 911064aa59b2f25c5c1dc7a7ac846a560ea99b1e..d28cc2f5b7b2c706fe46ea8c93b5d59fa4ebc263 100644 (file)
@@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v)
        return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
 }
 
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+       return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER);
+}
+
 static inline void atomic_add(int i, atomic_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -114,22 +119,27 @@ static inline void atomic_add(int i, atomic_t *v)
 #define atomic_inc_and_test(_v)                (atomic_add_return(1, _v) == 0)
 #define atomic_sub(_i, _v)             atomic_add(-(int)(_i), _v)
 #define atomic_sub_return(_i, _v)      atomic_add_return(-(int)(_i), _v)
+#define atomic_fetch_sub(_i, _v)       atomic_fetch_add(-(int)(_i), _v)
 #define atomic_sub_and_test(_i, _v)    (atomic_sub_return(_i, _v) == 0)
 #define atomic_dec(_v)                 atomic_sub(1, _v)
 #define atomic_dec_return(_v)          atomic_sub_return(1, _v)
 #define atomic_dec_and_test(_v)                (atomic_sub_return(1, _v) == 0)
 
-#define ATOMIC_OP(op, OP)                                              \
+#define ATOMIC_OPS(op, OP)                                             \
 static inline void atomic_##op(int i, atomic_t *v)                     \
 {                                                                      \
        __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER);        \
+}                                                                      \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER);    \
 }
 
-ATOMIC_OP(and, AND)
-ATOMIC_OP(or, OR)
-ATOMIC_OP(xor, XOR)
+ATOMIC_OPS(and, AND)
+ATOMIC_OPS(or, OR)
+ATOMIC_OPS(xor, XOR)
 
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
@@ -236,6 +246,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
        return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
 }
 
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+       return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER);
+}
+
 static inline void atomic64_add(long long i, atomic64_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -264,17 +279,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
        return old;
 }
 
-#define ATOMIC64_OP(op, OP)                                            \
+#define ATOMIC64_OPS(op, OP)                                           \
 static inline void atomic64_##op(long i, atomic64_t *v)                        \
 {                                                                      \
        __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER);  \
+}                                                                      \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)          \
+{                                                                      \
+       return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \
 }
 
-ATOMIC64_OP(and, AND)
-ATOMIC64_OP(or, OR)
-ATOMIC64_OP(xor, XOR)
+ATOMIC64_OPS(and, AND)
+ATOMIC64_OPS(or, OR)
+ATOMIC64_OPS(xor, XOR)
 
-#undef ATOMIC64_OP
+#undef ATOMIC64_OPS
 #undef __ATOMIC64_LOOP
 
 static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
@@ -315,6 +334,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_inc_return(_v)                atomic64_add_return(1, _v)
 #define atomic64_inc_and_test(_v)      (atomic64_add_return(1, _v) == 0)
 #define atomic64_sub_return(_i, _v)    atomic64_add_return(-(long long)(_i), _v)
+#define atomic64_fetch_sub(_i, _v)     atomic64_fetch_add(-(long long)(_i), _v)
 #define atomic64_sub(_i, _v)           atomic64_add(-(long long)(_i), _v)
 #define atomic64_sub_and_test(_i, _v)  (atomic64_sub_return(_i, _v) == 0)
 #define atomic64_dec(_v)               atomic64_sub(1, _v)
index c75e4471e618826a385c6aef2955b79c757ab9e7..597e7e96b59e8b9bb2948d9ae43799641abcc375 100644 (file)
@@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
                rwsem_downgrade_wake(sem);
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-       signed long old, new;
-
-       asm volatile(
-               "       lg      %0,%2\n"
-               "0:     lgr     %1,%0\n"
-               "       agr     %1,%4\n"
-               "       csg     %0,%1,%2\n"
-               "       jl      0b"
-               : "=&d" (old), "=&d" (new), "=Q" (sem->count)
-               : "Q" (sem->count), "d" (delta)
-               : "cc", "memory");
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-       signed long old, new;
-
-       asm volatile(
-               "       lg      %0,%2\n"
-               "0:     lgr     %1,%0\n"
-               "       agr     %1,%4\n"
-               "       csg     %0,%1,%2\n"
-               "       jl      0b"
-               : "=&d" (old), "=&d" (new), "=Q" (sem->count)
-               : "Q" (sem->count), "d" (delta)
-               : "cc", "memory");
-       return new;
-}
-
 #endif /* _S390_RWSEM_H */
index 63ebf37d31438a647b8d38177b9cb107995e3e99..7e9e09f600fa5932849948de3bc4f66ea871cba2 100644 (file)
@@ -10,6 +10,8 @@
 #define __ASM_SPINLOCK_H
 
 #include <linux/smp.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
 
@@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
        while (arch_spin_is_locked(lock))
                arch_spin_relax(lock);
+       smp_acquire__after_ctrl_dep();
 }
 
 /*
index b94df40e5f2d2b701fdb7c0858b06ab745f6b65e..d755e96c3064c132317eb2357b36fc106f1b68b8 100644 (file)
@@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
        return tmp;                                                     \
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       int res, tmp;                                                   \
+                                                                       \
+       __asm__ __volatile__ (                                          \
+               "   .align 2              \n\t"                         \
+               "   mova    1f,   r0      \n\t" /* r0 = end point */    \
+               "   mov    r15,   r1      \n\t" /* r1 = saved sp */     \
+               "   mov    #-6,   r15     \n\t" /* LOGIN: r15 = size */ \
+               "   mov.l  @%2,   %0      \n\t" /* load old value */    \
+               "   mov     %0,   %1      \n\t" /* save old value */    \
+               " " #op "   %3,   %0      \n\t" /* $op */               \
+               "   mov.l   %0,   @%2     \n\t" /* store new value */   \
+               "1: mov     r1,   r15     \n\t" /* LOGOUT */            \
+               : "=&r" (tmp), "=&r" (res), "+r"  (v)                   \
+               : "r"   (i)                                             \
+               : "memory" , "r0", "r1");                               \
+                                                                       \
+       return res;                                                     \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 23fcdad5773eea21cd58715dd76ed974fcdf15ee..8e2da5fa017830364296d4ac16773ee20dc1c4fe 100644 (file)
@@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
        return temp;                                                    \
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)                                      \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned long temp, flags;                                      \
+                                                                       \
+       raw_local_irq_save(flags);                                      \
+       temp = v->counter;                                              \
+       v->counter c_op i;                                              \
+       raw_local_irq_restore(flags);                                   \
+                                                                       \
+       return temp;                                                    \
+}
+
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_OP(op, c_op)                                             \
+       ATOMIC_OP_RETURN(op, c_op)                                      \
+       ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_OP(op, c_op)                                             \
+       ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 33d34b16d4d67bb15ed129d1233c110640de9d7b..caea2c45f6c2ac43ee6c134c1b4c9a7ebc8295e3 100644 (file)
@@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
        return temp;                                                    \
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned long res, temp;                                        \
+                                                                       \
+       __asm__ __volatile__ (                                          \
+"1:    movli.l @%3, %0         ! atomic_fetch_" #op "  \n"             \
+"      mov %0, %1                                      \n"             \
+"      " #op " %2, %0                                  \n"             \
+"      movco.l %0, @%3                                 \n"             \
+"      bf      1b                                      \n"             \
+"      synco                                           \n"             \
+       : "=&z" (temp), "=&z" (res)                                     \
+       : "r" (i), "r" (&v->counter)                                    \
+       : "t");                                                         \
+                                                                       \
+       return res;                                                     \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index bdc0f3b6c56afdf23d6b628128e6171cc2552b74..416834b60ad0cce9584e296a110f35e6c8481f23 100644 (file)
 #error "Need movli.l/movco.l for spinlocks"
 #endif
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
 #define arch_spin_is_locked(x)         ((x)->lock <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-       do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
index 7dcbebbcaec6f0c742f941e580bc6564b1158c13..ee3f11c43cdaa2ef6ab6fe444130e8dd2a8a5541 100644 (file)
 #define ATOMIC_INIT(i)  { (i) }
 
 int atomic_add_return(int, atomic_t *);
-void atomic_and(int, atomic_t *);
-void atomic_or(int, atomic_t *);
-void atomic_xor(int, atomic_t *);
+int atomic_fetch_add(int, atomic_t *);
+int atomic_fetch_and(int, atomic_t *);
+int atomic_fetch_or(int, atomic_t *);
+int atomic_fetch_xor(int, atomic_t *);
 int atomic_cmpxchg(atomic_t *, int, int);
 int atomic_xchg(atomic_t *, int);
 int __atomic_add_unless(atomic_t *, int, int);
@@ -35,7 +36,13 @@ void atomic_set(atomic_t *, int);
 #define atomic_inc(v)          ((void)atomic_add_return(        1, (v)))
 #define atomic_dec(v)          ((void)atomic_add_return(       -1, (v)))
 
+#define atomic_and(i, v)       ((void)atomic_fetch_and((i), (v)))
+#define atomic_or(i, v)                ((void)atomic_fetch_or((i), (v)))
+#define atomic_xor(i, v)       ((void)atomic_fetch_xor((i), (v)))
+
 #define atomic_sub_return(i, v)        (atomic_add_return(-(int)(i), (v)))
+#define atomic_fetch_sub(i, v)  (atomic_fetch_add (-(int)(i), (v)))
+
 #define atomic_inc_return(v)   (atomic_add_return(        1, (v)))
 #define atomic_dec_return(v)   (atomic_add_return(       -1, (v)))
 
index f2fbf9e16fafca66c4aa01145645f1748775cde2..24827a3f733a9b23e74509a16089a1790c8a16e8 100644 (file)
@@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *);
 int atomic_##op##_return(int, atomic_t *);                             \
 long atomic64_##op##_return(long, atomic64_t *);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+int atomic_fetch_##op(int, atomic_t *);                                        \
+long atomic64_fetch_##op(long, atomic64_t *);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index bcc98fc35281c73bed1ae1e35cb4d56aade44794..d9c5876c61215494df0238992da09c03f5d82211 100644 (file)
@@ -9,12 +9,15 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/psr.h>
+#include <asm/barrier.h>
 #include <asm/processor.h> /* for cpu_relax */
 
 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
-#define arch_spin_unlock_wait(lock) \
-       do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
index 9689176949781fc5ae8985a794def4521e6ba72b..87990b7c6b0d693eb4c715f33047e5d8cfa0e5c6 100644 (file)
@@ -8,6 +8,9 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
+#include <asm/barrier.h>
+
 /* To get debugging spinlocks which detect and catch
  * deadlock situations, set CONFIG_DEBUG_SPINLOCK
  * and rebuild your kernel.
 
 #define arch_spin_is_locked(lp)        ((lp)->lock != 0)
 
-#define arch_spin_unlock_wait(lp)      \
-       do {    rmb();                  \
-       } while((lp)->lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
index b9d63c0a7aabc2c726ff54c617f99bdb8b3ca74a..2c373329d5cb8dc8dc87c934feb8dbf64ecf9ce6 100644 (file)
@@ -27,39 +27,44 @@ static DEFINE_SPINLOCK(dummy);
 
 #endif /* SMP */
 
-#define ATOMIC_OP_RETURN(op, c_op)                                     \
-int atomic_##op##_return(int i, atomic_t *v)                           \
+#define ATOMIC_FETCH_OP(op, c_op)                                      \
+int atomic_fetch_##op(int i, atomic_t *v)                              \
 {                                                                      \
        int ret;                                                        \
        unsigned long flags;                                            \
        spin_lock_irqsave(ATOMIC_HASH(v), flags);                       \
                                                                        \
-       ret = (v->counter c_op i);                                      \
+       ret = v->counter;                                               \
+       v->counter c_op i;                                              \
                                                                        \
        spin_unlock_irqrestore(ATOMIC_HASH(v), flags);                  \
        return ret;                                                     \
 }                                                                      \
-EXPORT_SYMBOL(atomic_##op##_return);
+EXPORT_SYMBOL(atomic_fetch_##op);
 
-#define ATOMIC_OP(op, c_op)                                            \
-void atomic_##op(int i, atomic_t *v)                                   \
+#define ATOMIC_OP_RETURN(op, c_op)                                     \
+int atomic_##op##_return(int i, atomic_t *v)                           \
 {                                                                      \
+       int ret;                                                        \
        unsigned long flags;                                            \
        spin_lock_irqsave(ATOMIC_HASH(v), flags);                       \
                                                                        \
-       v->counter c_op i;                                              \
+       ret = (v->counter c_op i);                                      \
                                                                        \
        spin_unlock_irqrestore(ATOMIC_HASH(v), flags);                  \
+       return ret;                                                     \
 }                                                                      \
-EXPORT_SYMBOL(atomic_##op);
+EXPORT_SYMBOL(atomic_##op##_return);
 
 ATOMIC_OP_RETURN(add, +=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
+ATOMIC_FETCH_OP(add, +=)
+ATOMIC_FETCH_OP(and, &=)
+ATOMIC_FETCH_OP(or, |=)
+ATOMIC_FETCH_OP(xor, ^=)
+
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
 
 int atomic_xchg(atomic_t *v, int new)
 {
index d6b0363f345bfbd41aeb4a7a287ee651404f2259..a5c5a0279cccc7b3487e9361500cf8162a3c89ad 100644 (file)
@@ -9,10 +9,11 @@
 
        .text
 
-       /* Two versions of the atomic routines, one that
+       /* Three versions of the atomic routines, one that
         * does not return a value and does not perform
-        * memory barriers, and a second which returns
-        * a value and does the barriers.
+        * memory barriers, and a two which return
+        * a value, the new and old value resp. and does the
+        * barriers.
         */
 
 #define ATOMIC_OP(op)                                                  \
@@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \
 2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
 ENDPROC(atomic_##op##_return);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */       \
+       BACKOFF_SETUP(%o2);                                             \
+1:     lduw    [%o1], %g1;                                             \
+       op      %g1, %o0, %g7;                                          \
+       cas     [%o1], %g1, %g7;                                        \
+       cmp     %g1, %g7;                                               \
+       bne,pn  %icc, BACKOFF_LABEL(2f, 1b);                            \
+        nop;                                                           \
+       retl;                                                           \
+        sra    %g1, 0, %o0;                                            \
+2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
+ENDPROC(atomic_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */      \
 2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
 ENDPROC(atomic64_##op##_return);
 
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op)
+#define ATOMIC64_FETCH_OP(op)                                          \
+ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */     \
+       BACKOFF_SETUP(%o2);                                             \
+1:     ldx     [%o1], %g1;                                             \
+       op      %g1, %o0, %g7;                                          \
+       casx    [%o1], %g1, %g7;                                        \
+       cmp     %g1, %g7;                                               \
+       bne,pn  %xcc, BACKOFF_LABEL(2f, 1b);                            \
+        nop;                                                           \
+       retl;                                                           \
+        mov    %g1, %o0;                                               \
+2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
+ENDPROC(atomic64_fetch_##op);
+
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
index 8eb454cfe05c9f17a3d084306b085f6dde095ee3..de5e97817bdb6be31fd95f63030112035b766b27 100644 (file)
@@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op);
 EXPORT_SYMBOL(atomic_##op##_return);                                   \
 EXPORT_SYMBOL(atomic64_##op##_return);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)                                            \
+EXPORT_SYMBOL(atomic_fetch_##op);                                      \
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 9fc0107a9c5e557f226b62da9a5e086a4f7b4272..8dda3c8ff5ab5f4b0930af466b068a23b7ffcd04 100644 (file)
@@ -46,6 +46,8 @@ static inline int atomic_read(const atomic_t *v)
  */
 #define atomic_sub_return(i, v)                atomic_add_return((int)(-(i)), (v))
 
+#define atomic_fetch_sub(i, v)         atomic_fetch_add(-(int)(i), (v))
+
 /**
  * atomic_sub - subtract integer from atomic variable
  * @i: integer value to subtract
index d320ce253d8630d1dee48a366de0cf3b7823c0ab..a93774255136d4c250065407d2c295ac7f2088e7 100644 (file)
@@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v)
        _atomic_xchg_add(&v->counter, i);
 }
 
-#define ATOMIC_OP(op)                                                  \
-unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \
+#define ATOMIC_OPS(op)                                                 \
+unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \
 static inline void atomic_##op(int i, atomic_t *v)                     \
 {                                                                      \
-       _atomic_##op((unsigned long *)&v->counter, i);                  \
+       _atomic_fetch_##op((unsigned long *)&v->counter, i);            \
+}                                                                      \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       smp_mb();                                                       \
+       return _atomic_fetch_##op((unsigned long *)&v->counter, i);     \
 }
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+       smp_mb();
+       return _atomic_xchg_add(&v->counter, i);
+}
 
 /**
  * atomic_add_return - add integer and return
@@ -126,16 +137,29 @@ static inline void atomic64_add(long long i, atomic64_t *v)
        _atomic64_xchg_add(&v->counter, i);
 }
 
-#define ATOMIC64_OP(op)                                                \
-long long _atomic64_##op(long long *v, long long n);           \
+#define ATOMIC64_OPS(op)                                       \
+long long _atomic64_fetch_##op(long long *v, long long n);     \
 static inline void atomic64_##op(long long i, atomic64_t *v)   \
 {                                                              \
-       _atomic64_##op(&v->counter, i);                         \
+       _atomic64_fetch_##op(&v->counter, i);                   \
+}                                                              \
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)        \
+{                                                              \
+       smp_mb();                                               \
+       return _atomic64_fetch_##op(&v->counter, i);            \
 }
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+       smp_mb();
+       return _atomic64_xchg_add(&v->counter, i);
+}
 
 /**
  * atomic64_add_return - add integer and return
@@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n)
 #define atomic64_inc_return(v)         atomic64_add_return(1LL, (v))
 #define atomic64_inc_and_test(v)       (atomic64_inc_return(v) == 0)
 #define atomic64_sub_return(i, v)      atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v)       atomic64_fetch_add(-(i), (v))
 #define atomic64_sub_and_test(a, v)    (atomic64_sub_return((a), (v)) == 0)
 #define atomic64_sub(i, v)             atomic64_add(-(i), (v))
 #define atomic64_dec(v)                        atomic64_sub(1LL, (v))
@@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n)
 #define atomic64_dec_and_test(v)       (atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)       atomic64_add_unless((v), 1LL, 0LL)
 
-
 #endif /* !__ASSEMBLY__ */
 
 /*
@@ -242,16 +266,16 @@ struct __get_user {
        unsigned long val;
        int err;
 };
-extern struct __get_user __atomic_cmpxchg(volatile int *p,
+extern struct __get_user __atomic32_cmpxchg(volatile int *p,
                                          int *lock, int o, int n);
-extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add_unless(volatile int *p,
                                                  int *lock, int o, int n);
-extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_and(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n);
 extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
                                        long long o, long long n);
 extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
@@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
                                        long long n);
 extern long long __atomic64_xchg_add_unless(volatile long long *p,
                                        int *lock, long long o, long long n);
-extern long long __atomic64_and(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_or(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_xor(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n);
 
 /* Return failure from the atomic wrappers. */
 struct __get_user __atomic_bad_address(int __user *addr);
index b0531a623653a702544c475c110e8d19cb7b10eb..4cefa0c9fd81303d30d65a27e0e60adc43a4860f 100644 (file)
  * on any routine which updates memory and returns a value.
  */
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-       __insn_fetchadd4((void *)&v->counter, i);
-}
-
 /*
  * Note a subtlety of the locking here.  We are required to provide a
  * full memory barrier before and after the operation.  However, we
@@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v)
        return val;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#define ATOMIC_OPS(op)                                                 \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       int val;                                                        \
+       smp_mb();                                                       \
+       val = __insn_fetch##op##4((void *)&v->counter, i);              \
+       smp_mb();                                                       \
+       return val;                                                     \
+}                                                                      \
+static inline void atomic_##op(int i, atomic_t *v)                     \
+{                                                                      \
+       __insn_fetch##op##4((void *)&v->counter, i);                    \
+}
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
 {
        int guess, oldval = v->counter;
+       smp_mb();
        do {
-               if (oldval == u)
-                       break;
                guess = oldval;
-               oldval = cmpxchg(&v->counter, guess, guess + a);
+               __insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+               oldval = __insn_cmpexch4(&v->counter, guess ^ i);
        } while (guess != oldval);
+       smp_mb();
        return oldval;
 }
 
-static inline void atomic_and(int i, atomic_t *v)
-{
-       __insn_fetchand4((void *)&v->counter, i);
-}
-
-static inline void atomic_or(int i, atomic_t *v)
-{
-       __insn_fetchor4((void *)&v->counter, i);
-}
-
 static inline void atomic_xor(int i, atomic_t *v)
 {
        int guess, oldval = v->counter;
@@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v)
        } while (guess != oldval);
 }
 
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+       int guess, oldval = v->counter;
+       do {
+               if (oldval == u)
+                       break;
+               guess = oldval;
+               oldval = cmpxchg(&v->counter, guess, guess + a);
+       } while (guess != oldval);
+       return oldval;
+}
+
 /* Now the true 64-bit operations. */
 
 #define ATOMIC64_INIT(i)       { (i) }
@@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v)
 #define atomic64_read(v)       READ_ONCE((v)->counter)
 #define atomic64_set(v, i)     WRITE_ONCE((v)->counter, (i))
 
-static inline void atomic64_add(long i, atomic64_t *v)
-{
-       __insn_fetchadd((void *)&v->counter, i);
-}
-
 static inline long atomic64_add_return(long i, atomic64_t *v)
 {
        int val;
@@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
        return val;
 }
 
-static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+#define ATOMIC64_OPS(op)                                               \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)          \
+{                                                                      \
+       long val;                                                       \
+       smp_mb();                                                       \
+       val = __insn_fetch##op((void *)&v->counter, i);                 \
+       smp_mb();                                                       \
+       return val;                                                     \
+}                                                                      \
+static inline void atomic64_##op(long i, atomic64_t *v)                        \
+{                                                                      \
+       __insn_fetch##op((void *)&v->counter, i);                       \
+}
+
+ATOMIC64_OPS(add)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+
+#undef ATOMIC64_OPS
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
 {
        long guess, oldval = v->counter;
+       smp_mb();
        do {
-               if (oldval == u)
-                       break;
                guess = oldval;
-               oldval = cmpxchg(&v->counter, guess, guess + a);
+               __insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+               oldval = __insn_cmpexch(&v->counter, guess ^ i);
        } while (guess != oldval);
-       return oldval != u;
-}
-
-static inline void atomic64_and(long i, atomic64_t *v)
-{
-       __insn_fetchand((void *)&v->counter, i);
-}
-
-static inline void atomic64_or(long i, atomic64_t *v)
-{
-       __insn_fetchor((void *)&v->counter, i);
+       smp_mb();
+       return oldval;
 }
 
 static inline void atomic64_xor(long i, atomic64_t *v)
@@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v)
        } while (guess != oldval);
 }
 
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+       long guess, oldval = v->counter;
+       do {
+               if (oldval == u)
+                       break;
+               guess = oldval;
+               oldval = cmpxchg(&v->counter, guess, guess + a);
+       } while (guess != oldval);
+       return oldval != u;
+}
+
 #define atomic64_sub_return(i, v)      atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v)       atomic64_fetch_add(-(i), (v))
 #define atomic64_sub(i, v)             atomic64_add(-(i), (v))
 #define atomic64_inc_return(v)         atomic64_add_return(1, (v))
 #define atomic64_dec_return(v)         atomic64_sub_return(1, (v))
index d55222806c2f7d7ee96f135da21ec787d1123610..4c419ab95ab772d0b3190c9d93222348f647aae9 100644 (file)
@@ -87,6 +87,13 @@ mb_incoherent(void)
 #define __smp_mb__after_atomic()       __smp_mb()
 #endif
 
+/*
+ * The TILE architecture does not do speculative reads; this ensures
+ * that a control dependency also orders against loads and already provides
+ * a LOAD->{LOAD,STORE} order and can forgo the additional RMB.
+ */
+#define smp_acquire__after_ctrl_dep()  barrier()
+
 #include <asm-generic/barrier.h>
 
 #endif /* !__ASSEMBLY__ */
index bbf7b666f21df85cd511fbdde71edf403041644d..d1406a95f6b7d9412b92f844da013cc73bd0121f 100644 (file)
@@ -19,9 +19,9 @@
 #include <asm/barrier.h>
 
 /* Tile-specific routines to support <asm/bitops.h>. */
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask);
 
 /**
  * set_bit - Atomically set a bit in memory
@@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
  */
 static inline void set_bit(unsigned nr, volatile unsigned long *addr)
 {
-       _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+       _atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
 {
-       _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+       _atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
  */
 static inline void change_bit(unsigned nr, volatile unsigned long *addr)
 {
-       _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+       _atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
        unsigned long mask = BIT_MASK(nr);
        addr += BIT_WORD(nr);
        smp_mb();  /* barrier for proper semantics */
-       return (_atomic_or(addr, mask) & mask) != 0;
+       return (_atomic_fetch_or(addr, mask) & mask) != 0;
 }
 
 /**
@@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
        unsigned long mask = BIT_MASK(nr);
        addr += BIT_WORD(nr);
        smp_mb();  /* barrier for proper semantics */
-       return (_atomic_andn(addr, mask) & mask) != 0;
+       return (_atomic_fetch_andn(addr, mask) & mask) != 0;
 }
 
 /**
@@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr,
        unsigned long mask = BIT_MASK(nr);
        addr += BIT_WORD(nr);
        smp_mb();  /* barrier for proper semantics */
-       return (_atomic_xor(addr, mask) & mask) != 0;
+       return (_atomic_fetch_xor(addr, mask) & mask) != 0;
 }
 
 #include <asm-generic/bitops/ext2-atomic.h>
index 1a6ef1b69cb13cd4a8560c007a16a77cbbde36cf..e64a1b75fc386c4786d6a4988a4f5eb71bbfbd27 100644 (file)
                ret = gu.err;                                           \
        }
 
-#define __futex_set() __futex_call(__atomic_xchg)
-#define __futex_add() __futex_call(__atomic_xchg_add)
-#define __futex_or() __futex_call(__atomic_or)
-#define __futex_andn() __futex_call(__atomic_andn)
-#define __futex_xor() __futex_call(__atomic_xor)
+#define __futex_set() __futex_call(__atomic32_xchg)
+#define __futex_add() __futex_call(__atomic32_xchg_add)
+#define __futex_or() __futex_call(__atomic32_fetch_or)
+#define __futex_andn() __futex_call(__atomic32_fetch_andn)
+#define __futex_xor() __futex_call(__atomic32_fetch_xor)
 
 #define __futex_cmpxchg()                                              \
        {                                                               \
-               struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
-                                                       lock, oldval, oparg); \
+               struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \
+                                                         lock, oldval, oparg); \
                val = gu.val;                                           \
                ret = gu.err;                                           \
        }
index 298df1e9912a15b18c4de770a41f25ba14d20d73..f8128800dbf57661661270c60e30614a5c0f42c9 100644 (file)
@@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v)
 
 int _atomic_xchg(int *v, int n)
 {
-       return __atomic_xchg(v, __atomic_setup(v), n).val;
+       return __atomic32_xchg(v, __atomic_setup(v), n).val;
 }
 EXPORT_SYMBOL(_atomic_xchg);
 
 int _atomic_xchg_add(int *v, int i)
 {
-       return __atomic_xchg_add(v, __atomic_setup(v), i).val;
+       return __atomic32_xchg_add(v, __atomic_setup(v), i).val;
 }
 EXPORT_SYMBOL(_atomic_xchg_add);
 
@@ -78,39 +78,39 @@ int _atomic_xchg_add_unless(int *v, int a, int u)
         * to use the first argument consistently as the "old value"
         * in the assembly, as is done for _atomic_cmpxchg().
         */
-       return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val;
+       return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val;
 }
 EXPORT_SYMBOL(_atomic_xchg_add_unless);
 
 int _atomic_cmpxchg(int *v, int o, int n)
 {
-       return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val;
+       return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val;
 }
 EXPORT_SYMBOL(_atomic_cmpxchg);
 
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
 {
-       return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+       return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_or);
+EXPORT_SYMBOL(_atomic_fetch_or);
 
-unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
 {
-       return __atomic_and((int *)p, __atomic_setup(p), mask).val;
+       return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_and);
+EXPORT_SYMBOL(_atomic_fetch_and);
 
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
 {
-       return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+       return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_andn);
+EXPORT_SYMBOL(_atomic_fetch_andn);
 
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
 {
-       return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+       return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_xor);
+EXPORT_SYMBOL(_atomic_fetch_xor);
 
 
 long long _atomic64_xchg(long long *v, long long n)
@@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n)
 }
 EXPORT_SYMBOL(_atomic64_cmpxchg);
 
-long long _atomic64_and(long long *v, long long n)
+long long _atomic64_fetch_and(long long *v, long long n)
 {
-       return __atomic64_and(v, __atomic_setup(v), n);
+       return __atomic64_fetch_and(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_and);
+EXPORT_SYMBOL(_atomic64_fetch_and);
 
-long long _atomic64_or(long long *v, long long n)
+long long _atomic64_fetch_or(long long *v, long long n)
 {
-       return __atomic64_or(v, __atomic_setup(v), n);
+       return __atomic64_fetch_or(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_or);
+EXPORT_SYMBOL(_atomic64_fetch_or);
 
-long long _atomic64_xor(long long *v, long long n)
+long long _atomic64_fetch_xor(long long *v, long long n)
 {
-       return __atomic64_xor(v, __atomic_setup(v), n);
+       return __atomic64_fetch_xor(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_xor);
+EXPORT_SYMBOL(_atomic64_fetch_xor);
 
 /*
  * If any of the atomic or futex routines hit a bad address (not in
index f611265633d6958d1f21514a479f59ea53b20d0c..1a70e6c0f25936f62d8e95a77ec9e97ac1b66dc9 100644 (file)
@@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic)
        .endif
        .endm
 
-atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
-atomic_op _xchg, 32, "move r24, r2"
-atomic_op _xchg_add, 32, "add r24, r22, r2"
-atomic_op _xchg_add_unless, 32, \
+
+/*
+ * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
+ */
+
+atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op 32_xchg, 32, "move r24, r2"
+atomic_op 32_xchg_add, 32, "add r24, r22, r2"
+atomic_op 32_xchg_add_unless, 32, \
        "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op _or, 32, "or r24, r22, r2"
-atomic_op _and, 32, "and r24, r22, r2"
-atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op _xor, 32, "xor r24, r22, r2"
+atomic_op 32_fetch_or, 32, "or r24, r22, r2"
+atomic_op 32_fetch_and, 32, "and r24, r22, r2"
+atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
 
 atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
        { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
@@ -192,9 +197,9 @@ atomic_op 64_xchg_add_unless, 64, \
        { bbns r26, 3f; add r24, r22, r4 }; \
        { bbns r27, 3f; add r25, r23, r5 }; \
        slt_u r26, r24, r22; add r25, r25, r26"
-atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
-atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
-atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
+atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
+atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
+atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
 
        jrp     lr              /* happy backtracer */
 
index 88c2a53362e738110913134b840e1abe01df9fd9..076c6cc431136fc8475b0fa0b768bb7a6083dc66 100644 (file)
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
        do {
                delay_backoff(iterations++);
        } while (READ_ONCE(lock->current_ticket) == curr);
+
+       /*
+        * The TILE architecture doesn't do read speculation; therefore
+        * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+        */
+       barrier();
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
index c8d1f94ff1fe00e13f30a6c0e3ae51563a3226fa..a4b5b2cbce9337bdc775a92c087b3c5cf3876965 100644 (file)
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
        do {
                delay_backoff(iterations++);
        } while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
+
+       /*
+        * The TILE architecture doesn't do read speculation; therefore
+        * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+        */
+       barrier();
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
index 3e86742881984006555fa433d9988a2c4eaa7b4f..a58b998111053f2be1654780cd11a6eee164507c 100644 (file)
@@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v)
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
 
+static __always_inline int atomic_fetch_add(int i, atomic_t *v)
+{
+       return xadd(&v->counter, i);
+}
+
+static __always_inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+       return xadd(&v->counter, -i);
+}
+
 static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
        return cmpxchg(&v->counter, old, new);
@@ -190,10 +200,29 @@ static inline void atomic_##op(int i, atomic_t *v)                        \
                        : "memory");                                    \
 }
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#define ATOMIC_FETCH_OP(op, c_op)                                      \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                \
+{                                                                      \
+       int old, val = atomic_read(v);                                  \
+       for (;;) {                                                      \
+               old = atomic_cmpxchg(v, val, val c_op i);               \
+               if (old == val)                                         \
+                       break;                                          \
+               val = old;                                              \
+       }                                                               \
+       return old;                                                     \
+}
+
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_OP(op)                                                   \
+       ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &)
+ATOMIC_OPS(or , |)
+ATOMIC_OPS(xor, ^)
 
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP
 
 /**
index a984111135b16e9c7c51d2cde681aa3b310a6a6b..71d7705fb303ba5238f4f812fdb071277addbce8 100644 (file)
@@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v)              \
                c = old;                                                \
 }
 
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define ATOMIC64_FETCH_OP(op, c_op)                                    \
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)        \
+{                                                                      \
+       long long old, c = 0;                                           \
+       while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)           \
+               c = old;                                                \
+       return old;                                                     \
+}
+
+ATOMIC64_FETCH_OP(add, +)
+
+#define atomic64_fetch_sub(i, v)       atomic64_fetch_add(-(i), (v))
+
+#define ATOMIC64_OPS(op, c_op)                                         \
+       ATOMIC64_OP(op, c_op)                                           \
+       ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #endif /* _ASM_X86_ATOMIC64_32_H */
index 037351022f5483c99a3f0a42dd4646f1eb0a3055..70eed0e14553e69d7f4099e21fa94efe03900a36 100644 (file)
@@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
        return atomic64_add_return(-i, v);
 }
 
+static inline long atomic64_fetch_add(long i, atomic64_t *v)
+{
+       return xadd(&v->counter, i);
+}
+
+static inline long atomic64_fetch_sub(long i, atomic64_t *v)
+{
+       return xadd(&v->counter, -i);
+}
+
 #define atomic64_inc_return(v)  (atomic64_add_return(1, (v)))
 #define atomic64_dec_return(v)  (atomic64_sub_return(1, (v)))
 
@@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v)                   \
                        : "memory");                                    \
 }
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+#define ATOMIC64_FETCH_OP(op, c_op)                                    \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)          \
+{                                                                      \
+       long old, val = atomic64_read(v);                               \
+       for (;;) {                                                      \
+               old = atomic64_cmpxchg(v, val, val c_op i);             \
+               if (old == val)                                         \
+                       break;                                          \
+               val = old;                                              \
+       }                                                               \
+       return old;                                                     \
+}
+
+#define ATOMIC64_OPS(op, c_op)                                         \
+       ATOMIC64_OP(op)                                                 \
+       ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #endif /* _ASM_X86_ATOMIC64_64_H */
index 85e6cda45a0297e20204fe926757e46b647faa2b..e9355a84fc675b936c624bcd00d5640ed43f64b8 100644 (file)
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
                                           int (*fail_fn)(atomic_t *))
 {
        /* cmpxchg because it never induces a false contention state. */
-       if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+       if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
                return 1;
 
        return 0;
index 07537a44216ec9b2eed302183af7c57d8949a5a0..d9850758464eee05d898eb7c10b5b99cf13c3669 100644 (file)
@@ -118,10 +118,10 @@ do {                                                              \
 static inline int __mutex_fastpath_trylock(atomic_t *count,
                                           int (*fail_fn)(atomic_t *))
 {
-       if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+       if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
                return 1;
-       else
-               return 0;
+
+       return 0;
 }
 
 #endif /* _ASM_X86_MUTEX_64_H */
index 453744c1d34752c20988cf513ff1eef75c3fa657..089ced4edbbce70193eaa2e5e45154c39d492430 100644 (file)
@@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
                     : "memory", "cc");
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-       asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
-                    : "+m" (sem->count)
-                    : "er" (delta));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-       return delta + xadd(&sem->count, delta);
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_RWSEM_H */
index fd8017ce298afcc54aedae0763e36d71377a02b5..e7a23f2a519af7be68a9db30b4c112fb344efdda 100644 (file)
@@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v)          \
        return result;                                                  \
 }
 
+#define ATOMIC_FETCH_OP(op)                                            \
+static inline int atomic_fetch_##op(int i, atomic_t * v)               \
+{                                                                      \
+       unsigned long tmp;                                              \
+       int result;                                                     \
+                                                                       \
+       __asm__ __volatile__(                                           \
+                       "1:     l32i    %1, %3, 0\n"                    \
+                       "       wsr     %1, scompare1\n"                \
+                       "       " #op " %0, %1, %2\n"                   \
+                       "       s32c1i  %0, %3, 0\n"                    \
+                       "       bne     %0, %1, 1b\n"                   \
+                       : "=&a" (result), "=&a" (tmp)                   \
+                       : "a" (i), "a" (v)                              \
+                       : "memory"                                      \
+                       );                                              \
+                                                                       \
+       return result;                                                  \
+}
+
 #else /* XCHAL_HAVE_S32C1I */
 
 #define ATOMIC_OP(op)                                                  \
@@ -138,18 +158,42 @@ static inline int atomic_##op##_return(int i, atomic_t * v)               \
        return vval;                                                    \
 }
 
+#define ATOMIC_FETCH_OP(op)                                            \
+static inline int atomic_fetch_##op(int i, atomic_t * v)               \
+{                                                                      \
+       unsigned int tmp, vval;                                         \
+                                                                       \
+       __asm__ __volatile__(                                           \
+                       "       rsil    a15,"__stringify(TOPLEVEL)"\n"  \
+                       "       l32i    %0, %3, 0\n"                    \
+                       "       " #op " %1, %0, %2\n"                   \
+                       "       s32i    %1, %3, 0\n"                    \
+                       "       wsr     a15, ps\n"                      \
+                       "       rsync\n"                                \
+                       : "=&a" (vval), "=&a" (tmp)                     \
+                       : "a" (i), "a" (v)                              \
+                       : "a15", "memory"                               \
+                       );                                              \
+                                                                       \
+       return vval;                                                    \
+}
+
 #endif /* XCHAL_HAVE_S32C1I */
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index 1d95fa5dcd10f0121f394e22fb108682c17741aa..a36221cf63637fc2e066850e937bc138698af973 100644 (file)
@@ -11,6 +11,9 @@
 #ifndef _XTENSA_SPINLOCK_H
 #define _XTENSA_SPINLOCK_H
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 /*
  * spinlock
  *
  */
 
 #define arch_spin_is_locked(x) ((x)->slock != 0)
-#define arch_spin_unlock_wait(lock) \
-       do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
index 5e1f345b58ddf3529b074bab33d62bdefcdae5da..288cc9e963953ae182e6b8760663d7b712e834c6 100644 (file)
@@ -112,6 +112,62 @@ static __always_inline void atomic_long_dec(atomic_long_t *l)
        ATOMIC_LONG_PFX(_dec)(v);
 }
 
+#define ATOMIC_LONG_FETCH_OP(op, mo)                                   \
+static inline long                                                     \
+atomic_long_fetch_##op##mo(long i, atomic_long_t *l)                   \
+{                                                                      \
+       ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;              \
+                                                                       \
+       return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(i, v);            \
+}
+
+ATOMIC_LONG_FETCH_OP(add, )
+ATOMIC_LONG_FETCH_OP(add, _relaxed)
+ATOMIC_LONG_FETCH_OP(add, _acquire)
+ATOMIC_LONG_FETCH_OP(add, _release)
+ATOMIC_LONG_FETCH_OP(sub, )
+ATOMIC_LONG_FETCH_OP(sub, _relaxed)
+ATOMIC_LONG_FETCH_OP(sub, _acquire)
+ATOMIC_LONG_FETCH_OP(sub, _release)
+ATOMIC_LONG_FETCH_OP(and, )
+ATOMIC_LONG_FETCH_OP(and, _relaxed)
+ATOMIC_LONG_FETCH_OP(and, _acquire)
+ATOMIC_LONG_FETCH_OP(and, _release)
+ATOMIC_LONG_FETCH_OP(andnot, )
+ATOMIC_LONG_FETCH_OP(andnot, _relaxed)
+ATOMIC_LONG_FETCH_OP(andnot, _acquire)
+ATOMIC_LONG_FETCH_OP(andnot, _release)
+ATOMIC_LONG_FETCH_OP(or, )
+ATOMIC_LONG_FETCH_OP(or, _relaxed)
+ATOMIC_LONG_FETCH_OP(or, _acquire)
+ATOMIC_LONG_FETCH_OP(or, _release)
+ATOMIC_LONG_FETCH_OP(xor, )
+ATOMIC_LONG_FETCH_OP(xor, _relaxed)
+ATOMIC_LONG_FETCH_OP(xor, _acquire)
+ATOMIC_LONG_FETCH_OP(xor, _release)
+
+#undef ATOMIC_LONG_FETCH_OP
+
+#define ATOMIC_LONG_FETCH_INC_DEC_OP(op, mo)                                   \
+static inline long                                                     \
+atomic_long_fetch_##op##mo(atomic_long_t *l)                           \
+{                                                                      \
+       ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;              \
+                                                                       \
+       return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(v);               \
+}
+
+ATOMIC_LONG_FETCH_INC_DEC_OP(inc,)
+ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _relaxed)
+ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _acquire)
+ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _release)
+ATOMIC_LONG_FETCH_INC_DEC_OP(dec,)
+ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _relaxed)
+ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _acquire)
+ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _release)
+
+#undef ATOMIC_LONG_FETCH_INC_DEC_OP
+
 #define ATOMIC_LONG_OP(op)                                             \
 static __always_inline void                                            \
 atomic_long_##op(long i, atomic_long_t *l)                             \
@@ -124,9 +180,9 @@ atomic_long_##op(long i, atomic_long_t *l)                          \
 ATOMIC_LONG_OP(add)
 ATOMIC_LONG_OP(sub)
 ATOMIC_LONG_OP(and)
+ATOMIC_LONG_OP(andnot)
 ATOMIC_LONG_OP(or)
 ATOMIC_LONG_OP(xor)
-ATOMIC_LONG_OP(andnot)
 
 #undef ATOMIC_LONG_OP
 
index 74f1a3704d7a1ddf61707b6da03fc2348521e4a1..9ed8b987185b45b1157993abf4b0fe5a6c0b23a8 100644 (file)
@@ -61,6 +61,18 @@ static inline int atomic_##op##_return(int i, atomic_t *v)           \
        return c c_op i;                                                \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op)                                      \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       int c, old;                                                     \
+                                                                       \
+       c = v->counter;                                                 \
+       while ((old = cmpxchg(&v->counter, c, c c_op i)) != c)          \
+               c = old;                                                \
+                                                                       \
+       return c;                                                       \
+}
+
 #else
 
 #include <linux/irqflags.h>
@@ -88,6 +100,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v)          \
        return ret;                                                     \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op)                                      \
+static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned long flags;                                            \
+       int ret;                                                        \
+                                                                       \
+       raw_local_irq_save(flags);                                      \
+       ret = v->counter;                                               \
+       v->counter = v->counter c_op i;                                 \
+       raw_local_irq_restore(flags);                                   \
+                                                                       \
+       return ret;                                                     \
+}
+
 #endif /* CONFIG_SMP */
 
 #ifndef atomic_add_return
@@ -98,6 +124,26 @@ ATOMIC_OP_RETURN(add, +)
 ATOMIC_OP_RETURN(sub, -)
 #endif
 
+#ifndef atomic_fetch_add
+ATOMIC_FETCH_OP(add, +)
+#endif
+
+#ifndef atomic_fetch_sub
+ATOMIC_FETCH_OP(sub, -)
+#endif
+
+#ifndef atomic_fetch_and
+ATOMIC_FETCH_OP(and, &)
+#endif
+
+#ifndef atomic_fetch_or
+ATOMIC_FETCH_OP(or, |)
+#endif
+
+#ifndef atomic_fetch_xor
+ATOMIC_FETCH_OP(xor, ^)
+#endif
+
 #ifndef atomic_and
 ATOMIC_OP(and, &)
 #endif
@@ -110,6 +156,7 @@ ATOMIC_OP(or, |)
 ATOMIC_OP(xor, ^)
 #endif
 
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
index d48e78ccad3dd8aedb3b020d46bdb00312c86772..dad68bf46c77012e144857898093b99dabea1cd7 100644 (file)
@@ -27,16 +27,23 @@ extern void  atomic64_##op(long long a, atomic64_t *v);
 #define ATOMIC64_OP_RETURN(op)                                         \
 extern long long atomic64_##op##_return(long long a, atomic64_t *v);
 
-#define ATOMIC64_OPS(op)       ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op)
+#define ATOMIC64_FETCH_OP(op)                                          \
+extern long long atomic64_fetch_##op(long long a, atomic64_t *v);
+
+#define ATOMIC64_OPS(op)       ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op)       ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
index 1cceca146905013dbe2c346020ad73fe3df68ce5..fe297b599b0a1caa8696b8b7c5a5ed1ddab16556 100644 (file)
@@ -194,7 +194,7 @@ do {                                                                        \
 })
 #endif
 
-#endif
+#endif /* CONFIG_SMP */
 
 /* Barriers for virtual machine guests when talking to an SMP host */
 #define virt_mb() __smp_mb()
@@ -207,5 +207,44 @@ do {                                                                       \
 #define virt_store_release(p, v) __smp_store_release(p, v)
 #define virt_load_acquire(p) __smp_load_acquire(p)
 
+/**
+ * smp_acquire__after_ctrl_dep() - Provide ACQUIRE ordering after a control dependency
+ *
+ * A control dependency provides a LOAD->STORE order, the additional RMB
+ * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
+ * aka. (load)-ACQUIRE.
+ *
+ * Architectures that do not do load speculation can have this be barrier().
+ */
+#ifndef smp_acquire__after_ctrl_dep
+#define smp_acquire__after_ctrl_dep()          smp_rmb()
+#endif
+
+/**
+ * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * @ptr: pointer to the variable to wait on
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ *
+ * Due to C lacking lambda expressions we load the value of *ptr into a
+ * pre-named variable @VAL to be used in @cond.
+ */
+#ifndef smp_cond_load_acquire
+#define smp_cond_load_acquire(ptr, cond_expr) ({               \
+       typeof(ptr) __PTR = (ptr);                              \
+       typeof(*ptr) VAL;                                       \
+       for (;;) {                                              \
+               VAL = READ_ONCE(*__PTR);                        \
+               if (cond_expr)                                  \
+                       break;                                  \
+               cpu_relax();                                    \
+       }                                                       \
+       smp_acquire__after_ctrl_dep();                          \
+       VAL;                                                    \
+})
+#endif
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
index fd694cfd678af712b2c3ca1055331df24a8db8ba..c54829d3de3700ce6df65494b30f3dd1b32f63fe 100644 (file)
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
+       if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
                return 1;
        return 0;
 }
index a6b4a7bd6ac9770356e066c51f295c6b9c33793f..3269ec4e195fbaba5b44fa61b74896ff285ac989 100644 (file)
@@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-       int prev = atomic_xchg_acquire(count, 0);
+       int prev;
 
+       if (atomic_read(count) != 1)
+               return 0;
+
+       prev = atomic_xchg_acquire(count, 0);
        if (unlikely(prev < 0)) {
                /*
                 * The lock was marked contended so we must restore that
index 05f05f17a7c2e4fc14f8fa858abc9f7d7715c7b9..9f0681bf1e87d6ec0b2a224168fccc944286f29e 100644 (file)
@@ -111,10 +111,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 {
        /*
-        * smp_mb__before_atomic() in order to guarantee release semantics
+        * unlock() needs release semantics:
         */
-       smp_mb__before_atomic();
-       atomic_sub(_Q_LOCKED_VAL, &lock->val);
+       (void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val);
 }
 #endif
 
index 3fc94a046bf584374b15070f68074c64a3daaf14..5be122e3d32605ad9e0f809b23ba5349d5118de4 100644 (file)
@@ -41,8 +41,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
        long tmp;
 
-       while ((tmp = sem->count) >= 0) {
-               if (tmp == cmpxchg_acquire(&sem->count, tmp,
+       while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+               if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
                                   tmp + RWSEM_ACTIVE_READ_BIAS)) {
                        return 1;
                }
@@ -79,7 +79,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
        long tmp;
 
-       tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
+       tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
                      RWSEM_ACTIVE_WRITE_BIAS);
        return tmp == RWSEM_UNLOCKED_VALUE;
 }
@@ -106,14 +106,6 @@ static inline void __up_write(struct rw_semaphore *sem)
                rwsem_wake(sem);
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-       atomic_long_add(delta, (atomic_long_t *)&sem->count);
-}
-
 /*
  * downgrade write lock to read lock
  */
@@ -134,13 +126,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
                rwsem_downgrade_wake(sem);
 }
 
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-       return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_GENERIC_RWSEM_H */
index e451534fe54d9184fbdaab4c8684c1e5a288ff39..e71835bf60a977a37277d44c6357a02e70c3a41d 100644 (file)
 #endif
 #endif /* atomic_dec_return_relaxed */
 
-/* atomic_xchg_relaxed */
-#ifndef atomic_xchg_relaxed
-#define  atomic_xchg_relaxed           atomic_xchg
-#define  atomic_xchg_acquire           atomic_xchg
-#define  atomic_xchg_release           atomic_xchg
 
-#else /* atomic_xchg_relaxed */
+/* atomic_fetch_add_relaxed */
+#ifndef atomic_fetch_add_relaxed
+#define atomic_fetch_add_relaxed       atomic_fetch_add
+#define atomic_fetch_add_acquire       atomic_fetch_add
+#define atomic_fetch_add_release       atomic_fetch_add
 
-#ifndef atomic_xchg_acquire
-#define  atomic_xchg_acquire(...)                                      \
-       __atomic_op_acquire(atomic_xchg, __VA_ARGS__)
+#else /* atomic_fetch_add_relaxed */
+
+#ifndef atomic_fetch_add_acquire
+#define atomic_fetch_add_acquire(...)                                  \
+       __atomic_op_acquire(atomic_fetch_add, __VA_ARGS__)
 #endif
 
-#ifndef atomic_xchg_release
-#define  atomic_xchg_release(...)                                      \
-       __atomic_op_release(atomic_xchg, __VA_ARGS__)
+#ifndef atomic_fetch_add_release
+#define atomic_fetch_add_release(...)                                  \
+       __atomic_op_release(atomic_fetch_add, __VA_ARGS__)
 #endif
 
-#ifndef atomic_xchg
-#define  atomic_xchg(...)                                              \
-       __atomic_op_fence(atomic_xchg, __VA_ARGS__)
+#ifndef atomic_fetch_add
+#define atomic_fetch_add(...)                                          \
+       __atomic_op_fence(atomic_fetch_add, __VA_ARGS__)
+#endif
+#endif /* atomic_fetch_add_relaxed */
+
+/* atomic_fetch_inc_relaxed */
+#ifndef atomic_fetch_inc_relaxed
+
+#ifndef atomic_fetch_inc
+#define atomic_fetch_inc(v)            atomic_fetch_add(1, (v))
+#define atomic_fetch_inc_relaxed(v)    atomic_fetch_add_relaxed(1, (v))
+#define atomic_fetch_inc_acquire(v)    atomic_fetch_add_acquire(1, (v))
+#define atomic_fetch_inc_release(v)    atomic_fetch_add_release(1, (v))
+#else /* atomic_fetch_inc */
+#define atomic_fetch_inc_relaxed       atomic_fetch_inc
+#define atomic_fetch_inc_acquire       atomic_fetch_inc
+#define atomic_fetch_inc_release       atomic_fetch_inc
+#endif /* atomic_fetch_inc */
+
+#else /* atomic_fetch_inc_relaxed */
+
+#ifndef atomic_fetch_inc_acquire
+#define atomic_fetch_inc_acquire(...)                                  \
+       __atomic_op_acquire(atomic_fetch_inc, __VA_ARGS__)
 #endif
-#endif /* atomic_xchg_relaxed */
 
-/* atomic_cmpxchg_relaxed */
-#ifndef atomic_cmpxchg_relaxed
-#define  atomic_cmpxchg_relaxed                atomic_cmpxchg
-#define  atomic_cmpxchg_acquire                atomic_cmpxchg
-#define  atomic_cmpxchg_release                atomic_cmpxchg
+#ifndef atomic_fetch_inc_release
+#define atomic_fetch_inc_release(...)                                  \
+       __atomic_op_release(atomic_fetch_inc, __VA_ARGS__)
+#endif
 
-#else /* atomic_cmpxchg_relaxed */
+#ifndef atomic_fetch_inc
+#define atomic_fetch_inc(...)                                          \
+       __atomic_op_fence(atomic_fetch_inc, __VA_ARGS__)
+#endif
+#endif /* atomic_fetch_inc_relaxed */
 
-#ifndef atomic_cmpxchg_acquire
-#define  atomic_cmpxchg_acquire(...)                                   \
-       __atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__)
+/* atomic_fetch_sub_relaxed */
+#ifndef atomic_fetch_sub_relaxed
+#define atomic_fetch_sub_relaxed       atomic_fetch_sub
+#define atomic_fetch_sub_acquire       atomic_fetch_sub
+#define atomic_fetch_sub_release       atomic_fetch_sub
+
+#else /* atomic_fetch_sub_relaxed */
+
+#ifndef atomic_fetch_sub_acquire
+#define atomic_fetch_sub_acquire(...)                                  \
+       __atomic_op_acquire(atomic_fetch_sub, __VA_ARGS__)
 #endif
 
-#ifndef atomic_cmpxchg_release
-#define  atomic_cmpxchg_release(...)                                   \
-       __atomic_op_release(atomic_cmpxchg, __VA_ARGS__)
+#ifndef atomic_fetch_sub_release
+#define atomic_fetch_sub_release(...)                                  \
+       __atomic_op_release(atomic_fetch_sub, __VA_ARGS__)
 #endif
 
-#ifndef atomic_cmpxchg
-#define  atomic_cmpxchg(...)                                           \
-       __atomic_op_fence(atomic_cmpxchg, __VA_ARGS__)
+#ifndef atomic_fetch_sub
+#define atomic_fetch_sub(...)                                          \
+       __atomic_op_fence(atomic_fetch_sub, __VA_ARGS__)
+#endif
+#endif /* atomic_fetch_sub_relaxed */
+
+/* atomic_fetch_dec_relaxed */
+#ifndef atomic_fetch_dec_relaxed
+
+#ifndef atomic_fetch_dec
+#define atomic_fetch_dec(v)            atomic_fetch_sub(1, (v))
+#define atomic_fetch_dec_relaxed(v)    atomic_fetch_sub_relaxed(1, (v))
+#define atomic_fetch_dec_acquire(v)    atomic_fetch_sub_acquire(1, (v))
+#define atomic_fetch_dec_release(v)    atomic_fetch_sub_release(1, (v))
+#else /* atomic_fetch_dec */
+#define atomic_fetch_dec_relaxed       atomic_fetch_dec
+#define atomic_fetch_dec_acquire       atomic_fetch_dec
+#define atomic_fetch_dec_release       atomic_fetch_dec
+#endif /* atomic_fetch_dec */
+
+#else /* atomic_fetch_dec_relaxed */
+
+#ifndef atomic_fetch_dec_acquire
+#define atomic_fetch_dec_acquire(...)                                  \
+       __atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
 #endif
-#endif /* atomic_cmpxchg_relaxed */
 
-#ifndef atomic64_read_acquire
-#define  atomic64_read_acquire(v)      smp_load_acquire(&(v)->counter)
+#ifndef atomic_fetch_dec_release
+#define atomic_fetch_dec_release(...)                                  \
+       __atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_set_release
-#define  atomic64_set_release(v, i)    smp_store_release(&(v)->counter, (i))
+#ifndef atomic_fetch_dec
+#define atomic_fetch_dec(...)                                          \
+       __atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
 #endif
+#endif /* atomic_fetch_dec_relaxed */
 
-/* atomic64_add_return_relaxed */
-#ifndef atomic64_add_return_relaxed
-#define  atomic64_add_return_relaxed   atomic64_add_return
-#define  atomic64_add_return_acquire   atomic64_add_return
-#define  atomic64_add_return_release   atomic64_add_return
+/* atomic_fetch_or_relaxed */
+#ifndef atomic_fetch_or_relaxed
+#define atomic_fetch_or_relaxed        atomic_fetch_or
+#define atomic_fetch_or_acquire        atomic_fetch_or
+#define atomic_fetch_or_release        atomic_fetch_or
 
-#else /* atomic64_add_return_relaxed */
+#else /* atomic_fetch_or_relaxed */
 
-#ifndef atomic64_add_return_acquire
-#define  atomic64_add_return_acquire(...)                              \
-       __atomic_op_acquire(atomic64_add_return, __VA_ARGS__)
+#ifndef atomic_fetch_or_acquire
+#define atomic_fetch_or_acquire(...)                                   \
+       __atomic_op_acquire(atomic_fetch_or, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_add_return_release
-#define  atomic64_add_return_release(...)                              \
-       __atomic_op_release(atomic64_add_return, __VA_ARGS__)
+#ifndef atomic_fetch_or_release
+#define atomic_fetch_or_release(...)                                   \
+       __atomic_op_release(atomic_fetch_or, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_add_return
-#define  atomic64_add_return(...)                                      \
-       __atomic_op_fence(atomic64_add_return, __VA_ARGS__)
+#ifndef atomic_fetch_or
+#define atomic_fetch_or(...)                                           \
+       __atomic_op_fence(atomic_fetch_or, __VA_ARGS__)
 #endif
-#endif /* atomic64_add_return_relaxed */
+#endif /* atomic_fetch_or_relaxed */
 
-/* atomic64_inc_return_relaxed */
-#ifndef atomic64_inc_return_relaxed
-#define  atomic64_inc_return_relaxed   atomic64_inc_return
-#define  atomic64_inc_return_acquire   atomic64_inc_return
-#define  atomic64_inc_return_release   atomic64_inc_return
+/* atomic_fetch_and_relaxed */
+#ifndef atomic_fetch_and_relaxed
+#define atomic_fetch_and_relaxed       atomic_fetch_and
+#define atomic_fetch_and_acquire       atomic_fetch_and
+#define atomic_fetch_and_release       atomic_fetch_and
 
-#else /* atomic64_inc_return_relaxed */
+#else /* atomic_fetch_and_relaxed */
 
-#ifndef atomic64_inc_return_acquire
-#define  atomic64_inc_return_acquire(...)                              \
-       __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
+#ifndef atomic_fetch_and_acquire
+#define atomic_fetch_and_acquire(...)                                  \
+       __atomic_op_acquire(atomic_fetch_and, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_inc_return_release
-#define  atomic64_inc_return_release(...)                              \
-       __atomic_op_release(atomic64_inc_return, __VA_ARGS__)
+#ifndef atomic_fetch_and_release
+#define atomic_fetch_and_release(...)                                  \
+       __atomic_op_release(atomic_fetch_and, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_inc_return
-#define  atomic64_inc_return(...)                                      \
-       __atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
+#ifndef atomic_fetch_and
+#define atomic_fetch_and(...)                                          \
+       __atomic_op_fence(atomic_fetch_and, __VA_ARGS__)
 #endif
-#endif /* atomic64_inc_return_relaxed */
-
+#endif /* atomic_fetch_and_relaxed */
 
-/* atomic64_sub_return_relaxed */
-#ifndef atomic64_sub_return_relaxed
-#define  atomic64_sub_return_relaxed   atomic64_sub_return
-#define  atomic64_sub_return_acquire   atomic64_sub_return
-#define  atomic64_sub_return_release   atomic64_sub_return
+#ifdef atomic_andnot
+/* atomic_fetch_andnot_relaxed */
+#ifndef atomic_fetch_andnot_relaxed
+#define atomic_fetch_andnot_relaxed    atomic_fetch_andnot
+#define atomic_fetch_andnot_acquire    atomic_fetch_andnot
+#define atomic_fetch_andnot_release    atomic_fetch_andnot
 
-#else /* atomic64_sub_return_relaxed */
+#else /* atomic_fetch_andnot_relaxed */
 
-#ifndef atomic64_sub_return_acquire
-#define  atomic64_sub_return_acquire(...)                              \
-       __atomic_op_acquire(atomic64_sub_return, __VA_ARGS__)
+#ifndef atomic_fetch_andnot_acquire
+#define atomic_fetch_andnot_acquire(...)                                       \
+       __atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_sub_return_release
-#define  atomic64_sub_return_release(...)                              \
-       __atomic_op_release(atomic64_sub_return, __VA_ARGS__)
+#ifndef atomic_fetch_andnot_release
+#define atomic_fetch_andnot_release(...)                                       \
+       __atomic_op_release(atomic_fetch_andnot, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_sub_return
-#define  atomic64_sub_return(...)                                      \
-       __atomic_op_fence(atomic64_sub_return, __VA_ARGS__)
+#ifndef atomic_fetch_andnot
+#define atomic_fetch_andnot(...)                                               \
+       __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
 #endif
-#endif /* atomic64_sub_return_relaxed */
+#endif /* atomic_fetch_andnot_relaxed */
+#endif /* atomic_andnot */
 
-/* atomic64_dec_return_relaxed */
-#ifndef atomic64_dec_return_relaxed
-#define  atomic64_dec_return_relaxed   atomic64_dec_return
-#define  atomic64_dec_return_acquire   atomic64_dec_return
-#define  atomic64_dec_return_release   atomic64_dec_return
+/* atomic_fetch_xor_relaxed */
+#ifndef atomic_fetch_xor_relaxed
+#define atomic_fetch_xor_relaxed       atomic_fetch_xor
+#define atomic_fetch_xor_acquire       atomic_fetch_xor
+#define atomic_fetch_xor_release       atomic_fetch_xor
 
-#else /* atomic64_dec_return_relaxed */
+#else /* atomic_fetch_xor_relaxed */
 
-#ifndef atomic64_dec_return_acquire
-#define  atomic64_dec_return_acquire(...)                              \
-       __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
+#ifndef atomic_fetch_xor_acquire
+#define atomic_fetch_xor_acquire(...)                                  \
+       __atomic_op_acquire(atomic_fetch_xor, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_dec_return_release
-#define  atomic64_dec_return_release(...)                              \
-       __atomic_op_release(atomic64_dec_return, __VA_ARGS__)
+#ifndef atomic_fetch_xor_release
+#define atomic_fetch_xor_release(...)                                  \
+       __atomic_op_release(atomic_fetch_xor, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_dec_return
-#define  atomic64_dec_return(...)                                      \
-       __atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
+#ifndef atomic_fetch_xor
+#define atomic_fetch_xor(...)                                          \
+       __atomic_op_fence(atomic_fetch_xor, __VA_ARGS__)
 #endif
-#endif /* atomic64_dec_return_relaxed */
+#endif /* atomic_fetch_xor_relaxed */
 
-/* atomic64_xchg_relaxed */
-#ifndef atomic64_xchg_relaxed
-#define  atomic64_xchg_relaxed         atomic64_xchg
-#define  atomic64_xchg_acquire         atomic64_xchg
-#define  atomic64_xchg_release         atomic64_xchg
 
-#else /* atomic64_xchg_relaxed */
+/* atomic_xchg_relaxed */
+#ifndef atomic_xchg_relaxed
+#define  atomic_xchg_relaxed           atomic_xchg
+#define  atomic_xchg_acquire           atomic_xchg
+#define  atomic_xchg_release           atomic_xchg
 
-#ifndef atomic64_xchg_acquire
-#define  atomic64_xchg_acquire(...)                                    \
-       __atomic_op_acquire(atomic64_xchg, __VA_ARGS__)
+#else /* atomic_xchg_relaxed */
+
+#ifndef atomic_xchg_acquire
+#define  atomic_xchg_acquire(...)                                      \
+       __atomic_op_acquire(atomic_xchg, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_xchg_release
-#define  atomic64_xchg_release(...)                                    \
-       __atomic_op_release(atomic64_xchg, __VA_ARGS__)
+#ifndef atomic_xchg_release
+#define  atomic_xchg_release(...)                                      \
+       __atomic_op_release(atomic_xchg, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_xchg
-#define  atomic64_xchg(...)                                            \
-       __atomic_op_fence(atomic64_xchg, __VA_ARGS__)
+#ifndef atomic_xchg
+#define  atomic_xchg(...)                                              \
+       __atomic_op_fence(atomic_xchg, __VA_ARGS__)
 #endif
-#endif /* atomic64_xchg_relaxed */
+#endif /* atomic_xchg_relaxed */
 
-/* atomic64_cmpxchg_relaxed */
-#ifndef atomic64_cmpxchg_relaxed
-#define  atomic64_cmpxchg_relaxed      atomic64_cmpxchg
-#define  atomic64_cmpxchg_acquire      atomic64_cmpxchg
-#define  atomic64_cmpxchg_release      atomic64_cmpxchg
+/* atomic_cmpxchg_relaxed */
+#ifndef atomic_cmpxchg_relaxed
+#define  atomic_cmpxchg_relaxed                atomic_cmpxchg
+#define  atomic_cmpxchg_acquire                atomic_cmpxchg
+#define  atomic_cmpxchg_release                atomic_cmpxchg
 
-#else /* atomic64_cmpxchg_relaxed */
+#else /* atomic_cmpxchg_relaxed */
 
-#ifndef atomic64_cmpxchg_acquire
-#define  atomic64_cmpxchg_acquire(...)                                 \
-       __atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__)
+#ifndef atomic_cmpxchg_acquire
+#define  atomic_cmpxchg_acquire(...)                                   \
+       __atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_cmpxchg_release
-#define  atomic64_cmpxchg_release(...)                                 \
-       __atomic_op_release(atomic64_cmpxchg, __VA_ARGS__)
+#ifndef atomic_cmpxchg_release
+#define  atomic_cmpxchg_release(...)                                   \
+       __atomic_op_release(atomic_cmpxchg, __VA_ARGS__)
 #endif
 
-#ifndef atomic64_cmpxchg
-#define  atomic64_cmpxchg(...)                                         \
-       __atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__)
+#ifndef atomic_cmpxchg
+#define  atomic_cmpxchg(...)                                           \
+       __atomic_op_fence(atomic_cmpxchg, __VA_ARGS__)
 #endif
-#endif /* atomic64_cmpxchg_relaxed */
+#endif /* atomic_cmpxchg_relaxed */
 
 /* cmpxchg_relaxed */
 #ifndef cmpxchg_relaxed
@@ -463,18 +522,28 @@ static inline void atomic_andnot(int i, atomic_t *v)
 {
        atomic_and(~i, v);
 }
-#endif
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+static inline int atomic_fetch_andnot(int i, atomic_t *v)
 {
-       atomic_andnot(mask, v);
+       return atomic_fetch_and(~i, v);
 }
 
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+static inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
-       atomic_or(mask, v);
+       return atomic_fetch_and_relaxed(~i, v);
 }
 
+static inline int atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+       return atomic_fetch_and_acquire(~i, v);
+}
+
+static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
+{
+       return atomic_fetch_and_release(~i, v);
+}
+#endif
+
 /**
  * atomic_inc_not_zero_hint - increment if not null
  * @v: pointer of type atomic_t
@@ -558,36 +627,400 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-/**
- * atomic_fetch_or - perform *p |= mask and return old value of *p
- * @mask: mask to OR on the atomic_t
- * @p: pointer to atomic_t
- */
-#ifndef atomic_fetch_or
-static inline int atomic_fetch_or(int mask, atomic_t *p)
-{
-       int old, val = atomic_read(p);
+#ifdef CONFIG_GENERIC_ATOMIC64
+#include <asm-generic/atomic64.h>
+#endif
 
-       for (;;) {
-               old = atomic_cmpxchg(p, val, val | mask);
-               if (old == val)
-                       break;
-               val = old;
-       }
+#ifndef atomic64_read_acquire
+#define  atomic64_read_acquire(v)      smp_load_acquire(&(v)->counter)
+#endif
 
-       return old;
-}
+#ifndef atomic64_set_release
+#define  atomic64_set_release(v, i)    smp_store_release(&(v)->counter, (i))
 #endif
 
-#ifdef CONFIG_GENERIC_ATOMIC64
-#include <asm-generic/atomic64.h>
+/* atomic64_add_return_relaxed */
+#ifndef atomic64_add_return_relaxed
+#define  atomic64_add_return_relaxed   atomic64_add_return
+#define  atomic64_add_return_acquire   atomic64_add_return
+#define  atomic64_add_return_release   atomic64_add_return
+
+#else /* atomic64_add_return_relaxed */
+
+#ifndef atomic64_add_return_acquire
+#define  atomic64_add_return_acquire(...)                              \
+       __atomic_op_acquire(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return_release
+#define  atomic64_add_return_release(...)                              \
+       __atomic_op_release(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return
+#define  atomic64_add_return(...)                                      \
+       __atomic_op_fence(atomic64_add_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_add_return_relaxed */
+
+/* atomic64_inc_return_relaxed */
+#ifndef atomic64_inc_return_relaxed
+#define  atomic64_inc_return_relaxed   atomic64_inc_return
+#define  atomic64_inc_return_acquire   atomic64_inc_return
+#define  atomic64_inc_return_release   atomic64_inc_return
+
+#else /* atomic64_inc_return_relaxed */
+
+#ifndef atomic64_inc_return_acquire
+#define  atomic64_inc_return_acquire(...)                              \
+       __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return_release
+#define  atomic64_inc_return_release(...)                              \
+       __atomic_op_release(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return
+#define  atomic64_inc_return(...)                                      \
+       __atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_inc_return_relaxed */
+
+
+/* atomic64_sub_return_relaxed */
+#ifndef atomic64_sub_return_relaxed
+#define  atomic64_sub_return_relaxed   atomic64_sub_return
+#define  atomic64_sub_return_acquire   atomic64_sub_return
+#define  atomic64_sub_return_release   atomic64_sub_return
+
+#else /* atomic64_sub_return_relaxed */
+
+#ifndef atomic64_sub_return_acquire
+#define  atomic64_sub_return_acquire(...)                              \
+       __atomic_op_acquire(atomic64_sub_return, __VA_ARGS__)
 #endif
 
+#ifndef atomic64_sub_return_release
+#define  atomic64_sub_return_release(...)                              \
+       __atomic_op_release(atomic64_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_sub_return
+#define  atomic64_sub_return(...)                                      \
+       __atomic_op_fence(atomic64_sub_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_sub_return_relaxed */
+
+/* atomic64_dec_return_relaxed */
+#ifndef atomic64_dec_return_relaxed
+#define  atomic64_dec_return_relaxed   atomic64_dec_return
+#define  atomic64_dec_return_acquire   atomic64_dec_return
+#define  atomic64_dec_return_release   atomic64_dec_return
+
+#else /* atomic64_dec_return_relaxed */
+
+#ifndef atomic64_dec_return_acquire
+#define  atomic64_dec_return_acquire(...)                              \
+       __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return_release
+#define  atomic64_dec_return_release(...)                              \
+       __atomic_op_release(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return
+#define  atomic64_dec_return(...)                                      \
+       __atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_dec_return_relaxed */
+
+
+/* atomic64_fetch_add_relaxed */
+#ifndef atomic64_fetch_add_relaxed
+#define atomic64_fetch_add_relaxed     atomic64_fetch_add
+#define atomic64_fetch_add_acquire     atomic64_fetch_add
+#define atomic64_fetch_add_release     atomic64_fetch_add
+
+#else /* atomic64_fetch_add_relaxed */
+
+#ifndef atomic64_fetch_add_acquire
+#define atomic64_fetch_add_acquire(...)                                        \
+       __atomic_op_acquire(atomic64_fetch_add, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_add_release
+#define atomic64_fetch_add_release(...)                                        \
+       __atomic_op_release(atomic64_fetch_add, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_add
+#define atomic64_fetch_add(...)                                                \
+       __atomic_op_fence(atomic64_fetch_add, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_add_relaxed */
+
+/* atomic64_fetch_inc_relaxed */
+#ifndef atomic64_fetch_inc_relaxed
+
+#ifndef atomic64_fetch_inc
+#define atomic64_fetch_inc(v)          atomic64_fetch_add(1, (v))
+#define atomic64_fetch_inc_relaxed(v)  atomic64_fetch_add_relaxed(1, (v))
+#define atomic64_fetch_inc_acquire(v)  atomic64_fetch_add_acquire(1, (v))
+#define atomic64_fetch_inc_release(v)  atomic64_fetch_add_release(1, (v))
+#else /* atomic64_fetch_inc */
+#define atomic64_fetch_inc_relaxed     atomic64_fetch_inc
+#define atomic64_fetch_inc_acquire     atomic64_fetch_inc
+#define atomic64_fetch_inc_release     atomic64_fetch_inc
+#endif /* atomic64_fetch_inc */
+
+#else /* atomic64_fetch_inc_relaxed */
+
+#ifndef atomic64_fetch_inc_acquire
+#define atomic64_fetch_inc_acquire(...)                                        \
+       __atomic_op_acquire(atomic64_fetch_inc, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_inc_release
+#define atomic64_fetch_inc_release(...)                                        \
+       __atomic_op_release(atomic64_fetch_inc, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_inc
+#define atomic64_fetch_inc(...)                                                \
+       __atomic_op_fence(atomic64_fetch_inc, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_inc_relaxed */
+
+/* atomic64_fetch_sub_relaxed */
+#ifndef atomic64_fetch_sub_relaxed
+#define atomic64_fetch_sub_relaxed     atomic64_fetch_sub
+#define atomic64_fetch_sub_acquire     atomic64_fetch_sub
+#define atomic64_fetch_sub_release     atomic64_fetch_sub
+
+#else /* atomic64_fetch_sub_relaxed */
+
+#ifndef atomic64_fetch_sub_acquire
+#define atomic64_fetch_sub_acquire(...)                                        \
+       __atomic_op_acquire(atomic64_fetch_sub, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_sub_release
+#define atomic64_fetch_sub_release(...)                                        \
+       __atomic_op_release(atomic64_fetch_sub, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_sub
+#define atomic64_fetch_sub(...)                                                \
+       __atomic_op_fence(atomic64_fetch_sub, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_sub_relaxed */
+
+/* atomic64_fetch_dec_relaxed */
+#ifndef atomic64_fetch_dec_relaxed
+
+#ifndef atomic64_fetch_dec
+#define atomic64_fetch_dec(v)          atomic64_fetch_sub(1, (v))
+#define atomic64_fetch_dec_relaxed(v)  atomic64_fetch_sub_relaxed(1, (v))
+#define atomic64_fetch_dec_acquire(v)  atomic64_fetch_sub_acquire(1, (v))
+#define atomic64_fetch_dec_release(v)  atomic64_fetch_sub_release(1, (v))
+#else /* atomic64_fetch_dec */
+#define atomic64_fetch_dec_relaxed     atomic64_fetch_dec
+#define atomic64_fetch_dec_acquire     atomic64_fetch_dec
+#define atomic64_fetch_dec_release     atomic64_fetch_dec
+#endif /* atomic64_fetch_dec */
+
+#else /* atomic64_fetch_dec_relaxed */
+
+#ifndef atomic64_fetch_dec_acquire
+#define atomic64_fetch_dec_acquire(...)                                        \
+       __atomic_op_acquire(atomic64_fetch_dec, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_dec_release
+#define atomic64_fetch_dec_release(...)                                        \
+       __atomic_op_release(atomic64_fetch_dec, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_dec
+#define atomic64_fetch_dec(...)                                                \
+       __atomic_op_fence(atomic64_fetch_dec, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_dec_relaxed */
+
+/* atomic64_fetch_or_relaxed */
+#ifndef atomic64_fetch_or_relaxed
+#define atomic64_fetch_or_relaxed      atomic64_fetch_or
+#define atomic64_fetch_or_acquire      atomic64_fetch_or
+#define atomic64_fetch_or_release      atomic64_fetch_or
+
+#else /* atomic64_fetch_or_relaxed */
+
+#ifndef atomic64_fetch_or_acquire
+#define atomic64_fetch_or_acquire(...)                                 \
+       __atomic_op_acquire(atomic64_fetch_or, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_or_release
+#define atomic64_fetch_or_release(...)                                 \
+       __atomic_op_release(atomic64_fetch_or, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_or
+#define atomic64_fetch_or(...)                                         \
+       __atomic_op_fence(atomic64_fetch_or, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_or_relaxed */
+
+/* atomic64_fetch_and_relaxed */
+#ifndef atomic64_fetch_and_relaxed
+#define atomic64_fetch_and_relaxed     atomic64_fetch_and
+#define atomic64_fetch_and_acquire     atomic64_fetch_and
+#define atomic64_fetch_and_release     atomic64_fetch_and
+
+#else /* atomic64_fetch_and_relaxed */
+
+#ifndef atomic64_fetch_and_acquire
+#define atomic64_fetch_and_acquire(...)                                        \
+       __atomic_op_acquire(atomic64_fetch_and, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_and_release
+#define atomic64_fetch_and_release(...)                                        \
+       __atomic_op_release(atomic64_fetch_and, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_and
+#define atomic64_fetch_and(...)                                                \
+       __atomic_op_fence(atomic64_fetch_and, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_and_relaxed */
+
+#ifdef atomic64_andnot
+/* atomic64_fetch_andnot_relaxed */
+#ifndef atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_andnot_relaxed  atomic64_fetch_andnot
+#define atomic64_fetch_andnot_acquire  atomic64_fetch_andnot
+#define atomic64_fetch_andnot_release  atomic64_fetch_andnot
+
+#else /* atomic64_fetch_andnot_relaxed */
+
+#ifndef atomic64_fetch_andnot_acquire
+#define atomic64_fetch_andnot_acquire(...)                                     \
+       __atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_andnot_release
+#define atomic64_fetch_andnot_release(...)                                     \
+       __atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_andnot
+#define atomic64_fetch_andnot(...)                                             \
+       __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_andnot_relaxed */
+#endif /* atomic64_andnot */
+
+/* atomic64_fetch_xor_relaxed */
+#ifndef atomic64_fetch_xor_relaxed
+#define atomic64_fetch_xor_relaxed     atomic64_fetch_xor
+#define atomic64_fetch_xor_acquire     atomic64_fetch_xor
+#define atomic64_fetch_xor_release     atomic64_fetch_xor
+
+#else /* atomic64_fetch_xor_relaxed */
+
+#ifndef atomic64_fetch_xor_acquire
+#define atomic64_fetch_xor_acquire(...)                                        \
+       __atomic_op_acquire(atomic64_fetch_xor, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_xor_release
+#define atomic64_fetch_xor_release(...)                                        \
+       __atomic_op_release(atomic64_fetch_xor, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_fetch_xor
+#define atomic64_fetch_xor(...)                                                \
+       __atomic_op_fence(atomic64_fetch_xor, __VA_ARGS__)
+#endif
+#endif /* atomic64_fetch_xor_relaxed */
+
+
+/* atomic64_xchg_relaxed */
+#ifndef atomic64_xchg_relaxed
+#define  atomic64_xchg_relaxed         atomic64_xchg
+#define  atomic64_xchg_acquire         atomic64_xchg
+#define  atomic64_xchg_release         atomic64_xchg
+
+#else /* atomic64_xchg_relaxed */
+
+#ifndef atomic64_xchg_acquire
+#define  atomic64_xchg_acquire(...)                                    \
+       __atomic_op_acquire(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg_release
+#define  atomic64_xchg_release(...)                                    \
+       __atomic_op_release(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg
+#define  atomic64_xchg(...)                                            \
+       __atomic_op_fence(atomic64_xchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_xchg_relaxed */
+
+/* atomic64_cmpxchg_relaxed */
+#ifndef atomic64_cmpxchg_relaxed
+#define  atomic64_cmpxchg_relaxed      atomic64_cmpxchg
+#define  atomic64_cmpxchg_acquire      atomic64_cmpxchg
+#define  atomic64_cmpxchg_release      atomic64_cmpxchg
+
+#else /* atomic64_cmpxchg_relaxed */
+
+#ifndef atomic64_cmpxchg_acquire
+#define  atomic64_cmpxchg_acquire(...)                                 \
+       __atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg_release
+#define  atomic64_cmpxchg_release(...)                                 \
+       __atomic_op_release(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg
+#define  atomic64_cmpxchg(...)                                         \
+       __atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_cmpxchg_relaxed */
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
        atomic64_and(~i, v);
 }
+
+static inline long long atomic64_fetch_andnot(long long i, atomic64_t *v)
+{
+       return atomic64_fetch_and(~i, v);
+}
+
+static inline long long atomic64_fetch_andnot_relaxed(long long i, atomic64_t *v)
+{
+       return atomic64_fetch_and_relaxed(~i, v);
+}
+
+static inline long long atomic64_fetch_andnot_acquire(long long i, atomic64_t *v)
+{
+       return atomic64_fetch_and_acquire(~i, v);
+}
+
+static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v)
+{
+       return atomic64_fetch_and_release(~i, v);
+}
 #endif
 
 #include <asm-generic/atomic-long.h>
index 793c0829e3a3909dd532c972e513c7b8164a16ed..2e853b679a5da9c4061053b714801d7434728154 100644 (file)
@@ -304,23 +304,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
        __u.__val;                                      \
 })
 
-/**
- * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
- * @cond: boolean expression to wait for
- *
- * Equivalent to using smp_load_acquire() on the condition variable but employs
- * the control dependency of the wait to reduce the barrier on many platforms.
- *
- * The control dependency provides a LOAD->STORE order, the additional RMB
- * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
- * aka. ACQUIRE.
- */
-#define smp_cond_acquire(cond) do {            \
-       while (!(cond))                         \
-               cpu_relax();                    \
-       smp_rmb(); /* ctrl + rmb := acquire */  \
-} while (0)
-
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
@@ -545,10 +528,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * Similar to rcu_dereference(), but for situations where the pointed-to
  * object's lifetime is managed by something other than RCU.  That
  * "something other" might be reference counting or simple immortality.
+ *
+ * The seemingly unused void * variable is to validate @p is indeed a pointer
+ * type. All pointer types silently cast to void *.
  */
 #define lockless_dereference(p) \
 ({ \
        typeof(p) _________p1 = READ_ONCE(p); \
+       __maybe_unused const void * const _________p2 = _________p1; \
        smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
        (_________p1); \
 })
index 84f542df7ff5cca0932f02b6629f8229244c2d1a..1c7eec09e5eba7ae8c0cc8e82172791f992bb361 100644 (file)
@@ -136,14 +136,12 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
         * used as a pointer.  If the compiler generates a separate fetch
         * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in
         * between contaminating the pointer value, meaning that
-        * ACCESS_ONCE() is required when fetching it.
-        *
-        * Also, we need a data dependency barrier to be paired with
-        * smp_store_release() in __percpu_ref_switch_to_percpu().
-        *
-        * Use lockless deref which contains both.
+        * READ_ONCE() is required when fetching it.
         */
-       percpu_ptr = lockless_dereference(ref->percpu_count_ptr);
+       percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
+
+       /* paired with smp_store_release() in __percpu_ref_switch_to_percpu() */
+       smp_read_barrier_depends();
 
        /*
         * Theoretically, the following could test just ATOMIC; however,
index d37fbb34d06fed4494b3152300d71aba23663f63..dd1d142503404da2775808630bda2021a597229e 100644 (file)
@@ -23,10 +23,11 @@ struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
 #include <linux/rwsem-spinlock.h> /* use a generic implementation */
+#define __RWSEM_INIT_COUNT(name)       .count = RWSEM_UNLOCKED_VALUE
 #else
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
-       long count;
+       atomic_long_t count;
        struct list_head wait_list;
        raw_spinlock_t wait_lock;
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
@@ -54,9 +55,10 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
 /* In all implementations count != 0 means locked */
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
-       return sem->count != 0;
+       return atomic_long_read(&sem->count) != 0;
 }
 
+#define __RWSEM_INIT_COUNT(name)       .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
 #endif
 
 /* Common initializer macros and functions */
@@ -74,7 +76,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 #endif
 
 #define __RWSEM_INITIALIZER(name)                              \
-       { .count = RWSEM_UNLOCKED_VALUE,                        \
+       { __RWSEM_INIT_COUNT(name),                             \
          .wait_list = LIST_HEAD_INIT((name).wait_list),        \
          .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
          __RWSEM_OPT_INIT(name)                                \
index 8b3ac0d718ebd7fd80987f6ede10725358102e1d..0d9848de677d96eb186f71c578dd6031f6c47611 100644 (file)
@@ -6,6 +6,7 @@
 #endif
 
 #include <asm/processor.h>     /* for cpu_relax() */
+#include <asm/barrier.h>
 
 /*
  * include/linux/spinlock_up.h - UP-debug version of spinlocks.
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define arch_spin_is_locked(x)         ((x)->slock == 0)
 
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+       smp_cond_load_acquire(&lock->slock, VAL);
+}
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
        lock->slock = 0;
@@ -67,6 +73,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)      ((void)(lock), 0)
+#define arch_spin_unlock_wait(lock)    do { barrier(); (void)(lock); } while (0)
 /* for sched/core.c and kernel_lock.c: */
 # define arch_spin_lock(lock)          do { barrier(); (void)(lock); } while (0)
 # define arch_spin_lock_flags(lock, flags)     do { barrier(); (void)(lock); } while (0)
@@ -79,7 +86,4 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 #define arch_read_can_lock(lock)       (((void)(lock), 1))
 #define arch_write_can_lock(lock)      (((void)(lock), 1))
 
-#define arch_spin_unlock_wait(lock) \
-               do { cpu_relax(); } while (arch_spin_is_locked(lock))
-
 #endif /* __LINUX_SPINLOCK_UP_H */
index b3757ea0694be8a993d7e60d6c2fe90c9dbe90dc..ae72b3cddc8d866450314466df89d466ee7f2dd2 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -259,16 +259,6 @@ static void sem_rcu_free(struct rcu_head *head)
        ipc_rcu_free(head);
 }
 
-/*
- * spin_unlock_wait() and !spin_is_locked() are not memory barriers, they
- * are only control barriers.
- * The code must pair with spin_unlock(&sem->lock) or
- * spin_unlock(&sem_perm.lock), thus just the control barrier is insufficient.
- *
- * smp_rmb() is sufficient, as writes cannot pass the control barrier.
- */
-#define ipc_smp_acquire__after_spin_is_unlocked()      smp_rmb()
-
 /*
  * Wait until all currently ongoing simple ops have completed.
  * Caller must own sem_perm.lock.
@@ -292,7 +282,6 @@ static void sem_wait_array(struct sem_array *sma)
                sem = sma->sem_base + i;
                spin_unlock_wait(&sem->lock);
        }
-       ipc_smp_acquire__after_spin_is_unlocked();
 }
 
 /*
@@ -350,7 +339,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
                         *      complex_count++;
                         *      spin_unlock(sem_perm.lock);
                         */
-                       ipc_smp_acquire__after_spin_is_unlocked();
+                       smp_acquire__after_ctrl_dep();
 
                        /*
                         * Now repeat the test of complex_count:
index 9e6e1356e6bbc2dbb2aeb9ef5b2b02d05961ad3c..0b40791b9e70259b50458b4c67e1d3325a7a4b6b 100644 (file)
@@ -700,10 +700,14 @@ void do_exit(long code)
 
        exit_signals(tsk);  /* sets PF_EXITING */
        /*
-        * tsk->flags are checked in the futex code to protect against
-        * an exiting task cleaning up the robust pi futexes.
+        * Ensure that all new tsk->pi_lock acquisitions must observe
+        * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
         */
        smp_mb();
+       /*
+        * Ensure that we must observe the pi_state in exit_mm() ->
+        * mm_release() -> exit_pi_state_list().
+        */
        raw_spin_unlock_wait(&tsk->pi_lock);
 
        if (unlikely(in_atomic())) {
index 4b353e0be1215a081e2cf367d3a3074700e4a685..0dbea887d6258522e7c2c10692f24ced83b51b8c 100644 (file)
@@ -452,7 +452,7 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
        return notifier_from_errno(ret);
 }
 
-struct notifier_block jump_label_module_nb = {
+static struct notifier_block jump_label_module_nb = {
        .notifier_call = jump_label_module_notify,
        .priority = 1, /* higher than tracepoints */
 };
index 81f1a7107c0eb7b947c62658d886f556232d513a..589d763a49b3952322f08c00286675f314a3f76c 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/gfp.h>
 #include <linux/kmemcheck.h>
 #include <linux/random.h>
+#include <linux/jhash.h>
 
 #include <asm/sections.h>
 
@@ -309,10 +310,14 @@ static struct hlist_head chainhash_table[CHAINHASH_SIZE];
  * It's a 64-bit hash, because it's important for the keys to be
  * unique.
  */
-#define iterate_chain_key(key1, key2) \
-       (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \
-       ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \
-       (key2))
+static inline u64 iterate_chain_key(u64 key, u32 idx)
+{
+       u32 k0 = key, k1 = key >> 32;
+
+       __jhash_mix(idx, k0, k1); /* Macro that modifies arguments! */
+
+       return k0 | (u64)k1 << 32;
+}
 
 void lockdep_off(void)
 {
index d06ae3bb46c5f9f928cf7123464f49e284ab3ecf..57a871ae3c81a98e324b9ba907ef65f5c4f1cb8c 100644 (file)
@@ -29,12 +29,12 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
 
 static inline void mutex_set_owner(struct mutex *lock)
 {
-       lock->owner = current;
+       WRITE_ONCE(lock->owner, current);
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
 {
-       lock->owner = NULL;
+       WRITE_ONCE(lock->owner, NULL);
 }
 
 #define spin_lock_mutex(lock, flags)                   \
index a68bae5e852a08f1c0d75587b57c5e31835f937e..6cd6b8e9efd7f73175be41f46d062650da821af8 100644 (file)
                __list_del((waiter)->list.prev, (waiter)->list.next)
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+/*
+ * The mutex owner can get read and written to locklessly.
+ * We should use WRITE_ONCE when writing the owner value to
+ * avoid store tearing, otherwise, a thread could potentially
+ * read a partially written and incomplete owner value.
+ */
 static inline void mutex_set_owner(struct mutex *lock)
 {
-       lock->owner = current;
+       WRITE_ONCE(lock->owner, current);
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
 {
-       lock->owner = NULL;
+       WRITE_ONCE(lock->owner, NULL);
 }
 #else
 static inline void mutex_set_owner(struct mutex *lock)
index fec08233866875ca1962d836b4e5d6f88d475092..19248ddf37cea70b7a0e5cf0f2481fa5061bf8d5 100644 (file)
@@ -93,7 +93,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
         * that accesses can't leak upwards out of our subsequent critical
         * section in the case that the lock is currently held for write.
         */
-       cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+       cnts = atomic_fetch_add_acquire(_QR_BIAS, &lock->cnts);
        rspin_until_writer_unlock(lock, cnts);
 
        /*
index 5fc8c311b8fe59d46decc2c5a049ce6e860a07b8..b2caec7315af5b622a00f8babfd55bb3f27fb315 100644 (file)
@@ -90,7 +90,7 @@ static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
  * therefore increment the cpu number by one.
  */
 
-static inline u32 encode_tail(int cpu, int idx)
+static inline __pure u32 encode_tail(int cpu, int idx)
 {
        u32 tail;
 
@@ -103,7 +103,7 @@ static inline u32 encode_tail(int cpu, int idx)
        return tail;
 }
 
-static inline struct mcs_spinlock *decode_tail(u32 tail)
+static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
 {
        int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
        int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
@@ -267,6 +267,63 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 #define queued_spin_lock_slowpath      native_queued_spin_lock_slowpath
 #endif
 
+/*
+ * Various notes on spin_is_locked() and spin_unlock_wait(), which are
+ * 'interesting' functions:
+ *
+ * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
+ * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
+ * PPC). Also qspinlock has a similar issue per construction, the setting of
+ * the locked byte can be unordered acquiring the lock proper.
+ *
+ * This gets to be 'interesting' in the following cases, where the /should/s
+ * end up false because of this issue.
+ *
+ *
+ * CASE 1:
+ *
+ * So the spin_is_locked() correctness issue comes from something like:
+ *
+ *   CPU0                              CPU1
+ *
+ *   global_lock();                    local_lock(i)
+ *     spin_lock(&G)                     spin_lock(&L[i])
+ *     for (i)                           if (!spin_is_locked(&G)) {
+ *       spin_unlock_wait(&L[i]);          smp_acquire__after_ctrl_dep();
+ *                                         return;
+ *                                       }
+ *                                       // deal with fail
+ *
+ * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
+ * that there is exclusion between the two critical sections.
+ *
+ * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
+ * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
+ * /should/ be constrained by the ACQUIRE from spin_lock(&G).
+ *
+ * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
+ *
+ *
+ * CASE 2:
+ *
+ * For spin_unlock_wait() there is a second correctness issue, namely:
+ *
+ *   CPU0                              CPU1
+ *
+ *   flag = set;
+ *   smp_mb();                         spin_lock(&l)
+ *   spin_unlock_wait(&l);             if (!flag)
+ *                                       // add to lockless list
+ *                                     spin_unlock(&l);
+ *   // iterate lockless list
+ *
+ * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
+ * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
+ * semantics etc..)
+ *
+ * Where flag /should/ be ordered against the locked store of l.
+ */
+
 /*
  * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
  * issuing an _unordered_ store to set _Q_LOCKED_VAL.
@@ -322,7 +379,7 @@ void queued_spin_unlock_wait(struct qspinlock *lock)
                cpu_relax();
 
 done:
-       smp_rmb(); /* CTRL + RMB -> ACQUIRE */
+       smp_acquire__after_ctrl_dep();
 }
 EXPORT_SYMBOL(queued_spin_unlock_wait);
 #endif
@@ -418,7 +475,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
         * sequentiality; this is because not all clear_pending_set_locked()
         * implementations imply full barriers.
         */
-       smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
+       smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
 
        /*
         * take ownership and clear the pending bit.
@@ -455,6 +512,8 @@ queue:
         * pending stuff.
         *
         * p,*,* -> n,*,*
+        *
+        * RELEASE, such that the stores to @node must be complete.
         */
        old = xchg_tail(lock, tail);
        next = NULL;
@@ -465,6 +524,15 @@ queue:
         */
        if (old & _Q_TAIL_MASK) {
                prev = decode_tail(old);
+               /*
+                * The above xchg_tail() is also a load of @lock which generates,
+                * through decode_tail(), a pointer.
+                *
+                * The address dependency matches the RELEASE of xchg_tail()
+                * such that the access to @prev must happen after.
+                */
+               smp_read_barrier_depends();
+
                WRITE_ONCE(prev->next, node);
 
                pv_wait_node(node, prev);
@@ -494,7 +562,7 @@ queue:
         *
         * The PV pv_wait_head_or_lock function, if active, will acquire
         * the lock and return a non-zero value. So we have to skip the
-        * smp_cond_acquire() call. As the next PV queue head hasn't been
+        * smp_cond_load_acquire() call. As the next PV queue head hasn't been
         * designated yet, there is no way for the locked value to become
         * _Q_SLOW_VAL. So both the set_locked() and the
         * atomic_cmpxchg_relaxed() calls will be safe.
@@ -505,7 +573,7 @@ queue:
        if ((val = pv_wait_head_or_lock(lock, node)))
                goto locked;
 
-       smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
+       val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
 
 locked:
        /*
@@ -525,9 +593,9 @@ locked:
                        break;
                }
                /*
-                * The smp_cond_acquire() call above has provided the necessary
-                * acquire semantics required for locking. At most two
-                * iterations of this loop may be ran.
+                * The smp_cond_load_acquire() call above has provided the
+                * necessary acquire semantics required for locking. At most
+                * two iterations of this loop may be ran.
                 */
                old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
                if (old == val)
@@ -551,7 +619,7 @@ release:
        /*
         * release the node
         */
-       this_cpu_dec(mcs_nodes[0].count);
+       __this_cpu_dec(mcs_nodes[0].count);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
index 21ede57f68b320594f874e4ff0a1b60974fc67f0..37649e69056cf974e27d0137260f8ff46ad688df 100644 (file)
@@ -112,12 +112,12 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
 {
-       atomic_set_mask(_Q_PENDING_VAL, &lock->val);
+       atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
 static __always_inline void clear_pending(struct qspinlock *lock)
 {
-       atomic_clear_mask(_Q_PENDING_VAL, &lock->val);
+       atomic_andnot(_Q_PENDING_VAL, &lock->val);
 }
 
 static __always_inline int trylock_clear_pending(struct qspinlock *lock)
index 3e746607abe5230bb9c650957582669c085d9a90..1ec0f48962b33dce424d4d3f491c6a1fd76ba13d 100644 (file)
@@ -1478,7 +1478,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  */
 int __sched rt_mutex_trylock(struct rt_mutex *lock)
 {
-       if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
+       if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
                return 0;
 
        return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
index 09e30c6225e5373c523a034f42924b9973fcb7a4..447e08de1fabf11b3d2050c5735a0ea331362fc4 100644 (file)
@@ -80,7 +80,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
        debug_check_no_locks_freed((void *)sem, sizeof(*sem));
        lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
-       sem->count = RWSEM_UNLOCKED_VALUE;
+       atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
        raw_spin_lock_init(&sem->wait_lock);
        INIT_LIST_HEAD(&sem->wait_list);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
@@ -114,12 +114,16 @@ enum rwsem_wake_type {
  *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
  *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
  * - there must be someone on the queue
- * - the spinlock must be held by the caller
+ * - the wait_lock must be held by the caller
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
+ *   to actually wakeup the blocked task(s) and drop the reference count,
+ *   preferably when the wait_lock is released
  * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if downgrading is false
+ * - writers are only marked woken if downgrading is false
  */
 static struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
+__rwsem_mark_wake(struct rw_semaphore *sem,
+                 enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
 {
        struct rwsem_waiter *waiter;
        struct task_struct *tsk;
@@ -128,13 +132,16 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
 
        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
        if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-               if (wake_type == RWSEM_WAKE_ANY)
-                       /* Wake writer at the front of the queue, but do not
-                        * grant it the lock yet as we want other writers
-                        * to be able to steal it.  Readers, on the other hand,
-                        * will block as they will notice the queued writer.
+               if (wake_type == RWSEM_WAKE_ANY) {
+                       /*
+                        * Mark writer at the front of the queue for wakeup.
+                        * Until the task is actually later awoken later by
+                        * the caller, other writers are able to steal it.
+                        * Readers, on the other hand, will block as they
+                        * will notice the queued writer.
                         */
-                       wake_up_process(waiter->task);
+                       wake_q_add(wake_q, waiter->task);
+               }
                goto out;
        }
 
@@ -146,15 +153,27 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
        if (wake_type != RWSEM_WAKE_READ_OWNED) {
                adjustment = RWSEM_ACTIVE_READ_BIAS;
  try_reader_grant:
-               oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+               oldcount = atomic_long_fetch_add(adjustment, &sem->count);
+
                if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
-                       /* A writer stole the lock. Undo our reader grant. */
-                       if (rwsem_atomic_update(-adjustment, sem) &
-                                               RWSEM_ACTIVE_MASK)
+                       /*
+                        * If the count is still less than RWSEM_WAITING_BIAS
+                        * after removing the adjustment, it is assumed that
+                        * a writer has stolen the lock. We have to undo our
+                        * reader grant.
+                        */
+                       if (atomic_long_add_return(-adjustment, &sem->count) <
+                           RWSEM_WAITING_BIAS)
                                goto out;
                        /* Last active locker left. Retry waking readers. */
                        goto try_reader_grant;
                }
+               /*
+                * It is not really necessary to set it to reader-owned here,
+                * but it gives the spinners an early indication that the
+                * readers now have the lock.
+                */
+               rwsem_set_reader_owned(sem);
        }
 
        /* Grant an infinite number of read locks to the readers at the front
@@ -179,7 +198,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
                adjustment -= RWSEM_WAITING_BIAS;
 
        if (adjustment)
-               rwsem_atomic_add(adjustment, sem);
+               atomic_long_add(adjustment, &sem->count);
 
        next = sem->wait_list.next;
        loop = woken;
@@ -187,17 +206,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
                waiter = list_entry(next, struct rwsem_waiter, list);
                next = waiter->list.next;
                tsk = waiter->task;
+
+               wake_q_add(wake_q, tsk);
                /*
-                * Make sure we do not wakeup the next reader before
-                * setting the nil condition to grant the next reader;
-                * otherwise we could miss the wakeup on the other
-                * side and end up sleeping again. See the pairing
-                * in rwsem_down_read_failed().
+                * Ensure that the last operation is setting the reader
+                * waiter to nil such that rwsem_down_read_failed() cannot
+                * race with do_exit() by always holding a reference count
+                * to the task to wakeup.
                 */
-               smp_mb();
-               waiter->task = NULL;
-               wake_up_process(tsk);
-               put_task_struct(tsk);
+               smp_store_release(&waiter->task, NULL);
        } while (--loop);
 
        sem->wait_list.next = next;
@@ -216,11 +233,11 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
        long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
        struct rwsem_waiter waiter;
        struct task_struct *tsk = current;
+       WAKE_Q(wake_q);
 
        /* set up my own style of waitqueue */
        waiter.task = tsk;
        waiter.type = RWSEM_WAITING_FOR_READ;
-       get_task_struct(tsk);
 
        raw_spin_lock_irq(&sem->wait_lock);
        if (list_empty(&sem->wait_list))
@@ -228,7 +245,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
        list_add_tail(&waiter.list, &sem->wait_list);
 
        /* we're now waiting on the lock, but no longer actively locking */
-       count = rwsem_atomic_update(adjustment, sem);
+       count = atomic_long_add_return(adjustment, &sem->count);
 
        /* If there are no active locks, wake the front queued process(es).
         *
@@ -238,9 +255,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
        if (count == RWSEM_WAITING_BIAS ||
            (count > RWSEM_WAITING_BIAS &&
             adjustment != -RWSEM_ACTIVE_READ_BIAS))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
        raw_spin_unlock_irq(&sem->wait_lock);
+       wake_up_q(&wake_q);
 
        /* wait to be given the lock */
        while (true) {
@@ -255,17 +273,29 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 }
 EXPORT_SYMBOL(rwsem_down_read_failed);
 
+/*
+ * This function must be called with the sem->wait_lock held to prevent
+ * race conditions between checking the rwsem wait list and setting the
+ * sem->count accordingly.
+ */
 static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
 {
        /*
-        * Try acquiring the write lock. Check count first in order
-        * to reduce unnecessary expensive cmpxchg() operations.
+        * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS.
         */
-       if (count == RWSEM_WAITING_BIAS &&
-           cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
-                   RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
-               if (!list_is_singular(&sem->wait_list))
-                       rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+       if (count != RWSEM_WAITING_BIAS)
+               return false;
+
+       /*
+        * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there
+        * are other tasks on the wait list, we need to add on WAITING_BIAS.
+        */
+       count = list_is_singular(&sem->wait_list) ?
+                       RWSEM_ACTIVE_WRITE_BIAS :
+                       RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
+
+       if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
+                                                       == RWSEM_WAITING_BIAS) {
                rwsem_set_owner(sem);
                return true;
        }
@@ -279,13 +309,13 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
  */
 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 {
-       long old, count = READ_ONCE(sem->count);
+       long old, count = atomic_long_read(&sem->count);
 
        while (true) {
                if (!(count == 0 || count == RWSEM_WAITING_BIAS))
                        return false;
 
-               old = cmpxchg_acquire(&sem->count, count,
+               old = atomic_long_cmpxchg_acquire(&sem->count, count,
                                      count + RWSEM_ACTIVE_WRITE_BIAS);
                if (old == count) {
                        rwsem_set_owner(sem);
@@ -306,16 +336,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 
        rcu_read_lock();
        owner = READ_ONCE(sem->owner);
-       if (!owner) {
-               long count = READ_ONCE(sem->count);
+       if (!rwsem_owner_is_writer(owner)) {
                /*
-                * If sem->owner is not set, yet we have just recently entered the
-                * slowpath with the lock being active, then there is a possibility
-                * reader(s) may have the lock. To be safe, bail spinning in these
-                * situations.
+                * Don't spin if the rwsem is readers owned.
                 */
-               if (count & RWSEM_ACTIVE_MASK)
-                       ret = false;
+               ret = !rwsem_owner_is_reader(owner);
                goto done;
        }
 
@@ -325,10 +350,15 @@ done:
        return ret;
 }
 
-static noinline
-bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
+/*
+ * Return true only if we can still spin on the owner field of the rwsem.
+ */
+static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
 {
-       long count;
+       struct task_struct *owner = READ_ONCE(sem->owner);
+
+       if (!rwsem_owner_is_writer(owner))
+               goto out;
 
        rcu_read_lock();
        while (sem->owner == owner) {
@@ -349,22 +379,16 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
                cpu_relax_lowlatency();
        }
        rcu_read_unlock();
-
-       if (READ_ONCE(sem->owner))
-               return true; /* new owner, continue spinning */
-
+out:
        /*
-        * When the owner is not set, the lock could be free or
-        * held by readers. Check the counter to verify the
-        * state.
+        * If there is a new owner or the owner is not set, we continue
+        * spinning.
         */
-       count = READ_ONCE(sem->count);
-       return (count == 0 || count == RWSEM_WAITING_BIAS);
+       return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
 }
 
 static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 {
-       struct task_struct *owner;
        bool taken = false;
 
        preempt_disable();
@@ -376,12 +400,17 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
        if (!osq_lock(&sem->osq))
                goto done;
 
-       while (true) {
-               owner = READ_ONCE(sem->owner);
-               if (owner && !rwsem_spin_on_owner(sem, owner))
-                       break;
-
-               /* wait_lock will be acquired if write_lock is obtained */
+       /*
+        * Optimistically spin on the owner field and attempt to acquire the
+        * lock whenever the owner changes. Spinning will be stopped when:
+        *  1) the owning writer isn't running; or
+        *  2) readers own the lock as we can't determine if they are
+        *     actively running or not.
+        */
+       while (rwsem_spin_on_owner(sem)) {
+               /*
+                * Try to acquire the lock
+                */
                if (rwsem_try_write_lock_unqueued(sem)) {
                        taken = true;
                        break;
@@ -393,7 +422,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
                 * we're an RT task that will live-lock because we won't let
                 * the owner complete.
                 */
-               if (!owner && (need_resched() || rt_task(current)))
+               if (!sem->owner && (need_resched() || rt_task(current)))
                        break;
 
                /*
@@ -440,9 +469,10 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
        bool waiting = true; /* any queued threads before us */
        struct rwsem_waiter waiter;
        struct rw_semaphore *ret = sem;
+       WAKE_Q(wake_q);
 
        /* undo write bias from down_write operation, stop active locking */
-       count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+       count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
 
        /* do optimistic spinning and steal lock if possible */
        if (rwsem_optimistic_spin(sem))
@@ -465,18 +495,29 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 
        /* we're now waiting on the lock, but no longer actively locking */
        if (waiting) {
-               count = READ_ONCE(sem->count);
+               count = atomic_long_read(&sem->count);
 
                /*
                 * If there were already threads queued before us and there are
                 * no active writers, the lock must be read owned; so we try to
                 * wake any read locks that were queued ahead of us.
                 */
-               if (count > RWSEM_WAITING_BIAS)
-                       sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+               if (count > RWSEM_WAITING_BIAS) {
+                       WAKE_Q(wake_q);
+
+                       sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+                       /*
+                        * The wakeup is normally called _after_ the wait_lock
+                        * is released, but given that we are proactively waking
+                        * readers we can deal with the wake_q overhead as it is
+                        * similar to releasing and taking the wait_lock again
+                        * for attempting rwsem_try_write_lock().
+                        */
+                       wake_up_q(&wake_q);
+               }
 
        } else
-               count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+               count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
 
        /* wait until we successfully acquire the lock */
        set_current_state(state);
@@ -492,7 +533,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 
                        schedule();
                        set_current_state(state);
-               } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
+               } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
 
                raw_spin_lock_irq(&sem->wait_lock);
        }
@@ -507,10 +548,11 @@ out_nolock:
        raw_spin_lock_irq(&sem->wait_lock);
        list_del(&waiter.list);
        if (list_empty(&sem->wait_list))
-               rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
+               atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
        else
-               __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
        raw_spin_unlock_irq(&sem->wait_lock);
+       wake_up_q(&wake_q);
 
        return ERR_PTR(-EINTR);
 }
@@ -537,6 +579,7 @@ __visible
 struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
        unsigned long flags;
+       WAKE_Q(wake_q);
 
        /*
         * If a spinner is present, it is not necessary to do the wakeup.
@@ -573,9 +616,10 @@ locked:
 
        /* do nothing if list empty */
        if (!list_empty(&sem->wait_list))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+       wake_up_q(&wake_q);
 
        return sem;
 }
@@ -590,14 +634,16 @@ __visible
 struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
        unsigned long flags;
+       WAKE_Q(wake_q);
 
        raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
        /* do nothing if list empty */
        if (!list_empty(&sem->wait_list))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 
        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+       wake_up_q(&wake_q);
 
        return sem;
 }
index 2e853ad93a3a0f27ac3ce676ebe637ad8e03199e..45ba475d4be344b76b1b35e6a174d8f8085788ce 100644 (file)
@@ -22,6 +22,7 @@ void __sched down_read(struct rw_semaphore *sem)
        rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
 
        LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
+       rwsem_set_reader_owned(sem);
 }
 
 EXPORT_SYMBOL(down_read);
@@ -33,8 +34,10 @@ int down_read_trylock(struct rw_semaphore *sem)
 {
        int ret = __down_read_trylock(sem);
 
-       if (ret == 1)
+       if (ret == 1) {
                rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
+               rwsem_set_reader_owned(sem);
+       }
        return ret;
 }
 
@@ -124,7 +127,7 @@ void downgrade_write(struct rw_semaphore *sem)
         * lockdep: a downgraded write will live on as a write
         * dependency.
         */
-       rwsem_clear_owner(sem);
+       rwsem_set_reader_owned(sem);
        __downgrade_write(sem);
 }
 
@@ -138,6 +141,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
        rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
 
        LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
+       rwsem_set_reader_owned(sem);
 }
 
 EXPORT_SYMBOL(down_read_nested);
index 870ed9a5b4260c3c470924ad61e86ed91cefacaa..a699f4048ba11c76017a32ed0b5bc86b0ce45843 100644 (file)
@@ -1,14 +1,58 @@
+/*
+ * The owner field of the rw_semaphore structure will be set to
+ * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
+ * the owner field when it unlocks. A reader, on the other hand, will
+ * not touch the owner field when it unlocks.
+ *
+ * In essence, the owner field now has the following 3 states:
+ *  1) 0
+ *     - lock is free or the owner hasn't set the field yet
+ *  2) RWSEM_READER_OWNED
+ *     - lock is currently or previously owned by readers (lock is free
+ *       or not set by owner yet)
+ *  3) Other non-zero value
+ *     - a writer owns the lock
+ */
+#define RWSEM_READER_OWNED     ((struct task_struct *)1UL)
+
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+/*
+ * All writes to owner are protected by WRITE_ONCE() to make sure that
+ * store tearing can't happen as optimistic spinners may read and use
+ * the owner value concurrently without lock. Read from owner, however,
+ * may not need READ_ONCE() as long as the pointer value is only used
+ * for comparison and isn't being dereferenced.
+ */
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
 {
-       sem->owner = current;
+       WRITE_ONCE(sem->owner, current);
 }
 
 static inline void rwsem_clear_owner(struct rw_semaphore *sem)
 {
-       sem->owner = NULL;
+       WRITE_ONCE(sem->owner, NULL);
+}
+
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
+{
+       /*
+        * We check the owner value first to make sure that we will only
+        * do a write to the rwsem cacheline when it is really necessary
+        * to minimize cacheline contention.
+        */
+       if (sem->owner != RWSEM_READER_OWNED)
+               WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
+}
+
+static inline bool rwsem_owner_is_writer(struct task_struct *owner)
+{
+       return owner && owner != RWSEM_READER_OWNED;
 }
 
+static inline bool rwsem_owner_is_reader(struct task_struct *owner)
+{
+       return owner == RWSEM_READER_OWNED;
+}
 #else
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
 {
@@ -17,4 +61,8 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem)
 static inline void rwsem_clear_owner(struct rw_semaphore *sem)
 {
 }
+
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
+{
+}
 #endif
index d400434af6b2f4c6683a3e54c772f6fa5d526f16..6d86ab6ec2c9792a206f8d7236e48b40f7e258b3 100644 (file)
@@ -253,7 +253,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
        if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
            (rnp == rnp_root ||
             ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
-           !mutex_is_locked(&rsp->exp_mutex) &&
            mutex_trylock(&rsp->exp_mutex))
                goto fastpath;
 
index 97ee9ac7e97c7743249295fa0c4d2da32740bbf7..af0ef74df23c657563ba567fff72c23936228880 100644 (file)
@@ -1937,7 +1937,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * chain to provide order. Instead we do:
  *
  *   1) smp_store_release(X->on_cpu, 0)
- *   2) smp_cond_acquire(!X->on_cpu)
+ *   2) smp_cond_load_acquire(!X->on_cpu)
  *
  * Example:
  *
@@ -1948,7 +1948,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  *   sched-out X
  *   smp_store_release(X->on_cpu, 0);
  *
- *                    smp_cond_acquire(!X->on_cpu);
+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
  *                    X->state = WAKING
  *                    set_task_cpu(X,2)
  *
@@ -1974,7 +1974,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * This means that any means of doing remote wakeups must order the CPU doing
  * the wakeup against the CPU the task is going to end up running on. This,
  * however, is already required for the regular Program-Order guarantee above,
- * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
  *
  */
 
@@ -2047,7 +2047,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         * This ensures that tasks getting woken will be fully ordered against
         * their previous state and preserve Program Order.
         */
-       smp_cond_acquire(!p->on_cpu);
+       smp_cond_load_acquire(&p->on_cpu, !VAL);
 
        p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
index 898c0d2f18feb78e73746f0f3546b6f4de375dca..81283592942bf8f9954b65e5364fbcf03646da59 100644 (file)
@@ -1113,7 +1113,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
         * In particular, the load of prev->state in finish_task_switch() must
         * happen before this.
         *
-        * Pairs with the smp_cond_acquire() in try_to_wake_up().
+        * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
         */
        smp_store_release(&prev->on_cpu, 0);
 #endif
index 74165443c240147cd701489298d2b552c8f46adf..36552beed39713526aa384a9cf9f1878630834fb 100644 (file)
@@ -107,7 +107,7 @@ void __init call_function_init(void)
  */
 static __always_inline void csd_lock_wait(struct call_single_data *csd)
 {
-       smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
+       smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
 }
 
 static __always_inline void csd_lock(struct call_single_data *csd)
index 53fa971d000d0b7fff1b63be23007057468bf120..6ab4842b00e8bb71db357e5933fc6c3b72669832 100644 (file)
@@ -108,7 +108,6 @@ void task_work_run(void)
                 * fail, but it can play with *work and other entries.
                 */
                raw_spin_unlock_wait(&task->pi_lock);
-               smp_mb();
 
                do {
                        next = work->next;
index 2886ebac656772c8f77fe26f01d95175e0c18142..53c2d5edc826d4dd322e1623c3aa04297b762a2b 100644 (file)
@@ -96,17 +96,41 @@ long long atomic64_##op##_return(long long a, atomic64_t *v)                \
 }                                                                      \
 EXPORT_SYMBOL(atomic64_##op##_return);
 
+#define ATOMIC64_FETCH_OP(op, c_op)                                    \
+long long atomic64_fetch_##op(long long a, atomic64_t *v)              \
+{                                                                      \
+       unsigned long flags;                                            \
+       raw_spinlock_t *lock = lock_addr(v);                            \
+       long long val;                                                  \
+                                                                       \
+       raw_spin_lock_irqsave(lock, flags);                             \
+       val = v->counter;                                               \
+       v->counter c_op a;                                              \
+       raw_spin_unlock_irqrestore(lock, flags);                        \
+       return val;                                                     \
+}                                                                      \
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
 #define ATOMIC64_OPS(op, c_op)                                         \
        ATOMIC64_OP(op, c_op)                                           \
-       ATOMIC64_OP_RETURN(op, c_op)
+       ATOMIC64_OP_RETURN(op, c_op)                                    \
+       ATOMIC64_FETCH_OP(op, c_op)
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op)                                         \
+       ATOMIC64_OP(op, c_op)                                           \
+       ATOMIC64_OP_RETURN(op, c_op)                                    \
+       ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
index 1234818143204a754ec6df5ae8073107c5171dc5..dbb369145dda4665773f7f964b95a021b6d9a06d 100644 (file)
@@ -53,11 +53,25 @@ do {                                                                \
        BUG_ON(atomic##bit##_read(&v) != r);                    \
 } while (0)
 
+#define TEST_FETCH(bit, op, c_op, val)                         \
+do {                                                           \
+       atomic##bit##_set(&v, v0);                              \
+       r = v0;                                                 \
+       r c_op val;                                             \
+       BUG_ON(atomic##bit##_##op(val, &v) != v0);              \
+       BUG_ON(atomic##bit##_read(&v) != r);                    \
+} while (0)
+
 #define RETURN_FAMILY_TEST(bit, op, c_op, val)                 \
 do {                                                           \
        FAMILY_TEST(TEST_RETURN, bit, op, c_op, val);           \
 } while (0)
 
+#define FETCH_FAMILY_TEST(bit, op, c_op, val)                  \
+do {                                                           \
+       FAMILY_TEST(TEST_FETCH, bit, op, c_op, val);            \
+} while (0)
+
 #define TEST_ARGS(bit, op, init, ret, expect, args...)         \
 do {                                                           \
        atomic##bit##_set(&v, init);                            \
@@ -114,6 +128,16 @@ static __init void test_atomic(void)
        RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
        RETURN_FAMILY_TEST(, sub_return, -=, -one);
 
+       FETCH_FAMILY_TEST(, fetch_add, +=, onestwos);
+       FETCH_FAMILY_TEST(, fetch_add, +=, -one);
+       FETCH_FAMILY_TEST(, fetch_sub, -=, onestwos);
+       FETCH_FAMILY_TEST(, fetch_sub, -=, -one);
+
+       FETCH_FAMILY_TEST(, fetch_or,  |=, v1);
+       FETCH_FAMILY_TEST(, fetch_and, &=, v1);
+       FETCH_FAMILY_TEST(, fetch_andnot, &= ~, v1);
+       FETCH_FAMILY_TEST(, fetch_xor, ^=, v1);
+
        INC_RETURN_FAMILY_TEST(, v0);
        DEC_RETURN_FAMILY_TEST(, v0);
 
@@ -154,6 +178,16 @@ static __init void test_atomic64(void)
        RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
        RETURN_FAMILY_TEST(64, sub_return, -=, -one);
 
+       FETCH_FAMILY_TEST(64, fetch_add, +=, onestwos);
+       FETCH_FAMILY_TEST(64, fetch_add, +=, -one);
+       FETCH_FAMILY_TEST(64, fetch_sub, -=, onestwos);
+       FETCH_FAMILY_TEST(64, fetch_sub, -=, -one);
+
+       FETCH_FAMILY_TEST(64, fetch_or,  |=, v1);
+       FETCH_FAMILY_TEST(64, fetch_and, &=, v1);
+       FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, v1);
+       FETCH_FAMILY_TEST(64, fetch_xor, ^=, v1);
+
        INIT(v0);
        atomic64_inc(&v);
        r += one;
index 9f530adad10d54a9277a9fbfdee4236055e3f99a..4cbda4bd89266e3d8b5e571aa2023dbf32941e9d 100644 (file)
@@ -83,6 +83,13 @@ void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
        spin_lock(lock);
        while (unlikely(nf_conntrack_locks_all)) {
                spin_unlock(lock);
+
+               /*
+                * Order the 'nf_conntrack_locks_all' load vs. the
+                * spin_unlock_wait() loads below, to ensure
+                * that 'nf_conntrack_locks_all_lock' is indeed held:
+                */
+               smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
                spin_unlock_wait(&nf_conntrack_locks_all_lock);
                spin_lock(lock);
        }
@@ -128,6 +135,14 @@ static void nf_conntrack_all_lock(void)
        spin_lock(&nf_conntrack_locks_all_lock);
        nf_conntrack_locks_all = true;
 
+       /*
+        * Order the above store of 'nf_conntrack_locks_all' against
+        * the spin_unlock_wait() loads below, such that if
+        * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
+        * we must observe nf_conntrack_locks[] held:
+        */
+       smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
+
        for (i = 0; i < CONNTRACK_LOCKS; i++) {
                spin_unlock_wait(&nf_conntrack_locks[i]);
        }
@@ -135,7 +150,13 @@ static void nf_conntrack_all_lock(void)
 
 static void nf_conntrack_all_unlock(void)
 {
-       nf_conntrack_locks_all = false;
+       /*
+        * All prior stores must be complete before we clear
+        * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
+        * might observe the false value but not the entire
+        * critical section:
+        */
+       smp_store_release(&nf_conntrack_locks_all, false);
        spin_unlock(&nf_conntrack_locks_all_lock);
 }