]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/x86/include/asm/spinlock.h
7edca0d03c450d432298327e4db72a221422f45d
[karo-tx-linux.git] / arch / x86 / include / asm / spinlock.h
1 #ifndef _ASM_X86_SPINLOCK_H
2 #define _ASM_X86_SPINLOCK_H
3
4 #include <linux/atomic.h>
5 #include <asm/page.h>
6 #include <asm/processor.h>
7 #include <linux/compiler.h>
8 #include <asm/paravirt.h>
9 /*
10  * Your basic SMP spinlocks, allowing only a single CPU anywhere
11  *
12  * Simple spin lock operations.  There are two variants, one clears IRQ's
13  * on the local processor, one does not.
14  *
15  * These are fair FIFO ticket locks, which are currently limited to 256
16  * CPUs.
17  *
18  * (the type definitions are in asm/spinlock_types.h)
19  */
20
21 #ifdef CONFIG_X86_32
22 # define LOCK_PTR_REG "a"
23 # define REG_PTR_MODE "k"
24 #else
25 # define LOCK_PTR_REG "D"
26 # define REG_PTR_MODE "q"
27 #endif
28
29 #if defined(CONFIG_X86_32) && \
30         (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE))
31 /*
32  * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
33  * (PPro errata 66, 92)
34  */
35 static __always_inline void __ticket_unlock_release(struct arch_spinlock *lock)
36 {
37         if (sizeof(lock->tickets.head) == sizeof(u8))
38                 asm volatile(LOCK_PREFIX "incb %0"
39                              : "+m" (lock->tickets.head) : : "memory");
40         else
41                 asm volatile(LOCK_PREFIX "incw %0"
42                              : "+m" (lock->tickets.head) : : "memory");
43
44 }
45 #else
46 static __always_inline void __ticket_unlock_release(struct arch_spinlock *lock)
47 {
48         lock->tickets.head++;
49 }
50 #endif
51
52 /*
53  * Ticket locks are conceptually two parts, one indicating the current head of
54  * the queue, and the other indicating the current tail. The lock is acquired
55  * by atomically noting the tail and incrementing it by one (thus adding
56  * ourself to the queue and noting our position), then waiting until the head
57  * becomes equal to the the initial value of the tail.
58  *
59  * We use an xadd covering *both* parts of the lock, to increment the tail and
60  * also load the position of the head, which takes care of memory ordering
61  * issues and should be optimal for the uncontended case. Note the tail must be
62  * in the high part, because a wide xadd increment of the low part would carry
63  * up and contaminate the high part.
64  *
65  * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
66  * save some instructions and make the code more elegant. There really isn't
67  * much between them in performance though, especially as locks are out of line.
68  */
69 static __always_inline struct __raw_tickets __ticket_spin_claim(struct arch_spinlock *lock)
70 {
71         register struct __raw_tickets tickets = { .tail = 1 };
72
73         xadd(&lock->tickets, tickets);
74
75         return tickets;
76 }
77
78 static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock)
79 {
80         register struct __raw_tickets inc;
81
82         inc = __ticket_spin_claim(lock);
83
84         for (;;) {
85                 if (inc.head == inc.tail)
86                         goto out;
87                 cpu_relax();
88                 inc.head = ACCESS_ONCE(lock->tickets.head);
89         }
90 out:    barrier();              /* make sure nothing creeps before the lock is taken */
91 }
92
93 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
94 {
95         arch_spinlock_t old, new;
96
97         old.tickets = ACCESS_ONCE(lock->tickets);
98         if (old.tickets.head != old.tickets.tail)
99                 return 0;
100
101         new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
102
103         /* cmpxchg is a full barrier, so nothing can move before it */
104         return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
105 }
106
107 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
108 {
109         barrier();              /* prevent reordering out of locked region */
110         __ticket_unlock_release(lock);
111         barrier();              /* prevent reordering into locked region */
112 }
113
114 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
115 {
116         struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
117
118         return !!(tmp.tail ^ tmp.head);
119 }
120
121 static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
122 {
123         struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
124
125         return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
126 }
127
128 #ifndef CONFIG_PARAVIRT_SPINLOCKS
129
130 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
131 {
132         return __ticket_spin_is_locked(lock);
133 }
134
135 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
136 {
137         return __ticket_spin_is_contended(lock);
138 }
139 #define arch_spin_is_contended  arch_spin_is_contended
140
141 static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
142 {
143         __ticket_spin_lock(lock);
144 }
145
146 static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
147 {
148         return __ticket_spin_trylock(lock);
149 }
150
151 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
152 {
153         __ticket_spin_unlock(lock);
154 }
155
156 static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
157                                                   unsigned long flags)
158 {
159         arch_spin_lock(lock);
160 }
161
162 #endif  /* CONFIG_PARAVIRT_SPINLOCKS */
163
164 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
165 {
166         while (arch_spin_is_locked(lock))
167                 cpu_relax();
168 }
169
170 /*
171  * Read-write spinlocks, allowing multiple readers
172  * but only one writer.
173  *
174  * NOTE! it is quite common to have readers in interrupts
175  * but no interrupt writers. For those circumstances we
176  * can "mix" irq-safe locks - any writer needs to get a
177  * irq-safe write-lock, but readers can get non-irqsafe
178  * read-locks.
179  *
180  * On x86, we implement read-write locks as a 32-bit counter
181  * with the high bit (sign) being the "contended" bit.
182  */
183
184 /**
185  * read_can_lock - would read_trylock() succeed?
186  * @lock: the rwlock in question.
187  */
188 static inline int arch_read_can_lock(arch_rwlock_t *lock)
189 {
190         return lock->lock > 0;
191 }
192
193 /**
194  * write_can_lock - would write_trylock() succeed?
195  * @lock: the rwlock in question.
196  */
197 static inline int arch_write_can_lock(arch_rwlock_t *lock)
198 {
199         return lock->write == WRITE_LOCK_CMP;
200 }
201
202 static inline void arch_read_lock(arch_rwlock_t *rw)
203 {
204         asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t"
205                      "jns 1f\n"
206                      "call __read_lock_failed\n\t"
207                      "1:\n"
208                      ::LOCK_PTR_REG (rw) : "memory");
209 }
210
211 static inline void arch_write_lock(arch_rwlock_t *rw)
212 {
213         asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t"
214                      "jz 1f\n"
215                      "call __write_lock_failed\n\t"
216                      "1:\n"
217                      ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS)
218                      : "memory");
219 }
220
221 static inline int arch_read_trylock(arch_rwlock_t *lock)
222 {
223         READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock;
224
225         if (READ_LOCK_ATOMIC(dec_return)(count) >= 0)
226                 return 1;
227         READ_LOCK_ATOMIC(inc)(count);
228         return 0;
229 }
230
231 static inline int arch_write_trylock(arch_rwlock_t *lock)
232 {
233         atomic_t *count = (atomic_t *)&lock->write;
234
235         if (atomic_sub_and_test(WRITE_LOCK_CMP, count))
236                 return 1;
237         atomic_add(WRITE_LOCK_CMP, count);
238         return 0;
239 }
240
241 static inline void arch_read_unlock(arch_rwlock_t *rw)
242 {
243         asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0"
244                      :"+m" (rw->lock) : : "memory");
245 }
246
247 static inline void arch_write_unlock(arch_rwlock_t *rw)
248 {
249         asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"
250                      : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");
251 }
252
253 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
254 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
255
256 #undef READ_LOCK_SIZE
257 #undef READ_LOCK_ATOMIC
258 #undef WRITE_LOCK_ADD
259 #undef WRITE_LOCK_SUB
260 #undef WRITE_LOCK_CMP
261
262 #define arch_spin_relax(lock)   cpu_relax()
263 #define arch_read_relax(lock)   cpu_relax()
264 #define arch_write_relax(lock)  cpu_relax()
265
266 /* The {read|write|spin}_lock() on x86 are full memory barriers. */
267 static inline void smp_mb__after_lock(void) { }
268 #define ARCH_HAS_SMP_MB_AFTER_LOCK
269
270 #endif /* _ASM_X86_SPINLOCK_H */