lib/rwsem.c

   1 /* rwsem.c: R/W semaphores: contention handling functions
   2  *
   3  * Written by David Howells (dhowells@redhat.com).
   4  * Derived from arch/i386/kernel/semaphore.c
   5  *
   6  * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
   7  */
   8 #include <linux/rwsem.h>
   9 #include <linux/sched.h>
  10 #include <linux/init.h>
  11 #include <linux/export.h>
  12
  13 /*
  14  * Initialize an rwsem:
  15  */
  16 void __init_rwsem(struct rw_semaphore *sem, const char *name,
  17                   struct lock_class_key *key)
  18 {
  19 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  20         /*
  21          * Make sure we are not reinitializing a held semaphore:
  22          */
  23         debug_check_no_locks_freed((void *)sem, sizeof(*sem));
  24         lockdep_init_map(&sem->dep_map, name, key, 0);
  25 #endif
  26         sem->count = RWSEM_UNLOCKED_VALUE;
  27         raw_spin_lock_init(&sem->wait_lock);
  28         INIT_LIST_HEAD(&sem->wait_list);
  29 }
  30
  31 EXPORT_SYMBOL(__init_rwsem);
  32
  33 struct rwsem_waiter {
  34         struct list_head list;
  35         struct task_struct *task;
  36         unsigned int flags;
  37 #define RWSEM_WAITING_FOR_READ  0x00000001
  38 #define RWSEM_WAITING_FOR_WRITE 0x00000002
  39 };
  40
  41 /* Wake types for __rwsem_do_wake().  Note that RWSEM_WAKE_NO_ACTIVE and
  42  * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
  43  * since the rwsem value was observed.
  44  */
  45 #define RWSEM_WAKE_ANY        0 /* Wake whatever's at head of wait list */
  46 #define RWSEM_WAKE_NO_ACTIVE  1 /* rwsem was observed with no active thread */
  47 #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
  48
  49 /*
  50  * handle the lock release when processes blocked on it that can now run
  51  * - if we come here from up_xxxx(), then:
  52  *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
  53  *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
  54  * - there must be someone on the queue
  55  * - the spinlock must be held by the caller
  56  * - woken process blocks are discarded from the list after having task zeroed
  57  * - writers are only woken if downgrading is false
  58  */
  59 static struct rw_semaphore *
  60 __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
  61 {
  62         struct rwsem_waiter *waiter;
  63         struct task_struct *tsk;
  64         struct list_head *next;
  65         signed long woken, loop, adjustment;
  66
  67         waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
  68         if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
  69                 goto readers_only;
  70
  71         if (wake_type == RWSEM_WAKE_READ_OWNED)
  72                 /* Another active reader was observed, so wakeup is not
  73                  * likely to succeed. Save the atomic op.
  74                  */
  75                 goto out;
  76
  77         /* Wake up the writing waiter and let the task grab the sem: */
  78         wake_up_process(waiter->task);
  79         goto out;
  80
  81  readers_only:
  82         /* If we come here from up_xxxx(), another thread might have reached
  83          * rwsem_down_failed_common() before we acquired the spinlock and
  84          * woken up a waiter, making it now active.  We prefer to check for
  85          * this first in order to not spend too much time with the spinlock
  86          * held if we're not going to be able to wake up readers in the end.
  87          *
  88          * Note that we do not need to update the rwsem count: any writer
  89          * trying to acquire rwsem will run rwsem_down_write_failed() due
  90          * to the waiting threads and block trying to acquire the spinlock.
  91          *
  92          * We use a dummy atomic update in order to acquire the cache line
  93          * exclusively since we expect to succeed and run the final rwsem
  94          * count adjustment pretty soon.
  95          */
  96         if (wake_type == RWSEM_WAKE_ANY &&
  97             rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
  98                 /* Someone grabbed the sem for write already */
  99                 goto out;
 100
 101         /* Grant an infinite number of read locks to the readers at the front
 102          * of the queue.  Note we increment the 'active part' of the count by
 103          * the number of readers before waking any processes up.
 104          */
 105         woken = 0;
 106         do {
 107                 woken++;
 108
 109                 if (waiter->list.next == &sem->wait_list)
 110                         break;
 111
 112                 waiter = list_entry(waiter->list.next,
 113                                         struct rwsem_waiter, list);
 114
 115         } while (waiter->flags & RWSEM_WAITING_FOR_READ);
 116
 117         adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
 118         if (waiter->flags & RWSEM_WAITING_FOR_READ)
 119                 /* hit end of list above */
 120                 adjustment -= RWSEM_WAITING_BIAS;
 121
 122         rwsem_atomic_add(adjustment, sem);
 123
 124         next = sem->wait_list.next;
 125         for (loop = woken; loop > 0; loop--) {
 126                 waiter = list_entry(next, struct rwsem_waiter, list);
 127                 next = waiter->list.next;
 128                 tsk = waiter->task;
 129                 smp_mb();
 130                 waiter->task = NULL;
 131                 wake_up_process(tsk);
 132                 put_task_struct(tsk);
 133         }
 134
 135         sem->wait_list.next = next;
 136         next->prev = &sem->wait_list;
 137
 138  out:
 139         return sem;
 140 }
 141
 142 /* Try to get write sem, caller holds sem->wait_lock: */
 143 static int try_get_writer_sem(struct rw_semaphore *sem,
 144                                         struct rwsem_waiter *waiter)
 145 {
 146         struct rwsem_waiter *fwaiter;
 147         long oldcount, adjustment;
 148
 149         /* only steal when first waiter is writing */
 150         fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
 151         if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE))
 152                 return 0;
 153
 154         adjustment = RWSEM_ACTIVE_WRITE_BIAS;
 155         /* Only one waiter in the queue: */
 156         if (fwaiter == waiter && waiter->list.next == &sem->wait_list)
 157                 adjustment -= RWSEM_WAITING_BIAS;
 158
 159 try_again_write:
 160         oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
 161         if (!(oldcount & RWSEM_ACTIVE_MASK)) {
 162                 /* No active lock: */
 163                 struct task_struct *tsk = waiter->task;
 164
 165                 list_del(&waiter->list);
 166                 smp_mb();
 167                 put_task_struct(tsk);
 168                 tsk->state = TASK_RUNNING;
 169                 return 1;
 170         }
 171         /* some one grabbed the sem already */
 172         if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
 173                 return 0;
 174         goto try_again_write;
 175 }
 176
 177 /*
 178  * wait for a lock to be granted
 179  */
 180 static struct rw_semaphore __sched *
 181 rwsem_down_failed_common(struct rw_semaphore *sem,
 182                          unsigned int flags, signed long adjustment)
 183 {
 184         struct rwsem_waiter waiter;
 185         struct task_struct *tsk = current;
 186         signed long count;
 187
 188         set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 189
 190         /* set up my own style of waitqueue */
 191         raw_spin_lock_irq(&sem->wait_lock);
 192         waiter.task = tsk;
 193         waiter.flags = flags;
 194         get_task_struct(tsk);
 195
 196         if (list_empty(&sem->wait_list))
 197                 adjustment += RWSEM_WAITING_BIAS;
 198         list_add_tail(&waiter.list, &sem->wait_list);
 199
 200         /* we're now waiting on the lock, but no longer actively locking */
 201         count = rwsem_atomic_update(adjustment, sem);
 202
 203         /* If there are no active locks, wake the front queued process(es) up.
 204          *
 205          * Alternatively, if we're called from a failed down_write(), there
 206          * were already threads queued before us and there are no active
 207          * writers, the lock must be read owned; so we try to wake any read
 208          * locks that were queued ahead of us. */
 209         if (count == RWSEM_WAITING_BIAS)
 210                 sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
 211         else if (count > RWSEM_WAITING_BIAS &&
 212                  adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
 213                 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 214
 215         raw_spin_unlock_irq(&sem->wait_lock);
 216
 217         /* wait to be given the lock */
 218         for (;;) {
 219                 if (!waiter.task)
 220                         break;
 221
 222                 raw_spin_lock_irq(&sem->wait_lock);
 223                 /* Try to get the writer sem, may steal from the head writer: */
 224                 if (flags == RWSEM_WAITING_FOR_WRITE)
 225                         if (try_get_writer_sem(sem, &waiter)) {
 226                                 raw_spin_unlock_irq(&sem->wait_lock);
 227                                 return sem;
 228                         }
 229                 raw_spin_unlock_irq(&sem->wait_lock);
 230                 schedule();
 231                 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 232         }
 233
 234         tsk->state = TASK_RUNNING;
 235
 236         return sem;
 237 }
 238
 239 /*
 240  * wait for the read lock to be granted
 241  */
 242 struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 243 {
 244         return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
 245                                         -RWSEM_ACTIVE_READ_BIAS);
 246 }
 247
 248 /*
 249  * wait for the write lock to be granted
 250  */
 251 struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 252 {
 253         return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
 254                                         -RWSEM_ACTIVE_WRITE_BIAS);
 255 }
 256
 257 /*
 258  * handle waking up a waiter on the semaphore
 259  * - up_read/up_write has decremented the active part of count if we come here
 260  */
 261 struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 262 {
 263         unsigned long flags;
 264
 265         raw_spin_lock_irqsave(&sem->wait_lock, flags);
 266
 267         /* do nothing if list empty */
 268         if (!list_empty(&sem->wait_list))
 269                 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
 270
 271         raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 272
 273         return sem;
 274 }
 275
 276 /*
 277  * downgrade a write lock into a read lock
 278  * - caller incremented waiting part of count and discovered it still negative
 279  * - just wake up any readers at the front of the queue
 280  */
 281 struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 282 {
 283         unsigned long flags;
 284
 285         raw_spin_lock_irqsave(&sem->wait_lock, flags);
 286
 287         /* do nothing if list empty */
 288         if (!list_empty(&sem->wait_list))
 289                 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 290
 291         raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 292
 293         return sem;
 294 }
 295
 296 EXPORT_SYMBOL(rwsem_down_read_failed);
 297 EXPORT_SYMBOL(rwsem_down_write_failed);
 298 EXPORT_SYMBOL(rwsem_wake);
 299 EXPORT_SYMBOL(rwsem_downgrade_wake);