]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - kernel/sched/wait.c
regmap: rbtree: When adding a reg do a bsearch for target node
[karo-tx-linux.git] / kernel / sched / wait.c
1 /*
2  * Generic waiting primitives.
3  *
4  * (C) 2004 Nadia Yvette Chambers, Oracle
5  */
6 #include <linux/init.h>
7 #include <linux/export.h>
8 #include <linux/sched.h>
9 #include <linux/mm.h>
10 #include <linux/wait.h>
11 #include <linux/hash.h>
12 #include <linux/kthread.h>
13
14 void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
15 {
16         spin_lock_init(&q->lock);
17         lockdep_set_class_and_name(&q->lock, key, name);
18         INIT_LIST_HEAD(&q->task_list);
19 }
20
21 EXPORT_SYMBOL(__init_waitqueue_head);
22
23 void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
24 {
25         unsigned long flags;
26
27         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
28         spin_lock_irqsave(&q->lock, flags);
29         __add_wait_queue(q, wait);
30         spin_unlock_irqrestore(&q->lock, flags);
31 }
32 EXPORT_SYMBOL(add_wait_queue);
33
34 void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
35 {
36         unsigned long flags;
37
38         wait->flags |= WQ_FLAG_EXCLUSIVE;
39         spin_lock_irqsave(&q->lock, flags);
40         __add_wait_queue_tail(q, wait);
41         spin_unlock_irqrestore(&q->lock, flags);
42 }
43 EXPORT_SYMBOL(add_wait_queue_exclusive);
44
45 void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
46 {
47         unsigned long flags;
48
49         spin_lock_irqsave(&q->lock, flags);
50         __remove_wait_queue(q, wait);
51         spin_unlock_irqrestore(&q->lock, flags);
52 }
53 EXPORT_SYMBOL(remove_wait_queue);
54
55
56 /*
57  * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
58  * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
59  * number) then we wake all the non-exclusive tasks and one exclusive task.
60  *
61  * There are circumstances in which we can try to wake a task which has already
62  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
63  * zero in this (rare) case, and we handle it by continuing to scan the queue.
64  */
65 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
66                         int nr_exclusive, int wake_flags, void *key)
67 {
68         wait_queue_t *curr, *next;
69
70         list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
71                 unsigned flags = curr->flags;
72
73                 if (curr->func(curr, mode, wake_flags, key) &&
74                                 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
75                         break;
76         }
77 }
78
79 /**
80  * __wake_up - wake up threads blocked on a waitqueue.
81  * @q: the waitqueue
82  * @mode: which threads
83  * @nr_exclusive: how many wake-one or wake-many threads to wake up
84  * @key: is directly passed to the wakeup function
85  *
86  * It may be assumed that this function implies a write memory barrier before
87  * changing the task state if and only if any tasks are woken up.
88  */
89 void __wake_up(wait_queue_head_t *q, unsigned int mode,
90                         int nr_exclusive, void *key)
91 {
92         unsigned long flags;
93
94         spin_lock_irqsave(&q->lock, flags);
95         __wake_up_common(q, mode, nr_exclusive, 0, key);
96         spin_unlock_irqrestore(&q->lock, flags);
97 }
98 EXPORT_SYMBOL(__wake_up);
99
100 /*
101  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
102  */
103 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
104 {
105         __wake_up_common(q, mode, nr, 0, NULL);
106 }
107 EXPORT_SYMBOL_GPL(__wake_up_locked);
108
109 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr,
110                           void *key)
111 {
112         __wake_up_common(q, mode, nr, 0, key);
113 }
114 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
115
116 /**
117  * __wake_up_sync_key - wake up threads blocked on a waitqueue.
118  * @q: the waitqueue
119  * @mode: which threads
120  * @nr_exclusive: how many wake-one or wake-many threads to wake up
121  * @key: opaque value to be passed to wakeup targets
122  *
123  * The sync wakeup differs that the waker knows that it will schedule
124  * away soon, so while the target thread will be woken up, it will not
125  * be migrated to another CPU - ie. the two threads are 'synchronized'
126  * with each other. This can prevent needless bouncing between CPUs.
127  *
128  * On UP it can prevent extra preemption.
129  *
130  * It may be assumed that this function implies a write memory barrier before
131  * changing the task state if and only if any tasks are woken up.
132  */
133 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
134                         int nr_exclusive, void *key)
135 {
136         unsigned long flags;
137         int wake_flags = 1; /* XXX WF_SYNC */
138
139         if (unlikely(!q))
140                 return;
141
142         if (unlikely(nr_exclusive != 1))
143                 wake_flags = 0;
144
145         spin_lock_irqsave(&q->lock, flags);
146         __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
147         spin_unlock_irqrestore(&q->lock, flags);
148 }
149 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
150
151 /*
152  * __wake_up_sync - see __wake_up_sync_key()
153  */
154 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
155 {
156         __wake_up_sync_key(q, mode, nr_exclusive, NULL);
157 }
158 EXPORT_SYMBOL_GPL(__wake_up_sync);      /* For internal use only */
159
160 /*
161  * Note: we use "set_current_state()" _after_ the wait-queue add,
162  * because we need a memory barrier there on SMP, so that any
163  * wake-function that tests for the wait-queue being active
164  * will be guaranteed to see waitqueue addition _or_ subsequent
165  * tests in this thread will see the wakeup having taken place.
166  *
167  * The spin_unlock() itself is semi-permeable and only protects
168  * one way (it only protects stuff inside the critical region and
169  * stops them from bleeding out - it would still allow subsequent
170  * loads to move into the critical region).
171  */
172 void
173 prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
174 {
175         unsigned long flags;
176
177         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
178         spin_lock_irqsave(&q->lock, flags);
179         if (list_empty(&wait->task_list))
180                 __add_wait_queue(q, wait);
181         set_current_state(state);
182         spin_unlock_irqrestore(&q->lock, flags);
183 }
184 EXPORT_SYMBOL(prepare_to_wait);
185
186 void
187 prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
188 {
189         unsigned long flags;
190
191         wait->flags |= WQ_FLAG_EXCLUSIVE;
192         spin_lock_irqsave(&q->lock, flags);
193         if (list_empty(&wait->task_list))
194                 __add_wait_queue_tail(q, wait);
195         set_current_state(state);
196         spin_unlock_irqrestore(&q->lock, flags);
197 }
198 EXPORT_SYMBOL(prepare_to_wait_exclusive);
199
200 long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
201 {
202         unsigned long flags;
203
204         if (signal_pending_state(state, current))
205                 return -ERESTARTSYS;
206
207         wait->private = current;
208         wait->func = autoremove_wake_function;
209
210         spin_lock_irqsave(&q->lock, flags);
211         if (list_empty(&wait->task_list)) {
212                 if (wait->flags & WQ_FLAG_EXCLUSIVE)
213                         __add_wait_queue_tail(q, wait);
214                 else
215                         __add_wait_queue(q, wait);
216         }
217         set_current_state(state);
218         spin_unlock_irqrestore(&q->lock, flags);
219
220         return 0;
221 }
222 EXPORT_SYMBOL(prepare_to_wait_event);
223
224 /**
225  * finish_wait - clean up after waiting in a queue
226  * @q: waitqueue waited on
227  * @wait: wait descriptor
228  *
229  * Sets current thread back to running state and removes
230  * the wait descriptor from the given waitqueue if still
231  * queued.
232  */
233 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
234 {
235         unsigned long flags;
236
237         __set_current_state(TASK_RUNNING);
238         /*
239          * We can check for list emptiness outside the lock
240          * IFF:
241          *  - we use the "careful" check that verifies both
242          *    the next and prev pointers, so that there cannot
243          *    be any half-pending updates in progress on other
244          *    CPU's that we haven't seen yet (and that might
245          *    still change the stack area.
246          * and
247          *  - all other users take the lock (ie we can only
248          *    have _one_ other CPU that looks at or modifies
249          *    the list).
250          */
251         if (!list_empty_careful(&wait->task_list)) {
252                 spin_lock_irqsave(&q->lock, flags);
253                 list_del_init(&wait->task_list);
254                 spin_unlock_irqrestore(&q->lock, flags);
255         }
256 }
257 EXPORT_SYMBOL(finish_wait);
258
259 /**
260  * abort_exclusive_wait - abort exclusive waiting in a queue
261  * @q: waitqueue waited on
262  * @wait: wait descriptor
263  * @mode: runstate of the waiter to be woken
264  * @key: key to identify a wait bit queue or %NULL
265  *
266  * Sets current thread back to running state and removes
267  * the wait descriptor from the given waitqueue if still
268  * queued.
269  *
270  * Wakes up the next waiter if the caller is concurrently
271  * woken up through the queue.
272  *
273  * This prevents waiter starvation where an exclusive waiter
274  * aborts and is woken up concurrently and no one wakes up
275  * the next waiter.
276  */
277 void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
278                         unsigned int mode, void *key)
279 {
280         unsigned long flags;
281
282         __set_current_state(TASK_RUNNING);
283         spin_lock_irqsave(&q->lock, flags);
284         if (!list_empty(&wait->task_list))
285                 list_del_init(&wait->task_list);
286         else if (waitqueue_active(q))
287                 __wake_up_locked_key(q, mode, 1, key);
288         spin_unlock_irqrestore(&q->lock, flags);
289 }
290 EXPORT_SYMBOL(abort_exclusive_wait);
291
292 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
293 {
294         int ret = default_wake_function(wait, mode, sync, key);
295
296         if (ret)
297                 list_del_init(&wait->task_list);
298         return ret;
299 }
300 EXPORT_SYMBOL(autoremove_wake_function);
301
302 static inline bool is_kthread_should_stop(void)
303 {
304         return (current->flags & PF_KTHREAD) && kthread_should_stop();
305 }
306
307 /*
308  * DEFINE_WAIT_FUNC(wait, woken_wake_func);
309  *
310  * add_wait_queue(&wq, &wait);
311  * for (;;) {
312  *     if (condition)
313  *         break;
314  *
315  *     p->state = mode;                         condition = true;
316  *     smp_mb(); // A                           smp_wmb(); // C
317  *     if (!wait->flags & WQ_FLAG_WOKEN)        wait->flags |= WQ_FLAG_WOKEN;
318  *         schedule()                           try_to_wake_up();
319  *     p->state = TASK_RUNNING;             ~~~~~~~~~~~~~~~~~~
320  *     wait->flags &= ~WQ_FLAG_WOKEN;           condition = true;
321  *     smp_mb() // B                            smp_wmb(); // C
322  *                                              wait->flags |= WQ_FLAG_WOKEN;
323  * }
324  * remove_wait_queue(&wq, &wait);
325  *
326  */
327 long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
328 {
329         set_current_state(mode); /* A */
330         /*
331          * The above implies an smp_mb(), which matches with the smp_wmb() from
332          * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
333          * also observe all state before the wakeup.
334          */
335         if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
336                 timeout = schedule_timeout(timeout);
337         __set_current_state(TASK_RUNNING);
338
339         /*
340          * The below implies an smp_mb(), it too pairs with the smp_wmb() from
341          * woken_wake_function() such that we must either observe the wait
342          * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
343          * an event.
344          */
345         smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
346
347         return timeout;
348 }
349 EXPORT_SYMBOL(wait_woken);
350
351 int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
352 {
353         /*
354          * Although this function is called under waitqueue lock, LOCK
355          * doesn't imply write barrier and the users expects write
356          * barrier semantics on wakeup functions.  The following
357          * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
358          * and is paired with smp_store_mb() in wait_woken().
359          */
360         smp_wmb(); /* C */
361         wait->flags |= WQ_FLAG_WOKEN;
362
363         return default_wake_function(wait, mode, sync, key);
364 }
365 EXPORT_SYMBOL(woken_wake_function);
366
367 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
368 {
369         struct wait_bit_key *key = arg;
370         struct wait_bit_queue *wait_bit
371                 = container_of(wait, struct wait_bit_queue, wait);
372
373         if (wait_bit->key.flags != key->flags ||
374                         wait_bit->key.bit_nr != key->bit_nr ||
375                         test_bit(key->bit_nr, key->flags))
376                 return 0;
377         else
378                 return autoremove_wake_function(wait, mode, sync, key);
379 }
380 EXPORT_SYMBOL(wake_bit_function);
381
382 /*
383  * To allow interruptible waiting and asynchronous (i.e. nonblocking)
384  * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
385  * permitted return codes. Nonzero return codes halt waiting and return.
386  */
387 int __sched
388 __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
389               wait_bit_action_f *action, unsigned mode)
390 {
391         int ret = 0;
392
393         do {
394                 prepare_to_wait(wq, &q->wait, mode);
395                 if (test_bit(q->key.bit_nr, q->key.flags))
396                         ret = (*action)(&q->key);
397         } while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
398         finish_wait(wq, &q->wait);
399         return ret;
400 }
401 EXPORT_SYMBOL(__wait_on_bit);
402
403 int __sched out_of_line_wait_on_bit(void *word, int bit,
404                                     wait_bit_action_f *action, unsigned mode)
405 {
406         wait_queue_head_t *wq = bit_waitqueue(word, bit);
407         DEFINE_WAIT_BIT(wait, word, bit);
408
409         return __wait_on_bit(wq, &wait, action, mode);
410 }
411 EXPORT_SYMBOL(out_of_line_wait_on_bit);
412
413 int __sched out_of_line_wait_on_bit_timeout(
414         void *word, int bit, wait_bit_action_f *action,
415         unsigned mode, unsigned long timeout)
416 {
417         wait_queue_head_t *wq = bit_waitqueue(word, bit);
418         DEFINE_WAIT_BIT(wait, word, bit);
419
420         wait.key.timeout = jiffies + timeout;
421         return __wait_on_bit(wq, &wait, action, mode);
422 }
423 EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
424
425 int __sched
426 __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
427                         wait_bit_action_f *action, unsigned mode)
428 {
429         do {
430                 int ret;
431
432                 prepare_to_wait_exclusive(wq, &q->wait, mode);
433                 if (!test_bit(q->key.bit_nr, q->key.flags))
434                         continue;
435                 ret = action(&q->key);
436                 if (!ret)
437                         continue;
438                 abort_exclusive_wait(wq, &q->wait, mode, &q->key);
439                 return ret;
440         } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
441         finish_wait(wq, &q->wait);
442         return 0;
443 }
444 EXPORT_SYMBOL(__wait_on_bit_lock);
445
446 int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
447                                          wait_bit_action_f *action, unsigned mode)
448 {
449         wait_queue_head_t *wq = bit_waitqueue(word, bit);
450         DEFINE_WAIT_BIT(wait, word, bit);
451
452         return __wait_on_bit_lock(wq, &wait, action, mode);
453 }
454 EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
455
456 void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
457 {
458         struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
459         if (waitqueue_active(wq))
460                 __wake_up(wq, TASK_NORMAL, 1, &key);
461 }
462 EXPORT_SYMBOL(__wake_up_bit);
463
464 /**
465  * wake_up_bit - wake up a waiter on a bit
466  * @word: the word being waited on, a kernel virtual address
467  * @bit: the bit of the word being waited on
468  *
469  * There is a standard hashed waitqueue table for generic use. This
470  * is the part of the hashtable's accessor API that wakes up waiters
471  * on a bit. For instance, if one were to have waiters on a bitflag,
472  * one would call wake_up_bit() after clearing the bit.
473  *
474  * In order for this to function properly, as it uses waitqueue_active()
475  * internally, some kind of memory barrier must be done prior to calling
476  * this. Typically, this will be smp_mb__after_atomic(), but in some
477  * cases where bitflags are manipulated non-atomically under a lock, one
478  * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
479  * because spin_unlock() does not guarantee a memory barrier.
480  */
481 void wake_up_bit(void *word, int bit)
482 {
483         __wake_up_bit(bit_waitqueue(word, bit), word, bit);
484 }
485 EXPORT_SYMBOL(wake_up_bit);
486
487 wait_queue_head_t *bit_waitqueue(void *word, int bit)
488 {
489         const int shift = BITS_PER_LONG == 32 ? 5 : 6;
490         const struct zone *zone = page_zone(virt_to_page(word));
491         unsigned long val = (unsigned long)word << shift | bit;
492
493         return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
494 }
495 EXPORT_SYMBOL(bit_waitqueue);
496
497 /*
498  * Manipulate the atomic_t address to produce a better bit waitqueue table hash
499  * index (we're keying off bit -1, but that would produce a horrible hash
500  * value).
501  */
502 static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
503 {
504         if (BITS_PER_LONG == 64) {
505                 unsigned long q = (unsigned long)p;
506                 return bit_waitqueue((void *)(q & ~1), q & 1);
507         }
508         return bit_waitqueue(p, 0);
509 }
510
511 static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
512                                   void *arg)
513 {
514         struct wait_bit_key *key = arg;
515         struct wait_bit_queue *wait_bit
516                 = container_of(wait, struct wait_bit_queue, wait);
517         atomic_t *val = key->flags;
518
519         if (wait_bit->key.flags != key->flags ||
520             wait_bit->key.bit_nr != key->bit_nr ||
521             atomic_read(val) != 0)
522                 return 0;
523         return autoremove_wake_function(wait, mode, sync, key);
524 }
525
526 /*
527  * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
528  * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
529  * return codes halt waiting and return.
530  */
531 static __sched
532 int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
533                        int (*action)(atomic_t *), unsigned mode)
534 {
535         atomic_t *val;
536         int ret = 0;
537
538         do {
539                 prepare_to_wait(wq, &q->wait, mode);
540                 val = q->key.flags;
541                 if (atomic_read(val) == 0)
542                         break;
543                 ret = (*action)(val);
544         } while (!ret && atomic_read(val) != 0);
545         finish_wait(wq, &q->wait);
546         return ret;
547 }
548
549 #define DEFINE_WAIT_ATOMIC_T(name, p)                                   \
550         struct wait_bit_queue name = {                                  \
551                 .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),              \
552                 .wait   = {                                             \
553                         .private        = current,                      \
554                         .func           = wake_atomic_t_function,       \
555                         .task_list      =                               \
556                                 LIST_HEAD_INIT((name).wait.task_list),  \
557                 },                                                      \
558         }
559
560 __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
561                                          unsigned mode)
562 {
563         wait_queue_head_t *wq = atomic_t_waitqueue(p);
564         DEFINE_WAIT_ATOMIC_T(wait, p);
565
566         return __wait_on_atomic_t(wq, &wait, action, mode);
567 }
568 EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
569
570 /**
571  * wake_up_atomic_t - Wake up a waiter on a atomic_t
572  * @p: The atomic_t being waited on, a kernel virtual address
573  *
574  * Wake up anyone waiting for the atomic_t to go to zero.
575  *
576  * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
577  * check is done by the waiter's wake function, not the by the waker itself).
578  */
579 void wake_up_atomic_t(atomic_t *p)
580 {
581         __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
582 }
583 EXPORT_SYMBOL(wake_up_atomic_t);
584
585 __sched int bit_wait(struct wait_bit_key *word)
586 {
587         if (signal_pending_state(current->state, current))
588                 return 1;
589         schedule();
590         return 0;
591 }
592 EXPORT_SYMBOL(bit_wait);
593
594 __sched int bit_wait_io(struct wait_bit_key *word)
595 {
596         if (signal_pending_state(current->state, current))
597                 return 1;
598         io_schedule();
599         return 0;
600 }
601 EXPORT_SYMBOL(bit_wait_io);
602
603 __sched int bit_wait_timeout(struct wait_bit_key *word)
604 {
605         unsigned long now = READ_ONCE(jiffies);
606         if (signal_pending_state(current->state, current))
607                 return 1;
608         if (time_after_eq(now, word->timeout))
609                 return -EAGAIN;
610         schedule_timeout(word->timeout - now);
611         return 0;
612 }
613 EXPORT_SYMBOL_GPL(bit_wait_timeout);
614
615 __sched int bit_wait_io_timeout(struct wait_bit_key *word)
616 {
617         unsigned long now = READ_ONCE(jiffies);
618         if (signal_pending_state(current->state, current))
619                 return 1;
620         if (time_after_eq(now, word->timeout))
621                 return -EAGAIN;
622         io_schedule_timeout(word->timeout - now);
623         return 0;
624 }
625 EXPORT_SYMBOL_GPL(bit_wait_io_timeout);