Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial

[karo-tx-linux.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 7f01a3eeaf9520e934dfe70f502cd2a3feee8b93..987293d03ebcf0e6bf1c6b81e8a4e68c7965e903 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -16,9 +16,10 @@
   *
   * This is the generic async execution mechanism.  Work items as are
   * executed in process context.  The worker pool is shared and
- * automatically managed.  There is one worker pool for each CPU and
- * one extra for works which are better served by workers which are
- * not bound to any specific CPU.
+ * automatically managed.  There are two worker pools for each CPU (one for
+ * normal work items and the other for high priority ones) and some extra
+ * pools for workqueues which are not bound to any specific CPU - the
+ * number of these backing pools is dynamic.
   *
   * Please read Documentation/workqueue.txt for details.
   */
@@ -2039,8 +2040,11 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
   * multiple times.  Does GFP_KERNEL allocations.
   *
   * Return:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
- * multiple times.  Does GFP_KERNEL allocations.
+ * %false if the pool don't need management and the caller can safely start
+ * processing works, %true indicates that the function released pool->lock
+ * and reacquired it to perform some management function and that the
+ * conditions that the caller verified while holding the lock before
+ * calling the function might no longer be true.
   */
  static bool manage_workers(struct worker *worker)
  {
@@ -2207,6 +2211,15 @@ __acquires(&pool->lock)
                 dump_stack();
         }
  
+       /*
+        * The following prevents a kworker from hogging CPU on !PREEMPT
+        * kernels, where a requeueing work item waiting for something to
+        * happen could deadlock with stop_machine as such work item could
+        * indefinitely requeue itself while all other CPUs are trapped in
+        * stop_machine.
+        */
+       cond_resched();
+
         spin_lock_irq(&pool->lock);
  
         /* clear cpu intensive status */
@@ -2827,6 +2840,19 @@ already_gone:
         return false;
  }
  
+static bool __flush_work(struct work_struct *work)
+{
+       struct wq_barrier barr;
+
+       if (start_flush_work(work, &barr)) {
+               wait_for_completion(&barr.done);
+               destroy_work_on_stack(&barr.work);
+               return true;
+       } else {
+               return false;
+       }
+}
+
  /**
   * flush_work - wait for a work to finish executing the last queueing instance
   * @work: the work to flush
@@ -2840,18 +2866,10 @@ already_gone:
   */
  bool flush_work(struct work_struct *work)
  {
-       struct wq_barrier barr;
-
         lock_map_acquire(&work->lockdep_map);
         lock_map_release(&work->lockdep_map);
  
-       if (start_flush_work(work, &barr)) {
-               wait_for_completion(&barr.done);
-               destroy_work_on_stack(&barr.work);
-               return true;
-       } else {
-               return false;
-       }
+       return __flush_work(work);
  }
  EXPORT_SYMBOL_GPL(flush_work);
  
@@ -3095,25 +3113,26 @@ static struct workqueue_struct *dev_to_wq(struct device *dev)
         return wq_dev->wq;
  }
  
-static ssize_t wq_per_cpu_show(struct device *dev,
-                              struct device_attribute *attr, char *buf)
+static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
  {
         struct workqueue_struct *wq = dev_to_wq(dev);
  
         return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
  }
+static DEVICE_ATTR_RO(per_cpu);
  
-static ssize_t wq_max_active_show(struct device *dev,
-                                 struct device_attribute *attr, char *buf)
+static ssize_t max_active_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
  {
         struct workqueue_struct *wq = dev_to_wq(dev);
  
         return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
  }
  
-static ssize_t wq_max_active_store(struct device *dev,
-                                  struct device_attribute *attr,
-                                  const char *buf, size_t count)
+static ssize_t max_active_store(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t count)
  {
         struct workqueue_struct *wq = dev_to_wq(dev);
         int val;
@@ -3124,12 +3143,14 @@ static ssize_t wq_max_active_store(struct device *dev,
         workqueue_set_max_active(wq, val);
         return count;
  }
+static DEVICE_ATTR_RW(max_active);
  
-static struct device_attribute wq_sysfs_attrs[] = {
-       __ATTR(per_cpu, 0444, wq_per_cpu_show, NULL),
-       __ATTR(max_active, 0644, wq_max_active_show, wq_max_active_store),
-       __ATTR_NULL,
+static struct attribute *wq_sysfs_attrs[] = {
+       &dev_attr_per_cpu.attr,
+       &dev_attr_max_active.attr,
+       NULL,
  };
+ATTRIBUTE_GROUPS(wq_sysfs);
  
  static ssize_t wq_pool_ids_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
@@ -3279,7 +3300,7 @@ static struct device_attribute wq_sysfs_unbound_attrs[] = {
  
  static struct bus_type wq_subsys = {
         .name                           = "workqueue",
-       .dev_attrs                      = wq_sysfs_attrs,
+       .dev_groups                     = wq_sysfs_groups,
  };
  
  static int __init wq_sysfs_init(void)
@@ -3427,6 +3448,12 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
  {
         to->nice = from->nice;
         cpumask_copy(to->cpumask, from->cpumask);
+       /*
+        * Unlike hash and equality test, this function doesn't ignore
+        * ->no_numa as it is used for both pool and wq attrs.  Instead,
+        * get_unbound_pool() explicitly clears ->no_numa after copying.
+        */
+       to->no_numa = from->no_numa;
  }
  
  /* hash value of the content of @attr */
@@ -3598,6 +3625,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
         lockdep_set_subclass(&pool->lock, 1);   /* see put_pwq() */
         copy_workqueue_attrs(pool->attrs, attrs);
  
+       /*
+        * no_numa isn't a worker_pool attribute, always clear it.  See
+        * 'struct workqueue_attrs' comments for detail.
+        */
+       pool->attrs->no_numa = false;
+
         /* if cpumask is contained inside a NUMA node, we belong to that node */
         if (wq_numa_enabled) {
                 for_each_node(node) {
@@ -4781,7 +4814,14 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
  
         INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
         schedule_work_on(cpu, &wfc.work);
-       flush_work(&wfc.work);
+
+       /*
+        * The work item is on-stack and can't lead to deadlock through
+        * flushing.  Use __flush_work() to avoid spurious lockdep warnings
+        * when work_on_cpu()s are nested.
+        */
+       __flush_work(&wfc.work);
+
         return wfc.ret;
  }
  EXPORT_SYMBOL_GPL(work_on_cpu);