Merge tag 'mfd-fixes-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd

[karo-tx-linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 3595403921bd5be10c3e5e591bf04916e654423d..10a8faa1b0d4a5f737bd9008eb8f9a7e817b6ec9 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -621,18 +621,21 @@ int get_nohz_timer_target(void)
         int i, cpu = smp_processor_id();
         struct sched_domain *sd;
  
-       if (!idle_cpu(cpu))
+       if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
                 return cpu;
  
         rcu_read_lock();
         for_each_domain(cpu, sd) {
                 for_each_cpu(i, sched_domain_span(sd)) {
-                       if (!idle_cpu(i)) {
+                       if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
                                 cpu = i;
                                 goto unlock;
                         }
                 }
         }
+
+       if (!is_housekeeping_cpu(cpu))
+               cpu = housekeeping_any_cpu();
  unlock:
         rcu_read_unlock();
         return cpu;
@@ -2514,11 +2517,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
          * If a task dies, then it sets TASK_DEAD in tsk->state and calls
          * schedule one last time. The schedule call will never return, and
          * the scheduled task must drop that reference.
-        * The test for TASK_DEAD must occur while the runqueue locks are
-        * still held, otherwise prev could be scheduled on another cpu, die
-        * there before we look at prev->state, and then the reference would
-        * be dropped twice.
-        *              Manfred Spraul <manfred@colorfullife.com>
+        *
+        * We must observe prev->state before clearing prev->on_cpu (in
+        * finish_lock_switch), otherwise a concurrent wakeup can get prev
+        * running on another CPU and we could rave with its RUNNING -> DEAD
+        * transition, resulting in a double drop.
          */
         prev_state = prev->state;
         vtime_task_switch(prev);
@@ -2666,13 +2669,20 @@ unsigned long nr_running(void)
  
  /*
   * Check if only the current task is running on the cpu.
+ *
+ * Caution: this function does not check that the caller has disabled
+ * preemption, thus the result might have a time-of-check-to-time-of-use
+ * race.  The caller is responsible to use it correctly, for example:
+ *
+ * - from a non-preemptable section (of course)
+ *
+ * - from a thread that is bound to a single CPU
+ *
+ * - in a loop with very short iterations (e.g. a polling loop)
   */
  bool single_task_running(void)
  {
-       if (cpu_rq(smp_processor_id())->nr_running == 1)
-               return true;
-       else
-               return false;
+       return raw_rq()->nr_running == 1;
  }
  EXPORT_SYMBOL(single_task_running);
  
@@ -4924,7 +4934,15 @@ void init_idle(struct task_struct *idle, int cpu)
         idle->state = TASK_RUNNING;
         idle->se.exec_start = sched_clock();
  
-       do_set_cpus_allowed(idle, cpumask_of(cpu));
+#ifdef CONFIG_SMP
+       /*
+        * Its possible that init_idle() gets called multiple times on a task,
+        * in that case do_set_cpus_allowed() will not do the right thing.
+        *
+        * And since this is boot we can forgo the serialization.
+        */
+       set_cpus_allowed_common(idle, cpumask_of(cpu));
+#endif
         /*
          * We're having a chicken and egg problem, even though we are
          * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -4941,7 +4959,7 @@ void init_idle(struct task_struct *idle, int cpu)
  
         rq->curr = rq->idle = idle;
         idle->on_rq = TASK_ON_RQ_QUEUED;
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
         idle->on_cpu = 1;
  #endif
         raw_spin_unlock(&rq->lock);
@@ -4956,7 +4974,7 @@ void init_idle(struct task_struct *idle, int cpu)
         idle->sched_class = &idle_sched_class;
         ftrace_graph_init_idle_task(idle, cpu);
         vtime_init_idle(idle, cpu);
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
         sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
  #endif
  }
@@ -5178,24 +5196,47 @@ static void migrate_tasks(struct rq *dead_rq)
                         break;
  
                 /*
-                * Ensure rq->lock covers the entire task selection
-                * until the migration.
+                * pick_next_task assumes pinned rq->lock.
                  */
                 lockdep_pin_lock(&rq->lock);
                 next = pick_next_task(rq, &fake_task);
                 BUG_ON(!next);
                 next->sched_class->put_prev_task(rq, next);
  
+               /*
+                * Rules for changing task_struct::cpus_allowed are holding
+                * both pi_lock and rq->lock, such that holding either
+                * stabilizes the mask.
+                *
+                * Drop rq->lock is not quite as disastrous as it usually is
+                * because !cpu_active at this point, which means load-balance
+                * will not interfere. Also, stop-machine.
+                */
+               lockdep_unpin_lock(&rq->lock);
+               raw_spin_unlock(&rq->lock);
+               raw_spin_lock(&next->pi_lock);
+               raw_spin_lock(&rq->lock);
+
+               /*
+                * Since we're inside stop-machine, _nothing_ should have
+                * changed the task, WARN if weird stuff happened, because in
+                * that case the above rq->lock drop is a fail too.
+                */
+               if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
+                       raw_spin_unlock(&next->pi_lock);
+                       continue;
+               }
+
                 /* Find suitable destination for @next, with force if needed. */
                 dest_cpu = select_fallback_rq(dead_rq->cpu, next);
  
-               lockdep_unpin_lock(&rq->lock);
                 rq = __migrate_task(rq, next, dest_cpu);
                 if (rq != dead_rq) {
                         raw_spin_unlock(&rq->lock);
                         rq = dead_rq;
                         raw_spin_lock(&rq->lock);
                 }
+               raw_spin_unlock(&next->pi_lock);
         }
  
         rq->stop = stop;