Merge branch 'linus' into core/rcu, to fix up a semantic conflict

[karo-tx-linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 97d276ff1edb1225f0ad894cb66b052be36b2104..f7402f7eb44803a6659b5f2b6337d64624dd8d0a 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2366,8 +2366,15 @@ void wake_up_new_task(struct task_struct *p)
         trace_sched_wakeup_new(p);
         check_preempt_curr(rq, p, WF_FORK);
  #ifdef CONFIG_SMP
-       if (p->sched_class->task_woken)
+       if (p->sched_class->task_woken) {
+               /*
+                * Nothing relies on rq->lock after this, so its fine to
+                * drop it.
+                */
+               lockdep_unpin_lock(&rq->lock);
                 p->sched_class->task_woken(rq, p);
+               lockdep_pin_lock(&rq->lock);
+       }
  #endif
         task_rq_unlock(rq, p, &flags);
  }
@@ -2517,11 +2524,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
          * If a task dies, then it sets TASK_DEAD in tsk->state and calls
          * schedule one last time. The schedule call will never return, and
          * the scheduled task must drop that reference.
-        * The test for TASK_DEAD must occur while the runqueue locks are
-        * still held, otherwise prev could be scheduled on another cpu, die
-        * there before we look at prev->state, and then the reference would
-        * be dropped twice.
-        *              Manfred Spraul <manfred@colorfullife.com>
+        *
+        * We must observe prev->state before clearing prev->on_cpu (in
+        * finish_lock_switch), otherwise a concurrent wakeup can get prev
+        * running on another CPU and we could rave with its RUNNING -> DEAD
+        * transition, resulting in a double drop.
          */
         prev_state = prev->state;
         vtime_task_switch(prev);
@@ -2669,13 +2676,20 @@ unsigned long nr_running(void)
  
  /*
   * Check if only the current task is running on the cpu.
+ *
+ * Caution: this function does not check that the caller has disabled
+ * preemption, thus the result might have a time-of-check-to-time-of-use
+ * race.  The caller is responsible to use it correctly, for example:
+ *
+ * - from a non-preemptable section (of course)
+ *
+ * - from a thread that is bound to a single CPU
+ *
+ * - in a loop with very short iterations (e.g. a polling loop)
   */
  bool single_task_running(void)
  {
-       if (cpu_rq(smp_processor_id())->nr_running == 1)
-               return true;
-       else
-               return false;
+       return raw_rq()->nr_running == 1;
  }
  EXPORT_SYMBOL(single_task_running);
  
@@ -4015,6 +4029,7 @@ int sched_setscheduler_nocheck(struct task_struct *p, int policy,
  {
         return _sched_setscheduler(p, policy, param, false);
  }
+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
  
  static int
  do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
@@ -4927,7 +4942,15 @@ void init_idle(struct task_struct *idle, int cpu)
         idle->state = TASK_RUNNING;
         idle->se.exec_start = sched_clock();
  
-       do_set_cpus_allowed(idle, cpumask_of(cpu));
+#ifdef CONFIG_SMP
+       /*
+        * Its possible that init_idle() gets called multiple times on a task,
+        * in that case do_set_cpus_allowed() will not do the right thing.
+        *
+        * And since this is boot we can forgo the serialization.
+        */
+       set_cpus_allowed_common(idle, cpumask_of(cpu));
+#endif
         /*
          * We're having a chicken and egg problem, even though we are
          * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -4944,7 +4967,7 @@ void init_idle(struct task_struct *idle, int cpu)
  
         rq->curr = rq->idle = idle;
         idle->on_rq = TASK_ON_RQ_QUEUED;
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
         idle->on_cpu = 1;
  #endif
         raw_spin_unlock(&rq->lock);
@@ -4959,7 +4982,7 @@ void init_idle(struct task_struct *idle, int cpu)
         idle->sched_class = &idle_sched_class;
         ftrace_graph_init_idle_task(idle, cpu);
         vtime_init_idle(idle, cpu);
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
         sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
  #endif
  }
@@ -7223,9 +7246,6 @@ void __init sched_init_smp(void)
         alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
         alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
  
-       /* nohz_full won't take effect without isolating the cpus. */
-       tick_nohz_full_add_cpus_to(cpu_isolated_map);
-
         sched_init_numa();
  
         /*