]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - kernel/sched/core.c
Merge tag 'mfd-fixes-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd
[karo-tx-linux.git] / kernel / sched / core.c
index 3595403921bd5be10c3e5e591bf04916e654423d..10a8faa1b0d4a5f737bd9008eb8f9a7e817b6ec9 100644 (file)
@@ -621,18 +621,21 @@ int get_nohz_timer_target(void)
        int i, cpu = smp_processor_id();
        struct sched_domain *sd;
 
-       if (!idle_cpu(cpu))
+       if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
                return cpu;
 
        rcu_read_lock();
        for_each_domain(cpu, sd) {
                for_each_cpu(i, sched_domain_span(sd)) {
-                       if (!idle_cpu(i)) {
+                       if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
                                cpu = i;
                                goto unlock;
                        }
                }
        }
+
+       if (!is_housekeeping_cpu(cpu))
+               cpu = housekeeping_any_cpu();
 unlock:
        rcu_read_unlock();
        return cpu;
@@ -2514,11 +2517,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
         * If a task dies, then it sets TASK_DEAD in tsk->state and calls
         * schedule one last time. The schedule call will never return, and
         * the scheduled task must drop that reference.
-        * The test for TASK_DEAD must occur while the runqueue locks are
-        * still held, otherwise prev could be scheduled on another cpu, die
-        * there before we look at prev->state, and then the reference would
-        * be dropped twice.
-        *              Manfred Spraul <manfred@colorfullife.com>
+        *
+        * We must observe prev->state before clearing prev->on_cpu (in
+        * finish_lock_switch), otherwise a concurrent wakeup can get prev
+        * running on another CPU and we could rave with its RUNNING -> DEAD
+        * transition, resulting in a double drop.
         */
        prev_state = prev->state;
        vtime_task_switch(prev);
@@ -2666,13 +2669,20 @@ unsigned long nr_running(void)
 
 /*
  * Check if only the current task is running on the cpu.
+ *
+ * Caution: this function does not check that the caller has disabled
+ * preemption, thus the result might have a time-of-check-to-time-of-use
+ * race.  The caller is responsible to use it correctly, for example:
+ *
+ * - from a non-preemptable section (of course)
+ *
+ * - from a thread that is bound to a single CPU
+ *
+ * - in a loop with very short iterations (e.g. a polling loop)
  */
 bool single_task_running(void)
 {
-       if (cpu_rq(smp_processor_id())->nr_running == 1)
-               return true;
-       else
-               return false;
+       return raw_rq()->nr_running == 1;
 }
 EXPORT_SYMBOL(single_task_running);
 
@@ -4924,7 +4934,15 @@ void init_idle(struct task_struct *idle, int cpu)
        idle->state = TASK_RUNNING;
        idle->se.exec_start = sched_clock();
 
-       do_set_cpus_allowed(idle, cpumask_of(cpu));
+#ifdef CONFIG_SMP
+       /*
+        * Its possible that init_idle() gets called multiple times on a task,
+        * in that case do_set_cpus_allowed() will not do the right thing.
+        *
+        * And since this is boot we can forgo the serialization.
+        */
+       set_cpus_allowed_common(idle, cpumask_of(cpu));
+#endif
        /*
         * We're having a chicken and egg problem, even though we are
         * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -4941,7 +4959,7 @@ void init_idle(struct task_struct *idle, int cpu)
 
        rq->curr = rq->idle = idle;
        idle->on_rq = TASK_ON_RQ_QUEUED;
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
        idle->on_cpu = 1;
 #endif
        raw_spin_unlock(&rq->lock);
@@ -4956,7 +4974,7 @@ void init_idle(struct task_struct *idle, int cpu)
        idle->sched_class = &idle_sched_class;
        ftrace_graph_init_idle_task(idle, cpu);
        vtime_init_idle(idle, cpu);
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
        sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
 }
@@ -5178,24 +5196,47 @@ static void migrate_tasks(struct rq *dead_rq)
                        break;
 
                /*
-                * Ensure rq->lock covers the entire task selection
-                * until the migration.
+                * pick_next_task assumes pinned rq->lock.
                 */
                lockdep_pin_lock(&rq->lock);
                next = pick_next_task(rq, &fake_task);
                BUG_ON(!next);
                next->sched_class->put_prev_task(rq, next);
 
+               /*
+                * Rules for changing task_struct::cpus_allowed are holding
+                * both pi_lock and rq->lock, such that holding either
+                * stabilizes the mask.
+                *
+                * Drop rq->lock is not quite as disastrous as it usually is
+                * because !cpu_active at this point, which means load-balance
+                * will not interfere. Also, stop-machine.
+                */
+               lockdep_unpin_lock(&rq->lock);
+               raw_spin_unlock(&rq->lock);
+               raw_spin_lock(&next->pi_lock);
+               raw_spin_lock(&rq->lock);
+
+               /*
+                * Since we're inside stop-machine, _nothing_ should have
+                * changed the task, WARN if weird stuff happened, because in
+                * that case the above rq->lock drop is a fail too.
+                */
+               if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
+                       raw_spin_unlock(&next->pi_lock);
+                       continue;
+               }
+
                /* Find suitable destination for @next, with force if needed. */
                dest_cpu = select_fallback_rq(dead_rq->cpu, next);
 
-               lockdep_unpin_lock(&rq->lock);
                rq = __migrate_task(rq, next, dest_cpu);
                if (rq != dead_rq) {
                        raw_spin_unlock(&rq->lock);
                        rq = dead_rq;
                        raw_spin_lock(&rq->lock);
                }
+               raw_spin_unlock(&next->pi_lock);
        }
 
        rq->stop = stop;