Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

[karo-tx-linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 5e73c79fadd001d0bc898824549df6d5c89350b0..a585c7b2ccf0c8897419ac1cd3b259d433bf0b87 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1151,15 +1151,45 @@ static int migration_cpu_stop(void *data)
         return 0;
  }
  
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+/*
+ * sched_class::set_cpus_allowed must do the below, but is not required to
+ * actually call this function.
+ */
+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
  {
-       if (p->sched_class->set_cpus_allowed)
-               p->sched_class->set_cpus_allowed(p, new_mask);
-
         cpumask_copy(&p->cpus_allowed, new_mask);
         p->nr_cpus_allowed = cpumask_weight(new_mask);
  }
  
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+       struct rq *rq = task_rq(p);
+       bool queued, running;
+
+       lockdep_assert_held(&p->pi_lock);
+
+       queued = task_on_rq_queued(p);
+       running = task_current(rq, p);
+
+       if (queued) {
+               /*
+                * Because __kthread_bind() calls this on blocked tasks without
+                * holding rq->lock.
+                */
+               lockdep_assert_held(&rq->lock);
+               dequeue_task(rq, p, 0);
+       }
+       if (running)
+               put_prev_task(rq, p);
+
+       p->sched_class->set_cpus_allowed(p, new_mask);
+
+       if (running)
+               p->sched_class->set_curr_task(rq);
+       if (queued)
+               enqueue_task(rq, p, 0);
+}
+
  /*
   * Change a given task's CPU affinity. Migrate the thread to a
   * proper CPU and schedule it away if the CPU it's executing on
@@ -1169,7 +1199,8 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
   * task must not exit() & deallocate itself prematurely. The
   * call is not atomic; no spinlocks may be held.
   */
-int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+                                 const struct cpumask *new_mask, bool check)
  {
         unsigned long flags;
         struct rq *rq;
@@ -1178,6 +1209,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
  
         rq = task_rq_lock(p, &flags);
  
+       /*
+        * Must re-check here, to close a race against __kthread_bind(),
+        * sched_setaffinity() is not guaranteed to observe the flag.
+        */
+       if (check && (p->flags & PF_NO_SETAFFINITY)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
         if (cpumask_equal(&p->cpus_allowed, new_mask))
                 goto out;
  
@@ -1214,6 +1254,11 @@ out:
  
         return ret;
  }
+
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+{
+       return __set_cpus_allowed_ptr(p, new_mask, false);
+}
  EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
  
  void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
@@ -1595,6 +1640,15 @@ static void update_avg(u64 *avg, u64 sample)
         s64 diff = sample - *avg;
         *avg += diff >> 3;
  }
+
+#else
+
+static inline int __set_cpus_allowed_ptr(struct task_struct *p,
+                                        const struct cpumask *new_mask, bool check)
+{
+       return set_cpus_allowed_ptr(p, new_mask);
+}
+
  #endif /* CONFIG_SMP */
  
  static void
@@ -1654,9 +1708,9 @@ static void
  ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
  {
         check_preempt_curr(rq, p, wake_flags);
-       trace_sched_wakeup(p, true);
-
         p->state = TASK_RUNNING;
+       trace_sched_wakeup(p);
+
  #ifdef CONFIG_SMP
         if (p->sched_class->task_woken) {
                 /*
@@ -1874,6 +1928,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         if (!(p->state & state))
                 goto out;
  
+       trace_sched_waking(p);
+
         success = 1; /* we're going to change ->state */
         cpu = task_cpu(p);
  
@@ -1949,6 +2005,8 @@ static void try_to_wake_up_local(struct task_struct *p)
         if (!(p->state & TASK_NORMAL))
                 goto out;
  
+       trace_sched_waking(p);
+
         if (!task_on_rq_queued(p))
                 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  
@@ -2016,9 +2074,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
         p->se.prev_sum_exec_runtime     = 0;
         p->se.nr_migrations             = 0;
         p->se.vruntime                  = 0;
-#ifdef CONFIG_SMP
-       p->se.avg.decay_count           = 0;
-#endif
         INIT_LIST_HEAD(&p->se.group_node);
  
  #ifdef CONFIG_SCHEDSTATS
@@ -2303,11 +2358,11 @@ void wake_up_new_task(struct task_struct *p)
  #endif
  
         /* Initialize new task's runnable average */
-       init_task_runnable_average(p);
+       init_entity_runnable_average(&p->se);
         rq = __task_rq_lock(p);
         activate_task(rq, p, 0);
         p->on_rq = TASK_ON_RQ_QUEUED;
-       trace_sched_wakeup_new(p, true);
+       trace_sched_wakeup_new(p);
         check_preempt_curr(rq, p, WF_FORK);
  #ifdef CONFIG_SMP
         if (p->sched_class->task_woken)
@@ -2469,7 +2524,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
          */
         prev_state = prev->state;
         vtime_task_switch(prev);
-       finish_arch_switch(prev);
         perf_event_task_sched_in(prev, current);
         finish_lock_switch(rq, prev);
         finish_arch_post_lock_switch();
@@ -4340,7 +4394,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
         }
  #endif
  again:
-       retval = set_cpus_allowed_ptr(p, new_mask);
+       retval = __set_cpus_allowed_ptr(p, new_mask, true);
  
         if (!retval) {
                 cpuset_cpus_allowed(p, cpus_allowed);
@@ -4492,7 +4546,7 @@ SYSCALL_DEFINE0(sched_yield)
  
  int __sched _cond_resched(void)
  {
-       if (should_resched()) {
+       if (should_resched(0)) {
                 preempt_schedule_common();
                 return 1;
         }
@@ -4510,7 +4564,7 @@ EXPORT_SYMBOL(_cond_resched);
   */
  int __cond_resched_lock(spinlock_t *lock)
  {
-       int resched = should_resched();
+       int resched = should_resched(PREEMPT_LOCK_OFFSET);
         int ret = 0;
  
         lockdep_assert_held(lock);
@@ -4532,7 +4586,7 @@ int __sched __cond_resched_softirq(void)
  {
         BUG_ON(!in_softirq());
  
-       if (should_resched()) {
+       if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
                 local_bh_enable();
                 preempt_schedule_common();
                 local_bh_disable();
@@ -4865,7 +4919,8 @@ void init_idle(struct task_struct *idle, int cpu)
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
  
-       raw_spin_lock_irqsave(&rq->lock, flags);
+       raw_spin_lock_irqsave(&idle->pi_lock, flags);
+       raw_spin_lock(&rq->lock);
  
         __sched_fork(0, idle);
         idle->state = TASK_RUNNING;
@@ -4891,7 +4946,8 @@ void init_idle(struct task_struct *idle, int cpu)
  #if defined(CONFIG_SMP)
         idle->on_cpu = 1;
  #endif
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       raw_spin_unlock(&rq->lock);
+       raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
  
         /* Set the preempt count _outside_ the spinlocks! */
         init_idle_preempt_count(idle, cpu);
@@ -5311,8 +5367,7 @@ static void register_sched_domain_sysctl(void)
  /* may be called multiple times per register */
  static void unregister_sched_domain_sysctl(void)
  {
-       if (sd_sysctl_header)
-               unregister_sysctl_table(sd_sysctl_header);
+       unregister_sysctl_table(sd_sysctl_header);
         sd_sysctl_header = NULL;
         if (sd_ctl_dir[0].child)
                 sd_free_ctl_entry(&sd_ctl_dir[0].child);
@@ -6445,8 +6500,10 @@ static void init_numa_topology_type(void)
  
         n = sched_max_numa_distance;
  
-       if (n <= 1)
+       if (sched_domains_numa_levels <= 1) {
                 sched_numa_topology_type = NUMA_DIRECT;
+               return;
+       }
  
         for_each_online_node(a) {
                 for_each_online_node(b) {