sched: Use replace normalize_task() with __sched_setscheduler()

[karo-tx-linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 123673291ffbb160734ed889b934d557611a1cf1..b610ef9e522f03b7d0ff4ccbce22aa8710e205d6 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,26 +90,6 @@
  #define CREATE_TRACE_POINTS
  #include <trace/events/sched.h>
  
-void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
-{
-       unsigned long delta;
-       ktime_t soft, hard, now;
-
-       for (;;) {
-               if (hrtimer_active(period_timer))
-                       break;
-
-               now = hrtimer_cb_get_time(period_timer);
-               hrtimer_forward(period_timer, now, period);
-
-               soft = hrtimer_get_softexpires(period_timer);
-               hard = hrtimer_get_expires(period_timer);
-               delta = ktime_to_ns(ktime_sub(hard, soft));
-               __hrtimer_start_range_ns(period_timer, soft, delta,
-                                        HRTIMER_MODE_ABS_PINNED, 0);
-       }
-}
-
  DEFINE_MUTEX(sched_domains_mutex);
  DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  
@@ -355,12 +335,11 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
  
  #ifdef CONFIG_SMP
  
-static int __hrtick_restart(struct rq *rq)
+static void __hrtick_restart(struct rq *rq)
  {
         struct hrtimer *timer = &rq->hrtick_timer;
-       ktime_t time = hrtimer_get_softexpires(timer);
  
-       return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0);
+       hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
  }
  
  /*
@@ -440,8 +419,8 @@ void hrtick_start(struct rq *rq, u64 delay)
          * doesn't make sense. Rely on vruntime for fairness.
          */
         delay = max_t(u64, delay, 10000LL);
-       __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
-                       HRTIMER_MODE_REL_PINNED, 0);
+       hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
+                     HRTIMER_MODE_REL_PINNED);
  }
  
  static inline void init_hrtick(void)
@@ -511,7 +490,7 @@ static bool set_nr_and_not_polling(struct task_struct *p)
  static bool set_nr_if_polling(struct task_struct *p)
  {
         struct thread_info *ti = task_thread_info(p);
-       typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags);
+       typeof(ti->flags) old, val = READ_ONCE(ti->flags);
  
         for (;;) {
                 if (!(val & _TIF_POLLING_NRFLAG))
@@ -541,6 +520,52 @@ static bool set_nr_if_polling(struct task_struct *p)
  #endif
  #endif
  
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+{
+       struct wake_q_node *node = &task->wake_q;
+
+       /*
+        * Atomically grab the task, if ->wake_q is !nil already it means
+        * its already queued (either by us or someone else) and will get the
+        * wakeup due to that.
+        *
+        * This cmpxchg() implies a full barrier, which pairs with the write
+        * barrier implied by the wakeup in wake_up_list().
+        */
+       if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
+               return;
+
+       get_task_struct(task);
+
+       /*
+        * The head is context local, there can be no concurrency.
+        */
+       *head->lastp = node;
+       head->lastp = &node->next;
+}
+
+void wake_up_q(struct wake_q_head *head)
+{
+       struct wake_q_node *node = head->first;
+
+       while (node != WAKE_Q_TAIL) {
+               struct task_struct *task;
+
+               task = container_of(node, struct task_struct, wake_q);
+               BUG_ON(!task);
+               /* task can safely be re-inserted now */
+               node = node->next;
+               task->wake_q.next = NULL;
+
+               /*
+                * wake_up_process() implies a wmb() to pair with the queueing
+                * in wake_q_add() so as not to miss wakeups.
+                */
+               wake_up_process(task);
+               put_task_struct(task);
+       }
+}
+
  /*
   * resched_curr - mark rq's current task 'to be rescheduled now'.
   *
@@ -2252,23 +2277,35 @@ static struct rq *finish_task_switch(struct task_struct *prev)
  #ifdef CONFIG_SMP
  
  /* rq->lock is NOT held, but preemption is disabled */
-static inline void post_schedule(struct rq *rq)
+static void __balance_callback(struct rq *rq)
  {
-       if (rq->post_schedule) {
-               unsigned long flags;
+       struct callback_head *head, *next;
+       void (*func)(struct rq *rq);
+       unsigned long flags;
  
-               raw_spin_lock_irqsave(&rq->lock, flags);
-               if (rq->curr->sched_class->post_schedule)
-                       rq->curr->sched_class->post_schedule(rq);
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
+       raw_spin_lock_irqsave(&rq->lock, flags);
+       head = rq->balance_callback;
+       rq->balance_callback = NULL;
+       while (head) {
+               func = (void (*)(struct rq *))head->func;
+               next = head->next;
+               head->next = NULL;
+               head = next;
  
-               rq->post_schedule = 0;
+               func(rq);
         }
+       raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static inline void balance_callback(struct rq *rq)
+{
+       if (unlikely(rq->balance_callback))
+               __balance_callback(rq);
  }
  
  #else
  
-static inline void post_schedule(struct rq *rq)
+static inline void balance_callback(struct rq *rq)
  {
  }
  
@@ -2286,7 +2323,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
         /* finish_task_switch() drops rq->lock and enables preemtion */
         preempt_disable();
         rq = finish_task_switch(prev);
-       post_schedule(rq);
+       balance_callback(rq);
         preempt_enable();
  
         if (current->set_child_tid)
@@ -2397,9 +2434,9 @@ unsigned long nr_iowait_cpu(int cpu)
  
  void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
  {
-       struct rq *this = this_rq();
-       *nr_waiters = atomic_read(&this->nr_iowait);
-       *load = this->cpu_load[0];
+       struct rq *rq = this_rq();
+       *nr_waiters = atomic_read(&rq->nr_iowait);
+       *load = rq->load.weight;
  }
  
  #ifdef CONFIG_SMP
@@ -2497,6 +2534,7 @@ void scheduler_tick(void)
         update_rq_clock(rq);
         curr->sched_class->task_tick(rq, curr, 0);
         update_cpu_load_active(rq);
+       calc_global_load_tick(rq);
         raw_spin_unlock(&rq->lock);
  
         perf_event_task_tick();
@@ -2525,7 +2563,7 @@ void scheduler_tick(void)
  u64 scheduler_tick_max_deferment(void)
  {
         struct rq *rq = this_rq();
-       unsigned long next, now = ACCESS_ONCE(jiffies);
+       unsigned long next, now = READ_ONCE(jiffies);
  
         next = rq->last_sched_tick + HZ;
  
@@ -2726,9 +2764,7 @@ again:
   *          - return from syscall or exception to user-space
   *          - return from interrupt-handler to user-space
   *
- * WARNING: all callers must re-check need_resched() afterward and reschedule
- * accordingly in case an event triggered the need for rescheduling (such as
- * an interrupt waking up a task) while preemption was disabled in __schedule().
+ * WARNING: must be called with preemption disabled!
   */
  static void __sched __schedule(void)
  {
@@ -2737,7 +2773,6 @@ static void __sched __schedule(void)
         struct rq *rq;
         int cpu;
  
-       preempt_disable();
         cpu = smp_processor_id();
         rq = cpu_rq(cpu);
         rcu_note_context_switch();
@@ -2800,9 +2835,7 @@ static void __sched __schedule(void)
         } else
                 raw_spin_unlock_irq(&rq->lock);
  
-       post_schedule(rq);
-
-       sched_preempt_enable_no_resched();
+       balance_callback(rq);
  }
  
  static inline void sched_submit_work(struct task_struct *tsk)
@@ -2823,7 +2856,9 @@ asmlinkage __visible void __sched schedule(void)
  
         sched_submit_work(tsk);
         do {
+               preempt_disable();
                 __schedule();
+               sched_preempt_enable_no_resched();
         } while (need_resched());
  }
  EXPORT_SYMBOL(schedule);
@@ -2862,15 +2897,14 @@ void __sched schedule_preempt_disabled(void)
  static void __sched notrace preempt_schedule_common(void)
  {
         do {
-               __preempt_count_add(PREEMPT_ACTIVE);
+               preempt_active_enter();
                 __schedule();
-               __preempt_count_sub(PREEMPT_ACTIVE);
+               preempt_active_exit();
  
                 /*
                  * Check again in case we missed a preemption opportunity
                  * between schedule and now.
                  */
-               barrier();
         } while (need_resched());
  }
  
@@ -2894,9 +2928,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
  NOKPROBE_SYMBOL(preempt_schedule);
  EXPORT_SYMBOL(preempt_schedule);
  
-#ifdef CONFIG_CONTEXT_TRACKING
  /**
- * preempt_schedule_context - preempt_schedule called by tracing
+ * preempt_schedule_notrace - preempt_schedule called by tracing
   *
   * The tracing infrastructure uses preempt_enable_notrace to prevent
   * recursion and tracing preempt enabling caused by the tracing
@@ -2909,7 +2942,7 @@ EXPORT_SYMBOL(preempt_schedule);
   * instead of preempt_schedule() to exit user context if needed before
   * calling the scheduler.
   */
-asmlinkage __visible void __sched notrace preempt_schedule_context(void)
+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
  {
         enum ctx_state prev_ctx;
  
@@ -2917,7 +2950,13 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void)
                 return;
  
         do {
-               __preempt_count_add(PREEMPT_ACTIVE);
+               /*
+                * Use raw __prempt_count() ops that don't call function.
+                * We can't call functions before disabling preemption which
+                * disarm preemption tracing recursions.
+                */
+               __preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
+               barrier();
                 /*
                  * Needs preempt disabled in case user_exit() is traced
                  * and the tracer calls preempt_enable_notrace() causing
@@ -2927,12 +2966,11 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void)
                 __schedule();
                 exception_exit(prev_ctx);
  
-               __preempt_count_sub(PREEMPT_ACTIVE);
                 barrier();
+               __preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
         } while (need_resched());
  }
-EXPORT_SYMBOL_GPL(preempt_schedule_context);
-#endif /* CONFIG_CONTEXT_TRACKING */
+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
  
  #endif /* CONFIG_PREEMPT */
  
@@ -2952,17 +2990,11 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
         prev_state = exception_enter();
  
         do {
-               __preempt_count_add(PREEMPT_ACTIVE);
+               preempt_active_enter();
                 local_irq_enable();
                 __schedule();
                 local_irq_disable();
-               __preempt_count_sub(PREEMPT_ACTIVE);
-
-               /*
-                * Check again in case we missed a preemption opportunity
-                * between schedule and now.
-                */
-               barrier();
+               preempt_active_exit();
         } while (need_resched());
  
         exception_exit(prev_state);
@@ -3406,7 +3438,7 @@ static bool dl_param_changed(struct task_struct *p,
  
  static int __sched_setscheduler(struct task_struct *p,
                                 const struct sched_attr *attr,
-                               bool user)
+                               bool user, bool pi)
  {
         int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
                       MAX_RT_PRIO - 1 - attr->sched_priority;
@@ -3592,18 +3624,20 @@ change:
         p->sched_reset_on_fork = reset_on_fork;
         oldprio = p->prio;
  
-       /*
-        * Take priority boosted tasks into account. If the new
-        * effective priority is unchanged, we just store the new
-        * normal parameters and do not touch the scheduler class and
-        * the runqueue. This will be done when the task deboost
-        * itself.
-        */
-       new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
-       if (new_effective_prio == oldprio) {
-               __setscheduler_params(p, attr);
-               task_rq_unlock(rq, p, &flags);
-               return 0;
+       if (pi) {
+               /*
+                * Take priority boosted tasks into account. If the new
+                * effective priority is unchanged, we just store the new
+                * normal parameters and do not touch the scheduler class and
+                * the runqueue. This will be done when the task deboost
+                * itself.
+                */
+               new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+               if (new_effective_prio == oldprio) {
+                       __setscheduler_params(p, attr);
+                       task_rq_unlock(rq, p, &flags);
+                       return 0;
+               }
         }
  
         queued = task_on_rq_queued(p);
@@ -3614,7 +3648,7 @@ change:
                 put_prev_task(rq, p);
  
         prev_class = p->sched_class;
-       __setscheduler(rq, p, attr, true);
+       __setscheduler(rq, p, attr, pi);
  
         if (running)
                 p->sched_class->set_curr_task(rq);
@@ -3629,7 +3663,8 @@ change:
         check_class_changed(rq, p, prev_class, oldprio);
         task_rq_unlock(rq, p, &flags);
  
-       rt_mutex_adjust_pi(p);
+       if (pi)
+               rt_mutex_adjust_pi(p);
  
         return 0;
  }
@@ -3650,7 +3685,7 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
                 attr.sched_policy = policy;
         }
  
-       return __sched_setscheduler(p, &attr, check);
+       return __sched_setscheduler(p, &attr, check, true);
  }
  /**
   * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
@@ -3671,7 +3706,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
  
  int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
  {
-       return __sched_setscheduler(p, attr, true);
+       return __sched_setscheduler(p, attr, true, true);
  }
  EXPORT_SYMBOL_GPL(sched_setattr);
  
@@ -5314,7 +5349,7 @@ static struct notifier_block migration_notifier = {
         .priority = CPU_PRI_MIGRATION,
  };
  
-static void __cpuinit set_cpu_rq_start_time(void)
+static void set_cpu_rq_start_time(void)
  {
         int cpu = smp_processor_id();
         struct rq *rq = cpu_rq(cpu);
@@ -7199,7 +7234,7 @@ void __init sched_init(void)
                 rq->sd = NULL;
                 rq->rd = NULL;
                 rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
-               rq->post_schedule = 0;
+               rq->balance_callback = NULL;
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
                 rq->push_cpu = 0;
@@ -7329,32 +7364,12 @@ EXPORT_SYMBOL(___might_sleep);
  #endif
  
  #ifdef CONFIG_MAGIC_SYSRQ
-static void normalize_task(struct rq *rq, struct task_struct *p)
+void normalize_rt_tasks(void)
  {
-       const struct sched_class *prev_class = p->sched_class;
+       struct task_struct *g, *p;
         struct sched_attr attr = {
                 .sched_policy = SCHED_NORMAL,
         };
-       int old_prio = p->prio;
-       int queued;
-
-       queued = task_on_rq_queued(p);
-       if (queued)
-               dequeue_task(rq, p, 0);
-       __setscheduler(rq, p, &attr, false);
-       if (queued) {
-               enqueue_task(rq, p, 0);
-               resched_curr(rq);
-       }
-
-       check_class_changed(rq, p, prev_class, old_prio);
-}
-
-void normalize_rt_tasks(void)
-{
-       struct task_struct *g, *p;
-       unsigned long flags;
-       struct rq *rq;
  
         read_lock(&tasklist_lock);
         for_each_process_thread(g, p) {
@@ -7381,9 +7396,7 @@ void normalize_rt_tasks(void)
                         continue;
                 }
  
-               rq = task_rq_lock(p, &flags);
-               normalize_task(rq, p);
-               task_rq_unlock(rq, p, &flags);
+               __sched_setscheduler(p, &attr, false, false);
         }
         read_unlock(&tasklist_lock);
  }
@@ -7734,11 +7747,11 @@ static long sched_group_rt_runtime(struct task_group *tg)
         return rt_runtime_us;
  }
  
-static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
+static int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
  {
         u64 rt_runtime, rt_period;
  
-       rt_period = (u64)rt_period_us * NSEC_PER_USEC;
+       rt_period = rt_period_us * NSEC_PER_USEC;
         rt_runtime = tg->rt_bandwidth.rt_runtime;
  
         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
@@ -8105,10 +8118,8 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
  
         __refill_cfs_bandwidth_runtime(cfs_b);
         /* restart the period timer (if active) to handle new period expiry */
-       if (runtime_enabled && cfs_b->timer_active) {
-               /* force a reprogram */
-               __start_cfs_bandwidth(cfs_b, true);
-       }
+       if (runtime_enabled)
+               start_cfs_bandwidth(cfs_b);
         raw_spin_unlock_irq(&cfs_b->lock);
  
         for_each_online_cpu(i) {