X-Git-Url: https://git.kernelconcepts.de/?a=blobdiff_plain;f=kernel%2Fsched%2Fcore.c;h=b610ef9e522f03b7d0ff4ccbce22aa8710e205d6;hb=dbc7f069b93a249340e974d6e8f55656280d8701;hp=123673291ffbb160734ed889b934d557611a1cf1;hpb=8ba64dc33830fbcd57d59fddc2ca1c24a6a394c4;p=karo-tx-linux.git diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 123673291ffb..b610ef9e522f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -90,26 +90,6 @@ #define CREATE_TRACE_POINTS #include -void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) -{ - unsigned long delta; - ktime_t soft, hard, now; - - for (;;) { - if (hrtimer_active(period_timer)) - break; - - now = hrtimer_cb_get_time(period_timer); - hrtimer_forward(period_timer, now, period); - - soft = hrtimer_get_softexpires(period_timer); - hard = hrtimer_get_expires(period_timer); - delta = ktime_to_ns(ktime_sub(hard, soft)); - __hrtimer_start_range_ns(period_timer, soft, delta, - HRTIMER_MODE_ABS_PINNED, 0); - } -} - DEFINE_MUTEX(sched_domains_mutex); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); @@ -355,12 +335,11 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) #ifdef CONFIG_SMP -static int __hrtick_restart(struct rq *rq) +static void __hrtick_restart(struct rq *rq) { struct hrtimer *timer = &rq->hrtick_timer; - ktime_t time = hrtimer_get_softexpires(timer); - return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0); + hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); } /* @@ -440,8 +419,8 @@ void hrtick_start(struct rq *rq, u64 delay) * doesn't make sense. Rely on vruntime for fairness. */ delay = max_t(u64, delay, 10000LL); - __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, - HRTIMER_MODE_REL_PINNED, 0); + hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), + HRTIMER_MODE_REL_PINNED); } static inline void init_hrtick(void) @@ -511,7 +490,7 @@ static bool set_nr_and_not_polling(struct task_struct *p) static bool set_nr_if_polling(struct task_struct *p) { struct thread_info *ti = task_thread_info(p); - typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags); + typeof(ti->flags) old, val = READ_ONCE(ti->flags); for (;;) { if (!(val & _TIF_POLLING_NRFLAG)) @@ -541,6 +520,52 @@ static bool set_nr_if_polling(struct task_struct *p) #endif #endif +void wake_q_add(struct wake_q_head *head, struct task_struct *task) +{ + struct wake_q_node *node = &task->wake_q; + + /* + * Atomically grab the task, if ->wake_q is !nil already it means + * its already queued (either by us or someone else) and will get the + * wakeup due to that. + * + * This cmpxchg() implies a full barrier, which pairs with the write + * barrier implied by the wakeup in wake_up_list(). + */ + if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) + return; + + get_task_struct(task); + + /* + * The head is context local, there can be no concurrency. + */ + *head->lastp = node; + head->lastp = &node->next; +} + +void wake_up_q(struct wake_q_head *head) +{ + struct wake_q_node *node = head->first; + + while (node != WAKE_Q_TAIL) { + struct task_struct *task; + + task = container_of(node, struct task_struct, wake_q); + BUG_ON(!task); + /* task can safely be re-inserted now */ + node = node->next; + task->wake_q.next = NULL; + + /* + * wake_up_process() implies a wmb() to pair with the queueing + * in wake_q_add() so as not to miss wakeups. + */ + wake_up_process(task); + put_task_struct(task); + } +} + /* * resched_curr - mark rq's current task 'to be rescheduled now'. * @@ -2252,23 +2277,35 @@ static struct rq *finish_task_switch(struct task_struct *prev) #ifdef CONFIG_SMP /* rq->lock is NOT held, but preemption is disabled */ -static inline void post_schedule(struct rq *rq) +static void __balance_callback(struct rq *rq) { - if (rq->post_schedule) { - unsigned long flags; + struct callback_head *head, *next; + void (*func)(struct rq *rq); + unsigned long flags; - raw_spin_lock_irqsave(&rq->lock, flags); - if (rq->curr->sched_class->post_schedule) - rq->curr->sched_class->post_schedule(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_lock_irqsave(&rq->lock, flags); + head = rq->balance_callback; + rq->balance_callback = NULL; + while (head) { + func = (void (*)(struct rq *))head->func; + next = head->next; + head->next = NULL; + head = next; - rq->post_schedule = 0; + func(rq); } + raw_spin_unlock_irqrestore(&rq->lock, flags); +} + +static inline void balance_callback(struct rq *rq) +{ + if (unlikely(rq->balance_callback)) + __balance_callback(rq); } #else -static inline void post_schedule(struct rq *rq) +static inline void balance_callback(struct rq *rq) { } @@ -2286,7 +2323,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) /* finish_task_switch() drops rq->lock and enables preemtion */ preempt_disable(); rq = finish_task_switch(prev); - post_schedule(rq); + balance_callback(rq); preempt_enable(); if (current->set_child_tid) @@ -2397,9 +2434,9 @@ unsigned long nr_iowait_cpu(int cpu) void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) { - struct rq *this = this_rq(); - *nr_waiters = atomic_read(&this->nr_iowait); - *load = this->cpu_load[0]; + struct rq *rq = this_rq(); + *nr_waiters = atomic_read(&rq->nr_iowait); + *load = rq->load.weight; } #ifdef CONFIG_SMP @@ -2497,6 +2534,7 @@ void scheduler_tick(void) update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); update_cpu_load_active(rq); + calc_global_load_tick(rq); raw_spin_unlock(&rq->lock); perf_event_task_tick(); @@ -2525,7 +2563,7 @@ void scheduler_tick(void) u64 scheduler_tick_max_deferment(void) { struct rq *rq = this_rq(); - unsigned long next, now = ACCESS_ONCE(jiffies); + unsigned long next, now = READ_ONCE(jiffies); next = rq->last_sched_tick + HZ; @@ -2726,9 +2764,7 @@ again: * - return from syscall or exception to user-space * - return from interrupt-handler to user-space * - * WARNING: all callers must re-check need_resched() afterward and reschedule - * accordingly in case an event triggered the need for rescheduling (such as - * an interrupt waking up a task) while preemption was disabled in __schedule(). + * WARNING: must be called with preemption disabled! */ static void __sched __schedule(void) { @@ -2737,7 +2773,6 @@ static void __sched __schedule(void) struct rq *rq; int cpu; - preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_note_context_switch(); @@ -2800,9 +2835,7 @@ static void __sched __schedule(void) } else raw_spin_unlock_irq(&rq->lock); - post_schedule(rq); - - sched_preempt_enable_no_resched(); + balance_callback(rq); } static inline void sched_submit_work(struct task_struct *tsk) @@ -2823,7 +2856,9 @@ asmlinkage __visible void __sched schedule(void) sched_submit_work(tsk); do { + preempt_disable(); __schedule(); + sched_preempt_enable_no_resched(); } while (need_resched()); } EXPORT_SYMBOL(schedule); @@ -2862,15 +2897,14 @@ void __sched schedule_preempt_disabled(void) static void __sched notrace preempt_schedule_common(void) { do { - __preempt_count_add(PREEMPT_ACTIVE); + preempt_active_enter(); __schedule(); - __preempt_count_sub(PREEMPT_ACTIVE); + preempt_active_exit(); /* * Check again in case we missed a preemption opportunity * between schedule and now. */ - barrier(); } while (need_resched()); } @@ -2894,9 +2928,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); -#ifdef CONFIG_CONTEXT_TRACKING /** - * preempt_schedule_context - preempt_schedule called by tracing + * preempt_schedule_notrace - preempt_schedule called by tracing * * The tracing infrastructure uses preempt_enable_notrace to prevent * recursion and tracing preempt enabling caused by the tracing @@ -2909,7 +2942,7 @@ EXPORT_SYMBOL(preempt_schedule); * instead of preempt_schedule() to exit user context if needed before * calling the scheduler. */ -asmlinkage __visible void __sched notrace preempt_schedule_context(void) +asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) { enum ctx_state prev_ctx; @@ -2917,7 +2950,13 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void) return; do { - __preempt_count_add(PREEMPT_ACTIVE); + /* + * Use raw __prempt_count() ops that don't call function. + * We can't call functions before disabling preemption which + * disarm preemption tracing recursions. + */ + __preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); + barrier(); /* * Needs preempt disabled in case user_exit() is traced * and the tracer calls preempt_enable_notrace() causing @@ -2927,12 +2966,11 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void) __schedule(); exception_exit(prev_ctx); - __preempt_count_sub(PREEMPT_ACTIVE); barrier(); + __preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); } while (need_resched()); } -EXPORT_SYMBOL_GPL(preempt_schedule_context); -#endif /* CONFIG_CONTEXT_TRACKING */ +EXPORT_SYMBOL_GPL(preempt_schedule_notrace); #endif /* CONFIG_PREEMPT */ @@ -2952,17 +2990,11 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) prev_state = exception_enter(); do { - __preempt_count_add(PREEMPT_ACTIVE); + preempt_active_enter(); local_irq_enable(); __schedule(); local_irq_disable(); - __preempt_count_sub(PREEMPT_ACTIVE); - - /* - * Check again in case we missed a preemption opportunity - * between schedule and now. - */ - barrier(); + preempt_active_exit(); } while (need_resched()); exception_exit(prev_state); @@ -3406,7 +3438,7 @@ static bool dl_param_changed(struct task_struct *p, static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, - bool user) + bool user, bool pi) { int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : MAX_RT_PRIO - 1 - attr->sched_priority; @@ -3592,18 +3624,20 @@ change: p->sched_reset_on_fork = reset_on_fork; oldprio = p->prio; - /* - * Take priority boosted tasks into account. If the new - * effective priority is unchanged, we just store the new - * normal parameters and do not touch the scheduler class and - * the runqueue. This will be done when the task deboost - * itself. - */ - new_effective_prio = rt_mutex_get_effective_prio(p, newprio); - if (new_effective_prio == oldprio) { - __setscheduler_params(p, attr); - task_rq_unlock(rq, p, &flags); - return 0; + if (pi) { + /* + * Take priority boosted tasks into account. If the new + * effective priority is unchanged, we just store the new + * normal parameters and do not touch the scheduler class and + * the runqueue. This will be done when the task deboost + * itself. + */ + new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + if (new_effective_prio == oldprio) { + __setscheduler_params(p, attr); + task_rq_unlock(rq, p, &flags); + return 0; + } } queued = task_on_rq_queued(p); @@ -3614,7 +3648,7 @@ change: put_prev_task(rq, p); prev_class = p->sched_class; - __setscheduler(rq, p, attr, true); + __setscheduler(rq, p, attr, pi); if (running) p->sched_class->set_curr_task(rq); @@ -3629,7 +3663,8 @@ change: check_class_changed(rq, p, prev_class, oldprio); task_rq_unlock(rq, p, &flags); - rt_mutex_adjust_pi(p); + if (pi) + rt_mutex_adjust_pi(p); return 0; } @@ -3650,7 +3685,7 @@ static int _sched_setscheduler(struct task_struct *p, int policy, attr.sched_policy = policy; } - return __sched_setscheduler(p, &attr, check); + return __sched_setscheduler(p, &attr, check, true); } /** * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. @@ -3671,7 +3706,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); int sched_setattr(struct task_struct *p, const struct sched_attr *attr) { - return __sched_setscheduler(p, attr, true); + return __sched_setscheduler(p, attr, true, true); } EXPORT_SYMBOL_GPL(sched_setattr); @@ -5314,7 +5349,7 @@ static struct notifier_block migration_notifier = { .priority = CPU_PRI_MIGRATION, }; -static void __cpuinit set_cpu_rq_start_time(void) +static void set_cpu_rq_start_time(void) { int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); @@ -7199,7 +7234,7 @@ void __init sched_init(void) rq->sd = NULL; rq->rd = NULL; rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE; - rq->post_schedule = 0; + rq->balance_callback = NULL; rq->active_balance = 0; rq->next_balance = jiffies; rq->push_cpu = 0; @@ -7329,32 +7364,12 @@ EXPORT_SYMBOL(___might_sleep); #endif #ifdef CONFIG_MAGIC_SYSRQ -static void normalize_task(struct rq *rq, struct task_struct *p) +void normalize_rt_tasks(void) { - const struct sched_class *prev_class = p->sched_class; + struct task_struct *g, *p; struct sched_attr attr = { .sched_policy = SCHED_NORMAL, }; - int old_prio = p->prio; - int queued; - - queued = task_on_rq_queued(p); - if (queued) - dequeue_task(rq, p, 0); - __setscheduler(rq, p, &attr, false); - if (queued) { - enqueue_task(rq, p, 0); - resched_curr(rq); - } - - check_class_changed(rq, p, prev_class, old_prio); -} - -void normalize_rt_tasks(void) -{ - struct task_struct *g, *p; - unsigned long flags; - struct rq *rq; read_lock(&tasklist_lock); for_each_process_thread(g, p) { @@ -7381,9 +7396,7 @@ void normalize_rt_tasks(void) continue; } - rq = task_rq_lock(p, &flags); - normalize_task(rq, p); - task_rq_unlock(rq, p, &flags); + __sched_setscheduler(p, &attr, false, false); } read_unlock(&tasklist_lock); } @@ -7734,11 +7747,11 @@ static long sched_group_rt_runtime(struct task_group *tg) return rt_runtime_us; } -static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) +static int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us) { u64 rt_runtime, rt_period; - rt_period = (u64)rt_period_us * NSEC_PER_USEC; + rt_period = rt_period_us * NSEC_PER_USEC; rt_runtime = tg->rt_bandwidth.rt_runtime; return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); @@ -8105,10 +8118,8 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) __refill_cfs_bandwidth_runtime(cfs_b); /* restart the period timer (if active) to handle new period expiry */ - if (runtime_enabled && cfs_b->timer_active) { - /* force a reprogram */ - __start_cfs_bandwidth(cfs_b, true); - } + if (runtime_enabled) + start_cfs_bandwidth(cfs_b); raw_spin_unlock_irq(&cfs_b->lock); for_each_online_cpu(i) {