#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
-void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
-{
- unsigned long delta;
- ktime_t soft, hard, now;
-
- for (;;) {
- if (hrtimer_active(period_timer))
- break;
-
- now = hrtimer_cb_get_time(period_timer);
- hrtimer_forward(period_timer, now, period);
-
- soft = hrtimer_get_softexpires(period_timer);
- hard = hrtimer_get_expires(period_timer);
- delta = ktime_to_ns(ktime_sub(hard, soft));
- __hrtimer_start_range_ns(period_timer, soft, delta,
- HRTIMER_MODE_ABS_PINNED, 0);
- }
-}
-
DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#ifdef CONFIG_SMP
-static int __hrtick_restart(struct rq *rq)
+static void __hrtick_restart(struct rq *rq)
{
struct hrtimer *timer = &rq->hrtick_timer;
- ktime_t time = hrtimer_get_softexpires(timer);
- return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
}
/*
* doesn't make sense. Rely on vruntime for fairness.
*/
delay = max_t(u64, delay, 10000LL);
- __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
- HRTIMER_MODE_REL_PINNED, 0);
+ hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
+ HRTIMER_MODE_REL_PINNED);
}
static inline void init_hrtick(void)
static bool set_nr_if_polling(struct task_struct *p)
{
struct thread_info *ti = task_thread_info(p);
- typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags);
+ typeof(ti->flags) old, val = READ_ONCE(ti->flags);
for (;;) {
if (!(val & _TIF_POLLING_NRFLAG))
#endif
#endif
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+{
+ struct wake_q_node *node = &task->wake_q;
+
+ /*
+ * Atomically grab the task, if ->wake_q is !nil already it means
+ * its already queued (either by us or someone else) and will get the
+ * wakeup due to that.
+ *
+ * This cmpxchg() implies a full barrier, which pairs with the write
+ * barrier implied by the wakeup in wake_up_list().
+ */
+ if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
+ return;
+
+ get_task_struct(task);
+
+ /*
+ * The head is context local, there can be no concurrency.
+ */
+ *head->lastp = node;
+ head->lastp = &node->next;
+}
+
+void wake_up_q(struct wake_q_head *head)
+{
+ struct wake_q_node *node = head->first;
+
+ while (node != WAKE_Q_TAIL) {
+ struct task_struct *task;
+
+ task = container_of(node, struct task_struct, wake_q);
+ BUG_ON(!task);
+ /* task can safely be re-inserted now */
+ node = node->next;
+ task->wake_q.next = NULL;
+
+ /*
+ * wake_up_process() implies a wmb() to pair with the queueing
+ * in wake_q_add() so as not to miss wakeups.
+ */
+ wake_up_process(task);
+ put_task_struct(task);
+ }
+}
+
/*
* resched_curr - mark rq's current task 'to be rescheduled now'.
*
#ifdef CONFIG_SMP
/* rq->lock is NOT held, but preemption is disabled */
-static inline void post_schedule(struct rq *rq)
+static void __balance_callback(struct rq *rq)
{
- if (rq->post_schedule) {
- unsigned long flags;
+ struct callback_head *head, *next;
+ void (*func)(struct rq *rq);
+ unsigned long flags;
- raw_spin_lock_irqsave(&rq->lock, flags);
- if (rq->curr->sched_class->post_schedule)
- rq->curr->sched_class->post_schedule(rq);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ head = rq->balance_callback;
+ rq->balance_callback = NULL;
+ while (head) {
+ func = (void (*)(struct rq *))head->func;
+ next = head->next;
+ head->next = NULL;
+ head = next;
- rq->post_schedule = 0;
+ func(rq);
}
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static inline void balance_callback(struct rq *rq)
+{
+ if (unlikely(rq->balance_callback))
+ __balance_callback(rq);
}
#else
-static inline void post_schedule(struct rq *rq)
+static inline void balance_callback(struct rq *rq)
{
}
/* finish_task_switch() drops rq->lock and enables preemtion */
preempt_disable();
rq = finish_task_switch(prev);
- post_schedule(rq);
+ balance_callback(rq);
preempt_enable();
if (current->set_child_tid)
void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
{
- struct rq *this = this_rq();
- *nr_waiters = atomic_read(&this->nr_iowait);
- *load = this->cpu_load[0];
+ struct rq *rq = this_rq();
+ *nr_waiters = atomic_read(&rq->nr_iowait);
+ *load = rq->load.weight;
}
#ifdef CONFIG_SMP
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
update_cpu_load_active(rq);
+ calc_global_load_tick(rq);
raw_spin_unlock(&rq->lock);
perf_event_task_tick();
u64 scheduler_tick_max_deferment(void)
{
struct rq *rq = this_rq();
- unsigned long next, now = ACCESS_ONCE(jiffies);
+ unsigned long next, now = READ_ONCE(jiffies);
next = rq->last_sched_tick + HZ;
* - return from syscall or exception to user-space
* - return from interrupt-handler to user-space
*
- * WARNING: all callers must re-check need_resched() afterward and reschedule
- * accordingly in case an event triggered the need for rescheduling (such as
- * an interrupt waking up a task) while preemption was disabled in __schedule().
+ * WARNING: must be called with preemption disabled!
*/
static void __sched __schedule(void)
{
struct rq *rq;
int cpu;
- preempt_disable();
cpu = smp_processor_id();
rq = cpu_rq(cpu);
rcu_note_context_switch();
} else
raw_spin_unlock_irq(&rq->lock);
- post_schedule(rq);
-
- sched_preempt_enable_no_resched();
+ balance_callback(rq);
}
static inline void sched_submit_work(struct task_struct *tsk)
sched_submit_work(tsk);
do {
+ preempt_disable();
__schedule();
+ sched_preempt_enable_no_resched();
} while (need_resched());
}
EXPORT_SYMBOL(schedule);
static void __sched notrace preempt_schedule_common(void)
{
do {
- __preempt_count_add(PREEMPT_ACTIVE);
+ preempt_active_enter();
__schedule();
- __preempt_count_sub(PREEMPT_ACTIVE);
+ preempt_active_exit();
/*
* Check again in case we missed a preemption opportunity
* between schedule and now.
*/
- barrier();
} while (need_resched());
}
NOKPROBE_SYMBOL(preempt_schedule);
EXPORT_SYMBOL(preempt_schedule);
-#ifdef CONFIG_CONTEXT_TRACKING
/**
- * preempt_schedule_context - preempt_schedule called by tracing
+ * preempt_schedule_notrace - preempt_schedule called by tracing
*
* The tracing infrastructure uses preempt_enable_notrace to prevent
* recursion and tracing preempt enabling caused by the tracing
* instead of preempt_schedule() to exit user context if needed before
* calling the scheduler.
*/
-asmlinkage __visible void __sched notrace preempt_schedule_context(void)
+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
{
enum ctx_state prev_ctx;
return;
do {
- __preempt_count_add(PREEMPT_ACTIVE);
+ /*
+ * Use raw __prempt_count() ops that don't call function.
+ * We can't call functions before disabling preemption which
+ * disarm preemption tracing recursions.
+ */
+ __preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
+ barrier();
/*
* Needs preempt disabled in case user_exit() is traced
* and the tracer calls preempt_enable_notrace() causing
__schedule();
exception_exit(prev_ctx);
- __preempt_count_sub(PREEMPT_ACTIVE);
barrier();
+ __preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
} while (need_resched());
}
-EXPORT_SYMBOL_GPL(preempt_schedule_context);
-#endif /* CONFIG_CONTEXT_TRACKING */
+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
#endif /* CONFIG_PREEMPT */
prev_state = exception_enter();
do {
- __preempt_count_add(PREEMPT_ACTIVE);
+ preempt_active_enter();
local_irq_enable();
__schedule();
local_irq_disable();
- __preempt_count_sub(PREEMPT_ACTIVE);
-
- /*
- * Check again in case we missed a preemption opportunity
- * between schedule and now.
- */
- barrier();
+ preempt_active_exit();
} while (need_resched());
exception_exit(prev_state);
static int __sched_setscheduler(struct task_struct *p,
const struct sched_attr *attr,
- bool user)
+ bool user, bool pi)
{
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
MAX_RT_PRIO - 1 - attr->sched_priority;
p->sched_reset_on_fork = reset_on_fork;
oldprio = p->prio;
- /*
- * Take priority boosted tasks into account. If the new
- * effective priority is unchanged, we just store the new
- * normal parameters and do not touch the scheduler class and
- * the runqueue. This will be done when the task deboost
- * itself.
- */
- new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
- if (new_effective_prio == oldprio) {
- __setscheduler_params(p, attr);
- task_rq_unlock(rq, p, &flags);
- return 0;
+ if (pi) {
+ /*
+ * Take priority boosted tasks into account. If the new
+ * effective priority is unchanged, we just store the new
+ * normal parameters and do not touch the scheduler class and
+ * the runqueue. This will be done when the task deboost
+ * itself.
+ */
+ new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+ if (new_effective_prio == oldprio) {
+ __setscheduler_params(p, attr);
+ task_rq_unlock(rq, p, &flags);
+ return 0;
+ }
}
queued = task_on_rq_queued(p);
put_prev_task(rq, p);
prev_class = p->sched_class;
- __setscheduler(rq, p, attr, true);
+ __setscheduler(rq, p, attr, pi);
if (running)
p->sched_class->set_curr_task(rq);
check_class_changed(rq, p, prev_class, oldprio);
task_rq_unlock(rq, p, &flags);
- rt_mutex_adjust_pi(p);
+ if (pi)
+ rt_mutex_adjust_pi(p);
return 0;
}
attr.sched_policy = policy;
}
- return __sched_setscheduler(p, &attr, check);
+ return __sched_setscheduler(p, &attr, check, true);
}
/**
* sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
{
- return __sched_setscheduler(p, attr, true);
+ return __sched_setscheduler(p, attr, true, true);
}
EXPORT_SYMBOL_GPL(sched_setattr);
long ret;
current->in_iowait = 1;
- if (old_iowait)
- blk_schedule_flush_plug(current);
- else
- blk_flush_plug(current);
+ blk_schedule_flush_plug(current);
delayacct_blkio_start();
rq = raw_rq();
.priority = CPU_PRI_MIGRATION,
};
-static void __cpuinit set_cpu_rq_start_time(void)
+static void set_cpu_rq_start_time(void)
{
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
rq->sd = NULL;
rq->rd = NULL;
rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
- rq->post_schedule = 0;
+ rq->balance_callback = NULL;
rq->active_balance = 0;
rq->next_balance = jiffies;
rq->push_cpu = 0;
#endif
#ifdef CONFIG_MAGIC_SYSRQ
-static void normalize_task(struct rq *rq, struct task_struct *p)
+void normalize_rt_tasks(void)
{
- const struct sched_class *prev_class = p->sched_class;
+ struct task_struct *g, *p;
struct sched_attr attr = {
.sched_policy = SCHED_NORMAL,
};
- int old_prio = p->prio;
- int queued;
-
- queued = task_on_rq_queued(p);
- if (queued)
- dequeue_task(rq, p, 0);
- __setscheduler(rq, p, &attr, false);
- if (queued) {
- enqueue_task(rq, p, 0);
- resched_curr(rq);
- }
-
- check_class_changed(rq, p, prev_class, old_prio);
-}
-
-void normalize_rt_tasks(void)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- struct rq *rq;
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
continue;
}
- rq = task_rq_lock(p, &flags);
- normalize_task(rq, p);
- task_rq_unlock(rq, p, &flags);
+ __sched_setscheduler(p, &attr, false, false);
}
read_unlock(&tasklist_lock);
}
return rt_runtime_us;
}
-static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
+static int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
{
u64 rt_runtime, rt_period;
- rt_period = (u64)rt_period_us * NSEC_PER_USEC;
+ rt_period = rt_period_us * NSEC_PER_USEC;
rt_runtime = tg->rt_bandwidth.rt_runtime;
return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
__refill_cfs_bandwidth_runtime(cfs_b);
/* restart the period timer (if active) to handle new period expiry */
- if (runtime_enabled && cfs_b->timer_active) {
- /* force a reprogram */
- __start_cfs_bandwidth(cfs_b, true);
- }
+ if (runtime_enabled)
+ start_cfs_bandwidth(cfs_b);
raw_spin_unlock_irq(&cfs_b->lock);
for_each_online_cpu(i) {