]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'sched/urgent' into sched/core, to pick up fixes before applying new...
authorIngo Molnar <mingo@kernel.org>
Tue, 6 Oct 2015 15:05:36 +0000 (17:05 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 6 Oct 2015 15:05:36 +0000 (17:05 +0200)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/sched.h
include/linux/sched/deadline.h
kernel/cpu.c
kernel/locking/rtmutex.c
kernel/sched/core.c
kernel/sched/cpudeadline.c
kernel/sched/cpudeadline.h
kernel/sched/fair.c
kernel/sched/features.h
kernel/sched/rt.c
kernel/sched/sched.h

index b7b9501b41af4eab6a096601c3baf85c85854807..d086cf0ca2c78d0e6c66501d669f1340fa8527f6 100644 (file)
@@ -1139,8 +1139,6 @@ struct sched_domain_topology_level {
 #endif
 };
 
-extern struct sched_domain_topology_level *sched_domain_topology;
-
 extern void set_sched_topology(struct sched_domain_topology_level *tl);
 extern void wake_up_if_idle(int cpu);
 
@@ -1189,10 +1187,10 @@ struct load_weight {
 
 /*
  * The load_avg/util_avg accumulates an infinite geometric series.
- * 1) load_avg factors the amount of time that a sched_entity is
- * runnable on a rq into its weight. For cfs_rq, it is the aggregated
- * such weights of all runnable and blocked sched_entities.
- * 2) util_avg factors frequency scaling into the amount of time
+ * 1) load_avg factors frequency scaling into the amount of time that a
+ * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
+ * aggregated such weights of all runnable and blocked sched_entities.
+ * 2) util_avg factors frequency and cpu scaling into the amount of time
  * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
  * For cfs_rq, it is the aggregated such times of all runnable and
  * blocked sched_entities.
index 9d303b8847df28e4851746515fd9ee388e1cf8e1..9089a2ae913ddf4d12f10a7bcff209bbd1894400 100644 (file)
@@ -21,4 +21,9 @@ static inline int dl_task(struct task_struct *p)
        return dl_prio(p->prio);
 }
 
+static inline bool dl_time_before(u64 a, u64 b)
+{
+       return (s64)(a - b) < 0;
+}
+
 #endif /* _SCHED_DEADLINE_H */
index 82cf9dff4295eaa82305fe04a43cecb0c8a4c27e..050c63472f03f7e357eb20ac4ef4e815a0155eae 100644 (file)
@@ -304,8 +304,8 @@ static inline void check_for_tasks(int dead_cpu)
 {
        struct task_struct *g, *p;
 
-       read_lock_irq(&tasklist_lock);
-       do_each_thread(g, p) {
+       read_lock(&tasklist_lock);
+       for_each_process_thread(g, p) {
                if (!p->on_rq)
                        continue;
                /*
@@ -320,8 +320,8 @@ static inline void check_for_tasks(int dead_cpu)
 
                pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
                        p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
-       } while_each_thread(g, p);
-       read_unlock_irq(&tasklist_lock);
+       }
+       read_unlock(&tasklist_lock);
 }
 
 struct take_cpu_down_param {
index 7781d801212fa32082593cf6bbb5dd44077cb78e..35e9bfcc6ad940d90e9790544106b87912a05195 100644 (file)
@@ -158,7 +158,8 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
         * then right waiter has a dl_prio() too.
         */
        if (dl_prio(left->prio))
-               return (left->task->dl.deadline < right->task->dl.deadline);
+               return dl_time_before(left->task->dl.deadline,
+                                     right->task->dl.deadline);
 
        return 0;
 }
index 10a8faa1b0d4a5f737bd9008eb8f9a7e817b6ec9..88a425443ff4afeb678610b4870fd5ccb7b181ad 100644 (file)
@@ -817,7 +817,7 @@ static void set_load_weight(struct task_struct *p)
        /*
         * SCHED_IDLE tasks get minimal weight:
         */
-       if (p->policy == SCHED_IDLE) {
+       if (idle_policy(p->policy)) {
                load->weight = scale_load(WEIGHT_IDLEPRIO);
                load->inv_weight = WMULT_IDLEPRIO;
                return;
@@ -2114,23 +2114,17 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 #endif /* CONFIG_NUMA_BALANCING */
 }
 
+DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
+
 #ifdef CONFIG_NUMA_BALANCING
-#ifdef CONFIG_SCHED_DEBUG
+
 void set_numabalancing_state(bool enabled)
 {
        if (enabled)
-               sched_feat_set("NUMA");
+               static_branch_enable(&sched_numa_balancing);
        else
-               sched_feat_set("NO_NUMA");
+               static_branch_disable(&sched_numa_balancing);
 }
-#else
-__read_mostly bool numabalancing_enabled;
-
-void set_numabalancing_state(bool enabled)
-{
-       numabalancing_enabled = enabled;
-}
-#endif /* CONFIG_SCHED_DEBUG */
 
 #ifdef CONFIG_PROC_SYSCTL
 int sysctl_numa_balancing(struct ctl_table *table, int write,
@@ -2138,7 +2132,7 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 {
        struct ctl_table t;
        int err;
-       int state = numabalancing_enabled;
+       int state = static_branch_likely(&sched_numa_balancing);
 
        if (write && !capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -2349,6 +2343,8 @@ void wake_up_new_task(struct task_struct *p)
        struct rq *rq;
 
        raw_spin_lock_irqsave(&p->pi_lock, flags);
+       /* Initialize new task's runnable average */
+       init_entity_runnable_average(&p->se);
 #ifdef CONFIG_SMP
        /*
         * Fork balancing, do it here and not earlier because:
@@ -2358,8 +2354,6 @@ void wake_up_new_task(struct task_struct *p)
        set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
 #endif
 
-       /* Initialize new task's runnable average */
-       init_entity_runnable_average(&p->se);
        rq = __task_rq_lock(p);
        activate_task(rq, p, 0);
        p->on_rq = TASK_ON_RQ_QUEUED;
@@ -3746,10 +3740,7 @@ recheck:
        } else {
                reset_on_fork = !!(attr->sched_flags & SCHED_FLAG_RESET_ON_FORK);
 
-               if (policy != SCHED_DEADLINE &&
-                               policy != SCHED_FIFO && policy != SCHED_RR &&
-                               policy != SCHED_NORMAL && policy != SCHED_BATCH &&
-                               policy != SCHED_IDLE)
+               if (!valid_policy(policy))
                        return -EINVAL;
        }
 
@@ -3805,7 +3796,7 @@ recheck:
                 * Treat SCHED_IDLE as nice 20. Only allow a switch to
                 * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
                 */
-               if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
+               if (idle_policy(p->policy) && !idle_policy(policy)) {
                        if (!can_nice(p, task_nice(p)))
                                return -EPERM;
                }
@@ -6469,7 +6460,8 @@ static struct sched_domain_topology_level default_topology[] = {
        { NULL, },
 };
 
-struct sched_domain_topology_level *sched_domain_topology = default_topology;
+static struct sched_domain_topology_level *sched_domain_topology =
+       default_topology;
 
 #define for_each_sd_topology(tl)                       \
        for (tl = sched_domain_topology; tl->mask; tl++)
@@ -7736,7 +7728,7 @@ void sched_move_task(struct task_struct *tsk)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
        if (tsk->sched_class->task_move_group)
-               tsk->sched_class->task_move_group(tsk, queued);
+               tsk->sched_class->task_move_group(tsk);
        else
 #endif
                set_task_rq(tsk, task_cpu(tsk));
@@ -8208,14 +8200,6 @@ static void cpu_cgroup_exit(struct cgroup_subsys_state *css,
                            struct cgroup_subsys_state *old_css,
                            struct task_struct *task)
 {
-       /*
-        * cgroup_exit() is called in the copy_process() failure path.
-        * Ignore this case since the task hasn't ran yet, this avoids
-        * trying to poke a half freed task state from generic code.
-        */
-       if (!(task->flags & PF_EXITING))
-               return;
-
        sched_move_task(task);
 }
 
index c6acb07466bb82b1143af4aba1da5e483f628e4f..5a75b08cfd8576d830adf9fc9df52d807c052be9 100644 (file)
@@ -31,11 +31,6 @@ static inline int right_child(int i)
        return (i << 1) + 2;
 }
 
-static inline int dl_time_before(u64 a, u64 b)
-{
-       return (s64)(a - b) < 0;
-}
-
 static void cpudl_exchange(struct cpudl *cp, int a, int b)
 {
        int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
index 1a0a6ef2fbe1be030e32895571a1d267e26579e4..fcbdf83fed7e31afc4ee916f94d272d03117a2c2 100644 (file)
@@ -2,6 +2,7 @@
 #define _LINUX_CPUDL_H
 
 #include <linux/sched.h>
+#include <linux/sched/deadline.h>
 
 #define IDX_INVALID     -1
 
index 6e2e3483b1ecff588e76103e0b7b7d673f16d2a5..4df37a48f4995c1c69ea324a3f333038017e80c6 100644 (file)
@@ -661,11 +661,12 @@ static unsigned long task_h_load(struct task_struct *p);
 
 /*
  * We choose a half-life close to 1 scheduling period.
- * Note: The tables below are dependent on this value.
+ * Note: The tables runnable_avg_yN_inv and runnable_avg_yN_sum are
+ * dependent on this value.
  */
 #define LOAD_AVG_PERIOD 32
 #define LOAD_AVG_MAX 47742 /* maximum possible load avg */
-#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */
+#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_AVG_MAX */
 
 /* Give new sched_entity start runnable values to heavy its load in infant time */
 void init_entity_runnable_average(struct sched_entity *se)
@@ -682,7 +683,7 @@ void init_entity_runnable_average(struct sched_entity *se)
        sa->load_avg = scale_load_down(se->load.weight);
        sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
        sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
-       sa->util_sum = LOAD_AVG_MAX;
+       sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
        /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
 }
 
@@ -2069,7 +2070,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
        int local = !!(flags & TNF_FAULT_LOCAL);
        int priv;
 
-       if (!numabalancing_enabled)
+       if (!static_branch_likely(&sched_numa_balancing))
                return;
 
        /* for example, ksmd faulting in a user's mm */
@@ -2157,7 +2158,7 @@ void task_numa_work(struct callback_head *work)
        struct vm_area_struct *vma;
        unsigned long start, end;
        unsigned long nr_pte_updates = 0;
-       long pages;
+       long pages, virtpages;
 
        WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
 
@@ -2203,9 +2204,11 @@ void task_numa_work(struct callback_head *work)
        start = mm->numa_scan_offset;
        pages = sysctl_numa_balancing_scan_size;
        pages <<= 20 - PAGE_SHIFT; /* MB in pages */
+       virtpages = pages * 8;     /* Scan up to this much virtual space */
        if (!pages)
                return;
 
+
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, start);
        if (!vma) {
@@ -2240,18 +2243,22 @@ void task_numa_work(struct callback_head *work)
                        start = max(start, vma->vm_start);
                        end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);
                        end = min(end, vma->vm_end);
-                       nr_pte_updates += change_prot_numa(vma, start, end);
+                       nr_pte_updates = change_prot_numa(vma, start, end);
 
                        /*
-                        * Scan sysctl_numa_balancing_scan_size but ensure that
-                        * at least one PTE is updated so that unused virtual
-                        * address space is quickly skipped.
+                        * Try to scan sysctl_numa_balancing_size worth of
+                        * hpages that have at least one present PTE that
+                        * is not already pte-numa. If the VMA contains
+                        * areas that are unused or already full of prot_numa
+                        * PTEs, scan up to virtpages, to skip through those
+                        * areas faster.
                         */
                        if (nr_pte_updates)
                                pages -= (end - start) >> PAGE_SHIFT;
+                       virtpages -= (end - start) >> PAGE_SHIFT;
 
                        start = end;
-                       if (pages <= 0)
+                       if (pages <= 0 || virtpages <= 0)
                                goto out;
 
                        cond_resched();
@@ -2515,6 +2522,12 @@ static u32 __compute_runnable_contrib(u64 n)
        return contrib + runnable_avg_yN_sum[n];
 }
 
+#if (SCHED_LOAD_SHIFT - SCHED_LOAD_RESOLUTION) != 10 || SCHED_CAPACITY_SHIFT != 10
+#error "load tracking assumes 2^10 as unit"
+#endif
+
+#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+
 /*
  * We can represent the historical contribution to runnable average as the
  * coefficients of a geometric series.  To do this we sub-divide our runnable
@@ -2547,10 +2560,10 @@ static __always_inline int
 __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
                  unsigned long weight, int running, struct cfs_rq *cfs_rq)
 {
-       u64 delta, periods;
+       u64 delta, scaled_delta, periods;
        u32 contrib;
-       int delta_w, decayed = 0;
-       unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu);
+       unsigned int delta_w, scaled_delta_w, decayed = 0;
+       unsigned long scale_freq, scale_cpu;
 
        delta = now - sa->last_update_time;
        /*
@@ -2571,6 +2584,9 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
                return 0;
        sa->last_update_time = now;
 
+       scale_freq = arch_scale_freq_capacity(NULL, cpu);
+       scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+
        /* delta_w is the amount already accumulated against our next period */
        delta_w = sa->period_contrib;
        if (delta + delta_w >= 1024) {
@@ -2585,13 +2601,16 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
                 * period and accrue it.
                 */
                delta_w = 1024 - delta_w;
+               scaled_delta_w = cap_scale(delta_w, scale_freq);
                if (weight) {
-                       sa->load_sum += weight * delta_w;
-                       if (cfs_rq)
-                               cfs_rq->runnable_load_sum += weight * delta_w;
+                       sa->load_sum += weight * scaled_delta_w;
+                       if (cfs_rq) {
+                               cfs_rq->runnable_load_sum +=
+                                               weight * scaled_delta_w;
+                       }
                }
                if (running)
-                       sa->util_sum += delta_w * scale_freq >> SCHED_CAPACITY_SHIFT;
+                       sa->util_sum += scaled_delta_w * scale_cpu;
 
                delta -= delta_w;
 
@@ -2608,23 +2627,25 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 
                /* Efficiently calculate \sum (1..n_period) 1024*y^i */
                contrib = __compute_runnable_contrib(periods);
+               contrib = cap_scale(contrib, scale_freq);
                if (weight) {
                        sa->load_sum += weight * contrib;
                        if (cfs_rq)
                                cfs_rq->runnable_load_sum += weight * contrib;
                }
                if (running)
-                       sa->util_sum += contrib * scale_freq >> SCHED_CAPACITY_SHIFT;
+                       sa->util_sum += contrib * scale_cpu;
        }
 
        /* Remainder of delta accrued against u_0` */
+       scaled_delta = cap_scale(delta, scale_freq);
        if (weight) {
-               sa->load_sum += weight * delta;
+               sa->load_sum += weight * scaled_delta;
                if (cfs_rq)
-                       cfs_rq->runnable_load_sum += weight * delta;
+                       cfs_rq->runnable_load_sum += weight * scaled_delta;
        }
        if (running)
-               sa->util_sum += delta * scale_freq >> SCHED_CAPACITY_SHIFT;
+               sa->util_sum += scaled_delta * scale_cpu;
 
        sa->period_contrib += delta;
 
@@ -2634,7 +2655,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
                        cfs_rq->runnable_load_avg =
                                div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
                }
-               sa->util_avg = (sa->util_sum << SCHED_LOAD_SHIFT) / LOAD_AVG_MAX;
+               sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
        }
 
        return decayed;
@@ -2664,8 +2685,8 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
 /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
 static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 {
-       int decayed;
        struct sched_avg *sa = &cfs_rq->avg;
+       int decayed;
 
        if (atomic_long_read(&cfs_rq->removed_load_avg)) {
                long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
@@ -2676,8 +2697,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
        if (atomic_long_read(&cfs_rq->removed_util_avg)) {
                long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
                sa->util_avg = max_t(long, sa->util_avg - r, 0);
-               sa->util_sum = max_t(s32, sa->util_sum -
-                       ((r * LOAD_AVG_MAX) >> SCHED_LOAD_SHIFT), 0);
+               sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
        }
 
        decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
@@ -2695,33 +2715,70 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 static inline void update_load_avg(struct sched_entity *se, int update_tg)
 {
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
-       int cpu = cpu_of(rq_of(cfs_rq));
        u64 now = cfs_rq_clock_task(cfs_rq);
+       int cpu = cpu_of(rq_of(cfs_rq));
 
        /*
         * Track task load average for carrying it to new CPU after migrated, and
         * track group sched_entity load average for task_h_load calc in migration
         */
        __update_load_avg(now, cpu, &se->avg,
-               se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL);
+                         se->on_rq * scale_load_down(se->load.weight),
+                         cfs_rq->curr == se, NULL);
 
        if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
                update_tg_load_avg(cfs_rq, 0);
 }
 
+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       if (!sched_feat(ATTACH_AGE_LOAD))
+               goto skip_aging;
+
+       /*
+        * If we got migrated (either between CPUs or between cgroups) we'll
+        * have aged the average right before clearing @last_update_time.
+        */
+       if (se->avg.last_update_time) {
+               __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
+                                 &se->avg, 0, 0, NULL);
+
+               /*
+                * XXX: we could have just aged the entire load away if we've been
+                * absent from the fair class for too long.
+                */
+       }
+
+skip_aging:
+       se->avg.last_update_time = cfs_rq->avg.last_update_time;
+       cfs_rq->avg.load_avg += se->avg.load_avg;
+       cfs_rq->avg.load_sum += se->avg.load_sum;
+       cfs_rq->avg.util_avg += se->avg.util_avg;
+       cfs_rq->avg.util_sum += se->avg.util_sum;
+}
+
+static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
+                         &se->avg, se->on_rq * scale_load_down(se->load.weight),
+                         cfs_rq->curr == se, NULL);
+
+       cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
+       cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
+       cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
+       cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+}
+
 /* Add the load generated by se into cfs_rq's load average */
 static inline void
 enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        struct sched_avg *sa = &se->avg;
        u64 now = cfs_rq_clock_task(cfs_rq);
-       int migrated = 0, decayed;
+       int migrated, decayed;
 
-       if (sa->last_update_time == 0) {
-               sa->last_update_time = now;
-               migrated = 1;
-       }
-       else {
+       migrated = !sa->last_update_time;
+       if (!migrated) {
                __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
                        se->on_rq * scale_load_down(se->load.weight),
                        cfs_rq->curr == se, NULL);
@@ -2732,12 +2789,8 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
        cfs_rq->runnable_load_avg += sa->load_avg;
        cfs_rq->runnable_load_sum += sa->load_sum;
 
-       if (migrated) {
-               cfs_rq->avg.load_avg += sa->load_avg;
-               cfs_rq->avg.load_sum += sa->load_sum;
-               cfs_rq->avg.util_avg += sa->util_avg;
-               cfs_rq->avg.util_sum += sa->util_sum;
-       }
+       if (migrated)
+               attach_entity_load_avg(cfs_rq, se);
 
        if (decayed || migrated)
                update_tg_load_avg(cfs_rq, 0);
@@ -2752,7 +2805,7 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
        cfs_rq->runnable_load_avg =
                max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
        cfs_rq->runnable_load_sum =
-               max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
+               max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
 }
 
 /*
@@ -2820,6 +2873,11 @@ static inline void
 dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void remove_entity_load_avg(struct sched_entity *se) {}
 
+static inline void
+attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+static inline void
+detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+
 static inline int idle_balance(struct rq *rq)
 {
        return 0;
@@ -4816,32 +4874,39 @@ next:
 done:
        return target;
 }
+
 /*
- * get_cpu_usage returns the amount of capacity of a CPU that is used by CFS
+ * cpu_util returns the amount of capacity of a CPU that is used by CFS
  * tasks. The unit of the return value must be the one of capacity so we can
- * compare the usage with the capacity of the CPU that is available for CFS
- * task (ie cpu_capacity).
- * cfs.avg.util_avg is the sum of running time of runnable tasks on a
- * CPU. It represents the amount of utilization of a CPU in the range
- * [0..SCHED_LOAD_SCALE].  The usage of a CPU can't be higher than the full
- * capacity of the CPU because it's about the running time on this CPU.
- * Nevertheless, cfs.avg.util_avg can be higher than SCHED_LOAD_SCALE
- * because of unfortunate rounding in util_avg or just
- * after migrating tasks until the average stabilizes with the new running
- * time. So we need to check that the usage stays into the range
- * [0..cpu_capacity_orig] and cap if necessary.
- * Without capping the usage, a group could be seen as overloaded (CPU0 usage
- * at 121% + CPU1 usage at 80%) whereas CPU1 has 20% of available capacity
+ * compare the utilization with the capacity of the CPU that is available for
+ * CFS task (ie cpu_capacity).
+ *
+ * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
+ * recent utilization of currently non-runnable tasks on a CPU. It represents
+ * the amount of utilization of a CPU in the range [0..capacity_orig] where
+ * capacity_orig is the cpu_capacity available at the highest frequency
+ * (arch_scale_freq_capacity()).
+ * The utilization of a CPU converges towards a sum equal to or less than the
+ * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
+ * the running time on this CPU scaled by capacity_curr.
+ *
+ * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
+ * higher than capacity_orig because of unfortunate rounding in
+ * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
+ * the average stabilizes with the new running time. We need to check that the
+ * utilization stays within the range of [0..capacity_orig] and cap it if
+ * necessary. Without utilization capping, a group could be seen as overloaded
+ * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
+ * available capacity. We allow utilization to overshoot capacity_curr (but not
+ * capacity_orig) as it useful for predicting the capacity required after task
+ * migrations (scheduler-driven DVFS).
  */
-static int get_cpu_usage(int cpu)
+static int cpu_util(int cpu)
 {
-       unsigned long usage = cpu_rq(cpu)->cfs.avg.util_avg;
+       unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
        unsigned long capacity = capacity_orig_of(cpu);
 
-       if (usage >= SCHED_LOAD_SCALE)
-               return capacity;
-
-       return (usage * capacity) >> SCHED_LOAD_SHIFT;
+       return (util >= capacity) ? capacity : util;
 }
 
 /*
@@ -5524,10 +5589,10 @@ static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
        unsigned long src_faults, dst_faults;
        int src_nid, dst_nid;
 
-       if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
+       if (!static_branch_likely(&sched_numa_balancing))
                return -1;
 
-       if (!sched_feat(NUMA))
+       if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
                return -1;
 
        src_nid = cpu_to_node(env->src_cpu);
@@ -5933,7 +5998,7 @@ struct sg_lb_stats {
        unsigned long sum_weighted_load; /* Weighted load of group's tasks */
        unsigned long load_per_task;
        unsigned long group_capacity;
-       unsigned long group_usage; /* Total usage of the group */
+       unsigned long group_util; /* Total utilization of the group */
        unsigned int sum_nr_running; /* Nr tasks running in the group */
        unsigned int idle_cpus;
        unsigned int group_weight;
@@ -6009,19 +6074,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
        return load_idx;
 }
 
-static unsigned long default_scale_cpu_capacity(struct sched_domain *sd, int cpu)
-{
-       if ((sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
-               return sd->smt_gain / sd->span_weight;
-
-       return SCHED_CAPACITY_SCALE;
-}
-
-unsigned long __weak arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
-{
-       return default_scale_cpu_capacity(sd, cpu);
-}
-
 static unsigned long scale_rt_capacity(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -6051,16 +6103,9 @@ static unsigned long scale_rt_capacity(int cpu)
 
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
-       unsigned long capacity = SCHED_CAPACITY_SCALE;
+       unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
        struct sched_group *sdg = sd->groups;
 
-       if (sched_feat(ARCH_CAPACITY))
-               capacity *= arch_scale_cpu_capacity(sd, cpu);
-       else
-               capacity *= default_scale_cpu_capacity(sd, cpu);
-
-       capacity >>= SCHED_CAPACITY_SHIFT;
-
        cpu_rq(cpu)->cpu_capacity_orig = capacity;
 
        capacity *= scale_rt_capacity(cpu);
@@ -6186,8 +6231,8 @@ static inline int sg_imbalanced(struct sched_group *group)
  * group_has_capacity returns true if the group has spare capacity that could
  * be used by some tasks.
  * We consider that a group has spare capacity if the  * number of task is
- * smaller than the number of CPUs or if the usage is lower than the available
- * capacity for CFS tasks.
+ * smaller than the number of CPUs or if the utilization is lower than the
+ * available capacity for CFS tasks.
  * For the latter, we use a threshold to stabilize the state, to take into
  * account the variance of the tasks' load and to return true if the available
  * capacity in meaningful for the load balancer.
@@ -6201,7 +6246,7 @@ group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
                return true;
 
        if ((sgs->group_capacity * 100) >
-                       (sgs->group_usage * env->sd->imbalance_pct))
+                       (sgs->group_util * env->sd->imbalance_pct))
                return true;
 
        return false;
@@ -6222,15 +6267,15 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
                return false;
 
        if ((sgs->group_capacity * 100) <
-                       (sgs->group_usage * env->sd->imbalance_pct))
+                       (sgs->group_util * env->sd->imbalance_pct))
                return true;
 
        return false;
 }
 
-static enum group_type group_classify(struct lb_env *env,
-               struct sched_group *group,
-               struct sg_lb_stats *sgs)
+static inline enum
+group_type group_classify(struct sched_group *group,
+                         struct sg_lb_stats *sgs)
 {
        if (sgs->group_no_capacity)
                return group_overloaded;
@@ -6270,7 +6315,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                        load = source_load(i, load_idx);
 
                sgs->group_load += load;
-               sgs->group_usage += get_cpu_usage(i);
+               sgs->group_util += cpu_util(i);
                sgs->sum_nr_running += rq->cfs.h_nr_running;
 
                if (rq->nr_running > 1)
@@ -6295,7 +6340,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
        sgs->group_weight = group->group_weight;
 
        sgs->group_no_capacity = group_is_overloaded(env, sgs);
-       sgs->group_type = group_classify(env, group, sgs);
+       sgs->group_type = group_classify(group, sgs);
 }
 
 /**
@@ -6429,7 +6474,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
                    group_has_capacity(env, &sds->local_stat) &&
                    (sgs->sum_nr_running > 1)) {
                        sgs->group_no_capacity = 1;
-                       sgs->group_type = group_overloaded;
+                       sgs->group_type = group_classify(sg, sgs);
                }
 
                if (update_sd_pick_busiest(env, sds, sg, sgs)) {
@@ -7609,8 +7654,22 @@ out:
         * When the cpu is attached to null domain for ex, it will not be
         * updated.
         */
-       if (likely(update_next_balance))
+       if (likely(update_next_balance)) {
                rq->next_balance = next_balance;
+
+#ifdef CONFIG_NO_HZ_COMMON
+               /*
+                * If this CPU has been elected to perform the nohz idle
+                * balance. Other idle CPUs have already rebalanced with
+                * nohz_idle_balance() and nohz.next_balance has been
+                * updated accordingly. This CPU is now running the idle load
+                * balance for itself and we need to update the
+                * nohz.next_balance accordingly.
+                */
+               if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
+                       nohz.next_balance = rq->next_balance;
+#endif
+       }
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
@@ -7623,6 +7682,9 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
        int this_cpu = this_rq->cpu;
        struct rq *rq;
        int balance_cpu;
+       /* Earliest time when we have to do rebalance again */
+       unsigned long next_balance = jiffies + 60*HZ;
+       int update_next_balance = 0;
 
        if (idle != CPU_IDLE ||
            !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
@@ -7654,10 +7716,19 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
                        rebalance_domains(rq, CPU_IDLE);
                }
 
-               if (time_after(this_rq->next_balance, rq->next_balance))
-                       this_rq->next_balance = rq->next_balance;
+               if (time_after(next_balance, rq->next_balance)) {
+                       next_balance = rq->next_balance;
+                       update_next_balance = 1;
+               }
        }
-       nohz.next_balance = this_rq->next_balance;
+
+       /*
+        * next_balance will be updated only when there is a need.
+        * When the CPU is attached to null domain for ex, it will not be
+        * updated.
+        */
+       if (likely(update_next_balance))
+               nohz.next_balance = next_balance;
 end:
        clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu));
 }
@@ -7810,7 +7881,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
                entity_tick(cfs_rq, se, queued);
        }
 
-       if (numabalancing_enabled)
+       if (!static_branch_unlikely(&sched_numa_balancing))
                task_tick_numa(rq, curr);
 }
 
@@ -7886,21 +7957,39 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
                check_preempt_curr(rq, p, 0);
 }
 
-static void switched_from_fair(struct rq *rq, struct task_struct *p)
+static inline bool vruntime_normalized(struct task_struct *p)
 {
        struct sched_entity *se = &p->se;
-       struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
        /*
-        * Ensure the task's vruntime is normalized, so that when it's
-        * switched back to the fair class the enqueue_entity(.flags=0) will
-        * do the right thing.
+        * In both the TASK_ON_RQ_QUEUED and TASK_ON_RQ_MIGRATING cases,
+        * the dequeue_entity(.flags=0) will already have normalized the
+        * vruntime.
+        */
+       if (p->on_rq)
+               return true;
+
+       /*
+        * When !on_rq, vruntime of the task has usually NOT been normalized.
+        * But there are some cases where it has already been normalized:
         *
-        * If it's queued, then the dequeue_entity(.flags=0) will already
-        * have normalized the vruntime, if it's !queued, then only when
-        * the task is sleeping will it still have non-normalized vruntime.
+        * - A forked child which is waiting for being woken up by
+        *   wake_up_new_task().
+        * - A task which has been woken up by try_to_wake_up() and
+        *   waiting for actually being woken up by sched_ttwu_pending().
         */
-       if (!task_on_rq_queued(p) && p->state != TASK_RUNNING) {
+       if (!se->sum_exec_runtime || p->state == TASK_WAKING)
+               return true;
+
+       return false;
+}
+
+static void detach_task_cfs_rq(struct task_struct *p)
+{
+       struct sched_entity *se = &p->se;
+       struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+       if (!vruntime_normalized(p)) {
                /*
                 * Fix up our vruntime so that the current sleep doesn't
                 * cause 'unlimited' sleep bonus.
@@ -7909,28 +7998,14 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
                se->vruntime -= cfs_rq->min_vruntime;
        }
 
-#ifdef CONFIG_SMP
        /* Catch up with the cfs_rq and remove our load when we leave */
-       __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq), &se->avg,
-               se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL);
-
-       cfs_rq->avg.load_avg =
-               max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
-       cfs_rq->avg.load_sum =
-               max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
-       cfs_rq->avg.util_avg =
-               max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
-       cfs_rq->avg.util_sum =
-               max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
-#endif
+       detach_entity_load_avg(cfs_rq, se);
 }
 
-/*
- * We switched to the sched_fair class.
- */
-static void switched_to_fair(struct rq *rq, struct task_struct *p)
+static void attach_task_cfs_rq(struct task_struct *p)
 {
        struct sched_entity *se = &p->se;
+       struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
        /*
@@ -7940,31 +8015,33 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
        se->depth = se->parent ? se->parent->depth + 1 : 0;
 #endif
 
-       if (!task_on_rq_queued(p)) {
+       /* Synchronize task with its cfs_rq */
+       attach_entity_load_avg(cfs_rq, se);
+
+       if (!vruntime_normalized(p))
+               se->vruntime += cfs_rq->min_vruntime;
+}
+
+static void switched_from_fair(struct rq *rq, struct task_struct *p)
+{
+       detach_task_cfs_rq(p);
+}
+
+static void switched_to_fair(struct rq *rq, struct task_struct *p)
+{
+       attach_task_cfs_rq(p);
 
+       if (task_on_rq_queued(p)) {
                /*
-                * Ensure the task has a non-normalized vruntime when it is switched
-                * back to the fair class with !queued, so that enqueue_entity() at
-                * wake-up time will do the right thing.
-                *
-                * If it's queued, then the enqueue_entity(.flags=0) makes the task
-                * has non-normalized vruntime, if it's !queued, then it still has
-                * normalized vruntime.
+                * We were most likely switched from sched_rt, so
+                * kick off the schedule if running, otherwise just see
+                * if we can still preempt the current task.
                 */
-               if (p->state != TASK_RUNNING)
-                       se->vruntime += cfs_rq_of(se)->min_vruntime;
-               return;
+               if (rq->curr == p)
+                       resched_curr(rq);
+               else
+                       check_preempt_curr(rq, p, 0);
        }
-
-       /*
-        * We were most likely switched from sched_rt, so
-        * kick off the schedule if running, otherwise just see
-        * if we can still preempt the current task.
-        */
-       if (rq->curr == p)
-               resched_curr(rq);
-       else
-               check_preempt_curr(rq, p, 0);
 }
 
 /* Account for a task changing its policy or group.
@@ -7999,56 +8076,16 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void task_move_group_fair(struct task_struct *p, int queued)
+static void task_move_group_fair(struct task_struct *p)
 {
-       struct sched_entity *se = &p->se;
-       struct cfs_rq *cfs_rq;
-
-       /*
-        * If the task was not on the rq at the time of this cgroup movement
-        * it must have been asleep, sleeping tasks keep their ->vruntime
-        * absolute on their old rq until wakeup (needed for the fair sleeper
-        * bonus in place_entity()).
-        *
-        * If it was on the rq, we've just 'preempted' it, which does convert
-        * ->vruntime to a relative base.
-        *
-        * Make sure both cases convert their relative position when migrating
-        * to another cgroup's rq. This does somewhat interfere with the
-        * fair sleeper stuff for the first placement, but who cares.
-        */
-       /*
-        * When !queued, vruntime of the task has usually NOT been normalized.
-        * But there are some cases where it has already been normalized:
-        *
-        * - Moving a forked child which is waiting for being woken up by
-        *   wake_up_new_task().
-        * - Moving a task which has been woken up by try_to_wake_up() and
-        *   waiting for actually being woken up by sched_ttwu_pending().
-        *
-        * To prevent boost or penalty in the new cfs_rq caused by delta
-        * min_vruntime between the two cfs_rqs, we skip vruntime adjustment.
-        */
-       if (!queued && (!se->sum_exec_runtime || p->state == TASK_WAKING))
-               queued = 1;
-
-       if (!queued)
-               se->vruntime -= cfs_rq_of(se)->min_vruntime;
+       detach_task_cfs_rq(p);
        set_task_rq(p, task_cpu(p));
-       se->depth = se->parent ? se->parent->depth + 1 : 0;
-       if (!queued) {
-               cfs_rq = cfs_rq_of(se);
-               se->vruntime += cfs_rq->min_vruntime;
 
 #ifdef CONFIG_SMP
-               /* Virtually synchronize task with its new cfs_rq */
-               p->se.avg.last_update_time = cfs_rq->avg.last_update_time;
-               cfs_rq->avg.load_avg += p->se.avg.load_avg;
-               cfs_rq->avg.load_sum += p->se.avg.load_sum;
-               cfs_rq->avg.util_avg += p->se.avg.util_avg;
-               cfs_rq->avg.util_sum += p->se.avg.util_sum;
+       /* Tell se's cfs_rq has been changed -- migrated */
+       p->se.avg.last_update_time = 0;
 #endif
-       }
+       attach_task_cfs_rq(p);
 }
 
 void free_fair_sched_group(struct task_group *tg)
index 83a50e7ca53315695f779e2c5ac7177c423b646d..69631fa46c2f84fecd3e15599cba0e5935c1148e 100644 (file)
@@ -36,11 +36,6 @@ SCHED_FEAT(CACHE_HOT_BUDDY, true)
  */
 SCHED_FEAT(WAKEUP_PREEMPTION, true)
 
-/*
- * Use arch dependent cpu capacity functions
- */
-SCHED_FEAT(ARCH_CAPACITY, true)
-
 SCHED_FEAT(HRTICK, false)
 SCHED_FEAT(DOUBLE_TICK, false)
 SCHED_FEAT(LB_BIAS, true)
@@ -72,19 +67,5 @@ SCHED_FEAT(RT_PUSH_IPI, true)
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
+SCHED_FEAT(ATTACH_AGE_LOAD, true)
 
-/*
- * Apply the automatic NUMA scheduling policy. Enabled automatically
- * at runtime if running on a NUMA machine. Can be controlled via
- * numa_balancing=
- */
-#ifdef CONFIG_NUMA_BALANCING
-
-/*
- * NUMA will favor moving tasks towards nodes where a higher number of
- * hinting faults are recorded during active load balancing. It will
- * resist moving tasks towards nodes where a lower number of hinting
- * faults have been recorded.
- */
-SCHED_FEAT(NUMA,       true)
-#endif
index d2ea59364a1c8f7e66d72e96c7d170088424eec0..e3cc16312046689fd04db8748304210c0d9fc8df 100644 (file)
@@ -635,11 +635,11 @@ bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
 /*
  * We ran out of runtime, see if we can borrow some from our neighbours.
  */
-static int do_balance_runtime(struct rt_rq *rt_rq)
+static void do_balance_runtime(struct rt_rq *rt_rq)
 {
        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
        struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
-       int i, weight, more = 0;
+       int i, weight;
        u64 rt_period;
 
        weight = cpumask_weight(rd->span);
@@ -673,7 +673,6 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
                                diff = rt_period - rt_rq->rt_runtime;
                        iter->rt_runtime -= diff;
                        rt_rq->rt_runtime += diff;
-                       more = 1;
                        if (rt_rq->rt_runtime == rt_period) {
                                raw_spin_unlock(&iter->rt_runtime_lock);
                                break;
@@ -683,8 +682,6 @@ next:
                raw_spin_unlock(&iter->rt_runtime_lock);
        }
        raw_spin_unlock(&rt_b->rt_runtime_lock);
-
-       return more;
 }
 
 /*
@@ -796,26 +793,19 @@ static void __enable_runtime(struct rq *rq)
        }
 }
 
-static int balance_runtime(struct rt_rq *rt_rq)
+static void balance_runtime(struct rt_rq *rt_rq)
 {
-       int more = 0;
-
        if (!sched_feat(RT_RUNTIME_SHARE))
-               return more;
+               return;
 
        if (rt_rq->rt_time > rt_rq->rt_runtime) {
                raw_spin_unlock(&rt_rq->rt_runtime_lock);
-               more = do_balance_runtime(rt_rq);
+               do_balance_runtime(rt_rq);
                raw_spin_lock(&rt_rq->rt_runtime_lock);
        }
-
-       return more;
 }
 #else /* !CONFIG_SMP */
-static inline int balance_runtime(struct rt_rq *rt_rq)
-{
-       return 0;
-}
+static inline void balance_runtime(struct rt_rq *rt_rq) {}
 #endif /* CONFIG_SMP */
 
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
index 6d2a119c7ad9f63338ffb0e9e92efcbc269c2141..046242feff3a35552de27c3d575a06ee3f2a1df6 100644 (file)
@@ -84,6 +84,10 @@ static inline void update_cpu_load_active(struct rq *this_rq) { }
  */
 #define RUNTIME_INF    ((u64)~0ULL)
 
+static inline int idle_policy(int policy)
+{
+       return policy == SCHED_IDLE;
+}
 static inline int fair_policy(int policy)
 {
        return policy == SCHED_NORMAL || policy == SCHED_BATCH;
@@ -98,6 +102,11 @@ static inline int dl_policy(int policy)
 {
        return policy == SCHED_DEADLINE;
 }
+static inline bool valid_policy(int policy)
+{
+       return idle_policy(policy) || fair_policy(policy) ||
+               rt_policy(policy) || dl_policy(policy);
+}
 
 static inline int task_has_rt_policy(struct task_struct *p)
 {
@@ -109,11 +118,6 @@ static inline int task_has_dl_policy(struct task_struct *p)
        return dl_policy(p->policy);
 }
 
-static inline bool dl_time_before(u64 a, u64 b)
-{
-       return (s64)(a - b) < 0;
-}
-
 /*
  * Tells if entity @a should preempt entity @b.
  */
@@ -1003,17 +1007,7 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
 #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
 #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
 
-#ifdef CONFIG_NUMA_BALANCING
-#define sched_feat_numa(x) sched_feat(x)
-#ifdef CONFIG_SCHED_DEBUG
-#define numabalancing_enabled sched_feat_numa(NUMA)
-#else
-extern bool numabalancing_enabled;
-#endif /* CONFIG_SCHED_DEBUG */
-#else
-#define sched_feat_numa(x) (0)
-#define numabalancing_enabled (0)
-#endif /* CONFIG_NUMA_BALANCING */
+extern struct static_key_false sched_numa_balancing;
 
 static inline u64 global_rt_period(void)
 {
@@ -1227,7 +1221,7 @@ struct sched_class {
        void (*update_curr) (struct rq *rq);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-       void (*task_move_group) (struct task_struct *p, int on_rq);
+       void (*task_move_group) (struct task_struct *p);
 #endif
 };
 
@@ -1405,6 +1399,17 @@ unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 }
 #endif
 
+#ifndef arch_scale_cpu_capacity
+static __always_inline
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+       if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
+               return sd->smt_gain / sd->span_weight;
+
+       return SCHED_CAPACITY_SCALE;
+}
+#endif
+
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
        rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));