Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next-2.6

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 23 May 2011 05:06:24 +0000 (22:06 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 23 May 2011 05:06:24 +0000 (22:06 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 23 May 2011 05:06:24 +0000 (22:06 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 23 May 2011 05:06:24 +0000 (22:06 -0700)
diff --combined init/Kconfig

index ffcdad793f14d3f9acc3d0ffc0883ff6fad2a3a3,56240e724d9a31dc4ef43647afcf354b8a70f108..c8b172efaa65bf9264ecae76422159214f943b95
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -485,7 -485,7 +485,7 @@@ config TREE_RCU_TRAC
   
   config RCU_BOOST
         bool "Enable RCU priority boosting"
- -      depends on RT_MUTEXES && TINY_PREEMPT_RCU
+ +      depends on RT_MUTEXES && PREEMPT_RCU
         default n
         help
           This option boosts the priority of preempted RCU readers that
@@@ -827,11 -827,6 +827,6 @@@ config SCHED_AUTOGROU
           desktop applications.  Task group autogeneration is currently based
           upon task session.
   
- config SCHED_TTWU_QUEUE
-       bool
-       depends on !SPARC32
-       default y
- 
   config MM_OWNER
         bool
   
@@@ -908,6 -903,7 +903,6 @@@ endi
   
   config CC_OPTIMIZE_FOR_SIZE
         bool "Optimize for size"
- -      default y
         help
           Enabling this option will pass "-Os" instead of "-O2" to gcc
           resulting in a smaller kernel.
@@@ -928,6 -924,14 +923,6 @@@ menuconfig EXPER
             environments which can tolerate a "non-standard" kernel.
             Only use this if you really know what you are doing.
   
- -config EMBEDDED
- -      bool "Embedded system"
- -      select EXPERT
- -      help
- -        This option should be enabled if compiling the kernel for
- -        an embedded system so certain expert options are available
- -        for configuration.
- -
   config UID16
         bool "Enable 16-bit UID system calls" if EXPERT
         depends on ARM || BLACKFIN || CRIS || FRV || H8300 || X86_32 || M68K || (S390 && !64BIT) || SUPERH || SPARC32 || (SPARC64 && COMPAT) || UML || (X86_64 && IA32_EMULATION)
@@@ -1100,14 -1104,6 +1095,14 @@@ config AI
             by some high performance threaded applications. Disabling
             this option saves about 7k.
   
+ +config EMBEDDED
+ +      bool "Embedded system"
+ +      select EXPERT
+ +      help
+ +        This option should be enabled if compiling the kernel for
+ +        an embedded system so certain expert options are available
+ +        for configuration.
+ +
   config HAVE_PERF_EVENTS
         bool
         help
diff --combined kernel/sched.c

index c62acf45d3b9c0c4954090454de59bd08e2eb180,c4b3410d68d39628c40b3d77eaa1cda37ae08c01..0516af4150855b5d8e775c980a10ef7e0fe9d6fd
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -231,7 -231,7 +231,7 @@@ static void destroy_rt_bandwidth(struc
   #endif
   
   /*
- - * sched_domains_mutex serializes calls to arch_init_sched_domains,
+ + * sched_domains_mutex serializes calls to init_sched_domains,
    * detach_destroy_domains and partition_sched_domains.
    */
   static DEFINE_MUTEX(sched_domains_mutex);
@@@ -328,9 -328,7 +328,9 @@@ struct cfs_rq 
          */
         struct sched_entity *curr, *next, *last, *skip;
   
+ +#ifdef        CONFIG_SCHED_DEBUG
         unsigned int nr_spread_over;
+ +#endif
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
         struct rq *rq;  /* cpu runqueue to which this cfs_rq is attached */
@@@ -422,7 -420,6 +422,7 @@@ struct rt_rq 
    */
   struct root_domain {
         atomic_t refcount;
+ +      struct rcu_head rcu;
         cpumask_var_t span;
         cpumask_var_t online;
   
@@@ -466,7 -463,7 +466,7 @@@ struct rq 
         u64 nohz_stamp;
         unsigned char nohz_balance_kick;
   #endif
- -      unsigned int skip_clock_update;
+ +      int skip_clock_update;
   
         /* capture load from *all* tasks on this cpu: */
         struct load_weight load;
@@@ -581,7 -578,7 +581,7 @@@ static inline int cpu_of(struct rq *rq
   
   #define rcu_dereference_check_sched_domain(p) \
         rcu_dereference_check((p), \
- -                            rcu_read_lock_sched_held() || \
+ +                            rcu_read_lock_held() || \
                               lockdep_is_held(&sched_domains_mutex))
   
   /*
@@@ -652,7 -649,7 +652,7 @@@ static void update_rq_clock(struct rq *
   {
         s64 delta;
   
- -      if (rq->skip_clock_update)
+ +      if (rq->skip_clock_update > 0)
                 return;
   
         delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
@@@ -1210,17 -1207,11 +1210,17 @@@ int get_nohz_timer_target(void
         int i;
         struct sched_domain *sd;
   
+ +      rcu_read_lock();
         for_each_domain(cpu, sd) {
- -              for_each_cpu(i, sched_domain_span(sd))
- -                      if (!idle_cpu(i))
- -                              return i;
+ +              for_each_cpu(i, sched_domain_span(sd)) {
+ +                      if (!idle_cpu(i)) {
+ +                              cpu = i;
+ +                              goto unlock;
+ +                      }
+ +              }
         }
+ +unlock:
+ +      rcu_read_unlock();
         return cpu;
   }
   /*
@@@ -1330,15 -1321,15 +1330,15 @@@ calc_delta_mine(unsigned long delta_exe
   {
         u64 tmp;
   
+ +      tmp = (u64)delta_exec * weight;
+ +
         if (!lw->inv_weight) {
                 if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
                         lw->inv_weight = 1;
                 else
- -                      lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)
- -                              / (lw->weight+1);
+ +                      lw->inv_weight = WMULT_CONST / lw->weight;
         }
   
- -      tmp = (u64)delta_exec * weight;
         /*
          * Check whether we'd overflow the 64-bit multiplication:
          */
@@@ -2207,6 -2198,21 +2207,6 @@@ struct migration_arg 
   
   static int migration_cpu_stop(void *data);
   
- -/*
- - * The task's runqueue lock must be held.
- - * Returns true if you have to wait for migration thread.
- - */
- -static bool need_migrate_task(struct task_struct *p)
- -{
- -      /*
- -       * If the task is not on a runqueue (and not running), then
- -       * the next wake-up will properly place the task.
- -       */
- -      bool running = p->on_rq || p->on_cpu;
- -      smp_rmb(); /* finish_lock_switch() */
- -      return running;
- -}
- -
   /*
    * wait_task_inactive - wait for a thread to unschedule.
    *
@@@ -2423,14 -2429,12 +2423,14 @@@ ttwu_stat(struct task_struct *p, int cp
                 struct sched_domain *sd;
   
                 schedstat_inc(p, se.statistics.nr_wakeups_remote);
+ +              rcu_read_lock();
                 for_each_domain(this_cpu, sd) {
                         if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
                                 schedstat_inc(sd, ttwu_wake_remote);
                                 break;
                         }
                 }
+ +              rcu_read_unlock();
         }
   #endif /* CONFIG_SMP */
   
@@@ -2564,7 -2568,7 +2564,7 @@@ static void ttwu_queue(struct task_stru
   {
         struct rq *rq = cpu_rq(cpu);
   
- #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
+ #if defined(CONFIG_SMP)
         if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
                 ttwu_queue_remote(p, cpu);
                 return;
@@@ -2741,7 -2745,7 +2741,7 @@@ static void __sched_fork(struct task_st
   /*
    * fork()/clone()-time setup:
    */
- -void sched_fork(struct task_struct *p, int clone_flags)
+ +void sched_fork(struct task_struct *p)
   {
         unsigned long flags;
         int cpu = get_cpu();
@@@ -2823,7 -2827,7 +2823,7 @@@
    * that must be done for every newly created context, then puts the task
    * on the runqueue and wakes it.
    */
- -void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+ +void wake_up_new_task(struct task_struct *p)
   {
         unsigned long flags;
         struct rq *rq;
@@@ -4004,6 -4008,9 +4004,6 @@@ void thread_group_times(struct task_str
   /*
    * This function gets called by the timer code, with HZ frequency.
    * We call it with interrupts disabled.
- - *
- - * It also gets called by the fork code, when changing the parent's
- - * timeslices.
    */
   void scheduler_tick(void)
   {
@@@ -4123,11 -4130,17 +4123,11 @@@ static inline void schedule_debug(struc
         profile_hit(SCHED_PROFILING, __builtin_return_address(0));
   
         schedstat_inc(this_rq(), sched_count);
- -#ifdef CONFIG_SCHEDSTATS
- -      if (unlikely(prev->lock_depth >= 0)) {
- -              schedstat_inc(this_rq(), rq_sched_info.bkl_count);
- -              schedstat_inc(prev, sched_info.bkl_count);
- -      }
- -#endif
   }
   
   static void put_prev_task(struct rq *rq, struct task_struct *prev)
   {
- -      if (prev->on_rq)
+ +      if (prev->on_rq || rq->skip_clock_update < 0)
                 update_rq_clock(rq);
         prev->sched_class->put_prev_task(rq, prev);
   }
@@@ -5848,8 -5861,11 +5848,8 @@@ void __cpuinit init_idle(struct task_st
         raw_spin_unlock_irqrestore(&rq->lock, flags);
   
         /* Set the preempt count _outside_ the spinlocks! */
- -#if defined(CONFIG_PREEMPT)
- -      task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
- -#else
         task_thread_info(idle)->preempt_count = 0;
- -#endif
+ +
         /*
          * The idle tasks have their own, simple scheduling class:
          */
@@@ -5946,15 -5962,13 +5946,15 @@@ int set_cpus_allowed_ptr(struct task_st
   
         rq = task_rq_lock(p, &flags);
   
+ +      if (cpumask_equal(&p->cpus_allowed, new_mask))
+ +              goto out;
+ +
         if (!cpumask_intersects(new_mask, cpu_active_mask)) {
                 ret = -EINVAL;
                 goto out;
         }
   
- -      if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
- -                   !cpumask_equal(&p->cpus_allowed, new_mask))) {
+ +      if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
                 ret = -EINVAL;
                 goto out;
         }
@@@ -5971,7 -5985,7 +5971,7 @@@
                 goto out;
   
         dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
- -      if (need_migrate_task(p)) {
+ +      if (p->on_rq) {
                 struct migration_arg arg = { p, dest_cpu };
                 /* Need help from migration thread: drop lock and wait. */
                 task_rq_unlock(rq, p, &flags);
@@@ -6451,8 -6465,6 +6451,8 @@@ early_initcall(migration_init)
   
   #ifdef CONFIG_SMP
   
+ +static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
+ +
   #ifdef CONFIG_SCHED_DEBUG
   
   static __read_mostly int sched_domain_debug_enabled;
@@@ -6548,6 -6560,7 +6548,6 @@@ static int sched_domain_debug_one(struc
   
   static void sched_domain_debug(struct sched_domain *sd, int cpu)
   {
- -      cpumask_var_t groupmask;
         int level = 0;
   
         if (!sched_domain_debug_enabled)
@@@ -6560,14 -6573,20 +6560,14 @@@
   
         printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
   
- -      if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
- -              printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
- -              return;
- -      }
- -
         for (;;) {
- -              if (sched_domain_debug_one(sd, cpu, level, groupmask))
+ +              if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
                         break;
                 level++;
                 sd = sd->parent;
                 if (!sd)
                         break;
         }
- -      free_cpumask_var(groupmask);
   }
   #else /* !CONFIG_SCHED_DEBUG */
   # define sched_domain_debug(sd, cpu) do { } while (0)
@@@ -6624,11 -6643,12 +6624,11 @@@ sd_parent_degenerate(struct sched_domai
         return 1;
   }
   
- -static void free_rootdomain(struct root_domain *rd)
+ +static void free_rootdomain(struct rcu_head *rcu)
   {
- -      synchronize_sched();
+ +      struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
   
         cpupri_cleanup(&rd->cpupri);
- -
         free_cpumask_var(rd->rto_mask);
         free_cpumask_var(rd->online);
         free_cpumask_var(rd->span);
@@@ -6669,7 -6689,7 +6669,7 @@@ static void rq_attach_root(struct rq *r
         raw_spin_unlock_irqrestore(&rq->lock, flags);
   
         if (old_rd)
- -              free_rootdomain(old_rd);
+ +              call_rcu_sched(&old_rd->rcu, free_rootdomain);
   }
   
   static int init_rootdomain(struct root_domain *rd)
@@@ -6720,25 -6740,6 +6720,25 @@@ static struct root_domain *alloc_rootdo
         return rd;
   }
   
+ +static void free_sched_domain(struct rcu_head *rcu)
+ +{
+ +      struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+ +      if (atomic_dec_and_test(&sd->groups->ref))
+ +              kfree(sd->groups);
+ +      kfree(sd);
+ +}
+ +
+ +static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+ +{
+ +      call_rcu(&sd->rcu, free_sched_domain);
+ +}
+ +
+ +static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+ +{
+ +      for (; sd; sd = sd->parent)
+ +              destroy_sched_domain(sd, cpu);
+ +}
+ +
   /*
    * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
    * hold the hotplug lock.
@@@ -6749,6 -6750,9 +6749,6 @@@ cpu_attach_domain(struct sched_domain *
         struct rq *rq = cpu_rq(cpu);
         struct sched_domain *tmp;
   
- -      for (tmp = sd; tmp; tmp = tmp->parent)
- -              tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
- -
         /* Remove the sched domains which do not contribute to scheduling. */
         for (tmp = sd; tmp; ) {
                 struct sched_domain *parent = tmp->parent;
@@@ -6759,15 -6763,12 +6759,15 @@@
                         tmp->parent = parent->parent;
                         if (parent->parent)
                                 parent->parent->child = tmp;
+ +                      destroy_sched_domain(parent, cpu);
                 } else
                         tmp = tmp->parent;
         }
   
         if (sd && sd_degenerate(sd)) {
+ +              tmp = sd;
                 sd = sd->parent;
+ +              destroy_sched_domain(tmp, cpu);
                 if (sd)
                         sd->child = NULL;
         }
@@@ -6775,9 -6776,7 +6775,9 @@@
         sched_domain_debug(sd, cpu);
   
         rq_attach_root(rq, rd);
+ +      tmp = rq->sd;
         rcu_assign_pointer(rq->sd, sd);
+ +      destroy_sched_domains(tmp, cpu);
   }
   
   /* cpus with isolated domains */
@@@ -6793,6 -6792,56 +6793,6 @@@ static int __init isolated_cpu_setup(ch
   
   __setup("isolcpus=", isolated_cpu_setup);
   
- -/*
- - * init_sched_build_groups takes the cpumask we wish to span, and a pointer
- - * to a function which identifies what group(along with sched group) a CPU
- - * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- - * (due to the fact that we keep track of groups covered with a struct cpumask).
- - *
- - * init_sched_build_groups will build a circular linked list of the groups
- - * covered by the given span, and will set each group's ->cpumask correctly,
- - * and ->cpu_power to 0.
- - */
- -static void
- -init_sched_build_groups(const struct cpumask *span,
- -                      const struct cpumask *cpu_map,
- -                      int (*group_fn)(int cpu, const struct cpumask *cpu_map,
- -                                      struct sched_group **sg,
- -                                      struct cpumask *tmpmask),
- -                      struct cpumask *covered, struct cpumask *tmpmask)
- -{
- -      struct sched_group *first = NULL, *last = NULL;
- -      int i;
- -
- -      cpumask_clear(covered);
- -
- -      for_each_cpu(i, span) {
- -              struct sched_group *sg;
- -              int group = group_fn(i, cpu_map, &sg, tmpmask);
- -              int j;
- -
- -              if (cpumask_test_cpu(i, covered))
- -                      continue;
- -
- -              cpumask_clear(sched_group_cpus(sg));
- -              sg->cpu_power = 0;
- -
- -              for_each_cpu(j, span) {
- -                      if (group_fn(j, cpu_map, NULL, tmpmask) != group)
- -                              continue;
- -
- -                      cpumask_set_cpu(j, covered);
- -                      cpumask_set_cpu(j, sched_group_cpus(sg));
- -              }
- -              if (!first)
- -                      first = sg;
- -              if (last)
- -                      last->next = sg;
- -              last = sg;
- -      }
- -      last->next = first;
- -}
- -
   #define SD_NODES_PER_DOMAIN 16
   
   #ifdef CONFIG_NUMA
@@@ -6809,7 -6858,7 +6809,7 @@@
    */
   static int find_next_best_node(int node, nodemask_t *used_nodes)
   {
- -      int i, n, val, min_val, best_node = 0;
+ +      int i, n, val, min_val, best_node = -1;
   
         min_val = INT_MAX;
   
@@@ -6833,8 -6882,7 +6833,8 @@@
                 }
         }
   
- -      node_set(best_node, *used_nodes);
+ +      if (best_node != -1)
+ +              node_set(best_node, *used_nodes);
         return best_node;
   }
   
@@@ -6860,130 -6908,315 +6860,130 @@@ static void sched_domain_node_span(int 
   
         for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
                 int next_node = find_next_best_node(node, &used_nodes);
- -
+ +              if (next_node < 0)
+ +                      break;
                 cpumask_or(span, span, cpumask_of_node(next_node));
         }
   }
+ +
+ +static const struct cpumask *cpu_node_mask(int cpu)
+ +{
+ +      lockdep_assert_held(&sched_domains_mutex);
+ +
+ +      sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask);
+ +
+ +      return sched_domains_tmpmask;
+ +}
+ +
+ +static const struct cpumask *cpu_allnodes_mask(int cpu)
+ +{
+ +      return cpu_possible_mask;
+ +}
   #endif /* CONFIG_NUMA */
   
- -int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
+ +static const struct cpumask *cpu_cpu_mask(int cpu)
+ +{
+ +      return cpumask_of_node(cpu_to_node(cpu));
+ +}
   
- -/*
- - * The cpus mask in sched_group and sched_domain hangs off the end.
- - *
- - * ( See the the comments in include/linux/sched.h:struct sched_group
- - *   and struct sched_domain. )
- - */
- -struct static_sched_group {
- -      struct sched_group sg;
- -      DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
- -};
+ +int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
   
- -struct static_sched_domain {
- -      struct sched_domain sd;
- -      DECLARE_BITMAP(span, CONFIG_NR_CPUS);
+ +struct sd_data {
+ +      struct sched_domain **__percpu sd;
+ +      struct sched_group **__percpu sg;
   };
   
   struct s_data {
- -#ifdef CONFIG_NUMA
- -      int                     sd_allnodes;
- -      cpumask_var_t           domainspan;
- -      cpumask_var_t           covered;
- -      cpumask_var_t           notcovered;
- -#endif
- -      cpumask_var_t           nodemask;
- -      cpumask_var_t           this_sibling_map;
- -      cpumask_var_t           this_core_map;
- -      cpumask_var_t           this_book_map;
- -      cpumask_var_t           send_covered;
- -      cpumask_var_t           tmpmask;
- -      struct sched_group      **sched_group_nodes;
+ +      struct sched_domain ** __percpu sd;
         struct root_domain      *rd;
   };
   
   enum s_alloc {
- -      sa_sched_groups = 0,
         sa_rootdomain,
- -      sa_tmpmask,
- -      sa_send_covered,
- -      sa_this_book_map,
- -      sa_this_core_map,
- -      sa_this_sibling_map,
- -      sa_nodemask,
- -      sa_sched_group_nodes,
- -#ifdef CONFIG_NUMA
- -      sa_notcovered,
- -      sa_covered,
- -      sa_domainspan,
- -#endif
+ +      sa_sd,
+ +      sa_sd_storage,
         sa_none,
   };
   
- -/*
- - * SMT sched-domains:
- - */
- -#ifdef CONFIG_SCHED_SMT
- -static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
- -static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
- -
- -static int
- -cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
- -               struct sched_group **sg, struct cpumask *unused)
- -{
- -      if (sg)
- -              *sg = &per_cpu(sched_groups, cpu).sg;
- -      return cpu;
- -}
- -#endif /* CONFIG_SCHED_SMT */
+ +struct sched_domain_topology_level;
   
- -/*
- - * multi-core sched-domains:
- - */
- -#ifdef CONFIG_SCHED_MC
- -static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
- -static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
+ +typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
+ +typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
   
- -static int
- -cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
- -                struct sched_group **sg, struct cpumask *mask)
- -{
- -      int group;
- -#ifdef CONFIG_SCHED_SMT
- -      cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
- -      group = cpumask_first(mask);
- -#else
- -      group = cpu;
- -#endif
- -      if (sg)
- -              *sg = &per_cpu(sched_group_core, group).sg;
- -      return group;
- -}
- -#endif /* CONFIG_SCHED_MC */
+ +struct sched_domain_topology_level {
+ +      sched_domain_init_f init;
+ +      sched_domain_mask_f mask;
+ +      struct sd_data      data;
+ +};
   
   /*
- - * book sched-domains:
+ + * Assumes the sched_domain tree is fully constructed
    */
- -#ifdef CONFIG_SCHED_BOOK
- -static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
- -static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
- -
- -static int
- -cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
- -                struct sched_group **sg, struct cpumask *mask)
+ +static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
   {
- -      int group = cpu;
- -#ifdef CONFIG_SCHED_MC
- -      cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
- -      group = cpumask_first(mask);
- -#elif defined(CONFIG_SCHED_SMT)
- -      cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
- -      group = cpumask_first(mask);
- -#endif
- -      if (sg)
- -              *sg = &per_cpu(sched_group_book, group).sg;
- -      return group;
- -}
- -#endif /* CONFIG_SCHED_BOOK */
+ +      struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+ +      struct sched_domain *child = sd->child;
   
- -static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
- -static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
+ +      if (child)
+ +              cpu = cpumask_first(sched_domain_span(child));
   
- -static int
- -cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
- -                struct sched_group **sg, struct cpumask *mask)
- -{
- -      int group;
- -#ifdef CONFIG_SCHED_BOOK
- -      cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
- -      group = cpumask_first(mask);
- -#elif defined(CONFIG_SCHED_MC)
- -      cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
- -      group = cpumask_first(mask);
- -#elif defined(CONFIG_SCHED_SMT)
- -      cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
- -      group = cpumask_first(mask);
- -#else
- -      group = cpu;
- -#endif
         if (sg)
- -              *sg = &per_cpu(sched_group_phys, group).sg;
- -      return group;
+ +              *sg = *per_cpu_ptr(sdd->sg, cpu);
+ +
+ +      return cpu;
   }
   
- -#ifdef CONFIG_NUMA
   /*
- - * The init_sched_build_groups can't handle what we want to do with node
- - * groups, so roll our own. Now each node has its own list of groups which
- - * gets dynamically allocated.
+ + * build_sched_groups takes the cpumask we wish to span, and a pointer
+ + * to a function which identifies what group(along with sched group) a CPU
+ + * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ + * (due to the fact that we keep track of groups covered with a struct cpumask).
+ + *
+ + * build_sched_groups will build a circular linked list of the groups
+ + * covered by the given span, and will set each group's ->cpumask correctly,
+ + * and ->cpu_power to 0.
    */
- -static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
- -static struct sched_group ***sched_group_nodes_bycpu;
- -
- -static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
- -static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
- -
- -static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
- -                               struct sched_group **sg,
- -                               struct cpumask *nodemask)
- -{
- -      int group;
- -
- -      cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
- -      group = cpumask_first(nodemask);
- -
- -      if (sg)
- -              *sg = &per_cpu(sched_group_allnodes, group).sg;
- -      return group;
- -}
- -
- -static void init_numa_sched_groups_power(struct sched_group *group_head)
- -{
- -      struct sched_group *sg = group_head;
- -      int j;
- -
- -      if (!sg)
- -              return;
- -      do {
- -              for_each_cpu(j, sched_group_cpus(sg)) {
- -                      struct sched_domain *sd;
- -
- -                      sd = &per_cpu(phys_domains, j).sd;
- -                      if (j != group_first_cpu(sd->groups)) {
- -                              /*
- -                               * Only add "power" once for each
- -                               * physical package.
- -                               */
- -                              continue;
- -                      }
- -
- -                      sg->cpu_power += sd->groups->cpu_power;
- -              }
- -              sg = sg->next;
- -      } while (sg != group_head);
- -}
- -
- -static int build_numa_sched_groups(struct s_data *d,
- -                                 const struct cpumask *cpu_map, int num)
+ +static void
+ +build_sched_groups(struct sched_domain *sd)
   {
- -      struct sched_domain *sd;
- -      struct sched_group *sg, *prev;
- -      int n, j;
- -
- -      cpumask_clear(d->covered);
- -      cpumask_and(d->nodemask, cpumask_of_node(num), cpu_map);
- -      if (cpumask_empty(d->nodemask)) {
- -              d->sched_group_nodes[num] = NULL;
- -              goto out;
- -      }
- -
- -      sched_domain_node_span(num, d->domainspan);
- -      cpumask_and(d->domainspan, d->domainspan, cpu_map);
- -
- -      sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
- -                        GFP_KERNEL, num);
- -      if (!sg) {
- -              printk(KERN_WARNING "Can not alloc domain group for node %d\n",
- -                     num);
- -              return -ENOMEM;
- -      }
- -      d->sched_group_nodes[num] = sg;
- -
- -      for_each_cpu(j, d->nodemask) {
- -              sd = &per_cpu(node_domains, j).sd;
- -              sd->groups = sg;
- -      }
+ +      struct sched_group *first = NULL, *last = NULL;
+ +      struct sd_data *sdd = sd->private;
+ +      const struct cpumask *span = sched_domain_span(sd);
+ +      struct cpumask *covered;
+ +      int i;
   
- -      sg->cpu_power = 0;
- -      cpumask_copy(sched_group_cpus(sg), d->nodemask);
- -      sg->next = sg;
- -      cpumask_or(d->covered, d->covered, d->nodemask);
+ +      lockdep_assert_held(&sched_domains_mutex);
+ +      covered = sched_domains_tmpmask;
   
- -      prev = sg;
- -      for (j = 0; j < nr_node_ids; j++) {
- -              n = (num + j) % nr_node_ids;
- -              cpumask_complement(d->notcovered, d->covered);
- -              cpumask_and(d->tmpmask, d->notcovered, cpu_map);
- -              cpumask_and(d->tmpmask, d->tmpmask, d->domainspan);
- -              if (cpumask_empty(d->tmpmask))
- -                      break;
- -              cpumask_and(d->tmpmask, d->tmpmask, cpumask_of_node(n));
- -              if (cpumask_empty(d->tmpmask))
- -                      continue;
- -              sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
- -                                GFP_KERNEL, num);
- -              if (!sg) {
- -                      printk(KERN_WARNING
- -                             "Can not alloc domain group for node %d\n", j);
- -                      return -ENOMEM;
- -              }
- -              sg->cpu_power = 0;
- -              cpumask_copy(sched_group_cpus(sg), d->tmpmask);
- -              sg->next = prev->next;
- -              cpumask_or(d->covered, d->covered, d->tmpmask);
- -              prev->next = sg;
- -              prev = sg;
- -      }
- -out:
- -      return 0;
- -}
- -#endif /* CONFIG_NUMA */
- -
- -#ifdef CONFIG_NUMA
- -/* Free memory allocated for various sched_group structures */
- -static void free_sched_groups(const struct cpumask *cpu_map,
- -                            struct cpumask *nodemask)
- -{
- -      int cpu, i;
+ +      cpumask_clear(covered);
   
- -      for_each_cpu(cpu, cpu_map) {
- -              struct sched_group **sched_group_nodes
- -                      = sched_group_nodes_bycpu[cpu];
+ +      for_each_cpu(i, span) {
+ +              struct sched_group *sg;
+ +              int group = get_group(i, sdd, &sg);
+ +              int j;
   
- -              if (!sched_group_nodes)
+ +              if (cpumask_test_cpu(i, covered))
                         continue;
   
- -              for (i = 0; i < nr_node_ids; i++) {
- -                      struct sched_group *oldsg, *sg = sched_group_nodes[i];
+ +              cpumask_clear(sched_group_cpus(sg));
+ +              sg->cpu_power = 0;
   
- -                      cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
- -                      if (cpumask_empty(nodemask))
+ +              for_each_cpu(j, span) {
+ +                      if (get_group(j, sdd, NULL) != group)
                                 continue;
   
- -                      if (sg == NULL)
- -                              continue;
- -                      sg = sg->next;
- -next_sg:
- -                      oldsg = sg;
- -                      sg = sg->next;
- -                      kfree(oldsg);
- -                      if (oldsg != sched_group_nodes[i])
- -                              goto next_sg;
+ +                      cpumask_set_cpu(j, covered);
+ +                      cpumask_set_cpu(j, sched_group_cpus(sg));
                 }
- -              kfree(sched_group_nodes);
- -              sched_group_nodes_bycpu[cpu] = NULL;
+ +
+ +              if (!first)
+ +                      first = sg;
+ +              if (last)
+ +                      last->next = sg;
+ +              last = sg;
         }
+ +      last->next = first;
   }
- -#else /* !CONFIG_NUMA */
- -static void free_sched_groups(const struct cpumask *cpu_map,
- -                            struct cpumask *nodemask)
- -{
- -}
- -#endif /* CONFIG_NUMA */
   
   /*
    * Initialize sched groups cpu_power.
@@@ -6997,6 -7230,11 +6997,6 @@@
    */
   static void init_sched_groups_power(int cpu, struct sched_domain *sd)
   {
- -      struct sched_domain *child;
- -      struct sched_group *group;
- -      long power;
- -      int weight;
- -
         WARN_ON(!sd || !sd->groups);
   
         if (cpu != group_first_cpu(sd->groups))
@@@ -7004,7 -7242,36 +7004,7 @@@
   
         sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
   
- -      child = sd->child;
- -
- -      sd->groups->cpu_power = 0;
- -
- -      if (!child) {
- -              power = SCHED_LOAD_SCALE;
- -              weight = cpumask_weight(sched_domain_span(sd));
- -              /*
- -               * SMT siblings share the power of a single core.
- -               * Usually multiple threads get a better yield out of
- -               * that one core than a single thread would have,
- -               * reflect that in sd->smt_gain.
- -               */
- -              if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
- -                      power *= sd->smt_gain;
- -                      power /= weight;
- -                      power >>= SCHED_LOAD_SHIFT;
- -              }
- -              sd->groups->cpu_power += power;
- -              return;
- -      }
- -
- -      /*
- -       * Add cpu_power of each child group to this groups cpu_power.
- -       */
- -      group = child->groups;
- -      do {
- -              sd->groups->cpu_power += group->cpu_power;
- -              group = group->next;
- -      } while (group != child->groups);
+ +      update_group_power(sd, cpu);
   }
   
   /*
@@@ -7018,15 -7285,15 +7018,15 @@@
   # define SD_INIT_NAME(sd, type)               do { } while (0)
   #endif
   
- -#define       SD_INIT(sd, type)       sd_init_##type(sd)
- -
- -#define SD_INIT_FUNC(type)    \
- -static noinline void sd_init_##type(struct sched_domain *sd)  \
- -{                                                             \
- -      memset(sd, 0, sizeof(*sd));                             \
- -      *sd = SD_##type##_INIT;                                 \
- -      sd->level = SD_LV_##type;                               \
- -      SD_INIT_NAME(sd, type);                                 \
+ +#define SD_INIT_FUNC(type)                                            \
+ +static noinline struct sched_domain *                                 \
+ +sd_init_##type(struct sched_domain_topology_level *tl, int cpu)       \
+ +{                                                                     \
+ +      struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);       \
+ +      *sd = SD_##type##_INIT;                                         \
+ +      SD_INIT_NAME(sd, type);                                         \
+ +      sd->private = &tl->data;                                        \
+ +      return sd;                                                      \
   }
   
   SD_INIT_FUNC(CPU)
@@@ -7045,14 -7312,13 +7045,14 @@@
   #endif
   
   static int default_relax_domain_level = -1;
+ +int sched_domain_level_max;
   
   static int __init setup_relax_domain_level(char *str)
   {
         unsigned long val;
   
         val = simple_strtoul(str, NULL, 0);
- -      if (val < SD_LV_MAX)
+ +      if (val < sched_domain_level_max)
                 default_relax_domain_level = val;
   
         return 1;
@@@ -7080,20 -7346,37 +7080,20 @@@ static void set_domain_attribute(struc
         }
   }
   
+ +static void __sdt_free(const struct cpumask *cpu_map);
+ +static int __sdt_alloc(const struct cpumask *cpu_map);
+ +
   static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
                                  const struct cpumask *cpu_map)
   {
         switch (what) {
- -      case sa_sched_groups:
- -              free_sched_groups(cpu_map, d->tmpmask); /* fall through */
- -              d->sched_group_nodes = NULL;
         case sa_rootdomain:
- -              free_rootdomain(d->rd); /* fall through */
- -      case sa_tmpmask:
- -              free_cpumask_var(d->tmpmask); /* fall through */
- -      case sa_send_covered:
- -              free_cpumask_var(d->send_covered); /* fall through */
- -      case sa_this_book_map:
- -              free_cpumask_var(d->this_book_map); /* fall through */
- -      case sa_this_core_map:
- -              free_cpumask_var(d->this_core_map); /* fall through */
- -      case sa_this_sibling_map:
- -              free_cpumask_var(d->this_sibling_map); /* fall through */
- -      case sa_nodemask:
- -              free_cpumask_var(d->nodemask); /* fall through */
- -      case sa_sched_group_nodes:
- -#ifdef CONFIG_NUMA
- -              kfree(d->sched_group_nodes); /* fall through */
- -      case sa_notcovered:
- -              free_cpumask_var(d->notcovered); /* fall through */
- -      case sa_covered:
- -              free_cpumask_var(d->covered); /* fall through */
- -      case sa_domainspan:
- -              free_cpumask_var(d->domainspan); /* fall through */
- -#endif
+ +              if (!atomic_read(&d->rd->refcount))
+ +                      free_rootdomain(&d->rd->rcu); /* fall through */
+ +      case sa_sd:
+ +              free_percpu(d->sd); /* fall through */
+ +      case sa_sd_storage:
+ +              __sdt_free(cpu_map); /* fall through */
         case sa_none:
                 break;
         }
@@@ -7102,212 -7385,308 +7102,212 @@@
   static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
                                                    const struct cpumask *cpu_map)
   {
- -#ifdef CONFIG_NUMA
- -      if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL))
- -              return sa_none;
- -      if (!alloc_cpumask_var(&d->covered, GFP_KERNEL))
- -              return sa_domainspan;
- -      if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL))
- -              return sa_covered;
- -      /* Allocate the per-node list of sched groups */
- -      d->sched_group_nodes = kcalloc(nr_node_ids,
- -                                    sizeof(struct sched_group *), GFP_KERNEL);
- -      if (!d->sched_group_nodes) {
- -              printk(KERN_WARNING "Can not alloc sched group node list\n");
- -              return sa_notcovered;
- -      }
- -      sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
- -#endif
- -      if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
- -              return sa_sched_group_nodes;
- -      if (!alloc_cpumask_var(&d->this_sibling_map, GFP_KERNEL))
- -              return sa_nodemask;
- -      if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
- -              return sa_this_sibling_map;
- -      if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
- -              return sa_this_core_map;
- -      if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
- -              return sa_this_book_map;
- -      if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
- -              return sa_send_covered;
+ +      memset(d, 0, sizeof(*d));
+ +
+ +      if (__sdt_alloc(cpu_map))
+ +              return sa_sd_storage;
+ +      d->sd = alloc_percpu(struct sched_domain *);
+ +      if (!d->sd)
+ +              return sa_sd_storage;
         d->rd = alloc_rootdomain();
- -      if (!d->rd) {
- -              printk(KERN_WARNING "Cannot alloc root domain\n");
- -              return sa_tmpmask;
- -      }
+ +      if (!d->rd)
+ +              return sa_sd;
         return sa_rootdomain;
   }
   
- -static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
- -      const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
+ +/*
+ + * NULL the sd_data elements we've used to build the sched_domain and
+ + * sched_group structure so that the subsequent __free_domain_allocs()
+ + * will not free the data we're using.
+ + */
+ +static void claim_allocations(int cpu, struct sched_domain *sd)
   {
- -      struct sched_domain *sd = NULL;
- -#ifdef CONFIG_NUMA
- -      struct sched_domain *parent;
- -
- -      d->sd_allnodes = 0;
- -      if (cpumask_weight(cpu_map) >
- -          SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
- -              sd = &per_cpu(allnodes_domains, i).sd;
- -              SD_INIT(sd, ALLNODES);
- -              set_domain_attribute(sd, attr);
- -              cpumask_copy(sched_domain_span(sd), cpu_map);
- -              cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
- -              d->sd_allnodes = 1;
- -      }
- -      parent = sd;
- -
- -      sd = &per_cpu(node_domains, i).sd;
- -      SD_INIT(sd, NODE);
- -      set_domain_attribute(sd, attr);
- -      sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
- -      sd->parent = parent;
- -      if (parent)
- -              parent->child = sd;
- -      cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
- -#endif
- -      return sd;
- -}
+ +      struct sd_data *sdd = sd->private;
+ +      struct sched_group *sg = sd->groups;
   
- -static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
- -      const struct cpumask *cpu_map, struct sched_domain_attr *attr,
- -      struct sched_domain *parent, int i)
- -{
- -      struct sched_domain *sd;
- -      sd = &per_cpu(phys_domains, i).sd;
- -      SD_INIT(sd, CPU);
- -      set_domain_attribute(sd, attr);
- -      cpumask_copy(sched_domain_span(sd), d->nodemask);
- -      sd->parent = parent;
- -      if (parent)
- -              parent->child = sd;
- -      cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
- -      return sd;
- -}
+ +      WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
+ +      *per_cpu_ptr(sdd->sd, cpu) = NULL;
   
- -static struct sched_domain *__build_book_sched_domain(struct s_data *d,
- -      const struct cpumask *cpu_map, struct sched_domain_attr *attr,
- -      struct sched_domain *parent, int i)
- -{
- -      struct sched_domain *sd = parent;
- -#ifdef CONFIG_SCHED_BOOK
- -      sd = &per_cpu(book_domains, i).sd;
- -      SD_INIT(sd, BOOK);
- -      set_domain_attribute(sd, attr);
- -      cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
- -      sd->parent = parent;
- -      parent->child = sd;
- -      cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
- -#endif
- -      return sd;
+ +      if (cpu == cpumask_first(sched_group_cpus(sg))) {
+ +              WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
+ +              *per_cpu_ptr(sdd->sg, cpu) = NULL;
+ +      }
   }
   
- -static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
- -      const struct cpumask *cpu_map, struct sched_domain_attr *attr,
- -      struct sched_domain *parent, int i)
+ +#ifdef CONFIG_SCHED_SMT
+ +static const struct cpumask *cpu_smt_mask(int cpu)
   {
- -      struct sched_domain *sd = parent;
- -#ifdef CONFIG_SCHED_MC
- -      sd = &per_cpu(core_domains, i).sd;
- -      SD_INIT(sd, MC);
- -      set_domain_attribute(sd, attr);
- -      cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
- -      sd->parent = parent;
- -      parent->child = sd;
- -      cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
- -#endif
- -      return sd;
+ +      return topology_thread_cpumask(cpu);
   }
- -
- -static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
- -      const struct cpumask *cpu_map, struct sched_domain_attr *attr,
- -      struct sched_domain *parent, int i)
- -{
- -      struct sched_domain *sd = parent;
- -#ifdef CONFIG_SCHED_SMT
- -      sd = &per_cpu(cpu_domains, i).sd;
- -      SD_INIT(sd, SIBLING);
- -      set_domain_attribute(sd, attr);
- -      cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
- -      sd->parent = parent;
- -      parent->child = sd;
- -      cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
   #endif
- -      return sd;
- -}
   
- -static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
- -                             const struct cpumask *cpu_map, int cpu)
- -{
- -      switch (l) {
+ +/*
+ + * Topology list, bottom-up.
+ + */
+ +static struct sched_domain_topology_level default_topology[] = {
   #ifdef CONFIG_SCHED_SMT
- -      case SD_LV_SIBLING: /* set up CPU (sibling) groups */
- -              cpumask_and(d->this_sibling_map, cpu_map,
- -                          topology_thread_cpumask(cpu));
- -              if (cpu == cpumask_first(d->this_sibling_map))
- -                      init_sched_build_groups(d->this_sibling_map, cpu_map,
- -                                              &cpu_to_cpu_group,
- -                                              d->send_covered, d->tmpmask);
- -              break;
+ +      { sd_init_SIBLING, cpu_smt_mask, },
   #endif
   #ifdef CONFIG_SCHED_MC
- -      case SD_LV_MC: /* set up multi-core groups */
- -              cpumask_and(d->this_core_map, cpu_map, cpu_coregroup_mask(cpu));
- -              if (cpu == cpumask_first(d->this_core_map))
- -                      init_sched_build_groups(d->this_core_map, cpu_map,
- -                                              &cpu_to_core_group,
- -                                              d->send_covered, d->tmpmask);
- -              break;
+ +      { sd_init_MC, cpu_coregroup_mask, },
   #endif
   #ifdef CONFIG_SCHED_BOOK
- -      case SD_LV_BOOK: /* set up book groups */
- -              cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
- -              if (cpu == cpumask_first(d->this_book_map))
- -                      init_sched_build_groups(d->this_book_map, cpu_map,
- -                                              &cpu_to_book_group,
- -                                              d->send_covered, d->tmpmask);
- -              break;
+ +      { sd_init_BOOK, cpu_book_mask, },
   #endif
- -      case SD_LV_CPU: /* set up physical groups */
- -              cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
- -              if (!cpumask_empty(d->nodemask))
- -                      init_sched_build_groups(d->nodemask, cpu_map,
- -                                              &cpu_to_phys_group,
- -                                              d->send_covered, d->tmpmask);
- -              break;
+ +      { sd_init_CPU, cpu_cpu_mask, },
   #ifdef CONFIG_NUMA
- -      case SD_LV_ALLNODES:
- -              init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group,
- -                                      d->send_covered, d->tmpmask);
- -              break;
+ +      { sd_init_NODE, cpu_node_mask, },
+ +      { sd_init_ALLNODES, cpu_allnodes_mask, },
   #endif
- -      default:
- -              break;
+ +      { NULL, },
+ +};
+ +
+ +static struct sched_domain_topology_level *sched_domain_topology = default_topology;
+ +
+ +static int __sdt_alloc(const struct cpumask *cpu_map)
+ +{
+ +      struct sched_domain_topology_level *tl;
+ +      int j;
+ +
+ +      for (tl = sched_domain_topology; tl->init; tl++) {
+ +              struct sd_data *sdd = &tl->data;
+ +
+ +              sdd->sd = alloc_percpu(struct sched_domain *);
+ +              if (!sdd->sd)
+ +                      return -ENOMEM;
+ +
+ +              sdd->sg = alloc_percpu(struct sched_group *);
+ +              if (!sdd->sg)
+ +                      return -ENOMEM;
+ +
+ +              for_each_cpu(j, cpu_map) {
+ +                      struct sched_domain *sd;
+ +                      struct sched_group *sg;
+ +
+ +                      sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
+ +                                      GFP_KERNEL, cpu_to_node(j));
+ +                      if (!sd)
+ +                              return -ENOMEM;
+ +
+ +                      *per_cpu_ptr(sdd->sd, j) = sd;
+ +
+ +                      sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+ +                                      GFP_KERNEL, cpu_to_node(j));
+ +                      if (!sg)
+ +                              return -ENOMEM;
+ +
+ +                      *per_cpu_ptr(sdd->sg, j) = sg;
+ +              }
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static void __sdt_free(const struct cpumask *cpu_map)
+ +{
+ +      struct sched_domain_topology_level *tl;
+ +      int j;
+ +
+ +      for (tl = sched_domain_topology; tl->init; tl++) {
+ +              struct sd_data *sdd = &tl->data;
+ +
+ +              for_each_cpu(j, cpu_map) {
+ +                      kfree(*per_cpu_ptr(sdd->sd, j));
+ +                      kfree(*per_cpu_ptr(sdd->sg, j));
+ +              }
+ +              free_percpu(sdd->sd);
+ +              free_percpu(sdd->sg);
+ +      }
+ +}
+ +
+ +struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
+ +              struct s_data *d, const struct cpumask *cpu_map,
+ +              struct sched_domain_attr *attr, struct sched_domain *child,
+ +              int cpu)
+ +{
+ +      struct sched_domain *sd = tl->init(tl, cpu);
+ +      if (!sd)
+ +              return child;
+ +
+ +      set_domain_attribute(sd, attr);
+ +      cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+ +      if (child) {
+ +              sd->level = child->level + 1;
+ +              sched_domain_level_max = max(sched_domain_level_max, sd->level);
+ +              child->parent = sd;
         }
+ +      sd->child = child;
+ +
+ +      return sd;
   }
   
   /*
    * Build sched domains for a given set of cpus and attach the sched domains
    * to the individual cpus
    */
- -static int __build_sched_domains(const struct cpumask *cpu_map,
- -                               struct sched_domain_attr *attr)
+ +static int build_sched_domains(const struct cpumask *cpu_map,
+ +                             struct sched_domain_attr *attr)
   {
         enum s_alloc alloc_state = sa_none;
- -      struct s_data d;
         struct sched_domain *sd;
- -      int i;
- -#ifdef CONFIG_NUMA
- -      d.sd_allnodes = 0;
- -#endif
+ +      struct s_data d;
+ +      int i, ret = -ENOMEM;
   
         alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
         if (alloc_state != sa_rootdomain)
                 goto error;
- -      alloc_state = sa_sched_groups;
   
- -      /*
- -       * Set up domains for cpus specified by the cpu_map.
- -       */
+ +      /* Set up domains for cpus specified by the cpu_map. */
         for_each_cpu(i, cpu_map) {
- -              cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
- -                          cpu_map);
- -
- -              sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
- -              sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
- -              sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
- -              sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
- -              sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
- -      }
- -
- -      for_each_cpu(i, cpu_map) {
- -              build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
- -              build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
- -              build_sched_groups(&d, SD_LV_MC, cpu_map, i);
- -      }
- -
- -      /* Set up physical groups */
- -      for (i = 0; i < nr_node_ids; i++)
- -              build_sched_groups(&d, SD_LV_CPU, cpu_map, i);
+ +              struct sched_domain_topology_level *tl;
   
- -#ifdef CONFIG_NUMA
- -      /* Set up node groups */
- -      if (d.sd_allnodes)
- -              build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0);
+ +              sd = NULL;
+ +              for (tl = sched_domain_topology; tl->init; tl++)
+ +                      sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
   
- -      for (i = 0; i < nr_node_ids; i++)
- -              if (build_numa_sched_groups(&d, cpu_map, i))
- -                      goto error;
- -#endif
+ +              while (sd->child)
+ +                      sd = sd->child;
   
- -      /* Calculate CPU power for physical packages and nodes */
- -#ifdef CONFIG_SCHED_SMT
- -      for_each_cpu(i, cpu_map) {
- -              sd = &per_cpu(cpu_domains, i).sd;
- -              init_sched_groups_power(i, sd);
- -      }
- -#endif
- -#ifdef CONFIG_SCHED_MC
- -      for_each_cpu(i, cpu_map) {
- -              sd = &per_cpu(core_domains, i).sd;
- -              init_sched_groups_power(i, sd);
+ +              *per_cpu_ptr(d.sd, i) = sd;
         }
- -#endif
- -#ifdef CONFIG_SCHED_BOOK
- -      for_each_cpu(i, cpu_map) {
- -              sd = &per_cpu(book_domains, i).sd;
- -              init_sched_groups_power(i, sd);
- -      }
- -#endif
   
+ +      /* Build the groups for the domains */
         for_each_cpu(i, cpu_map) {
- -              sd = &per_cpu(phys_domains, i).sd;
- -              init_sched_groups_power(i, sd);
- -      }
+ +              for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+ +                      sd->span_weight = cpumask_weight(sched_domain_span(sd));
+ +                      get_group(i, sd->private, &sd->groups);
+ +                      atomic_inc(&sd->groups->ref);
   
- -#ifdef CONFIG_NUMA
- -      for (i = 0; i < nr_node_ids; i++)
- -              init_numa_sched_groups_power(d.sched_group_nodes[i]);
+ +                      if (i != cpumask_first(sched_domain_span(sd)))
+ +                              continue;
   
- -      if (d.sd_allnodes) {
- -              struct sched_group *sg;
+ +                      build_sched_groups(sd);
+ +              }
+ +      }
+ +
+ +      /* Calculate CPU power for physical packages and nodes */
+ +      for (i = nr_cpumask_bits-1; i >= 0; i--) {
+ +              if (!cpumask_test_cpu(i, cpu_map))
+ +                      continue;
   
- -              cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
- -                                                              d.tmpmask);
- -              init_numa_sched_groups_power(sg);
+ +              for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+ +                      claim_allocations(i, sd);
+ +                      init_sched_groups_power(i, sd);
+ +              }
         }
- -#endif
   
         /* Attach the domains */
+ +      rcu_read_lock();
         for_each_cpu(i, cpu_map) {
- -#ifdef CONFIG_SCHED_SMT
- -              sd = &per_cpu(cpu_domains, i).sd;
- -#elif defined(CONFIG_SCHED_MC)
- -              sd = &per_cpu(core_domains, i).sd;
- -#elif defined(CONFIG_SCHED_BOOK)
- -              sd = &per_cpu(book_domains, i).sd;
- -#else
- -              sd = &per_cpu(phys_domains, i).sd;
- -#endif
+ +              sd = *per_cpu_ptr(d.sd, i);
                 cpu_attach_domain(sd, d.rd, i);
         }
+ +      rcu_read_unlock();
   
- -      d.sched_group_nodes = NULL; /* don't free this we still need it */
- -      __free_domain_allocs(&d, sa_tmpmask, cpu_map);
- -      return 0;
- -
+ +      ret = 0;
   error:
         __free_domain_allocs(&d, alloc_state, cpu_map);
- -      return -ENOMEM;
- -}
- -
- -static int build_sched_domains(const struct cpumask *cpu_map)
- -{
- -      return __build_sched_domains(cpu_map, NULL);
+ +      return ret;
   }
   
   static cpumask_var_t *doms_cur;       /* current sched domains */
@@@ -7362,7 -7741,7 +7362,7 @@@ void free_sched_domains(cpumask_var_t d
    * For now this just excludes isolated cpus, but could be used to
    * exclude other special cases in the future.
    */
- -static int arch_init_sched_domains(const struct cpumask *cpu_map)
+ +static int init_sched_domains(const struct cpumask *cpu_map)
   {
         int err;
   
@@@ -7373,24 -7752,32 +7373,24 @@@
                 doms_cur = &fallback_doms;
         cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
         dattr_cur = NULL;
- -      err = build_sched_domains(doms_cur[0]);
+ +      err = build_sched_domains(doms_cur[0], NULL);
         register_sched_domain_sysctl();
   
         return err;
   }
   
- -static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
- -                                     struct cpumask *tmpmask)
- -{
- -      free_sched_groups(cpu_map, tmpmask);
- -}
- -
   /*
    * Detach sched domains from a group of cpus specified in cpu_map
    * These cpus will now be attached to the NULL domain
    */
   static void detach_destroy_domains(const struct cpumask *cpu_map)
   {
- -      /* Save because hotplug lock held. */
- -      static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
         int i;
   
+ +      rcu_read_lock();
         for_each_cpu(i, cpu_map)
                 cpu_attach_domain(NULL, &def_root_domain, i);
- -      synchronize_sched();
- -      arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
+ +      rcu_read_unlock();
   }
   
   /* handle null as "default" */
@@@ -7479,7 -7866,8 +7479,7 @@@ match1
                                 goto match2;
                 }
                 /* no match - add a new doms_new */
- -              __build_sched_domains(doms_new[i],
- -                                      dattr_new ? dattr_new + i : NULL);
+ +              build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
   match2:
                 ;
         }
@@@ -7498,7 -7886,7 +7498,7 @@@
   }
   
   #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
- -static void arch_reinit_sched_domains(void)
+ +static void reinit_sched_domains(void)
   {
         get_online_cpus();
   
@@@ -7531,7 -7919,7 +7531,7 @@@ static ssize_t sched_power_savings_stor
         else
                 sched_mc_power_savings = level;
   
- -      arch_reinit_sched_domains();
+ +      reinit_sched_domains();
   
         return count;
   }
@@@ -7650,9 -8038,14 +7650,9 @@@ void __init sched_init_smp(void
         alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
         alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
   
- -#if defined(CONFIG_NUMA)
- -      sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
- -                                                              GFP_KERNEL);
- -      BUG_ON(sched_group_nodes_bycpu == NULL);
- -#endif
         get_online_cpus();
         mutex_lock(&sched_domains_mutex);
- -      arch_init_sched_domains(cpu_active_mask);
+ +      init_sched_domains(cpu_active_mask);
         cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
         if (cpumask_empty(non_isolated_cpus))
                 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@@ -7959,7 -8352,6 +7959,7 @@@ void __init sched_init(void
         /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
         zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
   #ifdef CONFIG_SMP
+ +      zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
   #ifdef CONFIG_NO_HZ
         zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
         alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
@@@ -8232,6 -8624,7 +8232,6 @@@ int alloc_rt_sched_group(struct task_gr
   {
         struct rt_rq *rt_rq;
         struct sched_rt_entity *rt_se;
- -      struct rq *rq;
         int i;
   
         tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
@@@ -8245,6 -8638,8 +8245,6 @@@
                         ktime_to_ns(def_rt_bandwidth.rt_period), 0);
   
         for_each_possible_cpu(i) {
- -              rq = cpu_rq(i);
- -
                 rt_rq = kzalloc_node(sizeof(struct rt_rq),
                                      GFP_KERNEL, cpu_to_node(i));
                 if (!rt_rq)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 23 May 2011 05:06:24 +0000 (22:06 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 23 May 2011 05:06:24 +0000 (22:06 -0700)
		1	2
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history