]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - arch/x86/events/intel/cstate.c
Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block
[karo-tx-linux.git] / arch / x86 / events / intel / cstate.c
index 7946c4231169ff81ed2c22cd538cdf289e8930f2..4c7638b91fa56ea2dde92d50a818a3d8d3b7d1da 100644 (file)
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include "../perf_event.h"
 
+MODULE_LICENSE("GPL");
+
 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)                \
 static ssize_t __cstate_##_var##_show(struct kobject *kobj,    \
                                struct kobj_attribute *attr,    \
@@ -106,22 +109,27 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf);
 
+/* Model -> events mapping */
+struct cstate_model {
+       unsigned long           core_events;
+       unsigned long           pkg_events;
+       unsigned long           quirks;
+};
+
+/* Quirk flags */
+#define SLM_PKG_C6_USE_C7_MSR  (1UL << 0)
+
 struct perf_cstate_msr {
        u64     msr;
        struct  perf_pmu_events_attr *attr;
-       bool    (*test)(int idx);
 };
 
 
 /* cstate_core PMU */
-
 static struct pmu cstate_core_pmu;
 static bool has_cstate_core;
 
-enum perf_cstate_core_id {
-       /*
-        * cstate_core events
-        */
+enum perf_cstate_core_events {
        PERF_CSTATE_CORE_C1_RES = 0,
        PERF_CSTATE_CORE_C3_RES,
        PERF_CSTATE_CORE_C6_RES,
@@ -130,69 +138,16 @@ enum perf_cstate_core_id {
        PERF_CSTATE_CORE_EVENT_MAX,
 };
 
-bool test_core(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES ||
-                   idx == PERF_CSTATE_CORE_C7_RES)
-                       return true;
-               break;
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_CSTATE_CORE_C1_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
 PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
 PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
 
 static struct perf_cstate_msr core_msr[] = {
-       [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1, test_core, },
-       [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3, test_core, },
-       [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6, test_core, },
-       [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7, test_core, },
+       [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1 },
+       [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3 },
+       [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6 },
+       [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7 },
 };
 
 static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
@@ -234,18 +189,11 @@ static const struct attribute_group *core_attr_groups[] = {
        NULL,
 };
 
-/* cstate_core PMU end */
-
-
 /* cstate_pkg PMU */
-
 static struct pmu cstate_pkg_pmu;
 static bool has_cstate_pkg;
 
-enum perf_cstate_pkg_id {
-       /*
-        * cstate_pkg events
-        */
+enum perf_cstate_pkg_events {
        PERF_CSTATE_PKG_C2_RES = 0,
        PERF_CSTATE_PKG_C3_RES,
        PERF_CSTATE_PKG_C6_RES,
@@ -257,69 +205,6 @@ enum perf_cstate_pkg_id {
        PERF_CSTATE_PKG_EVENT_MAX,
 };
 
-bool test_pkg(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES ||
-                   idx == PERF_CSTATE_CORE_C7_RES)
-                       return true;
-               break;
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_CSTATE_PKG_C2_RES ||
-                   idx == PERF_CSTATE_PKG_C3_RES ||
-                   idx == PERF_CSTATE_PKG_C6_RES ||
-                   idx == PERF_CSTATE_PKG_C7_RES)
-                       return true;
-               break;
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       case 69: /* 22nm Haswell ULT */
-               if (idx == PERF_CSTATE_PKG_C2_RES ||
-                   idx == PERF_CSTATE_PKG_C3_RES ||
-                   idx == PERF_CSTATE_PKG_C6_RES ||
-                   idx == PERF_CSTATE_PKG_C7_RES ||
-                   idx == PERF_CSTATE_PKG_C8_RES ||
-                   idx == PERF_CSTATE_PKG_C9_RES ||
-                   idx == PERF_CSTATE_PKG_C10_RES)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
 PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
@@ -329,13 +214,13 @@ PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
 PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
 
 static struct perf_cstate_msr pkg_msr[] = {
-       [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2,  test_pkg, },
-       [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3,  test_pkg, },
-       [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6,  test_pkg, },
-       [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7,  test_pkg, },
-       [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8,  test_pkg, },
-       [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9,  test_pkg, },
-       [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10, test_pkg, },
+       [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2 },
+       [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3 },
+       [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6 },
+       [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7 },
+       [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8 },
+       [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9 },
+       [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10 },
 };
 
 static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
@@ -366,8 +251,6 @@ static const struct attribute_group *pkg_attr_groups[] = {
        NULL,
 };
 
-/* cstate_pkg PMU end*/
-
 static ssize_t cstate_get_attr_cpumask(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf)
@@ -385,7 +268,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
 static int cstate_pmu_event_init(struct perf_event *event)
 {
        u64 cfg = event->attr.config;
-       int ret = 0;
+       int cpu;
 
        if (event->attr.type != event->pmu->type)
                return -ENOENT;
@@ -400,26 +283,36 @@ static int cstate_pmu_event_init(struct perf_event *event)
            event->attr.sample_period) /* no sampling */
                return -EINVAL;
 
+       if (event->cpu < 0)
+               return -EINVAL;
+
        if (event->pmu == &cstate_core_pmu) {
                if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
                        return -EINVAL;
                if (!core_msr[cfg].attr)
                        return -EINVAL;
                event->hw.event_base = core_msr[cfg].msr;
+               cpu = cpumask_any_and(&cstate_core_cpu_mask,
+                                     topology_sibling_cpumask(event->cpu));
        } else if (event->pmu == &cstate_pkg_pmu) {
                if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
                        return -EINVAL;
                if (!pkg_msr[cfg].attr)
                        return -EINVAL;
                event->hw.event_base = pkg_msr[cfg].msr;
-       } else
+               cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
+                                     topology_core_cpumask(event->cpu));
+       } else {
                return -ENOENT;
+       }
+
+       if (cpu >= nr_cpu_ids)
+               return -ENODEV;
 
-       /* must be done before validate_group */
+       event->cpu = cpu;
        event->hw.config = cfg;
        event->hw.idx = -1;
-
-       return ret;
+       return 0;
 }
 
 static inline u64 cstate_pmu_read_counter(struct perf_event *event)
@@ -469,172 +362,91 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
        return 0;
 }
 
+/*
+ * Check if exiting cpu is the designated reader. If so migrate the
+ * events when there is a valid target available
+ */
 static void cstate_cpu_exit(int cpu)
 {
-       int i, id, target;
+       unsigned int target;
 
-       /* cpu exit for cstate core */
-       if (has_cstate_core) {
-               id = topology_core_id(cpu);
-               target = -1;
-
-               for_each_online_cpu(i) {
-                       if (i == cpu)
-                               continue;
-                       if (id == topology_core_id(i)) {
-                               target = i;
-                               break;
-                       }
-               }
-               if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+       if (has_cstate_core &&
+           cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
+
+               target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+               /* Migrate events if there is a valid target */
+               if (target < nr_cpu_ids) {
                        cpumask_set_cpu(target, &cstate_core_cpu_mask);
-               WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
-               if (target >= 0)
                        perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+               }
        }
 
-       /* cpu exit for cstate pkg */
-       if (has_cstate_pkg) {
-               id = topology_physical_package_id(cpu);
-               target = -1;
-
-               for_each_online_cpu(i) {
-                       if (i == cpu)
-                               continue;
-                       if (id == topology_physical_package_id(i)) {
-                               target = i;
-                               break;
-                       }
-               }
-               if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+       if (has_cstate_pkg &&
+           cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
+
+               target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+               /* Migrate events if there is a valid target */
+               if (target < nr_cpu_ids) {
                        cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
-               WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
-               if (target >= 0)
                        perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+               }
        }
 }
 
 static void cstate_cpu_init(int cpu)
 {
-       int i, id;
+       unsigned int target;
 
-       /* cpu init for cstate core */
-       if (has_cstate_core) {
-               id = topology_core_id(cpu);
-               for_each_cpu(i, &cstate_core_cpu_mask) {
-                       if (id == topology_core_id(i))
-                               break;
-               }
-               if (i >= nr_cpu_ids)
-                       cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
-       }
+       /*
+        * If this is the first online thread of that core, set it in
+        * the core cpu mask as the designated reader.
+        */
+       target = cpumask_any_and(&cstate_core_cpu_mask,
+                                topology_sibling_cpumask(cpu));
 
-       /* cpu init for cstate pkg */
-       if (has_cstate_pkg) {
-               id = topology_physical_package_id(cpu);
-               for_each_cpu(i, &cstate_pkg_cpu_mask) {
-                       if (id == topology_physical_package_id(i))
-                               break;
-               }
-               if (i >= nr_cpu_ids)
-                       cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
-       }
+       if (has_cstate_core && target >= nr_cpu_ids)
+               cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+
+       /*
+        * If this is the first online thread of that package, set it
+        * in the package cpu mask as the designated reader.
+        */
+       target = cpumask_any_and(&cstate_pkg_cpu_mask,
+                                topology_core_cpumask(cpu));
+       if (has_cstate_pkg && target >= nr_cpu_ids)
+               cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
 }
 
 static int cstate_cpu_notifier(struct notifier_block *self,
-                                 unsigned long action, void *hcpu)
+                              unsigned long action, void *hcpu)
 {
        unsigned int cpu = (long)hcpu;
 
        switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               break;
        case CPU_STARTING:
                cstate_cpu_init(cpu);
                break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               break;
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               break;
        case CPU_DOWN_PREPARE:
                cstate_cpu_exit(cpu);
                break;
        default:
                break;
        }
-
        return NOTIFY_OK;
 }
 
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there is no available events.
- */
-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
-                            struct attribute   **events_attrs,
-                            int max_event_nr)
-{
-       int i, j = 0;
-       u64 val;
-
-       /* Probe the cstate events. */
-       for (i = 0; i < max_event_nr; i++) {
-               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
-                       msr[i].attr = NULL;
-       }
-
-       /* List remaining events in the sysfs attrs. */
-       for (i = 0; i < max_event_nr; i++) {
-               if (msr[i].attr)
-                       events_attrs[j++] = &msr[i].attr->attr.attr;
-       }
-       events_attrs[j] = NULL;
-
-       return (j > 0) ? true : false;
-}
-
-static int __init cstate_init(void)
-{
-       /* SLM has different MSR for PKG C6 */
-       switch (boot_cpu_data.x86_model) {
-       case 55:
-       case 76:
-       case 77:
-               pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
-       }
-
-       if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
-               has_cstate_core = true;
-
-       if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
-               has_cstate_pkg = true;
-
-       return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
-}
-
-static void __init cstate_cpumask_init(void)
-{
-       int cpu;
-
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu)
-               cstate_cpu_init(cpu);
-
-       __perf_cpu_notifier(cstate_cpu_notifier);
-
-       cpu_notifier_register_done();
-}
+static struct notifier_block cstate_cpu_nb = {
+       .notifier_call  = cstate_cpu_notifier,
+       .priority       = CPU_PRI_PERF + 1,
+};
 
 static struct pmu cstate_core_pmu = {
        .attr_groups    = core_attr_groups,
        .name           = "cstate_core",
        .task_ctx_nr    = perf_invalid_context,
        .event_init     = cstate_pmu_event_init,
-       .add            = cstate_pmu_event_add, /* must have */
-       .del            = cstate_pmu_event_del, /* must have */
+       .add            = cstate_pmu_event_add,
+       .del            = cstate_pmu_event_del,
        .start          = cstate_pmu_event_start,
        .stop           = cstate_pmu_event_stop,
        .read           = cstate_pmu_event_update,
@@ -646,49 +458,203 @@ static struct pmu cstate_pkg_pmu = {
        .name           = "cstate_pkg",
        .task_ctx_nr    = perf_invalid_context,
        .event_init     = cstate_pmu_event_init,
-       .add            = cstate_pmu_event_add, /* must have */
-       .del            = cstate_pmu_event_del, /* must have */
+       .add            = cstate_pmu_event_add,
+       .del            = cstate_pmu_event_del,
        .start          = cstate_pmu_event_start,
        .stop           = cstate_pmu_event_stop,
        .read           = cstate_pmu_event_update,
        .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
 };
 
-static void __init cstate_pmus_register(void)
+static const struct cstate_model nhm_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
+                                 BIT(PERF_CSTATE_CORE_C6_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C3_RES) |
+                                 BIT(PERF_CSTATE_PKG_C6_RES) |
+                                 BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model snb_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
+                                 BIT(PERF_CSTATE_CORE_C6_RES) |
+                                 BIT(PERF_CSTATE_CORE_C7_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C2_RES) |
+                                 BIT(PERF_CSTATE_PKG_C3_RES) |
+                                 BIT(PERF_CSTATE_PKG_C6_RES) |
+                                 BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model hswult_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
+                                 BIT(PERF_CSTATE_CORE_C6_RES) |
+                                 BIT(PERF_CSTATE_CORE_C7_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C2_RES) |
+                                 BIT(PERF_CSTATE_PKG_C3_RES) |
+                                 BIT(PERF_CSTATE_PKG_C6_RES) |
+                                 BIT(PERF_CSTATE_PKG_C7_RES) |
+                                 BIT(PERF_CSTATE_PKG_C8_RES) |
+                                 BIT(PERF_CSTATE_PKG_C9_RES) |
+                                 BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
+static const struct cstate_model slm_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C1_RES) |
+                                 BIT(PERF_CSTATE_CORE_C6_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C6_RES),
+       .quirks                 = SLM_PKG_C6_USE_C7_MSR,
+};
+
+#define X86_CSTATES_MODEL(model, states)                               \
+       { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
+
+static const struct x86_cpu_id intel_cstates_match[] __initconst = {
+       X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM,    nhm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE,    nhm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X,    snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,     slm_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X,      snb_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+       { },
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there are no available events.
+ */
+static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
+                                   struct perf_cstate_msr *msr,
+                                   struct attribute **attrs)
 {
-       int err;
+       bool found = false;
+       unsigned int bit;
+       u64 val;
+
+       for (bit = 0; bit < max; bit++) {
+               if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
+                       *attrs++ = &msr[bit].attr->attr.attr;
+                       found = true;
+               } else {
+                       msr[bit].attr = NULL;
+               }
+       }
+       *attrs = NULL;
+
+       return found;
+}
+
+static int __init cstate_probe(const struct cstate_model *cm)
+{
+       /* SLM has different MSR for PKG C6 */
+       if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
+               pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+
+       has_cstate_core = cstate_probe_msr(cm->core_events,
+                                          PERF_CSTATE_CORE_EVENT_MAX,
+                                          core_msr, core_events_attrs);
+
+       has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
+                                         PERF_CSTATE_PKG_EVENT_MAX,
+                                         pkg_msr, pkg_events_attrs);
+
+       return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static inline void cstate_cleanup(void)
+{
+       if (has_cstate_core)
+               perf_pmu_unregister(&cstate_core_pmu);
+
+       if (has_cstate_pkg)
+               perf_pmu_unregister(&cstate_pkg_pmu);
+}
+
+static int __init cstate_init(void)
+{
+       int cpu, err;
+
+       cpu_notifier_register_begin();
+       for_each_online_cpu(cpu)
+               cstate_cpu_init(cpu);
 
        if (has_cstate_core) {
                err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
-               if (WARN_ON(err))
-                       pr_info("Failed to register PMU %s error %d\n",
-                               cstate_core_pmu.name, err);
+               if (err) {
+                       has_cstate_core = false;
+                       pr_info("Failed to register cstate core pmu\n");
+                       goto out;
+               }
        }
 
        if (has_cstate_pkg) {
                err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
-               if (WARN_ON(err))
-                       pr_info("Failed to register PMU %s error %d\n",
-                               cstate_pkg_pmu.name, err);
+               if (err) {
+                       has_cstate_pkg = false;
+                       pr_info("Failed to register cstate pkg pmu\n");
+                       cstate_cleanup();
+                       goto out;
+               }
        }
+       __register_cpu_notifier(&cstate_cpu_nb);
+out:
+       cpu_notifier_register_done();
+       return err;
 }
 
 static int __init cstate_pmu_init(void)
 {
+       const struct x86_cpu_id *id;
        int err;
 
-       if (cpu_has_hypervisor)
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+               return -ENODEV;
+
+       id = x86_match_cpu(intel_cstates_match);
+       if (!id)
                return -ENODEV;
 
-       err = cstate_init();
+       err = cstate_probe((const struct cstate_model *) id->driver_data);
        if (err)
                return err;
 
-       cstate_cpumask_init();
-
-       cstate_pmus_register();
-
-       return 0;
+       return cstate_init();
 }
+module_init(cstate_pmu_init);
 
-device_initcall(cstate_pmu_init);
+static void __exit cstate_pmu_exit(void)
+{
+       cpu_notifier_register_begin();
+       __unregister_cpu_notifier(&cstate_cpu_nb);
+       cstate_cleanup();
+       cpu_notifier_register_done();
+}
+module_exit(cstate_pmu_exit);