]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/x86/events/intel/cstate.c
Merge branch 'kbuild' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild
[karo-tx-linux.git] / arch / x86 / events / intel / cstate.c
1 /*
2  * perf_event_intel_cstate.c: support cstate residency counters
3  *
4  * Copyright (C) 2015, Intel Corp.
5  * Author: Kan Liang (kan.liang@intel.com)
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  */
18
19 /*
20  * This file export cstate related free running (read-only) counters
21  * for perf. These counters may be use simultaneously by other tools,
22  * such as turbostat. However, it still make sense to implement them
23  * in perf. Because we can conveniently collect them together with
24  * other events, and allow to use them from tools without special MSR
25  * access code.
26  *
27  * The events only support system-wide mode counting. There is no
28  * sampling support because it is not supported by the hardware.
29  *
30  * According to counters' scope and category, two PMUs are registered
31  * with the perf_event core subsystem.
32  *  - 'cstate_core': The counter is available for each physical core.
33  *    The counters include CORE_C*_RESIDENCY.
34  *  - 'cstate_pkg': The counter is available for each physical package.
35  *    The counters include PKG_C*_RESIDENCY.
36  *
37  * All of these counters are specified in the IntelĀ® 64 and IA-32
38  * Architectures Software Developer.s Manual Vol3b.
39  *
40  * Model specific counters:
41  *      MSR_CORE_C1_RES: CORE C1 Residency Counter
42  *                       perf code: 0x00
43  *                       Available model: SLM,AMT
44  *                       Scope: Core (each processor core has a MSR)
45  *      MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
46  *                             perf code: 0x01
47  *                             Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
48  *                             Scope: Core
49  *      MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
50  *                             perf code: 0x02
51  *                             Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
52  *                             Scope: Core
53  *      MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
54  *                             perf code: 0x03
55  *                             Available model: SNB,IVB,HSW,BDW,SKL
56  *                             Scope: Core
57  *      MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
58  *                             perf code: 0x00
59  *                             Available model: SNB,IVB,HSW,BDW,SKL
60  *                             Scope: Package (physical package)
61  *      MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
62  *                             perf code: 0x01
63  *                             Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
64  *                             Scope: Package (physical package)
65  *      MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
66  *                             perf code: 0x02
67  *                             Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
68  *                             Scope: Package (physical package)
69  *      MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
70  *                             perf code: 0x03
71  *                             Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
72  *                             Scope: Package (physical package)
73  *      MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
74  *                             perf code: 0x04
75  *                             Available model: HSW ULT only
76  *                             Scope: Package (physical package)
77  *      MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
78  *                             perf code: 0x05
79  *                             Available model: HSW ULT only
80  *                             Scope: Package (physical package)
81  *      MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
82  *                             perf code: 0x06
83  *                             Available model: HSW ULT only
84  *                             Scope: Package (physical package)
85  *
86  */
87
88 #include <linux/module.h>
89 #include <linux/slab.h>
90 #include <linux/perf_event.h>
91 #include <asm/cpu_device_id.h>
92 #include "../perf_event.h"
93
94 MODULE_LICENSE("GPL");
95
96 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)         \
97 static ssize_t __cstate_##_var##_show(struct kobject *kobj,     \
98                                 struct kobj_attribute *attr,    \
99                                 char *page)                     \
100 {                                                               \
101         BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
102         return sprintf(page, _format "\n");                     \
103 }                                                               \
104 static struct kobj_attribute format_attr_##_var =               \
105         __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
106
107 static ssize_t cstate_get_attr_cpumask(struct device *dev,
108                                        struct device_attribute *attr,
109                                        char *buf);
110
111 /* Model -> events mapping */
112 struct cstate_model {
113         unsigned long           core_events;
114         unsigned long           pkg_events;
115         unsigned long           quirks;
116 };
117
118 /* Quirk flags */
119 #define SLM_PKG_C6_USE_C7_MSR   (1UL << 0)
120
121 struct perf_cstate_msr {
122         u64     msr;
123         struct  perf_pmu_events_attr *attr;
124 };
125
126
127 /* cstate_core PMU */
128 static struct pmu cstate_core_pmu;
129 static bool has_cstate_core;
130
131 enum perf_cstate_core_events {
132         PERF_CSTATE_CORE_C1_RES = 0,
133         PERF_CSTATE_CORE_C3_RES,
134         PERF_CSTATE_CORE_C6_RES,
135         PERF_CSTATE_CORE_C7_RES,
136
137         PERF_CSTATE_CORE_EVENT_MAX,
138 };
139
140 PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
141 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
142 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
143 PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
144
145 static struct perf_cstate_msr core_msr[] = {
146         [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1 },
147         [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3 },
148         [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6 },
149         [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7 },
150 };
151
152 static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
153         NULL,
154 };
155
156 static struct attribute_group core_events_attr_group = {
157         .name = "events",
158         .attrs = core_events_attrs,
159 };
160
161 DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
162 static struct attribute *core_format_attrs[] = {
163         &format_attr_core_event.attr,
164         NULL,
165 };
166
167 static struct attribute_group core_format_attr_group = {
168         .name = "format",
169         .attrs = core_format_attrs,
170 };
171
172 static cpumask_t cstate_core_cpu_mask;
173 static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
174
175 static struct attribute *cstate_cpumask_attrs[] = {
176         &dev_attr_cpumask.attr,
177         NULL,
178 };
179
180 static struct attribute_group cpumask_attr_group = {
181         .attrs = cstate_cpumask_attrs,
182 };
183
184 static const struct attribute_group *core_attr_groups[] = {
185         &core_events_attr_group,
186         &core_format_attr_group,
187         &cpumask_attr_group,
188         NULL,
189 };
190
191 /* cstate_pkg PMU */
192 static struct pmu cstate_pkg_pmu;
193 static bool has_cstate_pkg;
194
195 enum perf_cstate_pkg_events {
196         PERF_CSTATE_PKG_C2_RES = 0,
197         PERF_CSTATE_PKG_C3_RES,
198         PERF_CSTATE_PKG_C6_RES,
199         PERF_CSTATE_PKG_C7_RES,
200         PERF_CSTATE_PKG_C8_RES,
201         PERF_CSTATE_PKG_C9_RES,
202         PERF_CSTATE_PKG_C10_RES,
203
204         PERF_CSTATE_PKG_EVENT_MAX,
205 };
206
207 PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
208 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
209 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
210 PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
211 PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
212 PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
213 PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
214
215 static struct perf_cstate_msr pkg_msr[] = {
216         [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2 },
217         [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3 },
218         [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6 },
219         [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7 },
220         [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8 },
221         [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9 },
222         [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10 },
223 };
224
225 static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
226         NULL,
227 };
228
229 static struct attribute_group pkg_events_attr_group = {
230         .name = "events",
231         .attrs = pkg_events_attrs,
232 };
233
234 DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
235 static struct attribute *pkg_format_attrs[] = {
236         &format_attr_pkg_event.attr,
237         NULL,
238 };
239 static struct attribute_group pkg_format_attr_group = {
240         .name = "format",
241         .attrs = pkg_format_attrs,
242 };
243
244 static cpumask_t cstate_pkg_cpu_mask;
245
246 static const struct attribute_group *pkg_attr_groups[] = {
247         &pkg_events_attr_group,
248         &pkg_format_attr_group,
249         &cpumask_attr_group,
250         NULL,
251 };
252
253 static ssize_t cstate_get_attr_cpumask(struct device *dev,
254                                        struct device_attribute *attr,
255                                        char *buf)
256 {
257         struct pmu *pmu = dev_get_drvdata(dev);
258
259         if (pmu == &cstate_core_pmu)
260                 return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
261         else if (pmu == &cstate_pkg_pmu)
262                 return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
263         else
264                 return 0;
265 }
266
267 static int cstate_pmu_event_init(struct perf_event *event)
268 {
269         u64 cfg = event->attr.config;
270         int cpu;
271
272         if (event->attr.type != event->pmu->type)
273                 return -ENOENT;
274
275         /* unsupported modes and filters */
276         if (event->attr.exclude_user   ||
277             event->attr.exclude_kernel ||
278             event->attr.exclude_hv     ||
279             event->attr.exclude_idle   ||
280             event->attr.exclude_host   ||
281             event->attr.exclude_guest  ||
282             event->attr.sample_period) /* no sampling */
283                 return -EINVAL;
284
285         if (event->cpu < 0)
286                 return -EINVAL;
287
288         if (event->pmu == &cstate_core_pmu) {
289                 if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
290                         return -EINVAL;
291                 if (!core_msr[cfg].attr)
292                         return -EINVAL;
293                 event->hw.event_base = core_msr[cfg].msr;
294                 cpu = cpumask_any_and(&cstate_core_cpu_mask,
295                                       topology_sibling_cpumask(event->cpu));
296         } else if (event->pmu == &cstate_pkg_pmu) {
297                 if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
298                         return -EINVAL;
299                 if (!pkg_msr[cfg].attr)
300                         return -EINVAL;
301                 event->hw.event_base = pkg_msr[cfg].msr;
302                 cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
303                                       topology_core_cpumask(event->cpu));
304         } else {
305                 return -ENOENT;
306         }
307
308         if (cpu >= nr_cpu_ids)
309                 return -ENODEV;
310
311         event->cpu = cpu;
312         event->hw.config = cfg;
313         event->hw.idx = -1;
314         return 0;
315 }
316
317 static inline u64 cstate_pmu_read_counter(struct perf_event *event)
318 {
319         u64 val;
320
321         rdmsrl(event->hw.event_base, val);
322         return val;
323 }
324
325 static void cstate_pmu_event_update(struct perf_event *event)
326 {
327         struct hw_perf_event *hwc = &event->hw;
328         u64 prev_raw_count, new_raw_count;
329
330 again:
331         prev_raw_count = local64_read(&hwc->prev_count);
332         new_raw_count = cstate_pmu_read_counter(event);
333
334         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
335                             new_raw_count) != prev_raw_count)
336                 goto again;
337
338         local64_add(new_raw_count - prev_raw_count, &event->count);
339 }
340
341 static void cstate_pmu_event_start(struct perf_event *event, int mode)
342 {
343         local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
344 }
345
346 static void cstate_pmu_event_stop(struct perf_event *event, int mode)
347 {
348         cstate_pmu_event_update(event);
349 }
350
351 static void cstate_pmu_event_del(struct perf_event *event, int mode)
352 {
353         cstate_pmu_event_stop(event, PERF_EF_UPDATE);
354 }
355
356 static int cstate_pmu_event_add(struct perf_event *event, int mode)
357 {
358         if (mode & PERF_EF_START)
359                 cstate_pmu_event_start(event, mode);
360
361         return 0;
362 }
363
364 /*
365  * Check if exiting cpu is the designated reader. If so migrate the
366  * events when there is a valid target available
367  */
368 static void cstate_cpu_exit(int cpu)
369 {
370         unsigned int target;
371
372         if (has_cstate_core &&
373             cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
374
375                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
376                 /* Migrate events if there is a valid target */
377                 if (target < nr_cpu_ids) {
378                         cpumask_set_cpu(target, &cstate_core_cpu_mask);
379                         perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
380                 }
381         }
382
383         if (has_cstate_pkg &&
384             cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
385
386                 target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
387                 /* Migrate events if there is a valid target */
388                 if (target < nr_cpu_ids) {
389                         cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
390                         perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
391                 }
392         }
393 }
394
395 static void cstate_cpu_init(int cpu)
396 {
397         unsigned int target;
398
399         /*
400          * If this is the first online thread of that core, set it in
401          * the core cpu mask as the designated reader.
402          */
403         target = cpumask_any_and(&cstate_core_cpu_mask,
404                                  topology_sibling_cpumask(cpu));
405
406         if (has_cstate_core && target >= nr_cpu_ids)
407                 cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
408
409         /*
410          * If this is the first online thread of that package, set it
411          * in the package cpu mask as the designated reader.
412          */
413         target = cpumask_any_and(&cstate_pkg_cpu_mask,
414                                  topology_core_cpumask(cpu));
415         if (has_cstate_pkg && target >= nr_cpu_ids)
416                 cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
417 }
418
419 static int cstate_cpu_notifier(struct notifier_block *self,
420                                unsigned long action, void *hcpu)
421 {
422         unsigned int cpu = (long)hcpu;
423
424         switch (action & ~CPU_TASKS_FROZEN) {
425         case CPU_STARTING:
426                 cstate_cpu_init(cpu);
427                 break;
428         case CPU_DOWN_PREPARE:
429                 cstate_cpu_exit(cpu);
430                 break;
431         default:
432                 break;
433         }
434         return NOTIFY_OK;
435 }
436
437 static struct notifier_block cstate_cpu_nb = {
438         .notifier_call  = cstate_cpu_notifier,
439         .priority       = CPU_PRI_PERF + 1,
440 };
441
442 static struct pmu cstate_core_pmu = {
443         .attr_groups    = core_attr_groups,
444         .name           = "cstate_core",
445         .task_ctx_nr    = perf_invalid_context,
446         .event_init     = cstate_pmu_event_init,
447         .add            = cstate_pmu_event_add,
448         .del            = cstate_pmu_event_del,
449         .start          = cstate_pmu_event_start,
450         .stop           = cstate_pmu_event_stop,
451         .read           = cstate_pmu_event_update,
452         .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
453 };
454
455 static struct pmu cstate_pkg_pmu = {
456         .attr_groups    = pkg_attr_groups,
457         .name           = "cstate_pkg",
458         .task_ctx_nr    = perf_invalid_context,
459         .event_init     = cstate_pmu_event_init,
460         .add            = cstate_pmu_event_add,
461         .del            = cstate_pmu_event_del,
462         .start          = cstate_pmu_event_start,
463         .stop           = cstate_pmu_event_stop,
464         .read           = cstate_pmu_event_update,
465         .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
466 };
467
468 static const struct cstate_model nhm_cstates __initconst = {
469         .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
470                                   BIT(PERF_CSTATE_CORE_C6_RES),
471
472         .pkg_events             = BIT(PERF_CSTATE_PKG_C3_RES) |
473                                   BIT(PERF_CSTATE_PKG_C6_RES) |
474                                   BIT(PERF_CSTATE_PKG_C7_RES),
475 };
476
477 static const struct cstate_model snb_cstates __initconst = {
478         .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
479                                   BIT(PERF_CSTATE_CORE_C6_RES) |
480                                   BIT(PERF_CSTATE_CORE_C7_RES),
481
482         .pkg_events             = BIT(PERF_CSTATE_PKG_C2_RES) |
483                                   BIT(PERF_CSTATE_PKG_C3_RES) |
484                                   BIT(PERF_CSTATE_PKG_C6_RES) |
485                                   BIT(PERF_CSTATE_PKG_C7_RES),
486 };
487
488 static const struct cstate_model hswult_cstates __initconst = {
489         .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
490                                   BIT(PERF_CSTATE_CORE_C6_RES) |
491                                   BIT(PERF_CSTATE_CORE_C7_RES),
492
493         .pkg_events             = BIT(PERF_CSTATE_PKG_C2_RES) |
494                                   BIT(PERF_CSTATE_PKG_C3_RES) |
495                                   BIT(PERF_CSTATE_PKG_C6_RES) |
496                                   BIT(PERF_CSTATE_PKG_C7_RES) |
497                                   BIT(PERF_CSTATE_PKG_C8_RES) |
498                                   BIT(PERF_CSTATE_PKG_C9_RES) |
499                                   BIT(PERF_CSTATE_PKG_C10_RES),
500 };
501
502 static const struct cstate_model slm_cstates __initconst = {
503         .core_events            = BIT(PERF_CSTATE_CORE_C1_RES) |
504                                   BIT(PERF_CSTATE_CORE_C6_RES),
505
506         .pkg_events             = BIT(PERF_CSTATE_PKG_C6_RES),
507         .quirks                 = SLM_PKG_C6_USE_C7_MSR,
508 };
509
510 #define X86_CSTATES_MODEL(model, states)                                \
511         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
512
513 static const struct x86_cpu_id intel_cstates_match[] __initconst = {
514         X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
515         X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
516         X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
517
518         X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
519         X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
520         X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
521
522         X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
523         X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
524
525         X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
526         X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
527
528         X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
529         X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
530         X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
531
532         X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
533
534         X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
535         X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
536         X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
537
538         X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
539         X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
540         X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
541         X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
542
543         X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
544         X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
545         { },
546 };
547 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
548
549 /*
550  * Probe the cstate events and insert the available one into sysfs attrs
551  * Return false if there are no available events.
552  */
553 static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
554                                    struct perf_cstate_msr *msr,
555                                    struct attribute **attrs)
556 {
557         bool found = false;
558         unsigned int bit;
559         u64 val;
560
561         for (bit = 0; bit < max; bit++) {
562                 if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
563                         *attrs++ = &msr[bit].attr->attr.attr;
564                         found = true;
565                 } else {
566                         msr[bit].attr = NULL;
567                 }
568         }
569         *attrs = NULL;
570
571         return found;
572 }
573
574 static int __init cstate_probe(const struct cstate_model *cm)
575 {
576         /* SLM has different MSR for PKG C6 */
577         if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
578                 pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
579
580         has_cstate_core = cstate_probe_msr(cm->core_events,
581                                            PERF_CSTATE_CORE_EVENT_MAX,
582                                            core_msr, core_events_attrs);
583
584         has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
585                                           PERF_CSTATE_PKG_EVENT_MAX,
586                                           pkg_msr, pkg_events_attrs);
587
588         return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
589 }
590
591 static inline void cstate_cleanup(void)
592 {
593         if (has_cstate_core)
594                 perf_pmu_unregister(&cstate_core_pmu);
595
596         if (has_cstate_pkg)
597                 perf_pmu_unregister(&cstate_pkg_pmu);
598 }
599
600 static int __init cstate_init(void)
601 {
602         int cpu, err;
603
604         cpu_notifier_register_begin();
605         for_each_online_cpu(cpu)
606                 cstate_cpu_init(cpu);
607
608         if (has_cstate_core) {
609                 err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
610                 if (err) {
611                         has_cstate_core = false;
612                         pr_info("Failed to register cstate core pmu\n");
613                         goto out;
614                 }
615         }
616
617         if (has_cstate_pkg) {
618                 err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
619                 if (err) {
620                         has_cstate_pkg = false;
621                         pr_info("Failed to register cstate pkg pmu\n");
622                         cstate_cleanup();
623                         goto out;
624                 }
625         }
626         __register_cpu_notifier(&cstate_cpu_nb);
627 out:
628         cpu_notifier_register_done();
629         return err;
630 }
631
632 static int __init cstate_pmu_init(void)
633 {
634         const struct x86_cpu_id *id;
635         int err;
636
637         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
638                 return -ENODEV;
639
640         id = x86_match_cpu(intel_cstates_match);
641         if (!id)
642                 return -ENODEV;
643
644         err = cstate_probe((const struct cstate_model *) id->driver_data);
645         if (err)
646                 return err;
647
648         return cstate_init();
649 }
650 module_init(cstate_pmu_init);
651
652 static void __exit cstate_pmu_exit(void)
653 {
654         cpu_notifier_register_begin();
655         __unregister_cpu_notifier(&cstate_cpu_nb);
656         cstate_cleanup();
657         cpu_notifier_register_done();
658 }
659 module_exit(cstate_pmu_exit);