]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/power/x86/turbostat/turbostat.c
tools/power turbostat: fix decoding for GLM, DNV, SKX turbo-ratio limits
[karo-tx-linux.git] / tools / power / x86 / turbostat / turbostat.c
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/resource.h>
33 #include <fcntl.h>
34 #include <signal.h>
35 #include <sys/time.h>
36 #include <stdlib.h>
37 #include <getopt.h>
38 #include <dirent.h>
39 #include <string.h>
40 #include <ctype.h>
41 #include <sched.h>
42 #include <time.h>
43 #include <cpuid.h>
44 #include <linux/capability.h>
45 #include <errno.h>
46
47 char *proc_stat = "/proc/stat";
48 FILE *outf;
49 int *fd_percpu;
50 struct timespec interval_ts = {5, 0};
51 unsigned int debug;
52 unsigned int quiet;
53 unsigned int rapl_joules;
54 unsigned int summary_only;
55 unsigned int dump_only;
56 unsigned int do_snb_cstates;
57 unsigned int do_knl_cstates;
58 unsigned int do_skl_residency;
59 unsigned int do_slm_cstates;
60 unsigned int use_c1_residency_msr;
61 unsigned int has_aperf;
62 unsigned int has_epb;
63 unsigned int do_irtl_snb;
64 unsigned int do_irtl_hsw;
65 unsigned int units = 1000000;   /* MHz etc */
66 unsigned int genuine_intel;
67 unsigned int has_invariant_tsc;
68 unsigned int do_nhm_platform_info;
69 unsigned int no_MSR_MISC_PWR_MGMT;
70 unsigned int aperf_mperf_multiplier = 1;
71 double bclk;
72 double base_hz;
73 unsigned int has_base_hz;
74 double tsc_tweak = 1.0;
75 unsigned int show_pkg_only;
76 unsigned int show_core_only;
77 char *output_buffer, *outp;
78 unsigned int do_rapl;
79 unsigned int do_dts;
80 unsigned int do_ptm;
81 unsigned long long  gfx_cur_rc6_ms;
82 unsigned int gfx_cur_mhz;
83 unsigned int tcc_activation_temp;
84 unsigned int tcc_activation_temp_override;
85 double rapl_power_units, rapl_time_units;
86 double rapl_dram_energy_units, rapl_energy_units;
87 double rapl_joule_counter_range;
88 unsigned int do_core_perf_limit_reasons;
89 unsigned int do_gfx_perf_limit_reasons;
90 unsigned int do_ring_perf_limit_reasons;
91 unsigned int crystal_hz;
92 unsigned long long tsc_hz;
93 int base_cpu;
94 double discover_bclk(unsigned int family, unsigned int model);
95 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
96                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
97 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
98 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
99 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
100 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
101 unsigned int has_misc_feature_control;
102
103 #define RAPL_PKG                (1 << 0)
104                                         /* 0x610 MSR_PKG_POWER_LIMIT */
105                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
106 #define RAPL_PKG_PERF_STATUS    (1 << 1)
107                                         /* 0x613 MSR_PKG_PERF_STATUS */
108 #define RAPL_PKG_POWER_INFO     (1 << 2)
109                                         /* 0x614 MSR_PKG_POWER_INFO */
110
111 #define RAPL_DRAM               (1 << 3)
112                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
113                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
114 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
115                                         /* 0x61b MSR_DRAM_PERF_STATUS */
116 #define RAPL_DRAM_POWER_INFO    (1 << 5)
117                                         /* 0x61c MSR_DRAM_POWER_INFO */
118
119 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
120                                         /* 0x638 MSR_PP0_POWER_LIMIT */
121 #define RAPL_CORE_POLICY        (1 << 7)
122                                         /* 0x63a MSR_PP0_POLICY */
123
124 #define RAPL_GFX                (1 << 8)
125                                         /* 0x640 MSR_PP1_POWER_LIMIT */
126                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
127                                         /* 0x642 MSR_PP1_POLICY */
128
129 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
130                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
131 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
132 #define TJMAX_DEFAULT   100
133
134 #define MAX(a, b) ((a) > (b) ? (a) : (b))
135
136 /*
137  * buffer size used by sscanf() for added column names
138  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
139  */
140 #define NAME_BYTES 20
141
142 int backwards_count;
143 char *progname;
144
145 cpu_set_t *cpu_present_set, *cpu_affinity_set;
146 size_t cpu_present_setsize, cpu_affinity_setsize;
147 #define MAX_ADDED_COUNTERS 16
148
149 struct thread_data {
150         unsigned long long tsc;
151         unsigned long long aperf;
152         unsigned long long mperf;
153         unsigned long long c1;
154         unsigned int irq_count;
155         unsigned int smi_count;
156         unsigned int cpu_id;
157         unsigned int flags;
158 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
159 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
160         unsigned long long counter[MAX_ADDED_COUNTERS];
161 } *thread_even, *thread_odd;
162
163 struct core_data {
164         unsigned long long c3;
165         unsigned long long c6;
166         unsigned long long c7;
167         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
168         unsigned int core_temp_c;
169         unsigned int core_id;
170         unsigned long long counter[MAX_ADDED_COUNTERS];
171 } *core_even, *core_odd;
172
173 struct pkg_data {
174         unsigned long long pc2;
175         unsigned long long pc3;
176         unsigned long long pc6;
177         unsigned long long pc7;
178         unsigned long long pc8;
179         unsigned long long pc9;
180         unsigned long long pc10;
181         unsigned long long pkg_wtd_core_c0;
182         unsigned long long pkg_any_core_c0;
183         unsigned long long pkg_any_gfxe_c0;
184         unsigned long long pkg_both_core_gfxe_c0;
185         long long gfx_rc6_ms;
186         unsigned int gfx_mhz;
187         unsigned int package_id;
188         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
189         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
190         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
191         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
192         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
193         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
194         unsigned int pkg_temp_c;
195         unsigned long long counter[MAX_ADDED_COUNTERS];
196 } *package_even, *package_odd;
197
198 #define ODD_COUNTERS thread_odd, core_odd, package_odd
199 #define EVEN_COUNTERS thread_even, core_even, package_even
200
201 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
202         (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
203                 topo.num_threads_per_core + \
204                 (core_no) * topo.num_threads_per_core + (thread_no))
205 #define GET_CORE(core_base, core_no, pkg_no) \
206         (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
207 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
208
209 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
210 enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS};
211 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
212
213 struct msr_counter {
214         unsigned int msr_num;
215         char name[NAME_BYTES];
216         unsigned int width;
217         enum counter_type type;
218         enum counter_format format;
219         struct msr_counter *next;
220         unsigned int flags;
221 #define FLAGS_HIDE      (1 << 0)
222 #define FLAGS_SHOW      (1 << 1)
223 };
224
225 struct sys_counters {
226         unsigned int added_thread_counters;
227         unsigned int added_core_counters;
228         unsigned int added_package_counters;
229         struct msr_counter *tp;
230         struct msr_counter *cp;
231         struct msr_counter *pp;
232 } sys;
233
234 struct system_summary {
235         struct thread_data threads;
236         struct core_data cores;
237         struct pkg_data packages;
238 } average;
239
240
241 struct topo_params {
242         int num_packages;
243         int num_cpus;
244         int num_cores;
245         int max_cpu_num;
246         int num_cores_per_pkg;
247         int num_threads_per_core;
248 } topo;
249
250 struct timeval tv_even, tv_odd, tv_delta;
251
252 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
253 int *irqs_per_cpu;              /* indexed by cpu_num */
254
255 void setup_all_buffers(void);
256
257 int cpu_is_not_present(int cpu)
258 {
259         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
260 }
261 /*
262  * run func(thread, core, package) in topology order
263  * skip non-present cpus
264  */
265
266 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
267         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
268 {
269         int retval, pkg_no, core_no, thread_no;
270
271         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
272                 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
273                         for (thread_no = 0; thread_no <
274                                 topo.num_threads_per_core; ++thread_no) {
275                                 struct thread_data *t;
276                                 struct core_data *c;
277                                 struct pkg_data *p;
278
279                                 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
280
281                                 if (cpu_is_not_present(t->cpu_id))
282                                         continue;
283
284                                 c = GET_CORE(core_base, core_no, pkg_no);
285                                 p = GET_PKG(pkg_base, pkg_no);
286
287                                 retval = func(t, c, p);
288                                 if (retval)
289                                         return retval;
290                         }
291                 }
292         }
293         return 0;
294 }
295
296 int cpu_migrate(int cpu)
297 {
298         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
299         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
300         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
301                 return -1;
302         else
303                 return 0;
304 }
305 int get_msr_fd(int cpu)
306 {
307         char pathname[32];
308         int fd;
309
310         fd = fd_percpu[cpu];
311
312         if (fd)
313                 return fd;
314
315         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
316         fd = open(pathname, O_RDONLY);
317         if (fd < 0)
318                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
319
320         fd_percpu[cpu] = fd;
321
322         return fd;
323 }
324
325 int get_msr(int cpu, off_t offset, unsigned long long *msr)
326 {
327         ssize_t retval;
328
329         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
330
331         if (retval != sizeof *msr)
332                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
333
334         return 0;
335 }
336
337 /*
338  * Each string in this array is compared in --show and --hide cmdline.
339  * Thus, strings that are proper sub-sets must follow their more specific peers.
340  */
341 struct msr_counter bic[] = {
342         { 0x0, "Package" },
343         { 0x0, "Avg_MHz" },
344         { 0x0, "Bzy_MHz" },
345         { 0x0, "TSC_MHz" },
346         { 0x0, "IRQ" },
347         { 0x0, "SMI", 32, 0, FORMAT_DELTA, NULL},
348         { 0x0, "Busy%" },
349         { 0x0, "CPU%c1" },
350         { 0x0, "CPU%c3" },
351         { 0x0, "CPU%c6" },
352         { 0x0, "CPU%c7" },
353         { 0x0, "ThreadC" },
354         { 0x0, "CoreTmp" },
355         { 0x0, "CoreCnt" },
356         { 0x0, "PkgTmp" },
357         { 0x0, "GFX%rc6" },
358         { 0x0, "GFXMHz" },
359         { 0x0, "Pkg%pc2" },
360         { 0x0, "Pkg%pc3" },
361         { 0x0, "Pkg%pc6" },
362         { 0x0, "Pkg%pc7" },
363         { 0x0, "Pkg%pc8" },
364         { 0x0, "Pkg%pc9" },
365         { 0x0, "Pkg%pc10" },
366         { 0x0, "PkgWatt" },
367         { 0x0, "CorWatt" },
368         { 0x0, "GFXWatt" },
369         { 0x0, "PkgCnt" },
370         { 0x0, "RAMWatt" },
371         { 0x0, "PKG_%" },
372         { 0x0, "RAM_%" },
373         { 0x0, "Pkg_J" },
374         { 0x0, "Cor_J" },
375         { 0x0, "GFX_J" },
376         { 0x0, "RAM_J" },
377         { 0x0, "Core" },
378         { 0x0, "CPU" },
379         { 0x0, "Mod%c6" },
380 };
381
382 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
383 #define BIC_Package     (1ULL << 0)
384 #define BIC_Avg_MHz     (1ULL << 1)
385 #define BIC_Bzy_MHz     (1ULL << 2)
386 #define BIC_TSC_MHz     (1ULL << 3)
387 #define BIC_IRQ         (1ULL << 4)
388 #define BIC_SMI         (1ULL << 5)
389 #define BIC_Busy        (1ULL << 6)
390 #define BIC_CPU_c1      (1ULL << 7)
391 #define BIC_CPU_c3      (1ULL << 8)
392 #define BIC_CPU_c6      (1ULL << 9)
393 #define BIC_CPU_c7      (1ULL << 10)
394 #define BIC_ThreadC     (1ULL << 11)
395 #define BIC_CoreTmp     (1ULL << 12)
396 #define BIC_CoreCnt     (1ULL << 13)
397 #define BIC_PkgTmp      (1ULL << 14)
398 #define BIC_GFX_rc6     (1ULL << 15)
399 #define BIC_GFXMHz      (1ULL << 16)
400 #define BIC_Pkgpc2      (1ULL << 17)
401 #define BIC_Pkgpc3      (1ULL << 18)
402 #define BIC_Pkgpc6      (1ULL << 19)
403 #define BIC_Pkgpc7      (1ULL << 20)
404 #define BIC_Pkgpc8      (1ULL << 21)
405 #define BIC_Pkgpc9      (1ULL << 22)
406 #define BIC_Pkgpc10     (1ULL << 23)
407 #define BIC_PkgWatt     (1ULL << 24)
408 #define BIC_CorWatt     (1ULL << 25)
409 #define BIC_GFXWatt     (1ULL << 26)
410 #define BIC_PkgCnt      (1ULL << 27)
411 #define BIC_RAMWatt     (1ULL << 28)
412 #define BIC_PKG__       (1ULL << 29)
413 #define BIC_RAM__       (1ULL << 30)
414 #define BIC_Pkg_J       (1ULL << 31)
415 #define BIC_Cor_J       (1ULL << 32)
416 #define BIC_GFX_J       (1ULL << 33)
417 #define BIC_RAM_J       (1ULL << 34)
418 #define BIC_Core        (1ULL << 35)
419 #define BIC_CPU         (1ULL << 36)
420 #define BIC_Mod_c6      (1ULL << 37)
421
422 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
423 unsigned long long bic_present;
424
425 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
426 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
427 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
428
429 /*
430  * bic_lookup
431  * for all the strings in comma separate name_list,
432  * set the approprate bit in return value.
433  */
434 unsigned long long bic_lookup(char *name_list)
435 {
436         int i;
437         unsigned long long retval = 0;
438
439         while (name_list) {
440                 char *comma;
441
442                 comma = strchr(name_list, ',');
443
444                 if (comma)
445                         *comma = '\0';
446
447                 for (i = 0; i < MAX_BIC; ++i) {
448                         if (!strcmp(name_list, bic[i].name)) {
449                                 retval |= (1ULL << i);
450                                 break;
451                         }
452                 }
453                 if (i == MAX_BIC) {
454                         fprintf(stderr, "Invalid counter name: %s\n", name_list);
455                         exit(-1);
456                 }
457
458                 name_list = comma;
459                 if (name_list)
460                         name_list++;
461
462         }
463         return retval;
464 }
465
466 void print_header(void)
467 {
468         struct msr_counter *mp;
469
470         if (DO_BIC(BIC_Package))
471                 outp += sprintf(outp, "\tPackage");
472         if (DO_BIC(BIC_Core))
473                 outp += sprintf(outp, "\tCore");
474         if (DO_BIC(BIC_CPU))
475                 outp += sprintf(outp, "\tCPU");
476         if (DO_BIC(BIC_Avg_MHz))
477                 outp += sprintf(outp, "\tAvg_MHz");
478         if (DO_BIC(BIC_Busy))
479                 outp += sprintf(outp, "\tBusy%%");
480         if (DO_BIC(BIC_Bzy_MHz))
481                 outp += sprintf(outp, "\tBzy_MHz");
482         if (DO_BIC(BIC_TSC_MHz))
483                 outp += sprintf(outp, "\tTSC_MHz");
484
485         if (DO_BIC(BIC_IRQ))
486                 outp += sprintf(outp, "\tIRQ");
487         if (DO_BIC(BIC_SMI))
488                 outp += sprintf(outp, "\tSMI");
489
490         if (DO_BIC(BIC_CPU_c1))
491                 outp += sprintf(outp, "\tCPU%%c1");
492
493         for (mp = sys.tp; mp; mp = mp->next) {
494                 if (mp->format == FORMAT_RAW) {
495                         if (mp->width == 64)
496                                 outp += sprintf(outp, "\t%18.18s", mp->name);
497                         else
498                                 outp += sprintf(outp, "\t%10.10s", mp->name);
499                 } else {
500                         outp += sprintf(outp, "\t%-7.7s", mp->name);
501                 }
502         }
503
504         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
505                 outp += sprintf(outp, "\tCPU%%c3");
506         if (DO_BIC(BIC_CPU_c6))
507                 outp += sprintf(outp, "\tCPU%%c6");
508         if (DO_BIC(BIC_CPU_c7))
509                 outp += sprintf(outp, "\tCPU%%c7");
510
511         if (DO_BIC(BIC_Mod_c6))
512                 outp += sprintf(outp, "\tMod%%c6");
513
514         if (DO_BIC(BIC_CoreTmp))
515                 outp += sprintf(outp, "\tCoreTmp");
516
517         for (mp = sys.cp; mp; mp = mp->next) {
518                 if (mp->format == FORMAT_RAW) {
519                         if (mp->width == 64)
520                                 outp += sprintf(outp, "\t%18.18s", mp->name);
521                         else
522                                 outp += sprintf(outp, "\t%10.10s", mp->name);
523                 } else {
524                         outp += sprintf(outp, "\t%-7.7s", mp->name);
525                 }
526         }
527
528         if (DO_BIC(BIC_PkgTmp))
529                 outp += sprintf(outp, "\tPkgTmp");
530
531         if (DO_BIC(BIC_GFX_rc6))
532                 outp += sprintf(outp, "\tGFX%%rc6");
533
534         if (DO_BIC(BIC_GFXMHz))
535                 outp += sprintf(outp, "\tGFXMHz");
536
537         if (do_skl_residency) {
538                 outp += sprintf(outp, "\tTotl%%C0");
539                 outp += sprintf(outp, "\tAny%%C0");
540                 outp += sprintf(outp, "\tGFX%%C0");
541                 outp += sprintf(outp, "\tCPUGFX%%");
542         }
543
544         if (DO_BIC(BIC_Pkgpc2))
545                 outp += sprintf(outp, "\tPkg%%pc2");
546         if (DO_BIC(BIC_Pkgpc3))
547                 outp += sprintf(outp, "\tPkg%%pc3");
548         if (DO_BIC(BIC_Pkgpc6))
549                 outp += sprintf(outp, "\tPkg%%pc6");
550         if (DO_BIC(BIC_Pkgpc7))
551                 outp += sprintf(outp, "\tPkg%%pc7");
552         if (DO_BIC(BIC_Pkgpc8))
553                 outp += sprintf(outp, "\tPkg%%pc8");
554         if (DO_BIC(BIC_Pkgpc9))
555                 outp += sprintf(outp, "\tPkg%%pc9");
556         if (DO_BIC(BIC_Pkgpc10))
557                 outp += sprintf(outp, "\tPk%%pc10");
558
559         if (do_rapl && !rapl_joules) {
560                 if (DO_BIC(BIC_PkgWatt))
561                         outp += sprintf(outp, "\tPkgWatt");
562                 if (DO_BIC(BIC_CorWatt))
563                         outp += sprintf(outp, "\tCorWatt");
564                 if (DO_BIC(BIC_GFXWatt))
565                         outp += sprintf(outp, "\tGFXWatt");
566                 if (DO_BIC(BIC_RAMWatt))
567                         outp += sprintf(outp, "\tRAMWatt");
568                 if (DO_BIC(BIC_PKG__))
569                         outp += sprintf(outp, "\tPKG_%%");
570                 if (DO_BIC(BIC_RAM__))
571                         outp += sprintf(outp, "\tRAM_%%");
572         } else if (do_rapl && rapl_joules) {
573                 if (DO_BIC(BIC_Pkg_J))
574                         outp += sprintf(outp, "\tPkg_J");
575                 if (DO_BIC(BIC_Cor_J))
576                         outp += sprintf(outp, "\tCor_J");
577                 if (DO_BIC(BIC_GFX_J))
578                         outp += sprintf(outp, "\tGFX_J");
579                 if (DO_BIC(BIC_RAM_J))
580                         outp += sprintf(outp, "\tRAM_J");
581                 if (DO_BIC(BIC_PKG__))
582                         outp += sprintf(outp, "\tPKG_%%");
583                 if (DO_BIC(BIC_RAM__))
584                         outp += sprintf(outp, "\tRAM_%%");
585         }
586         for (mp = sys.pp; mp; mp = mp->next) {
587                 if (mp->format == FORMAT_RAW) {
588                         if (mp->width == 64)
589                                 outp += sprintf(outp, "\t%18.18s", mp->name);
590                         else
591                                 outp += sprintf(outp, "\t%10.10s", mp->name);
592                 } else {
593                         outp += sprintf(outp, "\t%-7.7s", mp->name);
594                 }
595         }
596
597         outp += sprintf(outp, "\n");
598 }
599
600 int dump_counters(struct thread_data *t, struct core_data *c,
601         struct pkg_data *p)
602 {
603         int i;
604         struct msr_counter *mp;
605
606         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
607
608         if (t) {
609                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
610                         t->cpu_id, t->flags);
611                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
612                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
613                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
614                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
615
616                 if (DO_BIC(BIC_IRQ))
617                         outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
618                 if (DO_BIC(BIC_SMI))
619                         outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
620
621                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
622                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
623                                 i, mp->msr_num, t->counter[i]);
624                 }
625         }
626
627         if (c) {
628                 outp += sprintf(outp, "core: %d\n", c->core_id);
629                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
630                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
631                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
632                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
633
634                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
635                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
636                                 i, mp->msr_num, c->counter[i]);
637                 }
638                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
639         }
640
641         if (p) {
642                 outp += sprintf(outp, "package: %d\n", p->package_id);
643
644                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
645                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
646                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
647                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
648
649                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
650                 if (DO_BIC(BIC_Pkgpc3))
651                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
652                 if (DO_BIC(BIC_Pkgpc6))
653                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
654                 if (DO_BIC(BIC_Pkgpc7))
655                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
656                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
657                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
658                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
659                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
660                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
661                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
662                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
663                 outp += sprintf(outp, "Throttle PKG: %0X\n",
664                         p->rapl_pkg_perf_status);
665                 outp += sprintf(outp, "Throttle RAM: %0X\n",
666                         p->rapl_dram_perf_status);
667                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
668
669                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
670                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
671                                 i, mp->msr_num, p->counter[i]);
672                 }
673         }
674
675         outp += sprintf(outp, "\n");
676
677         return 0;
678 }
679
680 /*
681  * column formatting convention & formats
682  */
683 int format_counters(struct thread_data *t, struct core_data *c,
684         struct pkg_data *p)
685 {
686         double interval_float, tsc;
687         char *fmt8;
688         int i;
689         struct msr_counter *mp;
690
691          /* if showing only 1st thread in core and this isn't one, bail out */
692         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
693                 return 0;
694
695          /* if showing only 1st thread in pkg and this isn't one, bail out */
696         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
697                 return 0;
698
699         interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
700
701         tsc = t->tsc * tsc_tweak;
702
703         /* topo columns, print blanks on 1st (average) line */
704         if (t == &average.threads) {
705                 if (DO_BIC(BIC_Package))
706                         outp += sprintf(outp, "\t-");
707                 if (DO_BIC(BIC_Core))
708                         outp += sprintf(outp, "\t-");
709                 if (DO_BIC(BIC_CPU))
710                         outp += sprintf(outp, "\t-");
711         } else {
712                 if (DO_BIC(BIC_Package)) {
713                         if (p)
714                                 outp += sprintf(outp, "\t%d", p->package_id);
715                         else
716                                 outp += sprintf(outp, "\t-");
717                 }
718                 if (DO_BIC(BIC_Core)) {
719                         if (c)
720                                 outp += sprintf(outp, "\t%d", c->core_id);
721                         else
722                                 outp += sprintf(outp, "\t-");
723                 }
724                 if (DO_BIC(BIC_CPU))
725                         outp += sprintf(outp, "\t%d", t->cpu_id);
726         }
727
728         if (DO_BIC(BIC_Avg_MHz))
729                 outp += sprintf(outp, "\t%.0f",
730                         1.0 / units * t->aperf / interval_float);
731
732         if (DO_BIC(BIC_Busy))
733                 outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/tsc);
734
735         if (DO_BIC(BIC_Bzy_MHz)) {
736                 if (has_base_hz)
737                         outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
738                 else
739                         outp += sprintf(outp, "\t%.0f",
740                                 tsc / units * t->aperf / t->mperf / interval_float);
741         }
742
743         if (DO_BIC(BIC_TSC_MHz))
744                 outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
745
746         /* IRQ */
747         if (DO_BIC(BIC_IRQ))
748                 outp += sprintf(outp, "\t%d", t->irq_count);
749
750         /* SMI */
751         if (DO_BIC(BIC_SMI))
752                 outp += sprintf(outp, "\t%d", t->smi_count);
753
754         /* C1 */
755         if (DO_BIC(BIC_CPU_c1))
756                 outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
757
758         /* Added counters */
759         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
760                 if (mp->format == FORMAT_RAW) {
761                         if (mp->width == 32)
762                                 outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]);
763                         else
764                                 outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
765                 } else if (mp->format == FORMAT_DELTA) {
766                         outp += sprintf(outp, "\t%lld", t->counter[i]);
767                 } else if (mp->format == FORMAT_PERCENT) {
768                         outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
769                 }
770         }
771
772         /* print per-core data only for 1st thread in core */
773         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
774                 goto done;
775
776         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
777                 outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/tsc);
778         if (DO_BIC(BIC_CPU_c6))
779                 outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/tsc);
780         if (DO_BIC(BIC_CPU_c7))
781                 outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/tsc);
782
783         /* Mod%c6 */
784         if (DO_BIC(BIC_Mod_c6))
785                 outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / tsc);
786
787         if (DO_BIC(BIC_CoreTmp))
788                 outp += sprintf(outp, "\t%d", c->core_temp_c);
789
790         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
791                 if (mp->format == FORMAT_RAW) {
792                         if (mp->width == 32)
793                                 outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]);
794                         else
795                                 outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
796                 } else if (mp->format == FORMAT_DELTA) {
797                         outp += sprintf(outp, "\t%lld", c->counter[i]);
798                 } else if (mp->format == FORMAT_PERCENT) {
799                         outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/tsc);
800                 }
801         }
802
803         /* print per-package data only for 1st core in package */
804         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
805                 goto done;
806
807         /* PkgTmp */
808         if (DO_BIC(BIC_PkgTmp))
809                 outp += sprintf(outp, "\t%d", p->pkg_temp_c);
810
811         /* GFXrc6 */
812         if (DO_BIC(BIC_GFX_rc6)) {
813                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
814                         outp += sprintf(outp, "\t**.**");
815                 } else {
816                         outp += sprintf(outp, "\t%.2f",
817                                 p->gfx_rc6_ms / 10.0 / interval_float);
818                 }
819         }
820
821         /* GFXMHz */
822         if (DO_BIC(BIC_GFXMHz))
823                 outp += sprintf(outp, "\t%d", p->gfx_mhz);
824
825         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
826         if (do_skl_residency) {
827                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/tsc);
828                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/tsc);
829                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/tsc);
830                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/tsc);
831         }
832
833         if (DO_BIC(BIC_Pkgpc2))
834                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/tsc);
835         if (DO_BIC(BIC_Pkgpc3))
836                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/tsc);
837         if (DO_BIC(BIC_Pkgpc6))
838                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/tsc);
839         if (DO_BIC(BIC_Pkgpc7))
840                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/tsc);
841         if (DO_BIC(BIC_Pkgpc8))
842                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/tsc);
843         if (DO_BIC(BIC_Pkgpc9))
844                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/tsc);
845         if (DO_BIC(BIC_Pkgpc10))
846                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/tsc);
847
848         /*
849          * If measurement interval exceeds minimum RAPL Joule Counter range,
850          * indicate that results are suspect by printing "**" in fraction place.
851          */
852         if (interval_float < rapl_joule_counter_range)
853                 fmt8 = "\t%.2f";
854         else
855                 fmt8 = "%6.0f**";
856
857         if (DO_BIC(BIC_PkgWatt))
858                 outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
859         if (DO_BIC(BIC_CorWatt))
860                 outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
861         if (DO_BIC(BIC_GFXWatt))
862                 outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
863         if (DO_BIC(BIC_RAMWatt))
864                 outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
865         if (DO_BIC(BIC_Pkg_J))
866                 outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units);
867         if (DO_BIC(BIC_Cor_J))
868                 outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units);
869         if (DO_BIC(BIC_GFX_J))
870                 outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units);
871         if (DO_BIC(BIC_RAM_J))
872                 outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units);
873         if (DO_BIC(BIC_PKG__))
874                 outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
875         if (DO_BIC(BIC_RAM__))
876                 outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
877
878         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
879                 if (mp->format == FORMAT_RAW) {
880                         if (mp->width == 32)
881                                 outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]);
882                         else
883                                 outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
884                 } else if (mp->format == FORMAT_DELTA) {
885                         outp += sprintf(outp, "\t%lld", p->counter[i]);
886                 } else if (mp->format == FORMAT_PERCENT) {
887                         outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/tsc);
888                 }
889         }
890
891 done:
892         outp += sprintf(outp, "\n");
893
894         return 0;
895 }
896
897 void flush_output_stdout(void)
898 {
899         FILE *filep;
900
901         if (outf == stderr)
902                 filep = stdout;
903         else
904                 filep = outf;
905
906         fputs(output_buffer, filep);
907         fflush(filep);
908
909         outp = output_buffer;
910 }
911 void flush_output_stderr(void)
912 {
913         fputs(output_buffer, outf);
914         fflush(outf);
915         outp = output_buffer;
916 }
917 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
918 {
919         static int printed;
920
921         if (!printed || !summary_only)
922                 print_header();
923
924         if (topo.num_cpus > 1)
925                 format_counters(&average.threads, &average.cores,
926                         &average.packages);
927
928         printed = 1;
929
930         if (summary_only)
931                 return;
932
933         for_all_cpus(format_counters, t, c, p);
934 }
935
936 #define DELTA_WRAP32(new, old)                  \
937         if (new > old) {                        \
938                 old = new - old;                \
939         } else {                                \
940                 old = 0x100000000 + new - old;  \
941         }
942
943 int
944 delta_package(struct pkg_data *new, struct pkg_data *old)
945 {
946         int i;
947         struct msr_counter *mp;
948
949         if (do_skl_residency) {
950                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
951                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
952                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
953                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
954         }
955         old->pc2 = new->pc2 - old->pc2;
956         if (DO_BIC(BIC_Pkgpc3))
957                 old->pc3 = new->pc3 - old->pc3;
958         if (DO_BIC(BIC_Pkgpc6))
959                 old->pc6 = new->pc6 - old->pc6;
960         if (DO_BIC(BIC_Pkgpc7))
961                 old->pc7 = new->pc7 - old->pc7;
962         old->pc8 = new->pc8 - old->pc8;
963         old->pc9 = new->pc9 - old->pc9;
964         old->pc10 = new->pc10 - old->pc10;
965         old->pkg_temp_c = new->pkg_temp_c;
966
967         /* flag an error when rc6 counter resets/wraps */
968         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
969                 old->gfx_rc6_ms = -1;
970         else
971                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
972
973         old->gfx_mhz = new->gfx_mhz;
974
975         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
976         DELTA_WRAP32(new->energy_cores, old->energy_cores);
977         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
978         DELTA_WRAP32(new->energy_dram, old->energy_dram);
979         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
980         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
981
982         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
983                 if (mp->format == FORMAT_RAW)
984                         old->counter[i] = new->counter[i];
985                 else
986                         old->counter[i] = new->counter[i] - old->counter[i];
987         }
988
989         return 0;
990 }
991
992 void
993 delta_core(struct core_data *new, struct core_data *old)
994 {
995         int i;
996         struct msr_counter *mp;
997
998         old->c3 = new->c3 - old->c3;
999         old->c6 = new->c6 - old->c6;
1000         old->c7 = new->c7 - old->c7;
1001         old->core_temp_c = new->core_temp_c;
1002         old->mc6_us = new->mc6_us - old->mc6_us;
1003
1004         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1005                 if (mp->format == FORMAT_RAW)
1006                         old->counter[i] = new->counter[i];
1007                 else
1008                         old->counter[i] = new->counter[i] - old->counter[i];
1009         }
1010 }
1011
1012 /*
1013  * old = new - old
1014  */
1015 int
1016 delta_thread(struct thread_data *new, struct thread_data *old,
1017         struct core_data *core_delta)
1018 {
1019         int i;
1020         struct msr_counter *mp;
1021
1022         old->tsc = new->tsc - old->tsc;
1023
1024         /* check for TSC < 1 Mcycles over interval */
1025         if (old->tsc < (1000 * 1000))
1026                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1027                      "You can disable all c-states by booting with \"idle=poll\"\n"
1028                      "or just the deep ones with \"processor.max_cstate=1\"");
1029
1030         old->c1 = new->c1 - old->c1;
1031
1032         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1033                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1034                         old->aperf = new->aperf - old->aperf;
1035                         old->mperf = new->mperf - old->mperf;
1036                 } else {
1037                         return -1;
1038                 }
1039         }
1040
1041
1042         if (use_c1_residency_msr) {
1043                 /*
1044                  * Some models have a dedicated C1 residency MSR,
1045                  * which should be more accurate than the derivation below.
1046                  */
1047         } else {
1048                 /*
1049                  * As counter collection is not atomic,
1050                  * it is possible for mperf's non-halted cycles + idle states
1051                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1052                  */
1053                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
1054                         old->c1 = 0;
1055                 else {
1056                         /* normal case, derive c1 */
1057                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1058                                 - core_delta->c6 - core_delta->c7;
1059                 }
1060         }
1061
1062         if (old->mperf == 0) {
1063                 if (debug > 1)
1064                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1065                 old->mperf = 1; /* divide by 0 protection */
1066         }
1067
1068         if (DO_BIC(BIC_IRQ))
1069                 old->irq_count = new->irq_count - old->irq_count;
1070
1071         if (DO_BIC(BIC_SMI))
1072                 old->smi_count = new->smi_count - old->smi_count;
1073
1074         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1075                 if (mp->format == FORMAT_RAW)
1076                         old->counter[i] = new->counter[i];
1077                 else
1078                         old->counter[i] = new->counter[i] - old->counter[i];
1079         }
1080         return 0;
1081 }
1082
1083 int delta_cpu(struct thread_data *t, struct core_data *c,
1084         struct pkg_data *p, struct thread_data *t2,
1085         struct core_data *c2, struct pkg_data *p2)
1086 {
1087         int retval = 0;
1088
1089         /* calculate core delta only for 1st thread in core */
1090         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1091                 delta_core(c, c2);
1092
1093         /* always calculate thread delta */
1094         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1095         if (retval)
1096                 return retval;
1097
1098         /* calculate package delta only for 1st core in package */
1099         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1100                 retval = delta_package(p, p2);
1101
1102         return retval;
1103 }
1104
1105 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1106 {
1107         int i;
1108         struct msr_counter  *mp;
1109
1110         t->tsc = 0;
1111         t->aperf = 0;
1112         t->mperf = 0;
1113         t->c1 = 0;
1114
1115         t->irq_count = 0;
1116         t->smi_count = 0;
1117
1118         /* tells format_counters to dump all fields from this set */
1119         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1120
1121         c->c3 = 0;
1122         c->c6 = 0;
1123         c->c7 = 0;
1124         c->mc6_us = 0;
1125         c->core_temp_c = 0;
1126
1127         p->pkg_wtd_core_c0 = 0;
1128         p->pkg_any_core_c0 = 0;
1129         p->pkg_any_gfxe_c0 = 0;
1130         p->pkg_both_core_gfxe_c0 = 0;
1131
1132         p->pc2 = 0;
1133         if (DO_BIC(BIC_Pkgpc3))
1134                 p->pc3 = 0;
1135         if (DO_BIC(BIC_Pkgpc6))
1136                 p->pc6 = 0;
1137         if (DO_BIC(BIC_Pkgpc7))
1138                 p->pc7 = 0;
1139         p->pc8 = 0;
1140         p->pc9 = 0;
1141         p->pc10 = 0;
1142
1143         p->energy_pkg = 0;
1144         p->energy_dram = 0;
1145         p->energy_cores = 0;
1146         p->energy_gfx = 0;
1147         p->rapl_pkg_perf_status = 0;
1148         p->rapl_dram_perf_status = 0;
1149         p->pkg_temp_c = 0;
1150
1151         p->gfx_rc6_ms = 0;
1152         p->gfx_mhz = 0;
1153         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1154                 t->counter[i] = 0;
1155
1156         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1157                 c->counter[i] = 0;
1158
1159         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1160                 p->counter[i] = 0;
1161 }
1162 int sum_counters(struct thread_data *t, struct core_data *c,
1163         struct pkg_data *p)
1164 {
1165         int i;
1166         struct msr_counter *mp;
1167
1168         average.threads.tsc += t->tsc;
1169         average.threads.aperf += t->aperf;
1170         average.threads.mperf += t->mperf;
1171         average.threads.c1 += t->c1;
1172
1173         average.threads.irq_count += t->irq_count;
1174         average.threads.smi_count += t->smi_count;
1175
1176         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1177                 if (mp->format == FORMAT_RAW)
1178                         continue;
1179                 average.threads.counter[i] += t->counter[i];
1180         }
1181
1182         /* sum per-core values only for 1st thread in core */
1183         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1184                 return 0;
1185
1186         average.cores.c3 += c->c3;
1187         average.cores.c6 += c->c6;
1188         average.cores.c7 += c->c7;
1189         average.cores.mc6_us += c->mc6_us;
1190
1191         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1192
1193         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1194                 if (mp->format == FORMAT_RAW)
1195                         continue;
1196                 average.cores.counter[i] += c->counter[i];
1197         }
1198
1199         /* sum per-pkg values only for 1st core in pkg */
1200         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1201                 return 0;
1202
1203         if (do_skl_residency) {
1204                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1205                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1206                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1207                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1208         }
1209
1210         average.packages.pc2 += p->pc2;
1211         if (DO_BIC(BIC_Pkgpc3))
1212                 average.packages.pc3 += p->pc3;
1213         if (DO_BIC(BIC_Pkgpc6))
1214                 average.packages.pc6 += p->pc6;
1215         if (DO_BIC(BIC_Pkgpc7))
1216                 average.packages.pc7 += p->pc7;
1217         average.packages.pc8 += p->pc8;
1218         average.packages.pc9 += p->pc9;
1219         average.packages.pc10 += p->pc10;
1220
1221         average.packages.energy_pkg += p->energy_pkg;
1222         average.packages.energy_dram += p->energy_dram;
1223         average.packages.energy_cores += p->energy_cores;
1224         average.packages.energy_gfx += p->energy_gfx;
1225
1226         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1227         average.packages.gfx_mhz = p->gfx_mhz;
1228
1229         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1230
1231         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1232         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1233
1234         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1235                 if (mp->format == FORMAT_RAW)
1236                         continue;
1237                 average.packages.counter[i] += p->counter[i];
1238         }
1239         return 0;
1240 }
1241 /*
1242  * sum the counters for all cpus in the system
1243  * compute the weighted average
1244  */
1245 void compute_average(struct thread_data *t, struct core_data *c,
1246         struct pkg_data *p)
1247 {
1248         int i;
1249         struct msr_counter *mp;
1250
1251         clear_counters(&average.threads, &average.cores, &average.packages);
1252
1253         for_all_cpus(sum_counters, t, c, p);
1254
1255         average.threads.tsc /= topo.num_cpus;
1256         average.threads.aperf /= topo.num_cpus;
1257         average.threads.mperf /= topo.num_cpus;
1258         average.threads.c1 /= topo.num_cpus;
1259
1260         average.cores.c3 /= topo.num_cores;
1261         average.cores.c6 /= topo.num_cores;
1262         average.cores.c7 /= topo.num_cores;
1263         average.cores.mc6_us /= topo.num_cores;
1264
1265         if (do_skl_residency) {
1266                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1267                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1268                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1269                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1270         }
1271
1272         average.packages.pc2 /= topo.num_packages;
1273         if (DO_BIC(BIC_Pkgpc3))
1274                 average.packages.pc3 /= topo.num_packages;
1275         if (DO_BIC(BIC_Pkgpc6))
1276                 average.packages.pc6 /= topo.num_packages;
1277         if (DO_BIC(BIC_Pkgpc7))
1278                 average.packages.pc7 /= topo.num_packages;
1279
1280         average.packages.pc8 /= topo.num_packages;
1281         average.packages.pc9 /= topo.num_packages;
1282         average.packages.pc10 /= topo.num_packages;
1283
1284         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1285                 if (mp->format == FORMAT_RAW)
1286                         continue;
1287                 average.threads.counter[i] /= topo.num_cpus;
1288         }
1289         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1290                 if (mp->format == FORMAT_RAW)
1291                         continue;
1292                 average.cores.counter[i] /= topo.num_cores;
1293         }
1294         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1295                 if (mp->format == FORMAT_RAW)
1296                         continue;
1297                 average.packages.counter[i] /= topo.num_packages;
1298         }
1299 }
1300
1301 static unsigned long long rdtsc(void)
1302 {
1303         unsigned int low, high;
1304
1305         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1306
1307         return low | ((unsigned long long)high) << 32;
1308 }
1309
1310 /*
1311  * get_counters(...)
1312  * migrate to cpu
1313  * acquire and record local counters for that cpu
1314  */
1315 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1316 {
1317         int cpu = t->cpu_id;
1318         unsigned long long msr;
1319         int aperf_mperf_retry_count = 0;
1320         struct msr_counter *mp;
1321         int i;
1322
1323         if (cpu_migrate(cpu)) {
1324                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1325                 return -1;
1326         }
1327
1328 retry:
1329         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1330
1331         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1332                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1333
1334                 /*
1335                  * The TSC, APERF and MPERF must be read together for
1336                  * APERF/MPERF and MPERF/TSC to give accurate results.
1337                  *
1338                  * Unfortunately, APERF and MPERF are read by
1339                  * individual system call, so delays may occur
1340                  * between them.  If the time to read them
1341                  * varies by a large amount, we re-read them.
1342                  */
1343
1344                 /*
1345                  * This initial dummy APERF read has been seen to
1346                  * reduce jitter in the subsequent reads.
1347                  */
1348
1349                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1350                         return -3;
1351
1352                 t->tsc = rdtsc();       /* re-read close to APERF */
1353
1354                 tsc_before = t->tsc;
1355
1356                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1357                         return -3;
1358
1359                 tsc_between = rdtsc();
1360
1361                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1362                         return -4;
1363
1364                 tsc_after = rdtsc();
1365
1366                 aperf_time = tsc_between - tsc_before;
1367                 mperf_time = tsc_after - tsc_between;
1368
1369                 /*
1370                  * If the system call latency to read APERF and MPERF
1371                  * differ by more than 2x, then try again.
1372                  */
1373                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1374                         aperf_mperf_retry_count++;
1375                         if (aperf_mperf_retry_count < 5)
1376                                 goto retry;
1377                         else
1378                                 warnx("cpu%d jitter %lld %lld",
1379                                         cpu, aperf_time, mperf_time);
1380                 }
1381                 aperf_mperf_retry_count = 0;
1382
1383                 t->aperf = t->aperf * aperf_mperf_multiplier;
1384                 t->mperf = t->mperf * aperf_mperf_multiplier;
1385         }
1386
1387         if (DO_BIC(BIC_IRQ))
1388                 t->irq_count = irqs_per_cpu[cpu];
1389         if (DO_BIC(BIC_SMI)) {
1390                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1391                         return -5;
1392                 t->smi_count = msr & 0xFFFFFFFF;
1393         }
1394         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1395                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1396                         return -6;
1397         }
1398
1399         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1400                 if (get_msr(cpu, mp->msr_num, &t->counter[i]))
1401                         return -10;
1402         }
1403
1404
1405         /* collect core counters only for 1st thread in core */
1406         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1407                 return 0;
1408
1409         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
1410                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1411                         return -6;
1412         }
1413
1414         if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1415                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1416                         return -7;
1417         } else if (do_knl_cstates) {
1418                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1419                         return -7;
1420         }
1421
1422         if (DO_BIC(BIC_CPU_c7))
1423                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1424                         return -8;
1425
1426         if (DO_BIC(BIC_Mod_c6))
1427                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1428                         return -8;
1429
1430         if (DO_BIC(BIC_CoreTmp)) {
1431                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1432                         return -9;
1433                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1434         }
1435
1436         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1437                 if (get_msr(cpu, mp->msr_num, &c->counter[i]))
1438                         return -10;
1439         }
1440
1441         /* collect package counters only for 1st core in package */
1442         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1443                 return 0;
1444
1445         if (do_skl_residency) {
1446                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1447                         return -10;
1448                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1449                         return -11;
1450                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1451                         return -12;
1452                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1453                         return -13;
1454         }
1455         if (DO_BIC(BIC_Pkgpc3))
1456                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1457                         return -9;
1458         if (DO_BIC(BIC_Pkgpc6)) {
1459                 if (do_slm_cstates) {
1460                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1461                                 return -10;
1462                 } else {
1463                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1464                                 return -10;
1465                 }
1466         }
1467
1468         if (DO_BIC(BIC_Pkgpc2))
1469                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1470                         return -11;
1471         if (DO_BIC(BIC_Pkgpc7))
1472                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1473                         return -12;
1474         if (DO_BIC(BIC_Pkgpc8))
1475                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1476                         return -13;
1477         if (DO_BIC(BIC_Pkgpc9))
1478                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1479                         return -13;
1480         if (DO_BIC(BIC_Pkgpc10))
1481                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1482                         return -13;
1483
1484         if (do_rapl & RAPL_PKG) {
1485                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1486                         return -13;
1487                 p->energy_pkg = msr & 0xFFFFFFFF;
1488         }
1489         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1490                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1491                         return -14;
1492                 p->energy_cores = msr & 0xFFFFFFFF;
1493         }
1494         if (do_rapl & RAPL_DRAM) {
1495                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1496                         return -15;
1497                 p->energy_dram = msr & 0xFFFFFFFF;
1498         }
1499         if (do_rapl & RAPL_GFX) {
1500                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1501                         return -16;
1502                 p->energy_gfx = msr & 0xFFFFFFFF;
1503         }
1504         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1505                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1506                         return -16;
1507                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1508         }
1509         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1510                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1511                         return -16;
1512                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1513         }
1514         if (DO_BIC(BIC_PkgTmp)) {
1515                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1516                         return -17;
1517                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1518         }
1519
1520         if (DO_BIC(BIC_GFX_rc6))
1521                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1522
1523         if (DO_BIC(BIC_GFXMHz))
1524                 p->gfx_mhz = gfx_cur_mhz;
1525
1526         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1527                 if (get_msr(cpu, mp->msr_num, &p->counter[i]))
1528                         return -10;
1529         }
1530
1531         return 0;
1532 }
1533
1534 /*
1535  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1536  * If you change the values, note they are used both in comparisons
1537  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1538  */
1539
1540 #define PCLUKN 0 /* Unknown */
1541 #define PCLRSV 1 /* Reserved */
1542 #define PCL__0 2 /* PC0 */
1543 #define PCL__1 3 /* PC1 */
1544 #define PCL__2 4 /* PC2 */
1545 #define PCL__3 5 /* PC3 */
1546 #define PCL__4 6 /* PC4 */
1547 #define PCL__6 7 /* PC6 */
1548 #define PCL_6N 8 /* PC6 No Retention */
1549 #define PCL_6R 9 /* PC6 Retention */
1550 #define PCL__7 10 /* PC7 */
1551 #define PCL_7S 11 /* PC7 Shrink */
1552 #define PCL__8 12 /* PC8 */
1553 #define PCL__9 13 /* PC9 */
1554 #define PCLUNL 14 /* Unlimited */
1555
1556 int pkg_cstate_limit = PCLUKN;
1557 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1558         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1559
1560 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1561 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1562 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1563 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1564 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1565 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1566 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1567 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1568
1569
1570 static void
1571 calculate_tsc_tweak()
1572 {
1573         tsc_tweak = base_hz / tsc_hz;
1574 }
1575
1576 static void
1577 dump_nhm_platform_info(void)
1578 {
1579         unsigned long long msr;
1580         unsigned int ratio;
1581
1582         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1583
1584         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1585
1586         ratio = (msr >> 40) & 0xFF;
1587         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
1588                 ratio, bclk, ratio * bclk);
1589
1590         ratio = (msr >> 8) & 0xFF;
1591         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
1592                 ratio, bclk, ratio * bclk);
1593
1594         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1595         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1596                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1597
1598         return;
1599 }
1600
1601 static void
1602 dump_hsw_turbo_ratio_limits(void)
1603 {
1604         unsigned long long msr;
1605         unsigned int ratio;
1606
1607         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1608
1609         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1610
1611         ratio = (msr >> 8) & 0xFF;
1612         if (ratio)
1613                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
1614                         ratio, bclk, ratio * bclk);
1615
1616         ratio = (msr >> 0) & 0xFF;
1617         if (ratio)
1618                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
1619                         ratio, bclk, ratio * bclk);
1620         return;
1621 }
1622
1623 static void
1624 dump_ivt_turbo_ratio_limits(void)
1625 {
1626         unsigned long long msr;
1627         unsigned int ratio;
1628
1629         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1630
1631         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1632
1633         ratio = (msr >> 56) & 0xFF;
1634         if (ratio)
1635                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
1636                         ratio, bclk, ratio * bclk);
1637
1638         ratio = (msr >> 48) & 0xFF;
1639         if (ratio)
1640                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
1641                         ratio, bclk, ratio * bclk);
1642
1643         ratio = (msr >> 40) & 0xFF;
1644         if (ratio)
1645                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
1646                         ratio, bclk, ratio * bclk);
1647
1648         ratio = (msr >> 32) & 0xFF;
1649         if (ratio)
1650                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
1651                         ratio, bclk, ratio * bclk);
1652
1653         ratio = (msr >> 24) & 0xFF;
1654         if (ratio)
1655                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
1656                         ratio, bclk, ratio * bclk);
1657
1658         ratio = (msr >> 16) & 0xFF;
1659         if (ratio)
1660                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
1661                         ratio, bclk, ratio * bclk);
1662
1663         ratio = (msr >> 8) & 0xFF;
1664         if (ratio)
1665                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
1666                         ratio, bclk, ratio * bclk);
1667
1668         ratio = (msr >> 0) & 0xFF;
1669         if (ratio)
1670                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
1671                         ratio, bclk, ratio * bclk);
1672         return;
1673 }
1674 int has_turbo_ratio_group_limits(int family, int model)
1675 {
1676
1677         if (!genuine_intel)
1678                 return 0;
1679
1680         switch (model) {
1681         case INTEL_FAM6_ATOM_GOLDMONT:
1682         case INTEL_FAM6_SKYLAKE_X:
1683         case INTEL_FAM6_ATOM_DENVERTON:
1684                 return 1;
1685         }
1686         return 0;
1687 }
1688
1689 static void
1690 dump_turbo_ratio_limits(int family, int model)
1691 {
1692         unsigned long long msr, core_counts;
1693         unsigned int ratio, group_size;
1694
1695         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1696         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1697
1698         if (has_turbo_ratio_group_limits(family, model)) {
1699                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
1700                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
1701         } else {
1702                 core_counts = 0x0807060504030201;
1703         }
1704
1705         ratio = (msr >> 56) & 0xFF;
1706         group_size = (core_counts >> 56) & 0xFF;
1707         if (ratio)
1708                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1709                         ratio, bclk, ratio * bclk, group_size);
1710
1711         ratio = (msr >> 48) & 0xFF;
1712         group_size = (core_counts >> 48) & 0xFF;
1713         if (ratio)
1714                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1715                         ratio, bclk, ratio * bclk, group_size);
1716
1717         ratio = (msr >> 40) & 0xFF;
1718         group_size = (core_counts >> 40) & 0xFF;
1719         if (ratio)
1720                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1721                         ratio, bclk, ratio * bclk, group_size);
1722
1723         ratio = (msr >> 32) & 0xFF;
1724         group_size = (core_counts >> 32) & 0xFF;
1725         if (ratio)
1726                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1727                         ratio, bclk, ratio * bclk, group_size);
1728
1729         ratio = (msr >> 24) & 0xFF;
1730         group_size = (core_counts >> 24) & 0xFF;
1731         if (ratio)
1732                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1733                         ratio, bclk, ratio * bclk, group_size);
1734
1735         ratio = (msr >> 16) & 0xFF;
1736         group_size = (core_counts >> 16) & 0xFF;
1737         if (ratio)
1738                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1739                         ratio, bclk, ratio * bclk, group_size);
1740
1741         ratio = (msr >> 8) & 0xFF;
1742         group_size = (core_counts >> 8) & 0xFF;
1743         if (ratio)
1744                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1745                         ratio, bclk, ratio * bclk, group_size);
1746
1747         ratio = (msr >> 0) & 0xFF;
1748         group_size = (core_counts >> 0) & 0xFF;
1749         if (ratio)
1750                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1751                         ratio, bclk, ratio * bclk, group_size);
1752         return;
1753 }
1754
1755 static void
1756 dump_atom_turbo_ratio_limits(void)
1757 {
1758         unsigned long long msr;
1759         unsigned int ratio;
1760
1761         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
1762         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
1763
1764         ratio = (msr >> 0) & 0x3F;
1765         if (ratio)
1766                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
1767                         ratio, bclk, ratio * bclk);
1768
1769         ratio = (msr >> 8) & 0x3F;
1770         if (ratio)
1771                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
1772                         ratio, bclk, ratio * bclk);
1773
1774         ratio = (msr >> 16) & 0x3F;
1775         if (ratio)
1776                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
1777                         ratio, bclk, ratio * bclk);
1778
1779         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
1780         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
1781
1782         ratio = (msr >> 24) & 0x3F;
1783         if (ratio)
1784                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
1785                         ratio, bclk, ratio * bclk);
1786
1787         ratio = (msr >> 16) & 0x3F;
1788         if (ratio)
1789                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
1790                         ratio, bclk, ratio * bclk);
1791
1792         ratio = (msr >> 8) & 0x3F;
1793         if (ratio)
1794                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
1795                         ratio, bclk, ratio * bclk);
1796
1797         ratio = (msr >> 0) & 0x3F;
1798         if (ratio)
1799                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
1800                         ratio, bclk, ratio * bclk);
1801 }
1802
1803 static void
1804 dump_knl_turbo_ratio_limits(void)
1805 {
1806         const unsigned int buckets_no = 7;
1807
1808         unsigned long long msr;
1809         int delta_cores, delta_ratio;
1810         int i, b_nr;
1811         unsigned int cores[buckets_no];
1812         unsigned int ratio[buckets_no];
1813
1814         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1815
1816         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
1817                 base_cpu, msr);
1818
1819         /**
1820          * Turbo encoding in KNL is as follows:
1821          * [0] -- Reserved
1822          * [7:1] -- Base value of number of active cores of bucket 1.
1823          * [15:8] -- Base value of freq ratio of bucket 1.
1824          * [20:16] -- +ve delta of number of active cores of bucket 2.
1825          * i.e. active cores of bucket 2 =
1826          * active cores of bucket 1 + delta
1827          * [23:21] -- Negative delta of freq ratio of bucket 2.
1828          * i.e. freq ratio of bucket 2 =
1829          * freq ratio of bucket 1 - delta
1830          * [28:24]-- +ve delta of number of active cores of bucket 3.
1831          * [31:29]-- -ve delta of freq ratio of bucket 3.
1832          * [36:32]-- +ve delta of number of active cores of bucket 4.
1833          * [39:37]-- -ve delta of freq ratio of bucket 4.
1834          * [44:40]-- +ve delta of number of active cores of bucket 5.
1835          * [47:45]-- -ve delta of freq ratio of bucket 5.
1836          * [52:48]-- +ve delta of number of active cores of bucket 6.
1837          * [55:53]-- -ve delta of freq ratio of bucket 6.
1838          * [60:56]-- +ve delta of number of active cores of bucket 7.
1839          * [63:61]-- -ve delta of freq ratio of bucket 7.
1840          */
1841
1842         b_nr = 0;
1843         cores[b_nr] = (msr & 0xFF) >> 1;
1844         ratio[b_nr] = (msr >> 8) & 0xFF;
1845
1846         for (i = 16; i < 64; i += 8) {
1847                 delta_cores = (msr >> i) & 0x1F;
1848                 delta_ratio = (msr >> (i + 5)) & 0x7;
1849
1850                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
1851                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
1852                 b_nr++;
1853         }
1854
1855         for (i = buckets_no - 1; i >= 0; i--)
1856                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
1857                         fprintf(outf,
1858                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1859                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
1860 }
1861
1862 static void
1863 dump_nhm_cst_cfg(void)
1864 {
1865         unsigned long long msr;
1866
1867         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
1868
1869 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
1870 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
1871
1872         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
1873
1874         fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
1875                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1876                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1877                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1878                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1879                 (msr & (1 << 15)) ? "" : "UN",
1880                 (unsigned int)msr & 0xF,
1881                 pkg_cstate_limit_strings[pkg_cstate_limit]);
1882         return;
1883 }
1884
1885 static void
1886 dump_config_tdp(void)
1887 {
1888         unsigned long long msr;
1889
1890         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
1891         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
1892         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
1893
1894         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
1895         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
1896         if (msr) {
1897                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1898                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1899                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1900                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
1901         }
1902         fprintf(outf, ")\n");
1903
1904         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
1905         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
1906         if (msr) {
1907                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1908                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1909                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1910                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
1911         }
1912         fprintf(outf, ")\n");
1913
1914         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
1915         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
1916         if ((msr) & 0x3)
1917                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
1918         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1919         fprintf(outf, ")\n");
1920
1921         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
1922         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
1923         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
1924         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1925         fprintf(outf, ")\n");
1926 }
1927
1928 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1929
1930 void print_irtl(void)
1931 {
1932         unsigned long long msr;
1933
1934         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
1935         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
1936         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1937                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1938
1939         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
1940         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
1941         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1942                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1943
1944         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
1945         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
1946         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1947                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1948
1949         if (!do_irtl_hsw)
1950                 return;
1951
1952         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
1953         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
1954         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1955                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1956
1957         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
1958         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
1959         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1960                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1961
1962         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
1963         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
1964         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1965                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1966
1967 }
1968 void free_fd_percpu(void)
1969 {
1970         int i;
1971
1972         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
1973                 if (fd_percpu[i] != 0)
1974                         close(fd_percpu[i]);
1975         }
1976
1977         free(fd_percpu);
1978 }
1979
1980 void free_all_buffers(void)
1981 {
1982         CPU_FREE(cpu_present_set);
1983         cpu_present_set = NULL;
1984         cpu_present_setsize = 0;
1985
1986         CPU_FREE(cpu_affinity_set);
1987         cpu_affinity_set = NULL;
1988         cpu_affinity_setsize = 0;
1989
1990         free(thread_even);
1991         free(core_even);
1992         free(package_even);
1993
1994         thread_even = NULL;
1995         core_even = NULL;
1996         package_even = NULL;
1997
1998         free(thread_odd);
1999         free(core_odd);
2000         free(package_odd);
2001
2002         thread_odd = NULL;
2003         core_odd = NULL;
2004         package_odd = NULL;
2005
2006         free(output_buffer);
2007         output_buffer = NULL;
2008         outp = NULL;
2009
2010         free_fd_percpu();
2011
2012         free(irq_column_2_cpu);
2013         free(irqs_per_cpu);
2014 }
2015
2016 /*
2017  * Open a file, and exit on failure
2018  */
2019 FILE *fopen_or_die(const char *path, const char *mode)
2020 {
2021         FILE *filep = fopen(path, mode);
2022         if (!filep)
2023                 err(1, "%s: open failed", path);
2024         return filep;
2025 }
2026
2027 /*
2028  * Parse a file containing a single int.
2029  */
2030 int parse_int_file(const char *fmt, ...)
2031 {
2032         va_list args;
2033         char path[PATH_MAX];
2034         FILE *filep;
2035         int value;
2036
2037         va_start(args, fmt);
2038         vsnprintf(path, sizeof(path), fmt, args);
2039         va_end(args);
2040         filep = fopen_or_die(path, "r");
2041         if (fscanf(filep, "%d", &value) != 1)
2042                 err(1, "%s: failed to parse number from file", path);
2043         fclose(filep);
2044         return value;
2045 }
2046
2047 /*
2048  * get_cpu_position_in_core(cpu)
2049  * return the position of the CPU among its HT siblings in the core
2050  * return -1 if the sibling is not in list
2051  */
2052 int get_cpu_position_in_core(int cpu)
2053 {
2054         char path[64];
2055         FILE *filep;
2056         int this_cpu;
2057         char character;
2058         int i;
2059
2060         sprintf(path,
2061                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
2062                 cpu);
2063         filep = fopen(path, "r");
2064         if (filep == NULL) {
2065                 perror(path);
2066                 exit(1);
2067         }
2068
2069         for (i = 0; i < topo.num_threads_per_core; i++) {
2070                 fscanf(filep, "%d", &this_cpu);
2071                 if (this_cpu == cpu) {
2072                         fclose(filep);
2073                         return i;
2074                 }
2075
2076                 /* Account for no separator after last thread*/
2077                 if (i != (topo.num_threads_per_core - 1))
2078                         fscanf(filep, "%c", &character);
2079         }
2080
2081         fclose(filep);
2082         return -1;
2083 }
2084
2085 /*
2086  * cpu_is_first_core_in_package(cpu)
2087  * return 1 if given CPU is 1st core in package
2088  */
2089 int cpu_is_first_core_in_package(int cpu)
2090 {
2091         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2092 }
2093
2094 int get_physical_package_id(int cpu)
2095 {
2096         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2097 }
2098
2099 int get_core_id(int cpu)
2100 {
2101         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2102 }
2103
2104 int get_num_ht_siblings(int cpu)
2105 {
2106         char path[80];
2107         FILE *filep;
2108         int sib1;
2109         int matches = 0;
2110         char character;
2111         char str[100];
2112         char *ch;
2113
2114         sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
2115         filep = fopen_or_die(path, "r");
2116
2117         /*
2118          * file format:
2119          * A ',' separated or '-' separated set of numbers
2120          * (eg 1-2 or 1,3,4,5)
2121          */
2122         fscanf(filep, "%d%c\n", &sib1, &character);
2123         fseek(filep, 0, SEEK_SET);
2124         fgets(str, 100, filep);
2125         ch = strchr(str, character);
2126         while (ch != NULL) {
2127                 matches++;
2128                 ch = strchr(ch+1, character);
2129         }
2130
2131         fclose(filep);
2132         return matches+1;
2133 }
2134
2135 /*
2136  * run func(thread, core, package) in topology order
2137  * skip non-present cpus
2138  */
2139
2140 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2141         struct pkg_data *, struct thread_data *, struct core_data *,
2142         struct pkg_data *), struct thread_data *thread_base,
2143         struct core_data *core_base, struct pkg_data *pkg_base,
2144         struct thread_data *thread_base2, struct core_data *core_base2,
2145         struct pkg_data *pkg_base2)
2146 {
2147         int retval, pkg_no, core_no, thread_no;
2148
2149         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2150                 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
2151                         for (thread_no = 0; thread_no <
2152                                 topo.num_threads_per_core; ++thread_no) {
2153                                 struct thread_data *t, *t2;
2154                                 struct core_data *c, *c2;
2155                                 struct pkg_data *p, *p2;
2156
2157                                 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
2158
2159                                 if (cpu_is_not_present(t->cpu_id))
2160                                         continue;
2161
2162                                 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
2163
2164                                 c = GET_CORE(core_base, core_no, pkg_no);
2165                                 c2 = GET_CORE(core_base2, core_no, pkg_no);
2166
2167                                 p = GET_PKG(pkg_base, pkg_no);
2168                                 p2 = GET_PKG(pkg_base2, pkg_no);
2169
2170                                 retval = func(t, c, p, t2, c2, p2);
2171                                 if (retval)
2172                                         return retval;
2173                         }
2174                 }
2175         }
2176         return 0;
2177 }
2178
2179 /*
2180  * run func(cpu) on every cpu in /proc/stat
2181  * return max_cpu number
2182  */
2183 int for_all_proc_cpus(int (func)(int))
2184 {
2185         FILE *fp;
2186         int cpu_num;
2187         int retval;
2188
2189         fp = fopen_or_die(proc_stat, "r");
2190
2191         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2192         if (retval != 0)
2193                 err(1, "%s: failed to parse format", proc_stat);
2194
2195         while (1) {
2196                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2197                 if (retval != 1)
2198                         break;
2199
2200                 retval = func(cpu_num);
2201                 if (retval) {
2202                         fclose(fp);
2203                         return(retval);
2204                 }
2205         }
2206         fclose(fp);
2207         return 0;
2208 }
2209
2210 void re_initialize(void)
2211 {
2212         free_all_buffers();
2213         setup_all_buffers();
2214         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2215 }
2216
2217
2218 /*
2219  * count_cpus()
2220  * remember the last one seen, it will be the max
2221  */
2222 int count_cpus(int cpu)
2223 {
2224         if (topo.max_cpu_num < cpu)
2225                 topo.max_cpu_num = cpu;
2226
2227         topo.num_cpus += 1;
2228         return 0;
2229 }
2230 int mark_cpu_present(int cpu)
2231 {
2232         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2233         return 0;
2234 }
2235
2236 /*
2237  * snapshot_proc_interrupts()
2238  *
2239  * read and record summary of /proc/interrupts
2240  *
2241  * return 1 if config change requires a restart, else return 0
2242  */
2243 int snapshot_proc_interrupts(void)
2244 {
2245         static FILE *fp;
2246         int column, retval;
2247
2248         if (fp == NULL)
2249                 fp = fopen_or_die("/proc/interrupts", "r");
2250         else
2251                 rewind(fp);
2252
2253         /* read 1st line of /proc/interrupts to get cpu* name for each column */
2254         for (column = 0; column < topo.num_cpus; ++column) {
2255                 int cpu_number;
2256
2257                 retval = fscanf(fp, " CPU%d", &cpu_number);
2258                 if (retval != 1)
2259                         break;
2260
2261                 if (cpu_number > topo.max_cpu_num) {
2262                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2263                         return 1;
2264                 }
2265
2266                 irq_column_2_cpu[column] = cpu_number;
2267                 irqs_per_cpu[cpu_number] = 0;
2268         }
2269
2270         /* read /proc/interrupt count lines and sum up irqs per cpu */
2271         while (1) {
2272                 int column;
2273                 char buf[64];
2274
2275                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2276                 if (retval != 1)
2277                         break;
2278
2279                 /* read the count per cpu */
2280                 for (column = 0; column < topo.num_cpus; ++column) {
2281
2282                         int cpu_number, irq_count;
2283
2284                         retval = fscanf(fp, " %d", &irq_count);
2285                         if (retval != 1)
2286                                 break;
2287
2288                         cpu_number = irq_column_2_cpu[column];
2289                         irqs_per_cpu[cpu_number] += irq_count;
2290
2291                 }
2292
2293                 while (getc(fp) != '\n')
2294                         ;       /* flush interrupt description */
2295
2296         }
2297         return 0;
2298 }
2299 /*
2300  * snapshot_gfx_rc6_ms()
2301  *
2302  * record snapshot of
2303  * /sys/class/drm/card0/power/rc6_residency_ms
2304  *
2305  * return 1 if config change requires a restart, else return 0
2306  */
2307 int snapshot_gfx_rc6_ms(void)
2308 {
2309         FILE *fp;
2310         int retval;
2311
2312         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2313
2314         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2315         if (retval != 1)
2316                 err(1, "GFX rc6");
2317
2318         fclose(fp);
2319
2320         return 0;
2321 }
2322 /*
2323  * snapshot_gfx_mhz()
2324  *
2325  * record snapshot of
2326  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2327  *
2328  * return 1 if config change requires a restart, else return 0
2329  */
2330 int snapshot_gfx_mhz(void)
2331 {
2332         static FILE *fp;
2333         int retval;
2334
2335         if (fp == NULL)
2336                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2337         else
2338                 rewind(fp);
2339
2340         retval = fscanf(fp, "%d", &gfx_cur_mhz);
2341         if (retval != 1)
2342                 err(1, "GFX MHz");
2343
2344         return 0;
2345 }
2346
2347 /*
2348  * snapshot /proc and /sys files
2349  *
2350  * return 1 if configuration restart needed, else return 0
2351  */
2352 int snapshot_proc_sysfs_files(void)
2353 {
2354         if (snapshot_proc_interrupts())
2355                 return 1;
2356
2357         if (DO_BIC(BIC_GFX_rc6))
2358                 snapshot_gfx_rc6_ms();
2359
2360         if (DO_BIC(BIC_GFXMHz))
2361                 snapshot_gfx_mhz();
2362
2363         return 0;
2364 }
2365
2366 void turbostat_loop()
2367 {
2368         int retval;
2369         int restarted = 0;
2370
2371 restart:
2372         restarted++;
2373
2374         snapshot_proc_sysfs_files();
2375         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2376         if (retval < -1) {
2377                 exit(retval);
2378         } else if (retval == -1) {
2379                 if (restarted > 1) {
2380                         exit(retval);
2381                 }
2382                 re_initialize();
2383                 goto restart;
2384         }
2385         restarted = 0;
2386         gettimeofday(&tv_even, (struct timezone *)NULL);
2387
2388         while (1) {
2389                 if (for_all_proc_cpus(cpu_is_not_present)) {
2390                         re_initialize();
2391                         goto restart;
2392                 }
2393                 nanosleep(&interval_ts, NULL);
2394                 if (snapshot_proc_sysfs_files())
2395                         goto restart;
2396                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
2397                 if (retval < -1) {
2398                         exit(retval);
2399                 } else if (retval == -1) {
2400                         re_initialize();
2401                         goto restart;
2402                 }
2403                 gettimeofday(&tv_odd, (struct timezone *)NULL);
2404                 timersub(&tv_odd, &tv_even, &tv_delta);
2405                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2406                         re_initialize();
2407                         goto restart;
2408                 }
2409                 compute_average(EVEN_COUNTERS);
2410                 format_all_counters(EVEN_COUNTERS);
2411                 flush_output_stdout();
2412                 nanosleep(&interval_ts, NULL);
2413                 if (snapshot_proc_sysfs_files())
2414                         goto restart;
2415                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2416                 if (retval < -1) {
2417                         exit(retval);
2418                 } else if (retval == -1) {
2419                         re_initialize();
2420                         goto restart;
2421                 }
2422                 gettimeofday(&tv_even, (struct timezone *)NULL);
2423                 timersub(&tv_even, &tv_odd, &tv_delta);
2424                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
2425                         re_initialize();
2426                         goto restart;
2427                 }
2428                 compute_average(ODD_COUNTERS);
2429                 format_all_counters(ODD_COUNTERS);
2430                 flush_output_stdout();
2431         }
2432 }
2433
2434 void check_dev_msr()
2435 {
2436         struct stat sb;
2437         char pathname[32];
2438
2439         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2440         if (stat(pathname, &sb))
2441                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2442                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2443 }
2444
2445 void check_permissions()
2446 {
2447         struct __user_cap_header_struct cap_header_data;
2448         cap_user_header_t cap_header = &cap_header_data;
2449         struct __user_cap_data_struct cap_data_data;
2450         cap_user_data_t cap_data = &cap_data_data;
2451         extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2452         int do_exit = 0;
2453         char pathname[32];
2454
2455         /* check for CAP_SYS_RAWIO */
2456         cap_header->pid = getpid();
2457         cap_header->version = _LINUX_CAPABILITY_VERSION;
2458         if (capget(cap_header, cap_data) < 0)
2459                 err(-6, "capget(2) failed");
2460
2461         if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2462                 do_exit++;
2463                 warnx("capget(CAP_SYS_RAWIO) failed,"
2464                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
2465         }
2466
2467         /* test file permissions */
2468         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2469         if (euidaccess(pathname, R_OK)) {
2470                 do_exit++;
2471                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
2472         }
2473
2474         /* if all else fails, thell them to be root */
2475         if (do_exit)
2476                 if (getuid() != 0)
2477                         warnx("... or simply run as root");
2478
2479         if (do_exit)
2480                 exit(-6);
2481 }
2482
2483 /*
2484  * NHM adds support for additional MSRs:
2485  *
2486  * MSR_SMI_COUNT                   0x00000034
2487  *
2488  * MSR_PLATFORM_INFO               0x000000ce
2489  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
2490  *
2491  * MSR_MISC_PWR_MGMT               0x000001aa
2492  *
2493  * MSR_PKG_C3_RESIDENCY            0x000003f8
2494  * MSR_PKG_C6_RESIDENCY            0x000003f9
2495  * MSR_CORE_C3_RESIDENCY           0x000003fc
2496  * MSR_CORE_C6_RESIDENCY           0x000003fd
2497  *
2498  * Side effect:
2499  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
2500  * sets has_misc_feature_control
2501  */
2502 int probe_nhm_msrs(unsigned int family, unsigned int model)
2503 {
2504         unsigned long long msr;
2505         unsigned int base_ratio;
2506         int *pkg_cstate_limits;
2507
2508         if (!genuine_intel)
2509                 return 0;
2510
2511         if (family != 6)
2512                 return 0;
2513
2514         bclk = discover_bclk(family, model);
2515
2516         switch (model) {
2517         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2518         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2519         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
2520         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
2521         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
2522         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
2523         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
2524                 pkg_cstate_limits = nhm_pkg_cstate_limits;
2525                 break;
2526         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
2527         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
2528         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
2529         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
2530                 pkg_cstate_limits = snb_pkg_cstate_limits;
2531                 has_misc_feature_control = 1;
2532                 break;
2533         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
2534         case INTEL_FAM6_HASWELL_X:      /* HSX */
2535         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
2536         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
2537         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2538         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2539         case INTEL_FAM6_BROADWELL_X:    /* BDX */
2540         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2541         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2542         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
2543         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
2544         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
2545                 pkg_cstate_limits = hsw_pkg_cstate_limits;
2546                 has_misc_feature_control = 1;
2547                 break;
2548         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
2549                 pkg_cstate_limits = skx_pkg_cstate_limits;
2550                 has_misc_feature_control = 1;
2551                 break;
2552         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
2553                 no_MSR_MISC_PWR_MGMT = 1;
2554         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
2555                 pkg_cstate_limits = slv_pkg_cstate_limits;
2556                 break;
2557         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
2558                 pkg_cstate_limits = amt_pkg_cstate_limits;
2559                 no_MSR_MISC_PWR_MGMT = 1;
2560                 break;
2561         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
2562         case INTEL_FAM6_XEON_PHI_KNM:
2563                 pkg_cstate_limits = phi_pkg_cstate_limits;
2564                 break;
2565         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
2566         case INTEL_FAM6_ATOM_GEMINI_LAKE:
2567         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
2568                 pkg_cstate_limits = bxt_pkg_cstate_limits;
2569                 break;
2570         default:
2571                 return 0;
2572         }
2573         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2574         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
2575
2576         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2577         base_ratio = (msr >> 8) & 0xFF;
2578
2579         base_hz = base_ratio * bclk * 1000000;
2580         has_base_hz = 1;
2581         return 1;
2582 }
2583 /*
2584  * SLV client has supporet for unique MSRs:
2585  *
2586  * MSR_CC6_DEMOTION_POLICY_CONFIG
2587  * MSR_MC6_DEMOTION_POLICY_CONFIG
2588  */
2589
2590 int has_slv_msrs(unsigned int family, unsigned int model)
2591 {
2592         if (!genuine_intel)
2593                 return 0;
2594
2595         switch (model) {
2596         case INTEL_FAM6_ATOM_SILVERMONT1:
2597         case INTEL_FAM6_ATOM_MERRIFIELD:
2598         case INTEL_FAM6_ATOM_MOOREFIELD:
2599                 return 1;
2600         }
2601         return 0;
2602 }
2603 int is_dnv(unsigned int family, unsigned int model)
2604 {
2605
2606         if (!genuine_intel)
2607                 return 0;
2608
2609         switch (model) {
2610         case INTEL_FAM6_ATOM_DENVERTON:
2611                 return 1;
2612         }
2613         return 0;
2614 }
2615 int is_skx(unsigned int family, unsigned int model)
2616 {
2617
2618         if (!genuine_intel)
2619                 return 0;
2620
2621         switch (model) {
2622         case INTEL_FAM6_SKYLAKE_X:
2623                 return 1;
2624         }
2625         return 0;
2626 }
2627
2628 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
2629 {
2630         if (has_slv_msrs(family, model))
2631                 return 0;
2632
2633         switch (model) {
2634         /* Nehalem compatible, but do not include turbo-ratio limit support */
2635         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
2636         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
2637         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
2638         case INTEL_FAM6_XEON_PHI_KNM:
2639                 return 0;
2640         default:
2641                 return 1;
2642         }
2643 }
2644 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
2645 {
2646         if (has_slv_msrs(family, model))
2647                 return 1;
2648
2649         return 0;
2650 }
2651 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
2652 {
2653         if (!genuine_intel)
2654                 return 0;
2655
2656         if (family != 6)
2657                 return 0;
2658
2659         switch (model) {
2660         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
2661         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
2662                 return 1;
2663         default:
2664                 return 0;
2665         }
2666 }
2667 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
2668 {
2669         if (!genuine_intel)
2670                 return 0;
2671
2672         if (family != 6)
2673                 return 0;
2674
2675         switch (model) {
2676         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
2677                 return 1;
2678         default:
2679                 return 0;
2680         }
2681 }
2682
2683 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
2684 {
2685         if (!genuine_intel)
2686                 return 0;
2687
2688         if (family != 6)
2689                 return 0;
2690
2691         switch (model) {
2692         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
2693         case INTEL_FAM6_XEON_PHI_KNM:
2694                 return 1;
2695         default:
2696                 return 0;
2697         }
2698 }
2699 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
2700 {
2701         if (!genuine_intel)
2702                 return 0;
2703
2704         if (family != 6)
2705                 return 0;
2706
2707         switch (model) {
2708         case INTEL_FAM6_ATOM_GOLDMONT:
2709         case INTEL_FAM6_SKYLAKE_X:
2710                 return 1;
2711         default:
2712                 return 0;
2713         }
2714 }
2715 int has_config_tdp(unsigned int family, unsigned int model)
2716 {
2717         if (!genuine_intel)
2718                 return 0;
2719
2720         if (family != 6)
2721                 return 0;
2722
2723         switch (model) {
2724         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
2725         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
2726         case INTEL_FAM6_HASWELL_X:      /* HSX */
2727         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
2728         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
2729         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2730         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2731         case INTEL_FAM6_BROADWELL_X:    /* BDX */
2732         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2733         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2734         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
2735         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
2736         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
2737         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
2738
2739         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
2740         case INTEL_FAM6_XEON_PHI_KNM:
2741                 return 1;
2742         default:
2743                 return 0;
2744         }
2745 }
2746
2747 static void
2748 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
2749 {
2750         if (!do_nhm_platform_info)
2751                 return;
2752
2753         dump_nhm_platform_info();
2754
2755         if (has_hsw_turbo_ratio_limit(family, model))
2756                 dump_hsw_turbo_ratio_limits();
2757
2758         if (has_ivt_turbo_ratio_limit(family, model))
2759                 dump_ivt_turbo_ratio_limits();
2760
2761         if (has_turbo_ratio_limit(family, model))
2762                 dump_turbo_ratio_limits(family, model);
2763
2764         if (has_atom_turbo_ratio_limit(family, model))
2765                 dump_atom_turbo_ratio_limits();
2766
2767         if (has_knl_turbo_ratio_limit(family, model))
2768                 dump_knl_turbo_ratio_limits();
2769
2770         if (has_config_tdp(family, model))
2771                 dump_config_tdp();
2772
2773         dump_nhm_cst_cfg();
2774 }
2775
2776
2777 /*
2778  * print_epb()
2779  * Decode the ENERGY_PERF_BIAS MSR
2780  */
2781 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2782 {
2783         unsigned long long msr;
2784         char *epb_string;
2785         int cpu;
2786
2787         if (!has_epb)
2788                 return 0;
2789
2790         cpu = t->cpu_id;
2791
2792         /* EPB is per-package */
2793         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2794                 return 0;
2795
2796         if (cpu_migrate(cpu)) {
2797                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2798                 return -1;
2799         }
2800
2801         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
2802                 return 0;
2803
2804         switch (msr & 0xF) {
2805         case ENERGY_PERF_BIAS_PERFORMANCE:
2806                 epb_string = "performance";
2807                 break;
2808         case ENERGY_PERF_BIAS_NORMAL:
2809                 epb_string = "balanced";
2810                 break;
2811         case ENERGY_PERF_BIAS_POWERSAVE:
2812                 epb_string = "powersave";
2813                 break;
2814         default:
2815                 epb_string = "custom";
2816                 break;
2817         }
2818         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
2819
2820         return 0;
2821 }
2822 /*
2823  * print_hwp()
2824  * Decode the MSR_HWP_CAPABILITIES
2825  */
2826 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2827 {
2828         unsigned long long msr;
2829         int cpu;
2830
2831         if (!has_hwp)
2832                 return 0;
2833
2834         cpu = t->cpu_id;
2835
2836         /* MSR_HWP_CAPABILITIES is per-package */
2837         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2838                 return 0;
2839
2840         if (cpu_migrate(cpu)) {
2841                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2842                 return -1;
2843         }
2844
2845         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
2846                 return 0;
2847
2848         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
2849                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
2850
2851         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
2852         if ((msr & (1 << 0)) == 0)
2853                 return 0;
2854
2855         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
2856                 return 0;
2857
2858         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
2859                         "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
2860                         cpu, msr,
2861                         (unsigned int)HWP_HIGHEST_PERF(msr),
2862                         (unsigned int)HWP_GUARANTEED_PERF(msr),
2863                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
2864                         (unsigned int)HWP_LOWEST_PERF(msr));
2865
2866         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
2867                 return 0;
2868
2869         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
2870                         "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
2871                         cpu, msr,
2872                         (unsigned int)(((msr) >> 0) & 0xff),
2873                         (unsigned int)(((msr) >> 8) & 0xff),
2874                         (unsigned int)(((msr) >> 16) & 0xff),
2875                         (unsigned int)(((msr) >> 24) & 0xff),
2876                         (unsigned int)(((msr) >> 32) & 0xff3),
2877                         (unsigned int)(((msr) >> 42) & 0x1));
2878
2879         if (has_hwp_pkg) {
2880                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
2881                         return 0;
2882
2883                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
2884                         "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
2885                         cpu, msr,
2886                         (unsigned int)(((msr) >> 0) & 0xff),
2887                         (unsigned int)(((msr) >> 8) & 0xff),
2888                         (unsigned int)(((msr) >> 16) & 0xff),
2889                         (unsigned int)(((msr) >> 24) & 0xff),
2890                         (unsigned int)(((msr) >> 32) & 0xff3));
2891         }
2892         if (has_hwp_notify) {
2893                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
2894                         return 0;
2895
2896                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
2897                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
2898                         cpu, msr,
2899                         ((msr) & 0x1) ? "EN" : "Dis",
2900                         ((msr) & 0x2) ? "EN" : "Dis");
2901         }
2902         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
2903                 return 0;
2904
2905         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
2906                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
2907                         cpu, msr,
2908                         ((msr) & 0x1) ? "" : "No-",
2909                         ((msr) & 0x2) ? "" : "No-");
2910
2911         return 0;
2912 }
2913
2914 /*
2915  * print_perf_limit()
2916  */
2917 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2918 {
2919         unsigned long long msr;
2920         int cpu;
2921
2922         cpu = t->cpu_id;
2923
2924         /* per-package */
2925         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2926                 return 0;
2927
2928         if (cpu_migrate(cpu)) {
2929                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2930                 return -1;
2931         }
2932
2933         if (do_core_perf_limit_reasons) {
2934                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
2935                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2936                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
2937                         (msr & 1 << 15) ? "bit15, " : "",
2938                         (msr & 1 << 14) ? "bit14, " : "",
2939                         (msr & 1 << 13) ? "Transitions, " : "",
2940                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
2941                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
2942                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2943                         (msr & 1 << 9) ? "CorePwr, " : "",
2944                         (msr & 1 << 8) ? "Amps, " : "",
2945                         (msr & 1 << 6) ? "VR-Therm, " : "",
2946                         (msr & 1 << 5) ? "Auto-HWP, " : "",
2947                         (msr & 1 << 4) ? "Graphics, " : "",
2948                         (msr & 1 << 2) ? "bit2, " : "",
2949                         (msr & 1 << 1) ? "ThermStatus, " : "",
2950                         (msr & 1 << 0) ? "PROCHOT, " : "");
2951                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
2952                         (msr & 1 << 31) ? "bit31, " : "",
2953                         (msr & 1 << 30) ? "bit30, " : "",
2954                         (msr & 1 << 29) ? "Transitions, " : "",
2955                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
2956                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
2957                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
2958                         (msr & 1 << 25) ? "CorePwr, " : "",
2959                         (msr & 1 << 24) ? "Amps, " : "",
2960                         (msr & 1 << 22) ? "VR-Therm, " : "",
2961                         (msr & 1 << 21) ? "Auto-HWP, " : "",
2962                         (msr & 1 << 20) ? "Graphics, " : "",
2963                         (msr & 1 << 18) ? "bit18, " : "",
2964                         (msr & 1 << 17) ? "ThermStatus, " : "",
2965                         (msr & 1 << 16) ? "PROCHOT, " : "");
2966
2967         }
2968         if (do_gfx_perf_limit_reasons) {
2969                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
2970                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2971                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
2972                         (msr & 1 << 0) ? "PROCHOT, " : "",
2973                         (msr & 1 << 1) ? "ThermStatus, " : "",
2974                         (msr & 1 << 4) ? "Graphics, " : "",
2975                         (msr & 1 << 6) ? "VR-Therm, " : "",
2976                         (msr & 1 << 8) ? "Amps, " : "",
2977                         (msr & 1 << 9) ? "GFXPwr, " : "",
2978                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2979                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
2980                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
2981                         (msr & 1 << 16) ? "PROCHOT, " : "",
2982                         (msr & 1 << 17) ? "ThermStatus, " : "",
2983                         (msr & 1 << 20) ? "Graphics, " : "",
2984                         (msr & 1 << 22) ? "VR-Therm, " : "",
2985                         (msr & 1 << 24) ? "Amps, " : "",
2986                         (msr & 1 << 25) ? "GFXPwr, " : "",
2987                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
2988                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
2989         }
2990         if (do_ring_perf_limit_reasons) {
2991                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
2992                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2993                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
2994                         (msr & 1 << 0) ? "PROCHOT, " : "",
2995                         (msr & 1 << 1) ? "ThermStatus, " : "",
2996                         (msr & 1 << 6) ? "VR-Therm, " : "",
2997                         (msr & 1 << 8) ? "Amps, " : "",
2998                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2999                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3000                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3001                         (msr & 1 << 16) ? "PROCHOT, " : "",
3002                         (msr & 1 << 17) ? "ThermStatus, " : "",
3003                         (msr & 1 << 22) ? "VR-Therm, " : "",
3004                         (msr & 1 << 24) ? "Amps, " : "",
3005                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3006                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3007         }
3008         return 0;
3009 }
3010
3011 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
3012 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
3013
3014 double get_tdp(unsigned int model)
3015 {
3016         unsigned long long msr;
3017
3018         if (do_rapl & RAPL_PKG_POWER_INFO)
3019                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3020                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3021
3022         switch (model) {
3023         case INTEL_FAM6_ATOM_SILVERMONT1:
3024         case INTEL_FAM6_ATOM_SILVERMONT2:
3025                 return 30.0;
3026         default:
3027                 return 135.0;
3028         }
3029 }
3030
3031 /*
3032  * rapl_dram_energy_units_probe()
3033  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3034  */
3035 static double
3036 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3037 {
3038         /* only called for genuine_intel, family 6 */
3039
3040         switch (model) {
3041         case INTEL_FAM6_HASWELL_X:      /* HSX */
3042         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3043         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3044         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3045         case INTEL_FAM6_XEON_PHI_KNM:
3046                 return (rapl_dram_energy_units = 15.3 / 1000000);
3047         default:
3048                 return (rapl_energy_units);
3049         }
3050 }
3051
3052
3053 /*
3054  * rapl_probe()
3055  *
3056  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3057  */
3058 void rapl_probe(unsigned int family, unsigned int model)
3059 {
3060         unsigned long long msr;
3061         unsigned int time_unit;
3062         double tdp;
3063
3064         if (!genuine_intel)
3065                 return;
3066
3067         if (family != 6)
3068                 return;
3069
3070         switch (model) {
3071         case INTEL_FAM6_SANDYBRIDGE:
3072         case INTEL_FAM6_IVYBRIDGE:
3073         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3074         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3075         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3076         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3077         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3078                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3079                 if (rapl_joules) {
3080                         BIC_PRESENT(BIC_Pkg_J);
3081                         BIC_PRESENT(BIC_Cor_J);
3082                         BIC_PRESENT(BIC_GFX_J);
3083                 } else {
3084                         BIC_PRESENT(BIC_PkgWatt);
3085                         BIC_PRESENT(BIC_CorWatt);
3086                         BIC_PRESENT(BIC_GFXWatt);
3087                 }
3088                 break;
3089         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3090         case INTEL_FAM6_ATOM_GEMINI_LAKE:
3091                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3092                 if (rapl_joules)
3093                         BIC_PRESENT(BIC_Pkg_J);
3094                 else
3095                         BIC_PRESENT(BIC_PkgWatt);
3096                 break;
3097         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3098         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3099         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3100         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3101                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3102                 BIC_PRESENT(BIC_PKG__);
3103                 BIC_PRESENT(BIC_RAM__);
3104                 if (rapl_joules) {
3105                         BIC_PRESENT(BIC_Pkg_J);
3106                         BIC_PRESENT(BIC_Cor_J);
3107                         BIC_PRESENT(BIC_RAM_J);
3108                 } else {
3109                         BIC_PRESENT(BIC_PkgWatt);
3110                         BIC_PRESENT(BIC_CorWatt);
3111                         BIC_PRESENT(BIC_RAMWatt);
3112                 }
3113                 break;
3114         case INTEL_FAM6_HASWELL_X:      /* HSX */
3115         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3116         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3117         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3118         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3119         case INTEL_FAM6_XEON_PHI_KNM:
3120                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3121                 BIC_PRESENT(BIC_PKG__);
3122                 BIC_PRESENT(BIC_RAM__);
3123                 if (rapl_joules) {
3124                         BIC_PRESENT(BIC_Pkg_J);
3125                         BIC_PRESENT(BIC_RAM_J);
3126                 } else {
3127                         BIC_PRESENT(BIC_PkgWatt);
3128                         BIC_PRESENT(BIC_RAMWatt);
3129                 }
3130                 break;
3131         case INTEL_FAM6_SANDYBRIDGE_X:
3132         case INTEL_FAM6_IVYBRIDGE_X:
3133                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3134                 BIC_PRESENT(BIC_PKG__);
3135                 BIC_PRESENT(BIC_RAM__);
3136                 if (rapl_joules) {
3137                         BIC_PRESENT(BIC_Pkg_J);
3138                         BIC_PRESENT(BIC_Cor_J);
3139                         BIC_PRESENT(BIC_RAM_J);
3140                 } else {
3141                         BIC_PRESENT(BIC_PkgWatt);
3142                         BIC_PRESENT(BIC_CorWatt);
3143                         BIC_PRESENT(BIC_RAMWatt);
3144                 }
3145                 break;
3146         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3147         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3148                 do_rapl = RAPL_PKG | RAPL_CORES;
3149                 if (rapl_joules) {
3150                         BIC_PRESENT(BIC_Pkg_J);
3151                         BIC_PRESENT(BIC_Cor_J);
3152                 } else {
3153                         BIC_PRESENT(BIC_PkgWatt);
3154                         BIC_PRESENT(BIC_CorWatt);
3155                 }
3156                 break;
3157         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3158                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3159                 BIC_PRESENT(BIC_PKG__);
3160                 BIC_PRESENT(BIC_RAM__);
3161                 if (rapl_joules) {
3162                         BIC_PRESENT(BIC_Pkg_J);
3163                         BIC_PRESENT(BIC_Cor_J);
3164                         BIC_PRESENT(BIC_RAM_J);
3165                 } else {
3166                         BIC_PRESENT(BIC_PkgWatt);
3167                         BIC_PRESENT(BIC_CorWatt);
3168                         BIC_PRESENT(BIC_RAMWatt);
3169                 }
3170                 break;
3171         default:
3172                 return;
3173         }
3174
3175         /* units on package 0, verify later other packages match */
3176         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3177                 return;
3178
3179         rapl_power_units = 1.0 / (1 << (msr & 0xF));
3180         if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3181                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3182         else
3183                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3184
3185         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3186
3187         time_unit = msr >> 16 & 0xF;
3188         if (time_unit == 0)
3189                 time_unit = 0xA;
3190
3191         rapl_time_units = 1.0 / (1 << (time_unit));
3192
3193         tdp = get_tdp(model);
3194
3195         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3196         if (!quiet)
3197                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3198
3199         return;
3200 }
3201
3202 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3203 {
3204         if (!genuine_intel)
3205                 return;
3206
3207         if (family != 6)
3208                 return;
3209
3210         switch (model) {
3211         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3212         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3213         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3214                 do_gfx_perf_limit_reasons = 1;
3215         case INTEL_FAM6_HASWELL_X:      /* HSX */
3216                 do_core_perf_limit_reasons = 1;
3217                 do_ring_perf_limit_reasons = 1;
3218         default:
3219                 return;
3220         }
3221 }
3222
3223 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3224 {
3225         unsigned long long msr;
3226         unsigned int dts;
3227         int cpu;
3228
3229         if (!(do_dts || do_ptm))
3230                 return 0;
3231
3232         cpu = t->cpu_id;
3233
3234         /* DTS is per-core, no need to print for each thread */
3235         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3236                 return 0;
3237
3238         if (cpu_migrate(cpu)) {
3239                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3240                 return -1;
3241         }
3242
3243         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3244                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3245                         return 0;
3246
3247                 dts = (msr >> 16) & 0x7F;
3248                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3249                         cpu, msr, tcc_activation_temp - dts);
3250
3251 #ifdef  THERM_DEBUG
3252                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3253                         return 0;
3254
3255                 dts = (msr >> 16) & 0x7F;
3256                 dts2 = (msr >> 8) & 0x7F;
3257                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3258                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3259 #endif
3260         }
3261
3262
3263         if (do_dts) {
3264                 unsigned int resolution;
3265
3266                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3267                         return 0;
3268
3269                 dts = (msr >> 16) & 0x7F;
3270                 resolution = (msr >> 27) & 0xF;
3271                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3272                         cpu, msr, tcc_activation_temp - dts, resolution);
3273
3274 #ifdef THERM_DEBUG
3275                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
3276                         return 0;
3277
3278                 dts = (msr >> 16) & 0x7F;
3279                 dts2 = (msr >> 8) & 0x7F;
3280                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3281                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3282 #endif
3283         }
3284
3285         return 0;
3286 }
3287
3288 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
3289 {
3290         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
3291                 cpu, label,
3292                 ((msr >> 15) & 1) ? "EN" : "DIS",
3293                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
3294                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
3295                 (((msr >> 16) & 1) ? "EN" : "DIS"));
3296
3297         return;
3298 }
3299
3300 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3301 {
3302         unsigned long long msr;
3303         int cpu;
3304
3305         if (!do_rapl)
3306                 return 0;
3307
3308         /* RAPL counters are per package, so print only for 1st thread/package */
3309         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3310                 return 0;
3311
3312         cpu = t->cpu_id;
3313         if (cpu_migrate(cpu)) {
3314                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3315                 return -1;
3316         }
3317
3318         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
3319                 return -1;
3320
3321         fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
3322                 rapl_power_units, rapl_energy_units, rapl_time_units);
3323
3324         if (do_rapl & RAPL_PKG_POWER_INFO) {
3325
3326                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
3327                         return -5;
3328
3329
3330                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3331                         cpu, msr,
3332                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3333                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3334                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3335                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3336
3337         }
3338         if (do_rapl & RAPL_PKG) {
3339
3340                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
3341                         return -9;
3342
3343                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
3344                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
3345
3346                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
3347                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
3348                         cpu,
3349                         ((msr >> 47) & 1) ? "EN" : "DIS",
3350                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
3351                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
3352                         ((msr >> 48) & 1) ? "EN" : "DIS");
3353         }
3354
3355         if (do_rapl & RAPL_DRAM_POWER_INFO) {
3356                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
3357                         return -6;
3358
3359                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3360                         cpu, msr,
3361                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3362                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3363                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3364                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3365         }
3366         if (do_rapl & RAPL_DRAM) {
3367                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
3368                         return -9;
3369                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
3370                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3371
3372                 print_power_limit_msr(cpu, msr, "DRAM Limit");
3373         }
3374         if (do_rapl & RAPL_CORE_POLICY) {
3375                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
3376                         return -7;
3377
3378                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
3379         }
3380         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
3381                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
3382                         return -9;
3383                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
3384                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3385                 print_power_limit_msr(cpu, msr, "Cores Limit");
3386         }
3387         if (do_rapl & RAPL_GFX) {
3388                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
3389                         return -8;
3390
3391                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
3392
3393                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
3394                         return -9;
3395                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
3396                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3397                 print_power_limit_msr(cpu, msr, "GFX Limit");
3398         }
3399         return 0;
3400 }
3401
3402 /*
3403  * SNB adds support for additional MSRs:
3404  *
3405  * MSR_PKG_C7_RESIDENCY            0x000003fa
3406  * MSR_CORE_C7_RESIDENCY           0x000003fe
3407  * MSR_PKG_C2_RESIDENCY            0x0000060d
3408  */
3409
3410 int has_snb_msrs(unsigned int family, unsigned int model)
3411 {
3412         if (!genuine_intel)
3413                 return 0;
3414
3415         switch (model) {
3416         case INTEL_FAM6_SANDYBRIDGE:
3417         case INTEL_FAM6_SANDYBRIDGE_X:
3418         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3419         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3420         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3421         case INTEL_FAM6_HASWELL_X:      /* HSW */
3422         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3423         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3424         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3425         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3426         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3427         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3428         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3429         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3430         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3431         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3432         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3433         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3434         case INTEL_FAM6_ATOM_GEMINI_LAKE:
3435         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3436                 return 1;
3437         }
3438         return 0;
3439 }
3440
3441 /*
3442  * HSW adds support for additional MSRs:
3443  *
3444  * MSR_PKG_C8_RESIDENCY         0x00000630
3445  * MSR_PKG_C9_RESIDENCY         0x00000631
3446  * MSR_PKG_C10_RESIDENCY        0x00000632
3447  *
3448  * MSR_PKGC8_IRTL               0x00000633
3449  * MSR_PKGC9_IRTL               0x00000634
3450  * MSR_PKGC10_IRTL              0x00000635
3451  *
3452  */
3453 int has_hsw_msrs(unsigned int family, unsigned int model)
3454 {
3455         if (!genuine_intel)
3456                 return 0;
3457
3458         switch (model) {
3459         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3460         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3461         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3462         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3463         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3464         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3465         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3466         case INTEL_FAM6_ATOM_GEMINI_LAKE:
3467                 return 1;
3468         }
3469         return 0;
3470 }
3471
3472 /*
3473  * SKL adds support for additional MSRS:
3474  *
3475  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
3476  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
3477  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
3478  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
3479  */
3480 int has_skl_msrs(unsigned int family, unsigned int model)
3481 {
3482         if (!genuine_intel)
3483                 return 0;
3484
3485         switch (model) {
3486         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3487         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3488         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3489         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3490                 return 1;
3491         }
3492         return 0;
3493 }
3494
3495 int is_slm(unsigned int family, unsigned int model)
3496 {
3497         if (!genuine_intel)
3498                 return 0;
3499         switch (model) {
3500         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3501         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3502                 return 1;
3503         }
3504         return 0;
3505 }
3506
3507 int is_knl(unsigned int family, unsigned int model)
3508 {
3509         if (!genuine_intel)
3510                 return 0;
3511         switch (model) {
3512         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3513         case INTEL_FAM6_XEON_PHI_KNM:
3514                 return 1;
3515         }
3516         return 0;
3517 }
3518
3519 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
3520 {
3521         if (is_knl(family, model))
3522                 return 1024;
3523         return 1;
3524 }
3525
3526 #define SLM_BCLK_FREQS 5
3527 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
3528
3529 double slm_bclk(void)
3530 {
3531         unsigned long long msr = 3;
3532         unsigned int i;
3533         double freq;
3534
3535         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
3536                 fprintf(outf, "SLM BCLK: unknown\n");
3537
3538         i = msr & 0xf;
3539         if (i >= SLM_BCLK_FREQS) {
3540                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
3541                 i = 3;
3542         }
3543         freq = slm_freq_table[i];
3544
3545         if (!quiet)
3546                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
3547
3548         return freq;
3549 }
3550
3551 double discover_bclk(unsigned int family, unsigned int model)
3552 {
3553         if (has_snb_msrs(family, model) || is_knl(family, model))
3554                 return 100.00;
3555         else if (is_slm(family, model))
3556                 return slm_bclk();
3557         else
3558                 return 133.33;
3559 }
3560
3561 /*
3562  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
3563  * the Thermal Control Circuit (TCC) activates.
3564  * This is usually equal to tjMax.
3565  *
3566  * Older processors do not have this MSR, so there we guess,
3567  * but also allow cmdline over-ride with -T.
3568  *
3569  * Several MSR temperature values are in units of degrees-C
3570  * below this value, including the Digital Thermal Sensor (DTS),
3571  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
3572  */
3573 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3574 {
3575         unsigned long long msr;
3576         unsigned int target_c_local;
3577         int cpu;
3578
3579         /* tcc_activation_temp is used only for dts or ptm */
3580         if (!(do_dts || do_ptm))
3581                 return 0;
3582
3583         /* this is a per-package concept */
3584         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3585                 return 0;
3586
3587         cpu = t->cpu_id;
3588         if (cpu_migrate(cpu)) {
3589                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3590                 return -1;
3591         }
3592
3593         if (tcc_activation_temp_override != 0) {
3594                 tcc_activation_temp = tcc_activation_temp_override;
3595                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
3596                         cpu, tcc_activation_temp);
3597                 return 0;
3598         }
3599
3600         /* Temperature Target MSR is Nehalem and newer only */
3601         if (!do_nhm_platform_info)
3602                 goto guess;
3603
3604         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
3605                 goto guess;
3606
3607         target_c_local = (msr >> 16) & 0xFF;
3608
3609         if (!quiet)
3610                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
3611                         cpu, msr, target_c_local);
3612
3613         if (!target_c_local)
3614                 goto guess;
3615
3616         tcc_activation_temp = target_c_local;
3617
3618         return 0;
3619
3620 guess:
3621         tcc_activation_temp = TJMAX_DEFAULT;
3622         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
3623                 cpu, tcc_activation_temp);
3624
3625         return 0;
3626 }
3627
3628 void decode_feature_control_msr(void)
3629 {
3630         unsigned long long msr;
3631
3632         if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
3633                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
3634                         base_cpu, msr,
3635                         msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
3636                         msr & (1 << 18) ? "SGX" : "");
3637 }
3638
3639 void decode_misc_enable_msr(void)
3640 {
3641         unsigned long long msr;
3642
3643         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
3644                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
3645                         base_cpu, msr,
3646                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
3647                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
3648                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "",
3649                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
3650                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
3651 }
3652
3653 void decode_misc_feature_control(void)
3654 {
3655         unsigned long long msr;
3656
3657         if (!has_misc_feature_control)
3658                 return;
3659
3660         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
3661                 fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
3662                         base_cpu, msr,
3663                         msr & (0 << 0) ? "No-" : "",
3664                         msr & (1 << 0) ? "No-" : "",
3665                         msr & (2 << 0) ? "No-" : "",
3666                         msr & (3 << 0) ? "No-" : "");
3667 }
3668 /*
3669  * Decode MSR_MISC_PWR_MGMT
3670  *
3671  * Decode the bits according to the Nehalem documentation
3672  * bit[0] seems to continue to have same meaning going forward
3673  * bit[1] less so...
3674  */
3675 void decode_misc_pwr_mgmt_msr(void)
3676 {
3677         unsigned long long msr;
3678
3679         if (!do_nhm_platform_info)
3680                 return;
3681
3682         if (no_MSR_MISC_PWR_MGMT)
3683                 return;
3684
3685         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
3686                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
3687                         base_cpu, msr,
3688                         msr & (1 << 0) ? "DIS" : "EN",
3689                         msr & (1 << 1) ? "EN" : "DIS",
3690                         msr & (1 << 8) ? "EN" : "DIS");
3691 }
3692 /*
3693  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
3694  *
3695  * This MSRs are present on Silvermont processors,
3696  * Intel Atom processor E3000 series (Baytrail), and friends.
3697  */
3698 void decode_c6_demotion_policy_msr(void)
3699 {
3700         unsigned long long msr;
3701
3702         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
3703                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
3704                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
3705
3706         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
3707                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
3708                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
3709 }
3710
3711 void process_cpuid()
3712 {
3713         unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
3714         unsigned int fms, family, model, stepping;
3715         unsigned int has_turbo;
3716
3717         eax = ebx = ecx = edx = 0;
3718
3719         __cpuid(0, max_level, ebx, ecx, edx);
3720
3721         if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
3722                 genuine_intel = 1;
3723
3724         if (!quiet)
3725                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
3726                         (char *)&ebx, (char *)&edx, (char *)&ecx);
3727
3728         __cpuid(1, fms, ebx, ecx, edx);
3729         family = (fms >> 8) & 0xf;
3730         model = (fms >> 4) & 0xf;
3731         stepping = fms & 0xf;
3732         if (family == 6 || family == 0xf)
3733                 model += ((fms >> 16) & 0xf) << 4;
3734
3735         if (!quiet) {
3736                 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
3737                         max_level, family, model, stepping, family, model, stepping);
3738                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
3739                         ecx & (1 << 0) ? "SSE3" : "-",
3740                         ecx & (1 << 3) ? "MONITOR" : "-",
3741                         ecx & (1 << 6) ? "SMX" : "-",
3742                         ecx & (1 << 7) ? "EIST" : "-",
3743                         ecx & (1 << 8) ? "TM2" : "-",
3744                         edx & (1 << 4) ? "TSC" : "-",
3745                         edx & (1 << 5) ? "MSR" : "-",
3746                         edx & (1 << 22) ? "ACPI-TM" : "-",
3747                         edx & (1 << 29) ? "TM" : "-");
3748         }
3749
3750         if (!(edx & (1 << 5)))
3751                 errx(1, "CPUID: no MSR");
3752
3753         /*
3754          * check max extended function levels of CPUID.
3755          * This is needed to check for invariant TSC.
3756          * This check is valid for both Intel and AMD.
3757          */
3758         ebx = ecx = edx = 0;
3759         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
3760
3761         if (max_extended_level >= 0x80000007) {
3762
3763                 /*
3764                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
3765                  * this check is valid for both Intel and AMD
3766                  */
3767                 __cpuid(0x80000007, eax, ebx, ecx, edx);
3768                 has_invariant_tsc = edx & (1 << 8);
3769         }
3770
3771         /*
3772          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
3773          * this check is valid for both Intel and AMD
3774          */
3775
3776         __cpuid(0x6, eax, ebx, ecx, edx);
3777         has_aperf = ecx & (1 << 0);
3778         if (has_aperf) {
3779                 BIC_PRESENT(BIC_Avg_MHz);
3780                 BIC_PRESENT(BIC_Busy);
3781                 BIC_PRESENT(BIC_Bzy_MHz);
3782         }
3783         do_dts = eax & (1 << 0);
3784         if (do_dts)
3785                 BIC_PRESENT(BIC_CoreTmp);
3786         has_turbo = eax & (1 << 1);
3787         do_ptm = eax & (1 << 6);
3788         if (do_ptm)
3789                 BIC_PRESENT(BIC_PkgTmp);
3790         has_hwp = eax & (1 << 7);
3791         has_hwp_notify = eax & (1 << 8);
3792         has_hwp_activity_window = eax & (1 << 9);
3793         has_hwp_epp = eax & (1 << 10);
3794         has_hwp_pkg = eax & (1 << 11);
3795         has_epb = ecx & (1 << 3);
3796
3797         if (!quiet)
3798                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
3799                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
3800                         has_aperf ? "" : "No-",
3801                         has_turbo ? "" : "No-",
3802                         do_dts ? "" : "No-",
3803                         do_ptm ? "" : "No-",
3804                         has_hwp ? "" : "No-",
3805                         has_hwp_notify ? "" : "No-",
3806                         has_hwp_activity_window ? "" : "No-",
3807                         has_hwp_epp ? "" : "No-",
3808                         has_hwp_pkg ? "" : "No-",
3809                         has_epb ? "" : "No-");
3810
3811         if (!quiet)
3812                 decode_misc_enable_msr();
3813
3814
3815         if (max_level >= 0x7 && !quiet) {
3816                 int has_sgx;
3817
3818                 ecx = 0;
3819
3820                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
3821
3822                 has_sgx = ebx & (1 << 2);
3823                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
3824
3825                 if (has_sgx)
3826                         decode_feature_control_msr();
3827         }
3828
3829         if (max_level >= 0x15) {
3830                 unsigned int eax_crystal;
3831                 unsigned int ebx_tsc;
3832
3833                 /*
3834                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
3835                  */
3836                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
3837                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
3838
3839                 if (ebx_tsc != 0) {
3840
3841                         if (!quiet && (ebx != 0))
3842                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
3843                                         eax_crystal, ebx_tsc, crystal_hz);
3844
3845                         if (crystal_hz == 0)
3846                                 switch(model) {
3847                                 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3848                                 case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3849                                 case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3850                                 case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3851                                         crystal_hz = 24000000;  /* 24.0 MHz */
3852                                         break;
3853                                 case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3854                                 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3855                                         crystal_hz = 25000000;  /* 25.0 MHz */
3856                                         break;
3857                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3858                                 case INTEL_FAM6_ATOM_GEMINI_LAKE:
3859                                         crystal_hz = 19200000;  /* 19.2 MHz */
3860                                         break;
3861                                 default:
3862                                         crystal_hz = 0;
3863                         }
3864
3865                         if (crystal_hz) {
3866                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
3867                                 if (!quiet)
3868                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
3869                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
3870                         }
3871                 }
3872         }
3873         if (max_level >= 0x16) {
3874                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
3875
3876                 /*
3877                  * CPUID 16H Base MHz, Max MHz, Bus MHz
3878                  */
3879                 base_mhz = max_mhz = bus_mhz = edx = 0;
3880
3881                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
3882                 if (!quiet)
3883                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
3884                                 base_mhz, max_mhz, bus_mhz);
3885         }
3886
3887         if (has_aperf)
3888                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
3889
3890         BIC_PRESENT(BIC_IRQ);
3891         BIC_PRESENT(BIC_TSC_MHz);
3892
3893         if (probe_nhm_msrs(family, model)) {
3894                 do_nhm_platform_info = 1;
3895                 BIC_PRESENT(BIC_CPU_c1);
3896                 BIC_PRESENT(BIC_CPU_c3);
3897                 BIC_PRESENT(BIC_CPU_c6);
3898                 BIC_PRESENT(BIC_SMI);
3899         }
3900         do_snb_cstates = has_snb_msrs(family, model);
3901
3902         if (do_snb_cstates)
3903                 BIC_PRESENT(BIC_CPU_c7);
3904
3905         do_irtl_snb = has_snb_msrs(family, model);
3906         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
3907                 BIC_PRESENT(BIC_Pkgpc2);
3908         if (pkg_cstate_limit >= PCL__3)
3909                 BIC_PRESENT(BIC_Pkgpc3);
3910         if (pkg_cstate_limit >= PCL__6)
3911                 BIC_PRESENT(BIC_Pkgpc6);
3912         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
3913                 BIC_PRESENT(BIC_Pkgpc7);
3914         if (has_slv_msrs(family, model)) {
3915                 BIC_NOT_PRESENT(BIC_Pkgpc2);
3916                 BIC_NOT_PRESENT(BIC_Pkgpc3);
3917                 BIC_PRESENT(BIC_Pkgpc6);
3918                 BIC_NOT_PRESENT(BIC_Pkgpc7);
3919                 BIC_PRESENT(BIC_Mod_c6);
3920                 use_c1_residency_msr = 1;
3921         }
3922         if (is_dnv(family, model)) {
3923                 BIC_PRESENT(BIC_CPU_c1);
3924                 BIC_NOT_PRESENT(BIC_CPU_c3);
3925                 BIC_NOT_PRESENT(BIC_Pkgpc3);
3926                 BIC_NOT_PRESENT(BIC_CPU_c7);
3927                 BIC_NOT_PRESENT(BIC_Pkgpc7);
3928                 use_c1_residency_msr = 1;
3929         }
3930         if (is_skx(family, model)) {
3931                 BIC_NOT_PRESENT(BIC_CPU_c3);
3932                 BIC_NOT_PRESENT(BIC_Pkgpc3);
3933                 BIC_NOT_PRESENT(BIC_CPU_c7);
3934                 BIC_NOT_PRESENT(BIC_Pkgpc7);
3935         }
3936         if (has_hsw_msrs(family, model)) {
3937                 BIC_PRESENT(BIC_Pkgpc8);
3938                 BIC_PRESENT(BIC_Pkgpc9);
3939                 BIC_PRESENT(BIC_Pkgpc10);
3940         }
3941         do_irtl_hsw = has_hsw_msrs(family, model);
3942         do_skl_residency = has_skl_msrs(family, model);
3943         do_slm_cstates = is_slm(family, model);
3944         do_knl_cstates  = is_knl(family, model);
3945
3946         if (!quiet)
3947                 decode_misc_pwr_mgmt_msr();
3948
3949         if (!quiet && has_slv_msrs(family, model))
3950                 decode_c6_demotion_policy_msr();
3951
3952         rapl_probe(family, model);
3953         perf_limit_reasons_probe(family, model);
3954
3955         if (!quiet)
3956                 dump_cstate_pstate_config_info(family, model);
3957
3958         if (has_skl_msrs(family, model))
3959                 calculate_tsc_tweak();
3960
3961         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
3962                 BIC_PRESENT(BIC_GFX_rc6);
3963
3964         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
3965                 BIC_PRESENT(BIC_GFXMHz);
3966
3967         if (!quiet)
3968                 decode_misc_feature_control();
3969
3970         return;
3971 }
3972
3973 void help()
3974 {
3975         fprintf(outf,
3976         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
3977         "\n"
3978         "Turbostat forks the specified COMMAND and prints statistics\n"
3979         "when COMMAND completes.\n"
3980         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
3981         "to print statistics, until interrupted.\n"
3982         "--add          add a counter\n"
3983         "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
3984         "--quiet        skip decoding system configuration header\n"
3985         "--interval sec Override default 5-second measurement interval\n"
3986         "--help         print this help message\n"
3987         "--out file     create or truncate \"file\" for all output\n"
3988         "--version      print version information\n"
3989         "\n"
3990         "For more help, run \"man turbostat\"\n");
3991 }
3992
3993
3994 /*
3995  * in /dev/cpu/ return success for names that are numbers
3996  * ie. filter out ".", "..", "microcode".
3997  */
3998 int dir_filter(const struct dirent *dirp)
3999 {
4000         if (isdigit(dirp->d_name[0]))
4001                 return 1;
4002         else
4003                 return 0;
4004 }
4005
4006 int open_dev_cpu_msr(int dummy1)
4007 {
4008         return 0;
4009 }
4010
4011 void topology_probe()
4012 {
4013         int i;
4014         int max_core_id = 0;
4015         int max_package_id = 0;
4016         int max_siblings = 0;
4017         struct cpu_topology {
4018                 int core_id;
4019                 int physical_package_id;
4020         } *cpus;
4021
4022         /* Initialize num_cpus, max_cpu_num */
4023         topo.num_cpus = 0;
4024         topo.max_cpu_num = 0;
4025         for_all_proc_cpus(count_cpus);
4026         if (!summary_only && topo.num_cpus > 1)
4027                 BIC_PRESENT(BIC_CPU);
4028
4029         if (debug > 1)
4030                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4031
4032         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4033         if (cpus == NULL)
4034                 err(1, "calloc cpus");
4035
4036         /*
4037          * Allocate and initialize cpu_present_set
4038          */
4039         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4040         if (cpu_present_set == NULL)
4041                 err(3, "CPU_ALLOC");
4042         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4043         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4044         for_all_proc_cpus(mark_cpu_present);
4045
4046         /*
4047          * Allocate and initialize cpu_affinity_set
4048          */
4049         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4050         if (cpu_affinity_set == NULL)
4051                 err(3, "CPU_ALLOC");
4052         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4053         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4054
4055
4056         /*
4057          * For online cpus
4058          * find max_core_id, max_package_id
4059          */
4060         for (i = 0; i <= topo.max_cpu_num; ++i) {
4061                 int siblings;
4062
4063                 if (cpu_is_not_present(i)) {
4064                         if (debug > 1)
4065                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
4066                         continue;
4067                 }
4068                 cpus[i].core_id = get_core_id(i);
4069                 if (cpus[i].core_id > max_core_id)
4070                         max_core_id = cpus[i].core_id;
4071
4072                 cpus[i].physical_package_id = get_physical_package_id(i);
4073                 if (cpus[i].physical_package_id > max_package_id)
4074                         max_package_id = cpus[i].physical_package_id;
4075
4076                 siblings = get_num_ht_siblings(i);
4077                 if (siblings > max_siblings)
4078                         max_siblings = siblings;
4079                 if (debug > 1)
4080                         fprintf(outf, "cpu %d pkg %d core %d\n",
4081                                 i, cpus[i].physical_package_id, cpus[i].core_id);
4082         }
4083         topo.num_cores_per_pkg = max_core_id + 1;
4084         if (debug > 1)
4085                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4086                         max_core_id, topo.num_cores_per_pkg);
4087         if (!summary_only && topo.num_cores_per_pkg > 1)
4088                 BIC_PRESENT(BIC_Core);
4089
4090         topo.num_packages = max_package_id + 1;
4091         if (debug > 1)
4092                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4093                         max_package_id, topo.num_packages);
4094         if (debug && !summary_only && topo.num_packages > 1)
4095                 BIC_PRESENT(BIC_Package);
4096
4097         topo.num_threads_per_core = max_siblings;
4098         if (debug > 1)
4099                 fprintf(outf, "max_siblings %d\n", max_siblings);
4100
4101         free(cpus);
4102 }
4103
4104 void
4105 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
4106 {
4107         int i;
4108
4109         *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
4110                 topo.num_packages, sizeof(struct thread_data));
4111         if (*t == NULL)
4112                 goto error;
4113
4114         for (i = 0; i < topo.num_threads_per_core *
4115                 topo.num_cores_per_pkg * topo.num_packages; i++)
4116                 (*t)[i].cpu_id = -1;
4117
4118         *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
4119                 sizeof(struct core_data));
4120         if (*c == NULL)
4121                 goto error;
4122
4123         for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
4124                 (*c)[i].core_id = -1;
4125
4126         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
4127         if (*p == NULL)
4128                 goto error;
4129
4130         for (i = 0; i < topo.num_packages; i++)
4131                 (*p)[i].package_id = i;
4132
4133         return;
4134 error:
4135         err(1, "calloc counters");
4136 }
4137 /*
4138  * init_counter()
4139  *
4140  * set cpu_id, core_num, pkg_num
4141  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4142  *
4143  * increment topo.num_cores when 1st core in pkg seen
4144  */
4145 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4146         struct pkg_data *pkg_base, int thread_num, int core_num,
4147         int pkg_num, int cpu_id)
4148 {
4149         struct thread_data *t;
4150         struct core_data *c;
4151         struct pkg_data *p;
4152
4153         t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
4154         c = GET_CORE(core_base, core_num, pkg_num);
4155         p = GET_PKG(pkg_base, pkg_num);
4156
4157         t->cpu_id = cpu_id;
4158         if (thread_num == 0) {
4159                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4160                 if (cpu_is_first_core_in_package(cpu_id))
4161                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4162         }
4163
4164         c->core_id = core_num;
4165         p->package_id = pkg_num;
4166 }
4167
4168
4169 int initialize_counters(int cpu_id)
4170 {
4171         int my_thread_id, my_core_id, my_package_id;
4172
4173         my_package_id = get_physical_package_id(cpu_id);
4174         my_core_id = get_core_id(cpu_id);
4175         my_thread_id = get_cpu_position_in_core(cpu_id);
4176         if (!my_thread_id)
4177                 topo.num_cores++;
4178
4179         init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4180         init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4181         return 0;
4182 }
4183
4184 void allocate_output_buffer()
4185 {
4186         output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
4187         outp = output_buffer;
4188         if (outp == NULL)
4189                 err(-1, "calloc output buffer");
4190 }
4191 void allocate_fd_percpu(void)
4192 {
4193         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4194         if (fd_percpu == NULL)
4195                 err(-1, "calloc fd_percpu");
4196 }
4197 void allocate_irq_buffers(void)
4198 {
4199         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
4200         if (irq_column_2_cpu == NULL)
4201                 err(-1, "calloc %d", topo.num_cpus);
4202
4203         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4204         if (irqs_per_cpu == NULL)
4205                 err(-1, "calloc %d", topo.max_cpu_num + 1);
4206 }
4207 void setup_all_buffers(void)
4208 {
4209         topology_probe();
4210         allocate_irq_buffers();
4211         allocate_fd_percpu();
4212         allocate_counters(&thread_even, &core_even, &package_even);
4213         allocate_counters(&thread_odd, &core_odd, &package_odd);
4214         allocate_output_buffer();
4215         for_all_proc_cpus(initialize_counters);
4216 }
4217
4218 void set_base_cpu(void)
4219 {
4220         base_cpu = sched_getcpu();
4221         if (base_cpu < 0)
4222                 err(-ENODEV, "No valid cpus found");
4223
4224         if (debug > 1)
4225                 fprintf(outf, "base_cpu = %d\n", base_cpu);
4226 }
4227
4228 void turbostat_init()
4229 {
4230         setup_all_buffers();
4231         set_base_cpu();
4232         check_dev_msr();
4233         check_permissions();
4234         process_cpuid();
4235
4236
4237         if (!quiet)
4238                 for_all_cpus(print_hwp, ODD_COUNTERS);
4239
4240         if (!quiet)
4241                 for_all_cpus(print_epb, ODD_COUNTERS);
4242
4243         if (!quiet)
4244                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
4245
4246         if (!quiet)
4247                 for_all_cpus(print_rapl, ODD_COUNTERS);
4248
4249         for_all_cpus(set_temperature_target, ODD_COUNTERS);
4250
4251         if (!quiet)
4252                 for_all_cpus(print_thermal, ODD_COUNTERS);
4253
4254         if (!quiet && do_irtl_snb)
4255                 print_irtl();
4256 }
4257
4258 int fork_it(char **argv)
4259 {
4260         pid_t child_pid;
4261         int status;
4262
4263         status = for_all_cpus(get_counters, EVEN_COUNTERS);
4264         if (status)
4265                 exit(status);
4266         /* clear affinity side-effect of get_counters() */
4267         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
4268         gettimeofday(&tv_even, (struct timezone *)NULL);
4269
4270         child_pid = fork();
4271         if (!child_pid) {
4272                 /* child */
4273                 execvp(argv[0], argv);
4274         } else {
4275
4276                 /* parent */
4277                 if (child_pid == -1)
4278                         err(1, "fork");
4279
4280                 signal(SIGINT, SIG_IGN);
4281                 signal(SIGQUIT, SIG_IGN);
4282                 if (waitpid(child_pid, &status, 0) == -1)
4283                         err(status, "waitpid");
4284         }
4285         /*
4286          * n.b. fork_it() does not check for errors from for_all_cpus()
4287          * because re-starting is problematic when forking
4288          */
4289         for_all_cpus(get_counters, ODD_COUNTERS);
4290         gettimeofday(&tv_odd, (struct timezone *)NULL);
4291         timersub(&tv_odd, &tv_even, &tv_delta);
4292         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
4293                 fprintf(outf, "%s: Counter reset detected\n", progname);
4294         else {
4295                 compute_average(EVEN_COUNTERS);
4296                 format_all_counters(EVEN_COUNTERS);
4297         }
4298
4299         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
4300
4301         flush_output_stderr();
4302
4303         return status;
4304 }
4305
4306 int get_and_dump_counters(void)
4307 {
4308         int status;
4309
4310         status = for_all_cpus(get_counters, ODD_COUNTERS);
4311         if (status)
4312                 return status;
4313
4314         status = for_all_cpus(dump_counters, ODD_COUNTERS);
4315         if (status)
4316                 return status;
4317
4318         flush_output_stdout();
4319
4320         return status;
4321 }
4322
4323 void print_version() {
4324         fprintf(outf, "turbostat version 4.17 10 Jan 2017"
4325                 " - Len Brown <lenb@kernel.org>\n");
4326 }
4327
4328 int add_counter(unsigned int msr_num, char *name, unsigned int width,
4329         enum counter_scope scope, enum counter_type type,
4330         enum counter_format format)
4331 {
4332         struct msr_counter *msrp;
4333
4334         msrp = calloc(1, sizeof(struct msr_counter));
4335         if (msrp == NULL) {
4336                 perror("calloc");
4337                 exit(1);
4338         }
4339
4340         msrp->msr_num = msr_num;
4341         strncpy(msrp->name, name, NAME_BYTES);
4342         msrp->width = width;
4343         msrp->type = type;
4344         msrp->format = format;
4345
4346         switch (scope) {
4347
4348         case SCOPE_CPU:
4349                 msrp->next = sys.tp;
4350                 sys.tp = msrp;
4351                 sys.added_thread_counters++;
4352                 if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
4353                         fprintf(stderr, "exceeded max %d added thread counters\n",
4354                                 MAX_ADDED_COUNTERS);
4355                         exit(-1);
4356                 }
4357                 break;
4358
4359         case SCOPE_CORE:
4360                 msrp->next = sys.cp;
4361                 sys.cp = msrp;
4362                 sys.added_core_counters++;
4363                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
4364                         fprintf(stderr, "exceeded max %d added core counters\n",
4365                                 MAX_ADDED_COUNTERS);
4366                         exit(-1);
4367                 }
4368                 break;
4369
4370         case SCOPE_PACKAGE:
4371                 msrp->next = sys.pp;
4372                 sys.pp = msrp;
4373                 sys.added_package_counters++;
4374                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
4375                         fprintf(stderr, "exceeded max %d added package counters\n",
4376                                 MAX_ADDED_COUNTERS);
4377                         exit(-1);
4378                 }
4379                 break;
4380         }
4381
4382         return 0;
4383 }
4384
4385 void parse_add_command(char *add_command)
4386 {
4387         int msr_num = 0;
4388         char name_buffer[NAME_BYTES] = "";
4389         int width = 64;
4390         int fail = 0;
4391         enum counter_scope scope = SCOPE_CPU;
4392         enum counter_type type = COUNTER_CYCLES;
4393         enum counter_format format = FORMAT_DELTA;
4394
4395         while (add_command) {
4396
4397                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
4398                         goto next;
4399
4400                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
4401                         goto next;
4402
4403                 if (sscanf(add_command, "u%d", &width) == 1) {
4404                         if ((width == 32) || (width == 64))
4405                                 goto next;
4406                         width = 64;
4407                 }
4408                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
4409                         scope = SCOPE_CPU;
4410                         goto next;
4411                 }
4412                 if (!strncmp(add_command, "core", strlen("core"))) {
4413                         scope = SCOPE_CORE;
4414                         goto next;
4415                 }
4416                 if (!strncmp(add_command, "package", strlen("package"))) {
4417                         scope = SCOPE_PACKAGE;
4418                         goto next;
4419                 }
4420                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
4421                         type = COUNTER_CYCLES;
4422                         goto next;
4423                 }
4424                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
4425                         type = COUNTER_SECONDS;
4426                         goto next;
4427                 }
4428                 if (!strncmp(add_command, "raw", strlen("raw"))) {
4429                         format = FORMAT_RAW;
4430                         goto next;
4431                 }
4432                 if (!strncmp(add_command, "delta", strlen("delta"))) {
4433                         format = FORMAT_DELTA;
4434                         goto next;
4435                 }
4436                 if (!strncmp(add_command, "percent", strlen("percent"))) {
4437                         format = FORMAT_PERCENT;
4438                         goto next;
4439                 }
4440
4441                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
4442                         char *eos;
4443
4444                         eos = strchr(name_buffer, ',');
4445                         if (eos)
4446                                 *eos = '\0';
4447                         goto next;
4448                 }
4449
4450 next:
4451                 add_command = strchr(add_command, ',');
4452                 if (add_command)
4453                         add_command++;
4454
4455         }
4456         if (msr_num == 0) {
4457                 fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n");
4458                 fail++;
4459         }
4460
4461         /* generate default column header */
4462         if (*name_buffer == '\0') {
4463                 if (format == FORMAT_RAW) {
4464                         if (width == 32)
4465                                 sprintf(name_buffer, "msr%d", msr_num);
4466                         else
4467                                 sprintf(name_buffer, "MSR%d", msr_num);
4468                 } else if (format == FORMAT_DELTA) {
4469                         if (width == 32)
4470                                 sprintf(name_buffer, "cnt%d", msr_num);
4471                         else
4472                                 sprintf(name_buffer, "CNT%d", msr_num);
4473                 } else if (format == FORMAT_PERCENT) {
4474                         if (width == 32)
4475                                 sprintf(name_buffer, "msr%d%%", msr_num);
4476                         else
4477                                 sprintf(name_buffer, "MSR%d%%", msr_num);
4478                 }
4479         }
4480
4481         if (add_counter(msr_num, name_buffer, width, scope, type, format))
4482                 fail++;
4483
4484         if (fail) {
4485                 help();
4486                 exit(1);
4487         }
4488 }
4489 /*
4490  * HIDE_LIST - hide this list of counters, show the rest [default]
4491  * SHOW_LIST - show this list of counters, hide the rest
4492  */
4493 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
4494
4495 int shown;
4496 /*
4497  * parse_show_hide() - process cmdline to set default counter action
4498  */
4499 void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
4500 {
4501         /*
4502          * --show: show only those specified
4503          *  The 1st invocation will clear and replace the enabled mask
4504          *  subsequent invocations can add to it.
4505          */
4506         if (new_mode == SHOW_LIST) {
4507                 if (shown == 0)
4508                         bic_enabled = bic_lookup(optarg);
4509                 else
4510                         bic_enabled |= bic_lookup(optarg);
4511                 shown = 1;
4512
4513                 return;
4514         }
4515
4516         /*
4517          * --hide: do not show those specified
4518          *  multiple invocations simply clear more bits in enabled mask
4519          */
4520         bic_enabled &= ~bic_lookup(optarg);
4521 }
4522
4523 void cmdline(int argc, char **argv)
4524 {
4525         int opt;
4526         int option_index = 0;
4527         static struct option long_options[] = {
4528                 {"add",         required_argument,      0, 'a'},
4529                 {"Dump",        no_argument,            0, 'D'},
4530                 {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
4531                 {"interval",    required_argument,      0, 'i'},
4532                 {"help",        no_argument,            0, 'h'},
4533                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
4534                 {"Joules",      no_argument,            0, 'J'},
4535                 {"out",         required_argument,      0, 'o'},
4536                 {"Package",     no_argument,            0, 'p'},
4537                 {"processor",   no_argument,            0, 'p'},
4538                 {"quiet",       no_argument,            0, 'q'},
4539                 {"show",        required_argument,      0, 's'},
4540                 {"Summary",     no_argument,            0, 'S'},
4541                 {"TCC",         required_argument,      0, 'T'},
4542                 {"version",     no_argument,            0, 'v' },
4543                 {0,             0,                      0,  0 }
4544         };
4545
4546         progname = argv[0];
4547
4548         while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpqST:v",
4549                                 long_options, &option_index)) != -1) {
4550                 switch (opt) {
4551                 case 'a':
4552                         parse_add_command(optarg);
4553                         break;
4554                 case 'D':
4555                         dump_only++;
4556                         break;
4557                 case 'd':
4558                         debug++;
4559                         break;
4560                 case 'H':
4561                         parse_show_hide(optarg, HIDE_LIST);
4562                         break;
4563                 case 'h':
4564                 default:
4565                         help();
4566                         exit(1);
4567                 case 'i':
4568                         {
4569                                 double interval = strtod(optarg, NULL);
4570
4571                                 if (interval < 0.001) {
4572                                         fprintf(outf, "interval %f seconds is too small\n",
4573                                                 interval);
4574                                         exit(2);
4575                                 }
4576
4577                                 interval_ts.tv_sec = interval;
4578                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
4579                         }
4580                         break;
4581                 case 'J':
4582                         rapl_joules++;
4583                         break;
4584                 case 'o':
4585                         outf = fopen_or_die(optarg, "w");
4586                         break;
4587                 case 'P':
4588                         show_pkg_only++;
4589                         break;
4590                 case 'p':
4591                         show_core_only++;
4592                         break;
4593                 case 'q':
4594                         quiet = 1;
4595                         break;
4596                 case 's':
4597                         parse_show_hide(optarg, SHOW_LIST);
4598                         break;
4599                 case 'S':
4600                         summary_only++;
4601                         break;
4602                 case 'T':
4603                         tcc_activation_temp_override = atoi(optarg);
4604                         break;
4605                 case 'v':
4606                         print_version();
4607                         exit(0);
4608                         break;
4609                 }
4610         }
4611 }
4612
4613 int main(int argc, char **argv)
4614 {
4615         outf = stderr;
4616
4617         cmdline(argc, argv);
4618
4619         if (!quiet)
4620                 print_version();
4621
4622         turbostat_init();
4623
4624         /* dump counters and exit */
4625         if (dump_only)
4626                 return get_and_dump_counters();
4627
4628         /*
4629          * if any params left, it must be a command to fork
4630          */
4631         if (argc - optind)
4632                 return fork_it(argv + optind);
4633         else
4634                 turbostat_loop();
4635
4636         return 0;
4637 }