]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/power/x86/turbostat/turbostat.c
ASoC: sti-asoc-card: update tdm mode
[karo-tx-linux.git] / tools / power / x86 / turbostat / turbostat.c
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <err.h>
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
30 #include <sys/stat.h>
31 #include <sys/resource.h>
32 #include <fcntl.h>
33 #include <signal.h>
34 #include <sys/time.h>
35 #include <stdlib.h>
36 #include <getopt.h>
37 #include <dirent.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <sched.h>
41 #include <time.h>
42 #include <cpuid.h>
43 #include <linux/capability.h>
44 #include <errno.h>
45
46 char *proc_stat = "/proc/stat";
47 FILE *outf;
48 int *fd_percpu;
49 struct timespec interval_ts = {5, 0};
50 unsigned int debug;
51 unsigned int rapl_joules;
52 unsigned int summary_only;
53 unsigned int dump_only;
54 unsigned int skip_c0;
55 unsigned int skip_c1;
56 unsigned int do_nhm_cstates;
57 unsigned int do_snb_cstates;
58 unsigned int do_knl_cstates;
59 unsigned int do_pc2;
60 unsigned int do_pc3;
61 unsigned int do_pc6;
62 unsigned int do_pc7;
63 unsigned int do_c8_c9_c10;
64 unsigned int do_skl_residency;
65 unsigned int do_slm_cstates;
66 unsigned int use_c1_residency_msr;
67 unsigned int has_aperf;
68 unsigned int has_epb;
69 unsigned int units = 1000000;   /* MHz etc */
70 unsigned int genuine_intel;
71 unsigned int has_invariant_tsc;
72 unsigned int do_nhm_platform_info;
73 unsigned int extra_msr_offset32;
74 unsigned int extra_msr_offset64;
75 unsigned int extra_delta_offset32;
76 unsigned int extra_delta_offset64;
77 unsigned int aperf_mperf_multiplier = 1;
78 int do_irq = 1;
79 int do_smi;
80 double bclk;
81 double base_hz;
82 unsigned int has_base_hz;
83 double tsc_tweak = 1.0;
84 unsigned int show_pkg;
85 unsigned int show_core;
86 unsigned int show_cpu;
87 unsigned int show_pkg_only;
88 unsigned int show_core_only;
89 char *output_buffer, *outp;
90 unsigned int do_rapl;
91 unsigned int do_dts;
92 unsigned int do_ptm;
93 unsigned int do_gfx_rc6_ms;
94 unsigned long long  gfx_cur_rc6_ms;
95 unsigned int do_gfx_mhz;
96 unsigned int gfx_cur_mhz;
97 unsigned int tcc_activation_temp;
98 unsigned int tcc_activation_temp_override;
99 double rapl_power_units, rapl_time_units;
100 double rapl_dram_energy_units, rapl_energy_units;
101 double rapl_joule_counter_range;
102 unsigned int do_core_perf_limit_reasons;
103 unsigned int do_gfx_perf_limit_reasons;
104 unsigned int do_ring_perf_limit_reasons;
105 unsigned int crystal_hz;
106 unsigned long long tsc_hz;
107 int base_cpu;
108 double discover_bclk(unsigned int family, unsigned int model);
109 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
110                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
111 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
112 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
113 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
114 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
115
116 #define RAPL_PKG                (1 << 0)
117                                         /* 0x610 MSR_PKG_POWER_LIMIT */
118                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
119 #define RAPL_PKG_PERF_STATUS    (1 << 1)
120                                         /* 0x613 MSR_PKG_PERF_STATUS */
121 #define RAPL_PKG_POWER_INFO     (1 << 2)
122                                         /* 0x614 MSR_PKG_POWER_INFO */
123
124 #define RAPL_DRAM               (1 << 3)
125                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
126                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
127 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
128                                         /* 0x61b MSR_DRAM_PERF_STATUS */
129 #define RAPL_DRAM_POWER_INFO    (1 << 5)
130                                         /* 0x61c MSR_DRAM_POWER_INFO */
131
132 #define RAPL_CORES              (1 << 6)
133                                         /* 0x638 MSR_PP0_POWER_LIMIT */
134                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
135 #define RAPL_CORE_POLICY        (1 << 7)
136                                         /* 0x63a MSR_PP0_POLICY */
137
138 #define RAPL_GFX                (1 << 8)
139                                         /* 0x640 MSR_PP1_POWER_LIMIT */
140                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
141                                         /* 0x642 MSR_PP1_POLICY */
142 #define TJMAX_DEFAULT   100
143
144 #define MAX(a, b) ((a) > (b) ? (a) : (b))
145
146 int aperf_mperf_unstable;
147 int backwards_count;
148 char *progname;
149
150 cpu_set_t *cpu_present_set, *cpu_affinity_set;
151 size_t cpu_present_setsize, cpu_affinity_setsize;
152
153 struct thread_data {
154         unsigned long long tsc;
155         unsigned long long aperf;
156         unsigned long long mperf;
157         unsigned long long c1;
158         unsigned long long extra_msr64;
159         unsigned long long extra_delta64;
160         unsigned long long extra_msr32;
161         unsigned long long extra_delta32;
162         unsigned int irq_count;
163         unsigned int smi_count;
164         unsigned int cpu_id;
165         unsigned int flags;
166 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
167 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
168 } *thread_even, *thread_odd;
169
170 struct core_data {
171         unsigned long long c3;
172         unsigned long long c6;
173         unsigned long long c7;
174         unsigned int core_temp_c;
175         unsigned int core_id;
176 } *core_even, *core_odd;
177
178 struct pkg_data {
179         unsigned long long pc2;
180         unsigned long long pc3;
181         unsigned long long pc6;
182         unsigned long long pc7;
183         unsigned long long pc8;
184         unsigned long long pc9;
185         unsigned long long pc10;
186         unsigned long long pkg_wtd_core_c0;
187         unsigned long long pkg_any_core_c0;
188         unsigned long long pkg_any_gfxe_c0;
189         unsigned long long pkg_both_core_gfxe_c0;
190         unsigned long long gfx_rc6_ms;
191         unsigned int gfx_mhz;
192         unsigned int package_id;
193         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
194         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
195         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
196         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
197         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
198         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
199         unsigned int pkg_temp_c;
200
201 } *package_even, *package_odd;
202
203 #define ODD_COUNTERS thread_odd, core_odd, package_odd
204 #define EVEN_COUNTERS thread_even, core_even, package_even
205
206 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
207         (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
208                 topo.num_threads_per_core + \
209                 (core_no) * topo.num_threads_per_core + (thread_no))
210 #define GET_CORE(core_base, core_no, pkg_no) \
211         (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
212 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
213
214 struct system_summary {
215         struct thread_data threads;
216         struct core_data cores;
217         struct pkg_data packages;
218 } sum, average;
219
220
221 struct topo_params {
222         int num_packages;
223         int num_cpus;
224         int num_cores;
225         int max_cpu_num;
226         int num_cores_per_pkg;
227         int num_threads_per_core;
228 } topo;
229
230 struct timeval tv_even, tv_odd, tv_delta;
231
232 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
233 int *irqs_per_cpu;              /* indexed by cpu_num */
234
235 void setup_all_buffers(void);
236
237 int cpu_is_not_present(int cpu)
238 {
239         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
240 }
241 /*
242  * run func(thread, core, package) in topology order
243  * skip non-present cpus
244  */
245
246 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
247         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
248 {
249         int retval, pkg_no, core_no, thread_no;
250
251         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
252                 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
253                         for (thread_no = 0; thread_no <
254                                 topo.num_threads_per_core; ++thread_no) {
255                                 struct thread_data *t;
256                                 struct core_data *c;
257                                 struct pkg_data *p;
258
259                                 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
260
261                                 if (cpu_is_not_present(t->cpu_id))
262                                         continue;
263
264                                 c = GET_CORE(core_base, core_no, pkg_no);
265                                 p = GET_PKG(pkg_base, pkg_no);
266
267                                 retval = func(t, c, p);
268                                 if (retval)
269                                         return retval;
270                         }
271                 }
272         }
273         return 0;
274 }
275
276 int cpu_migrate(int cpu)
277 {
278         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
279         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
280         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
281                 return -1;
282         else
283                 return 0;
284 }
285 int get_msr_fd(int cpu)
286 {
287         char pathname[32];
288         int fd;
289
290         fd = fd_percpu[cpu];
291
292         if (fd)
293                 return fd;
294
295         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
296         fd = open(pathname, O_RDONLY);
297         if (fd < 0)
298                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
299
300         fd_percpu[cpu] = fd;
301
302         return fd;
303 }
304
305 int get_msr(int cpu, off_t offset, unsigned long long *msr)
306 {
307         ssize_t retval;
308
309         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
310
311         if (retval != sizeof *msr)
312                 err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
313
314         return 0;
315 }
316
317 /*
318  * Example Format w/ field column widths:
319  *
320  *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
321  * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
322  */
323
324 void print_header(void)
325 {
326         if (show_pkg)
327                 outp += sprintf(outp, " Package");
328         if (show_core)
329                 outp += sprintf(outp, "    Core");
330         if (show_cpu)
331                 outp += sprintf(outp, "     CPU");
332         if (has_aperf)
333                 outp += sprintf(outp, " Avg_MHz");
334         if (has_aperf)
335                 outp += sprintf(outp, "   Busy%%");
336         if (has_aperf)
337                 outp += sprintf(outp, " Bzy_MHz");
338         outp += sprintf(outp, " TSC_MHz");
339
340         if (extra_delta_offset32)
341                 outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
342         if (extra_delta_offset64)
343                 outp += sprintf(outp, "  COUNT 0x%03X", extra_delta_offset64);
344         if (extra_msr_offset32)
345                 outp += sprintf(outp, "   MSR 0x%03X", extra_msr_offset32);
346         if (extra_msr_offset64)
347                 outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
348
349         if (!debug)
350                 goto done;
351
352         if (do_irq)
353                 outp += sprintf(outp, "     IRQ");
354         if (do_smi)
355                 outp += sprintf(outp, "     SMI");
356
357         if (do_nhm_cstates)
358                 outp += sprintf(outp, "  CPU%%c1");
359         if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
360                 outp += sprintf(outp, "  CPU%%c3");
361         if (do_nhm_cstates)
362                 outp += sprintf(outp, "  CPU%%c6");
363         if (do_snb_cstates)
364                 outp += sprintf(outp, "  CPU%%c7");
365
366         if (do_dts)
367                 outp += sprintf(outp, " CoreTmp");
368         if (do_ptm)
369                 outp += sprintf(outp, "  PkgTmp");
370
371         if (do_gfx_rc6_ms)
372                 outp += sprintf(outp, " GFX%%rc6");
373
374         if (do_gfx_mhz)
375                 outp += sprintf(outp, "  GFXMHz");
376
377         if (do_skl_residency) {
378                 outp += sprintf(outp, " Totl%%C0");
379                 outp += sprintf(outp, "  Any%%C0");
380                 outp += sprintf(outp, "  GFX%%C0");
381                 outp += sprintf(outp, " CPUGFX%%");
382         }
383
384         if (do_pc2)
385                 outp += sprintf(outp, " Pkg%%pc2");
386         if (do_pc3)
387                 outp += sprintf(outp, " Pkg%%pc3");
388         if (do_pc6)
389                 outp += sprintf(outp, " Pkg%%pc6");
390         if (do_pc7)
391                 outp += sprintf(outp, " Pkg%%pc7");
392         if (do_c8_c9_c10) {
393                 outp += sprintf(outp, " Pkg%%pc8");
394                 outp += sprintf(outp, " Pkg%%pc9");
395                 outp += sprintf(outp, " Pk%%pc10");
396         }
397
398         if (do_rapl && !rapl_joules) {
399                 if (do_rapl & RAPL_PKG)
400                         outp += sprintf(outp, " PkgWatt");
401                 if (do_rapl & RAPL_CORES)
402                         outp += sprintf(outp, " CorWatt");
403                 if (do_rapl & RAPL_GFX)
404                         outp += sprintf(outp, " GFXWatt");
405                 if (do_rapl & RAPL_DRAM)
406                         outp += sprintf(outp, " RAMWatt");
407                 if (do_rapl & RAPL_PKG_PERF_STATUS)
408                         outp += sprintf(outp, "   PKG_%%");
409                 if (do_rapl & RAPL_DRAM_PERF_STATUS)
410                         outp += sprintf(outp, "   RAM_%%");
411         } else if (do_rapl && rapl_joules) {
412                 if (do_rapl & RAPL_PKG)
413                         outp += sprintf(outp, "   Pkg_J");
414                 if (do_rapl & RAPL_CORES)
415                         outp += sprintf(outp, "   Cor_J");
416                 if (do_rapl & RAPL_GFX)
417                         outp += sprintf(outp, "   GFX_J");
418                 if (do_rapl & RAPL_DRAM)
419                         outp += sprintf(outp, "   RAM_J");
420                 if (do_rapl & RAPL_PKG_PERF_STATUS)
421                         outp += sprintf(outp, "   PKG_%%");
422                 if (do_rapl & RAPL_DRAM_PERF_STATUS)
423                         outp += sprintf(outp, "   RAM_%%");
424                 outp += sprintf(outp, "   time");
425
426         }
427     done:
428         outp += sprintf(outp, "\n");
429 }
430
431 int dump_counters(struct thread_data *t, struct core_data *c,
432         struct pkg_data *p)
433 {
434         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
435
436         if (t) {
437                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
438                         t->cpu_id, t->flags);
439                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
440                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
441                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
442                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
443                 outp += sprintf(outp, "msr0x%x: %08llX\n",
444                         extra_delta_offset32, t->extra_delta32);
445                 outp += sprintf(outp, "msr0x%x: %016llX\n",
446                         extra_delta_offset64, t->extra_delta64);
447                 outp += sprintf(outp, "msr0x%x: %08llX\n",
448                         extra_msr_offset32, t->extra_msr32);
449                 outp += sprintf(outp, "msr0x%x: %016llX\n",
450                         extra_msr_offset64, t->extra_msr64);
451                 if (do_irq)
452                         outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
453                 if (do_smi)
454                         outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
455         }
456
457         if (c) {
458                 outp += sprintf(outp, "core: %d\n", c->core_id);
459                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
460                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
461                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
462                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
463         }
464
465         if (p) {
466                 outp += sprintf(outp, "package: %d\n", p->package_id);
467
468                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
469                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
470                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
471                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
472
473                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
474                 if (do_pc3)
475                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
476                 if (do_pc6)
477                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
478                 if (do_pc7)
479                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
480                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
481                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
482                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
483                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
484                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
485                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
486                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
487                 outp += sprintf(outp, "Throttle PKG: %0X\n",
488                         p->rapl_pkg_perf_status);
489                 outp += sprintf(outp, "Throttle RAM: %0X\n",
490                         p->rapl_dram_perf_status);
491                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
492         }
493
494         outp += sprintf(outp, "\n");
495
496         return 0;
497 }
498
499 /*
500  * column formatting convention & formats
501  */
502 int format_counters(struct thread_data *t, struct core_data *c,
503         struct pkg_data *p)
504 {
505         double interval_float;
506         char *fmt8;
507
508          /* if showing only 1st thread in core and this isn't one, bail out */
509         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
510                 return 0;
511
512          /* if showing only 1st thread in pkg and this isn't one, bail out */
513         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
514                 return 0;
515
516         interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
517
518         /* topo columns, print blanks on 1st (average) line */
519         if (t == &average.threads) {
520                 if (show_pkg)
521                         outp += sprintf(outp, "       -");
522                 if (show_core)
523                         outp += sprintf(outp, "       -");
524                 if (show_cpu)
525                         outp += sprintf(outp, "       -");
526         } else {
527                 if (show_pkg) {
528                         if (p)
529                                 outp += sprintf(outp, "%8d", p->package_id);
530                         else
531                                 outp += sprintf(outp, "       -");
532                 }
533                 if (show_core) {
534                         if (c)
535                                 outp += sprintf(outp, "%8d", c->core_id);
536                         else
537                                 outp += sprintf(outp, "       -");
538                 }
539                 if (show_cpu)
540                         outp += sprintf(outp, "%8d", t->cpu_id);
541         }
542
543         /* Avg_MHz */
544         if (has_aperf)
545                 outp += sprintf(outp, "%8.0f",
546                         1.0 / units * t->aperf / interval_float);
547
548         /* Busy% */
549         if (has_aperf) {
550                 if (!skip_c0)
551                         outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
552                 else
553                         outp += sprintf(outp, "********");
554         }
555
556         /* Bzy_MHz */
557         if (has_aperf) {
558                 if (has_base_hz)
559                         outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf);
560                 else
561                         outp += sprintf(outp, "%8.0f",
562                                 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
563         }
564
565         /* TSC_MHz */
566         outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
567
568         /* delta */
569         if (extra_delta_offset32)
570                 outp += sprintf(outp, "  %11llu", t->extra_delta32);
571
572         /* DELTA */
573         if (extra_delta_offset64)
574                 outp += sprintf(outp, "  %11llu", t->extra_delta64);
575         /* msr */
576         if (extra_msr_offset32)
577                 outp += sprintf(outp, "  0x%08llx", t->extra_msr32);
578
579         /* MSR */
580         if (extra_msr_offset64)
581                 outp += sprintf(outp, "  0x%016llx", t->extra_msr64);
582
583         if (!debug)
584                 goto done;
585
586         /* IRQ */
587         if (do_irq)
588                 outp += sprintf(outp, "%8d", t->irq_count);
589
590         /* SMI */
591         if (do_smi)
592                 outp += sprintf(outp, "%8d", t->smi_count);
593
594         if (do_nhm_cstates) {
595                 if (!skip_c1)
596                         outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
597                 else
598                         outp += sprintf(outp, "********");
599         }
600
601         /* print per-core data only for 1st thread in core */
602         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
603                 goto done;
604
605         if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
606                 outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
607         if (do_nhm_cstates)
608                 outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
609         if (do_snb_cstates)
610                 outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc);
611
612         if (do_dts)
613                 outp += sprintf(outp, "%8d", c->core_temp_c);
614
615         /* print per-package data only for 1st core in package */
616         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
617                 goto done;
618
619         /* PkgTmp */
620         if (do_ptm)
621                 outp += sprintf(outp, "%8d", p->pkg_temp_c);
622
623         /* GFXrc6 */
624         if (do_gfx_rc6_ms)
625                 outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float);
626
627         /* GFXMHz */
628         if (do_gfx_mhz)
629                 outp += sprintf(outp, "%8d", p->gfx_mhz);
630
631         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
632         if (do_skl_residency) {
633                 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
634                 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
635                 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
636                 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
637         }
638
639         if (do_pc2)
640                 outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
641         if (do_pc3)
642                 outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc);
643         if (do_pc6)
644                 outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc);
645         if (do_pc7)
646                 outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc);
647         if (do_c8_c9_c10) {
648                 outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc);
649                 outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc);
650                 outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc);
651         }
652
653         /*
654          * If measurement interval exceeds minimum RAPL Joule Counter range,
655          * indicate that results are suspect by printing "**" in fraction place.
656          */
657         if (interval_float < rapl_joule_counter_range)
658                 fmt8 = "%8.2f";
659         else
660                 fmt8 = " %6.0f**";
661
662         if (do_rapl && !rapl_joules) {
663                 if (do_rapl & RAPL_PKG)
664                         outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
665                 if (do_rapl & RAPL_CORES)
666                         outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
667                 if (do_rapl & RAPL_GFX)
668                         outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
669                 if (do_rapl & RAPL_DRAM)
670                         outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
671                 if (do_rapl & RAPL_PKG_PERF_STATUS)
672                         outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
673                 if (do_rapl & RAPL_DRAM_PERF_STATUS)
674                         outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
675         } else if (do_rapl && rapl_joules) {
676                 if (do_rapl & RAPL_PKG)
677                         outp += sprintf(outp, fmt8,
678                                         p->energy_pkg * rapl_energy_units);
679                 if (do_rapl & RAPL_CORES)
680                         outp += sprintf(outp, fmt8,
681                                         p->energy_cores * rapl_energy_units);
682                 if (do_rapl & RAPL_GFX)
683                         outp += sprintf(outp, fmt8,
684                                         p->energy_gfx * rapl_energy_units);
685                 if (do_rapl & RAPL_DRAM)
686                         outp += sprintf(outp, fmt8,
687                                         p->energy_dram * rapl_dram_energy_units);
688                 if (do_rapl & RAPL_PKG_PERF_STATUS)
689                         outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
690                 if (do_rapl & RAPL_DRAM_PERF_STATUS)
691                         outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
692
693                 outp += sprintf(outp, fmt8, interval_float);
694         }
695 done:
696         outp += sprintf(outp, "\n");
697
698         return 0;
699 }
700
701 void flush_output_stdout(void)
702 {
703         FILE *filep;
704
705         if (outf == stderr)
706                 filep = stdout;
707         else
708                 filep = outf;
709
710         fputs(output_buffer, filep);
711         fflush(filep);
712
713         outp = output_buffer;
714 }
715 void flush_output_stderr(void)
716 {
717         fputs(output_buffer, outf);
718         fflush(outf);
719         outp = output_buffer;
720 }
721 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
722 {
723         static int printed;
724
725         if (!printed || !summary_only)
726                 print_header();
727
728         if (topo.num_cpus > 1)
729                 format_counters(&average.threads, &average.cores,
730                         &average.packages);
731
732         printed = 1;
733
734         if (summary_only)
735                 return;
736
737         for_all_cpus(format_counters, t, c, p);
738 }
739
740 #define DELTA_WRAP32(new, old)                  \
741         if (new > old) {                        \
742                 old = new - old;                \
743         } else {                                \
744                 old = 0x100000000 + new - old;  \
745         }
746
747 void
748 delta_package(struct pkg_data *new, struct pkg_data *old)
749 {
750
751         if (do_skl_residency) {
752                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
753                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
754                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
755                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
756         }
757         old->pc2 = new->pc2 - old->pc2;
758         if (do_pc3)
759                 old->pc3 = new->pc3 - old->pc3;
760         if (do_pc6)
761                 old->pc6 = new->pc6 - old->pc6;
762         if (do_pc7)
763                 old->pc7 = new->pc7 - old->pc7;
764         old->pc8 = new->pc8 - old->pc8;
765         old->pc9 = new->pc9 - old->pc9;
766         old->pc10 = new->pc10 - old->pc10;
767         old->pkg_temp_c = new->pkg_temp_c;
768
769         old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
770         old->gfx_mhz = new->gfx_mhz;
771
772         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
773         DELTA_WRAP32(new->energy_cores, old->energy_cores);
774         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
775         DELTA_WRAP32(new->energy_dram, old->energy_dram);
776         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
777         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
778 }
779
780 void
781 delta_core(struct core_data *new, struct core_data *old)
782 {
783         old->c3 = new->c3 - old->c3;
784         old->c6 = new->c6 - old->c6;
785         old->c7 = new->c7 - old->c7;
786         old->core_temp_c = new->core_temp_c;
787 }
788
789 /*
790  * old = new - old
791  */
792 void
793 delta_thread(struct thread_data *new, struct thread_data *old,
794         struct core_data *core_delta)
795 {
796         old->tsc = new->tsc - old->tsc;
797
798         /* check for TSC < 1 Mcycles over interval */
799         if (old->tsc < (1000 * 1000))
800                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
801                      "You can disable all c-states by booting with \"idle=poll\"\n"
802                      "or just the deep ones with \"processor.max_cstate=1\"");
803
804         old->c1 = new->c1 - old->c1;
805
806         if (has_aperf) {
807                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
808                         old->aperf = new->aperf - old->aperf;
809                         old->mperf = new->mperf - old->mperf;
810                 } else {
811
812                         if (!aperf_mperf_unstable) {
813                                 fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname);
814                                 fprintf(outf, "* Frequency results do not cover entire interval *\n");
815                                 fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n");
816
817                                 aperf_mperf_unstable = 1;
818                         }
819                         /*
820                          * mperf delta is likely a huge "positive" number
821                          * can not use it for calculating c0 time
822                          */
823                         skip_c0 = 1;
824                         skip_c1 = 1;
825                 }
826         }
827
828
829         if (use_c1_residency_msr) {
830                 /*
831                  * Some models have a dedicated C1 residency MSR,
832                  * which should be more accurate than the derivation below.
833                  */
834         } else {
835                 /*
836                  * As counter collection is not atomic,
837                  * it is possible for mperf's non-halted cycles + idle states
838                  * to exceed TSC's all cycles: show c1 = 0% in that case.
839                  */
840                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
841                         old->c1 = 0;
842                 else {
843                         /* normal case, derive c1 */
844                         old->c1 = old->tsc - old->mperf - core_delta->c3
845                                 - core_delta->c6 - core_delta->c7;
846                 }
847         }
848
849         if (old->mperf == 0) {
850                 if (debug > 1)
851                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
852                 old->mperf = 1; /* divide by 0 protection */
853         }
854
855         old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
856         old->extra_delta32 &= 0xFFFFFFFF;
857
858         old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
859
860         /*
861          * Extra MSR is just a snapshot, simply copy latest w/o subtracting
862          */
863         old->extra_msr32 = new->extra_msr32;
864         old->extra_msr64 = new->extra_msr64;
865
866         if (do_irq)
867                 old->irq_count = new->irq_count - old->irq_count;
868
869         if (do_smi)
870                 old->smi_count = new->smi_count - old->smi_count;
871 }
872
873 int delta_cpu(struct thread_data *t, struct core_data *c,
874         struct pkg_data *p, struct thread_data *t2,
875         struct core_data *c2, struct pkg_data *p2)
876 {
877         /* calculate core delta only for 1st thread in core */
878         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
879                 delta_core(c, c2);
880
881         /* always calculate thread delta */
882         delta_thread(t, t2, c2);        /* c2 is core delta */
883
884         /* calculate package delta only for 1st core in package */
885         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
886                 delta_package(p, p2);
887
888         return 0;
889 }
890
891 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
892 {
893         t->tsc = 0;
894         t->aperf = 0;
895         t->mperf = 0;
896         t->c1 = 0;
897
898         t->extra_delta32 = 0;
899         t->extra_delta64 = 0;
900
901         t->irq_count = 0;
902         t->smi_count = 0;
903
904         /* tells format_counters to dump all fields from this set */
905         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
906
907         c->c3 = 0;
908         c->c6 = 0;
909         c->c7 = 0;
910         c->core_temp_c = 0;
911
912         p->pkg_wtd_core_c0 = 0;
913         p->pkg_any_core_c0 = 0;
914         p->pkg_any_gfxe_c0 = 0;
915         p->pkg_both_core_gfxe_c0 = 0;
916
917         p->pc2 = 0;
918         if (do_pc3)
919                 p->pc3 = 0;
920         if (do_pc6)
921                 p->pc6 = 0;
922         if (do_pc7)
923                 p->pc7 = 0;
924         p->pc8 = 0;
925         p->pc9 = 0;
926         p->pc10 = 0;
927
928         p->energy_pkg = 0;
929         p->energy_dram = 0;
930         p->energy_cores = 0;
931         p->energy_gfx = 0;
932         p->rapl_pkg_perf_status = 0;
933         p->rapl_dram_perf_status = 0;
934         p->pkg_temp_c = 0;
935
936         p->gfx_rc6_ms = 0;
937         p->gfx_mhz = 0;
938 }
939 int sum_counters(struct thread_data *t, struct core_data *c,
940         struct pkg_data *p)
941 {
942         average.threads.tsc += t->tsc;
943         average.threads.aperf += t->aperf;
944         average.threads.mperf += t->mperf;
945         average.threads.c1 += t->c1;
946
947         average.threads.extra_delta32 += t->extra_delta32;
948         average.threads.extra_delta64 += t->extra_delta64;
949
950         average.threads.irq_count += t->irq_count;
951         average.threads.smi_count += t->smi_count;
952
953         /* sum per-core values only for 1st thread in core */
954         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
955                 return 0;
956
957         average.cores.c3 += c->c3;
958         average.cores.c6 += c->c6;
959         average.cores.c7 += c->c7;
960
961         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
962
963         /* sum per-pkg values only for 1st core in pkg */
964         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
965                 return 0;
966
967         if (do_skl_residency) {
968                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
969                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
970                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
971                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
972         }
973
974         average.packages.pc2 += p->pc2;
975         if (do_pc3)
976                 average.packages.pc3 += p->pc3;
977         if (do_pc6)
978                 average.packages.pc6 += p->pc6;
979         if (do_pc7)
980                 average.packages.pc7 += p->pc7;
981         average.packages.pc8 += p->pc8;
982         average.packages.pc9 += p->pc9;
983         average.packages.pc10 += p->pc10;
984
985         average.packages.energy_pkg += p->energy_pkg;
986         average.packages.energy_dram += p->energy_dram;
987         average.packages.energy_cores += p->energy_cores;
988         average.packages.energy_gfx += p->energy_gfx;
989
990         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
991         average.packages.gfx_mhz = p->gfx_mhz;
992
993         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
994
995         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
996         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
997         return 0;
998 }
999 /*
1000  * sum the counters for all cpus in the system
1001  * compute the weighted average
1002  */
1003 void compute_average(struct thread_data *t, struct core_data *c,
1004         struct pkg_data *p)
1005 {
1006         clear_counters(&average.threads, &average.cores, &average.packages);
1007
1008         for_all_cpus(sum_counters, t, c, p);
1009
1010         average.threads.tsc /= topo.num_cpus;
1011         average.threads.aperf /= topo.num_cpus;
1012         average.threads.mperf /= topo.num_cpus;
1013         average.threads.c1 /= topo.num_cpus;
1014
1015         average.threads.extra_delta32 /= topo.num_cpus;
1016         average.threads.extra_delta32 &= 0xFFFFFFFF;
1017
1018         average.threads.extra_delta64 /= topo.num_cpus;
1019
1020         average.cores.c3 /= topo.num_cores;
1021         average.cores.c6 /= topo.num_cores;
1022         average.cores.c7 /= topo.num_cores;
1023
1024         if (do_skl_residency) {
1025                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1026                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1027                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1028                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1029         }
1030
1031         average.packages.pc2 /= topo.num_packages;
1032         if (do_pc3)
1033                 average.packages.pc3 /= topo.num_packages;
1034         if (do_pc6)
1035                 average.packages.pc6 /= topo.num_packages;
1036         if (do_pc7)
1037                 average.packages.pc7 /= topo.num_packages;
1038
1039         average.packages.pc8 /= topo.num_packages;
1040         average.packages.pc9 /= topo.num_packages;
1041         average.packages.pc10 /= topo.num_packages;
1042 }
1043
1044 static unsigned long long rdtsc(void)
1045 {
1046         unsigned int low, high;
1047
1048         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1049
1050         return low | ((unsigned long long)high) << 32;
1051 }
1052
1053 /*
1054  * get_counters(...)
1055  * migrate to cpu
1056  * acquire and record local counters for that cpu
1057  */
1058 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1059 {
1060         int cpu = t->cpu_id;
1061         unsigned long long msr;
1062         int aperf_mperf_retry_count = 0;
1063
1064         if (cpu_migrate(cpu)) {
1065                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1066                 return -1;
1067         }
1068
1069 retry:
1070         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1071
1072         if (has_aperf) {
1073                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1074
1075                 /*
1076                  * The TSC, APERF and MPERF must be read together for
1077                  * APERF/MPERF and MPERF/TSC to give accurate results.
1078                  *
1079                  * Unfortunately, APERF and MPERF are read by
1080                  * individual system call, so delays may occur
1081                  * between them.  If the time to read them
1082                  * varies by a large amount, we re-read them.
1083                  */
1084
1085                 /*
1086                  * This initial dummy APERF read has been seen to
1087                  * reduce jitter in the subsequent reads.
1088                  */
1089
1090                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1091                         return -3;
1092
1093                 t->tsc = rdtsc();       /* re-read close to APERF */
1094
1095                 tsc_before = t->tsc;
1096
1097                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1098                         return -3;
1099
1100                 tsc_between = rdtsc();
1101
1102                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1103                         return -4;
1104
1105                 tsc_after = rdtsc();
1106
1107                 aperf_time = tsc_between - tsc_before;
1108                 mperf_time = tsc_after - tsc_between;
1109
1110                 /*
1111                  * If the system call latency to read APERF and MPERF
1112                  * differ by more than 2x, then try again.
1113                  */
1114                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1115                         aperf_mperf_retry_count++;
1116                         if (aperf_mperf_retry_count < 5)
1117                                 goto retry;
1118                         else
1119                                 warnx("cpu%d jitter %lld %lld",
1120                                         cpu, aperf_time, mperf_time);
1121                 }
1122                 aperf_mperf_retry_count = 0;
1123
1124                 t->aperf = t->aperf * aperf_mperf_multiplier;
1125                 t->mperf = t->mperf * aperf_mperf_multiplier;
1126         }
1127
1128         if (do_irq)
1129                 t->irq_count = irqs_per_cpu[cpu];
1130         if (do_smi) {
1131                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1132                         return -5;
1133                 t->smi_count = msr & 0xFFFFFFFF;
1134         }
1135         if (extra_delta_offset32) {
1136                 if (get_msr(cpu, extra_delta_offset32, &msr))
1137                         return -5;
1138                 t->extra_delta32 = msr & 0xFFFFFFFF;
1139         }
1140
1141         if (extra_delta_offset64)
1142                 if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
1143                         return -5;
1144
1145         if (extra_msr_offset32) {
1146                 if (get_msr(cpu, extra_msr_offset32, &msr))
1147                         return -5;
1148                 t->extra_msr32 = msr & 0xFFFFFFFF;
1149         }
1150
1151         if (extra_msr_offset64)
1152                 if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
1153                         return -5;
1154
1155         if (use_c1_residency_msr) {
1156                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1157                         return -6;
1158         }
1159
1160         /* collect core counters only for 1st thread in core */
1161         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1162                 return 0;
1163
1164         if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
1165                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1166                         return -6;
1167         }
1168
1169         if (do_nhm_cstates && !do_knl_cstates) {
1170                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1171                         return -7;
1172         } else if (do_knl_cstates) {
1173                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1174                         return -7;
1175         }
1176
1177         if (do_snb_cstates)
1178                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1179                         return -8;
1180
1181         if (do_dts) {
1182                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1183                         return -9;
1184                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1185         }
1186
1187
1188         /* collect package counters only for 1st core in package */
1189         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1190                 return 0;
1191
1192         if (do_skl_residency) {
1193                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1194                         return -10;
1195                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1196                         return -11;
1197                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1198                         return -12;
1199                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1200                         return -13;
1201         }
1202         if (do_pc3)
1203                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1204                         return -9;
1205         if (do_pc6)
1206                 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1207                         return -10;
1208         if (do_pc2)
1209                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1210                         return -11;
1211         if (do_pc7)
1212                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1213                         return -12;
1214         if (do_c8_c9_c10) {
1215                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1216                         return -13;
1217                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1218                         return -13;
1219                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1220                         return -13;
1221         }
1222         if (do_rapl & RAPL_PKG) {
1223                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1224                         return -13;
1225                 p->energy_pkg = msr & 0xFFFFFFFF;
1226         }
1227         if (do_rapl & RAPL_CORES) {
1228                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1229                         return -14;
1230                 p->energy_cores = msr & 0xFFFFFFFF;
1231         }
1232         if (do_rapl & RAPL_DRAM) {
1233                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1234                         return -15;
1235                 p->energy_dram = msr & 0xFFFFFFFF;
1236         }
1237         if (do_rapl & RAPL_GFX) {
1238                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1239                         return -16;
1240                 p->energy_gfx = msr & 0xFFFFFFFF;
1241         }
1242         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1243                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1244                         return -16;
1245                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1246         }
1247         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1248                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1249                         return -16;
1250                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1251         }
1252         if (do_ptm) {
1253                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1254                         return -17;
1255                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1256         }
1257
1258         if (do_gfx_rc6_ms)
1259                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1260
1261         if (do_gfx_mhz)
1262                 p->gfx_mhz = gfx_cur_mhz;
1263
1264         return 0;
1265 }
1266
1267 /*
1268  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1269  * If you change the values, note they are used both in comparisons
1270  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1271  */
1272
1273 #define PCLUKN 0 /* Unknown */
1274 #define PCLRSV 1 /* Reserved */
1275 #define PCL__0 2 /* PC0 */
1276 #define PCL__1 3 /* PC1 */
1277 #define PCL__2 4 /* PC2 */
1278 #define PCL__3 5 /* PC3 */
1279 #define PCL__4 6 /* PC4 */
1280 #define PCL__6 7 /* PC6 */
1281 #define PCL_6N 8 /* PC6 No Retention */
1282 #define PCL_6R 9 /* PC6 Retention */
1283 #define PCL__7 10 /* PC7 */
1284 #define PCL_7S 11 /* PC7 Shrink */
1285 #define PCL__8 12 /* PC8 */
1286 #define PCL__9 13 /* PC9 */
1287 #define PCLUNL 14 /* Unlimited */
1288
1289 int pkg_cstate_limit = PCLUKN;
1290 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1291         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1292
1293 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1294 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1295 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1296 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1297 int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1298 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1299
1300
1301 static void
1302 calculate_tsc_tweak()
1303 {
1304         tsc_tweak = base_hz / tsc_hz;
1305 }
1306
1307 static void
1308 dump_nhm_platform_info(void)
1309 {
1310         unsigned long long msr;
1311         unsigned int ratio;
1312
1313         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1314
1315         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1316
1317         ratio = (msr >> 40) & 0xFF;
1318         fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
1319                 ratio, bclk, ratio * bclk);
1320
1321         ratio = (msr >> 8) & 0xFF;
1322         fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
1323                 ratio, bclk, ratio * bclk);
1324
1325         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1326         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1327                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1328
1329         return;
1330 }
1331
1332 static void
1333 dump_hsw_turbo_ratio_limits(void)
1334 {
1335         unsigned long long msr;
1336         unsigned int ratio;
1337
1338         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1339
1340         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1341
1342         ratio = (msr >> 8) & 0xFF;
1343         if (ratio)
1344                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
1345                         ratio, bclk, ratio * bclk);
1346
1347         ratio = (msr >> 0) & 0xFF;
1348         if (ratio)
1349                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
1350                         ratio, bclk, ratio * bclk);
1351         return;
1352 }
1353
1354 static void
1355 dump_ivt_turbo_ratio_limits(void)
1356 {
1357         unsigned long long msr;
1358         unsigned int ratio;
1359
1360         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1361
1362         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1363
1364         ratio = (msr >> 56) & 0xFF;
1365         if (ratio)
1366                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
1367                         ratio, bclk, ratio * bclk);
1368
1369         ratio = (msr >> 48) & 0xFF;
1370         if (ratio)
1371                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
1372                         ratio, bclk, ratio * bclk);
1373
1374         ratio = (msr >> 40) & 0xFF;
1375         if (ratio)
1376                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
1377                         ratio, bclk, ratio * bclk);
1378
1379         ratio = (msr >> 32) & 0xFF;
1380         if (ratio)
1381                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
1382                         ratio, bclk, ratio * bclk);
1383
1384         ratio = (msr >> 24) & 0xFF;
1385         if (ratio)
1386                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
1387                         ratio, bclk, ratio * bclk);
1388
1389         ratio = (msr >> 16) & 0xFF;
1390         if (ratio)
1391                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
1392                         ratio, bclk, ratio * bclk);
1393
1394         ratio = (msr >> 8) & 0xFF;
1395         if (ratio)
1396                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
1397                         ratio, bclk, ratio * bclk);
1398
1399         ratio = (msr >> 0) & 0xFF;
1400         if (ratio)
1401                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
1402                         ratio, bclk, ratio * bclk);
1403         return;
1404 }
1405
1406 static void
1407 dump_nhm_turbo_ratio_limits(void)
1408 {
1409         unsigned long long msr;
1410         unsigned int ratio;
1411
1412         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1413
1414         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1415
1416         ratio = (msr >> 56) & 0xFF;
1417         if (ratio)
1418                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
1419                         ratio, bclk, ratio * bclk);
1420
1421         ratio = (msr >> 48) & 0xFF;
1422         if (ratio)
1423                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
1424                         ratio, bclk, ratio * bclk);
1425
1426         ratio = (msr >> 40) & 0xFF;
1427         if (ratio)
1428                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
1429                         ratio, bclk, ratio * bclk);
1430
1431         ratio = (msr >> 32) & 0xFF;
1432         if (ratio)
1433                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
1434                         ratio, bclk, ratio * bclk);
1435
1436         ratio = (msr >> 24) & 0xFF;
1437         if (ratio)
1438                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
1439                         ratio, bclk, ratio * bclk);
1440
1441         ratio = (msr >> 16) & 0xFF;
1442         if (ratio)
1443                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
1444                         ratio, bclk, ratio * bclk);
1445
1446         ratio = (msr >> 8) & 0xFF;
1447         if (ratio)
1448                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
1449                         ratio, bclk, ratio * bclk);
1450
1451         ratio = (msr >> 0) & 0xFF;
1452         if (ratio)
1453                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1454                         ratio, bclk, ratio * bclk);
1455         return;
1456 }
1457
1458 static void
1459 dump_knl_turbo_ratio_limits(void)
1460 {
1461         const unsigned int buckets_no = 7;
1462
1463         unsigned long long msr;
1464         int delta_cores, delta_ratio;
1465         int i, b_nr;
1466         unsigned int cores[buckets_no];
1467         unsigned int ratio[buckets_no];
1468
1469         get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
1470
1471         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
1472                 base_cpu, msr);
1473
1474         /**
1475          * Turbo encoding in KNL is as follows:
1476          * [0] -- Reserved
1477          * [7:1] -- Base value of number of active cores of bucket 1.
1478          * [15:8] -- Base value of freq ratio of bucket 1.
1479          * [20:16] -- +ve delta of number of active cores of bucket 2.
1480          * i.e. active cores of bucket 2 =
1481          * active cores of bucket 1 + delta
1482          * [23:21] -- Negative delta of freq ratio of bucket 2.
1483          * i.e. freq ratio of bucket 2 =
1484          * freq ratio of bucket 1 - delta
1485          * [28:24]-- +ve delta of number of active cores of bucket 3.
1486          * [31:29]-- -ve delta of freq ratio of bucket 3.
1487          * [36:32]-- +ve delta of number of active cores of bucket 4.
1488          * [39:37]-- -ve delta of freq ratio of bucket 4.
1489          * [44:40]-- +ve delta of number of active cores of bucket 5.
1490          * [47:45]-- -ve delta of freq ratio of bucket 5.
1491          * [52:48]-- +ve delta of number of active cores of bucket 6.
1492          * [55:53]-- -ve delta of freq ratio of bucket 6.
1493          * [60:56]-- +ve delta of number of active cores of bucket 7.
1494          * [63:61]-- -ve delta of freq ratio of bucket 7.
1495          */
1496
1497         b_nr = 0;
1498         cores[b_nr] = (msr & 0xFF) >> 1;
1499         ratio[b_nr] = (msr >> 8) & 0xFF;
1500
1501         for (i = 16; i < 64; i += 8) {
1502                 delta_cores = (msr >> i) & 0x1F;
1503                 delta_ratio = (msr >> (i + 5)) & 0x7;
1504
1505                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
1506                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
1507                 b_nr++;
1508         }
1509
1510         for (i = buckets_no - 1; i >= 0; i--)
1511                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
1512                         fprintf(outf,
1513                                 "%d * %.0f = %.0f MHz max turbo %d active cores\n",
1514                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
1515 }
1516
1517 static void
1518 dump_nhm_cst_cfg(void)
1519 {
1520         unsigned long long msr;
1521
1522         get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
1523
1524 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
1525 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
1526
1527         fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
1528
1529         fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
1530                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1531                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1532                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1533                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1534                 (msr & (1 << 15)) ? "" : "UN",
1535                 (unsigned int)msr & 0xF,
1536                 pkg_cstate_limit_strings[pkg_cstate_limit]);
1537         return;
1538 }
1539
1540 static void
1541 dump_config_tdp(void)
1542 {
1543         unsigned long long msr;
1544
1545         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
1546         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
1547         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
1548
1549         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
1550         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
1551         if (msr) {
1552                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1553                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1554                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1555                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
1556         }
1557         fprintf(outf, ")\n");
1558
1559         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
1560         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
1561         if (msr) {
1562                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1563                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1564                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1565                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
1566         }
1567         fprintf(outf, ")\n");
1568
1569         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
1570         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
1571         if ((msr) & 0x3)
1572                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
1573         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1574         fprintf(outf, ")\n");
1575
1576         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
1577         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
1578         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
1579         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1580         fprintf(outf, ")\n");
1581 }
1582 void free_fd_percpu(void)
1583 {
1584         int i;
1585
1586         for (i = 0; i < topo.max_cpu_num; ++i) {
1587                 if (fd_percpu[i] != 0)
1588                         close(fd_percpu[i]);
1589         }
1590
1591         free(fd_percpu);
1592 }
1593
1594 void free_all_buffers(void)
1595 {
1596         CPU_FREE(cpu_present_set);
1597         cpu_present_set = NULL;
1598         cpu_present_setsize = 0;
1599
1600         CPU_FREE(cpu_affinity_set);
1601         cpu_affinity_set = NULL;
1602         cpu_affinity_setsize = 0;
1603
1604         free(thread_even);
1605         free(core_even);
1606         free(package_even);
1607
1608         thread_even = NULL;
1609         core_even = NULL;
1610         package_even = NULL;
1611
1612         free(thread_odd);
1613         free(core_odd);
1614         free(package_odd);
1615
1616         thread_odd = NULL;
1617         core_odd = NULL;
1618         package_odd = NULL;
1619
1620         free(output_buffer);
1621         output_buffer = NULL;
1622         outp = NULL;
1623
1624         free_fd_percpu();
1625
1626         free(irq_column_2_cpu);
1627         free(irqs_per_cpu);
1628 }
1629
1630 /*
1631  * Open a file, and exit on failure
1632  */
1633 FILE *fopen_or_die(const char *path, const char *mode)
1634 {
1635         FILE *filep = fopen(path, mode);
1636         if (!filep)
1637                 err(1, "%s: open failed", path);
1638         return filep;
1639 }
1640
1641 /*
1642  * Parse a file containing a single int.
1643  */
1644 int parse_int_file(const char *fmt, ...)
1645 {
1646         va_list args;
1647         char path[PATH_MAX];
1648         FILE *filep;
1649         int value;
1650
1651         va_start(args, fmt);
1652         vsnprintf(path, sizeof(path), fmt, args);
1653         va_end(args);
1654         filep = fopen_or_die(path, "r");
1655         if (fscanf(filep, "%d", &value) != 1)
1656                 err(1, "%s: failed to parse number from file", path);
1657         fclose(filep);
1658         return value;
1659 }
1660
1661 /*
1662  * get_cpu_position_in_core(cpu)
1663  * return the position of the CPU among its HT siblings in the core
1664  * return -1 if the sibling is not in list
1665  */
1666 int get_cpu_position_in_core(int cpu)
1667 {
1668         char path[64];
1669         FILE *filep;
1670         int this_cpu;
1671         char character;
1672         int i;
1673
1674         sprintf(path,
1675                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
1676                 cpu);
1677         filep = fopen(path, "r");
1678         if (filep == NULL) {
1679                 perror(path);
1680                 exit(1);
1681         }
1682
1683         for (i = 0; i < topo.num_threads_per_core; i++) {
1684                 fscanf(filep, "%d", &this_cpu);
1685                 if (this_cpu == cpu) {
1686                         fclose(filep);
1687                         return i;
1688                 }
1689
1690                 /* Account for no separator after last thread*/
1691                 if (i != (topo.num_threads_per_core - 1))
1692                         fscanf(filep, "%c", &character);
1693         }
1694
1695         fclose(filep);
1696         return -1;
1697 }
1698
1699 /*
1700  * cpu_is_first_core_in_package(cpu)
1701  * return 1 if given CPU is 1st core in package
1702  */
1703 int cpu_is_first_core_in_package(int cpu)
1704 {
1705         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
1706 }
1707
1708 int get_physical_package_id(int cpu)
1709 {
1710         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
1711 }
1712
1713 int get_core_id(int cpu)
1714 {
1715         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
1716 }
1717
1718 int get_num_ht_siblings(int cpu)
1719 {
1720         char path[80];
1721         FILE *filep;
1722         int sib1;
1723         int matches = 0;
1724         char character;
1725         char str[100];
1726         char *ch;
1727
1728         sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1729         filep = fopen_or_die(path, "r");
1730
1731         /*
1732          * file format:
1733          * A ',' separated or '-' separated set of numbers
1734          * (eg 1-2 or 1,3,4,5)
1735          */
1736         fscanf(filep, "%d%c\n", &sib1, &character);
1737         fseek(filep, 0, SEEK_SET);
1738         fgets(str, 100, filep);
1739         ch = strchr(str, character);
1740         while (ch != NULL) {
1741                 matches++;
1742                 ch = strchr(ch+1, character);
1743         }
1744
1745         fclose(filep);
1746         return matches+1;
1747 }
1748
1749 /*
1750  * run func(thread, core, package) in topology order
1751  * skip non-present cpus
1752  */
1753
1754 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
1755         struct pkg_data *, struct thread_data *, struct core_data *,
1756         struct pkg_data *), struct thread_data *thread_base,
1757         struct core_data *core_base, struct pkg_data *pkg_base,
1758         struct thread_data *thread_base2, struct core_data *core_base2,
1759         struct pkg_data *pkg_base2)
1760 {
1761         int retval, pkg_no, core_no, thread_no;
1762
1763         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
1764                 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
1765                         for (thread_no = 0; thread_no <
1766                                 topo.num_threads_per_core; ++thread_no) {
1767                                 struct thread_data *t, *t2;
1768                                 struct core_data *c, *c2;
1769                                 struct pkg_data *p, *p2;
1770
1771                                 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
1772
1773                                 if (cpu_is_not_present(t->cpu_id))
1774                                         continue;
1775
1776                                 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
1777
1778                                 c = GET_CORE(core_base, core_no, pkg_no);
1779                                 c2 = GET_CORE(core_base2, core_no, pkg_no);
1780
1781                                 p = GET_PKG(pkg_base, pkg_no);
1782                                 p2 = GET_PKG(pkg_base2, pkg_no);
1783
1784                                 retval = func(t, c, p, t2, c2, p2);
1785                                 if (retval)
1786                                         return retval;
1787                         }
1788                 }
1789         }
1790         return 0;
1791 }
1792
1793 /*
1794  * run func(cpu) on every cpu in /proc/stat
1795  * return max_cpu number
1796  */
1797 int for_all_proc_cpus(int (func)(int))
1798 {
1799         FILE *fp;
1800         int cpu_num;
1801         int retval;
1802
1803         fp = fopen_or_die(proc_stat, "r");
1804
1805         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
1806         if (retval != 0)
1807                 err(1, "%s: failed to parse format", proc_stat);
1808
1809         while (1) {
1810                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
1811                 if (retval != 1)
1812                         break;
1813
1814                 retval = func(cpu_num);
1815                 if (retval) {
1816                         fclose(fp);
1817                         return(retval);
1818                 }
1819         }
1820         fclose(fp);
1821         return 0;
1822 }
1823
1824 void re_initialize(void)
1825 {
1826         free_all_buffers();
1827         setup_all_buffers();
1828         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
1829 }
1830
1831
1832 /*
1833  * count_cpus()
1834  * remember the last one seen, it will be the max
1835  */
1836 int count_cpus(int cpu)
1837 {
1838         if (topo.max_cpu_num < cpu)
1839                 topo.max_cpu_num = cpu;
1840
1841         topo.num_cpus += 1;
1842         return 0;
1843 }
1844 int mark_cpu_present(int cpu)
1845 {
1846         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
1847         return 0;
1848 }
1849
1850 /*
1851  * snapshot_proc_interrupts()
1852  *
1853  * read and record summary of /proc/interrupts
1854  *
1855  * return 1 if config change requires a restart, else return 0
1856  */
1857 int snapshot_proc_interrupts(void)
1858 {
1859         static FILE *fp;
1860         int column, retval;
1861
1862         if (fp == NULL)
1863                 fp = fopen_or_die("/proc/interrupts", "r");
1864         else
1865                 rewind(fp);
1866
1867         /* read 1st line of /proc/interrupts to get cpu* name for each column */
1868         for (column = 0; column < topo.num_cpus; ++column) {
1869                 int cpu_number;
1870
1871                 retval = fscanf(fp, " CPU%d", &cpu_number);
1872                 if (retval != 1)
1873                         break;
1874
1875                 if (cpu_number > topo.max_cpu_num) {
1876                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
1877                         return 1;
1878                 }
1879
1880                 irq_column_2_cpu[column] = cpu_number;
1881                 irqs_per_cpu[cpu_number] = 0;
1882         }
1883
1884         /* read /proc/interrupt count lines and sum up irqs per cpu */
1885         while (1) {
1886                 int column;
1887                 char buf[64];
1888
1889                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
1890                 if (retval != 1)
1891                         break;
1892
1893                 /* read the count per cpu */
1894                 for (column = 0; column < topo.num_cpus; ++column) {
1895
1896                         int cpu_number, irq_count;
1897
1898                         retval = fscanf(fp, " %d", &irq_count);
1899                         if (retval != 1)
1900                                 break;
1901
1902                         cpu_number = irq_column_2_cpu[column];
1903                         irqs_per_cpu[cpu_number] += irq_count;
1904
1905                 }
1906
1907                 while (getc(fp) != '\n')
1908                         ;       /* flush interrupt description */
1909
1910         }
1911         return 0;
1912 }
1913 /*
1914  * snapshot_gfx_rc6_ms()
1915  *
1916  * record snapshot of
1917  * /sys/class/drm/card0/power/rc6_residency_ms
1918  *
1919  * return 1 if config change requires a restart, else return 0
1920  */
1921 int snapshot_gfx_rc6_ms(void)
1922 {
1923         FILE *fp;
1924         int retval;
1925
1926         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
1927
1928         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
1929         if (retval != 1)
1930                 err(1, "GFX rc6");
1931
1932         fclose(fp);
1933
1934         return 0;
1935 }
1936 /*
1937  * snapshot_gfx_mhz()
1938  *
1939  * record snapshot of
1940  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
1941  *
1942  * return 1 if config change requires a restart, else return 0
1943  */
1944 int snapshot_gfx_mhz(void)
1945 {
1946         static FILE *fp;
1947         int retval;
1948
1949         if (fp == NULL)
1950                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
1951         else
1952                 rewind(fp);
1953
1954         retval = fscanf(fp, "%d", &gfx_cur_mhz);
1955         if (retval != 1)
1956                 err(1, "GFX MHz");
1957
1958         return 0;
1959 }
1960
1961 /*
1962  * snapshot /proc and /sys files
1963  *
1964  * return 1 if configuration restart needed, else return 0
1965  */
1966 int snapshot_proc_sysfs_files(void)
1967 {
1968         if (snapshot_proc_interrupts())
1969                 return 1;
1970
1971         if (do_gfx_rc6_ms)
1972                 snapshot_gfx_rc6_ms();
1973
1974         if (do_gfx_mhz)
1975                 snapshot_gfx_mhz();
1976
1977         return 0;
1978 }
1979
1980 void turbostat_loop()
1981 {
1982         int retval;
1983         int restarted = 0;
1984
1985 restart:
1986         restarted++;
1987
1988         snapshot_proc_sysfs_files();
1989         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1990         if (retval < -1) {
1991                 exit(retval);
1992         } else if (retval == -1) {
1993                 if (restarted > 1) {
1994                         exit(retval);
1995                 }
1996                 re_initialize();
1997                 goto restart;
1998         }
1999         restarted = 0;
2000         gettimeofday(&tv_even, (struct timezone *)NULL);
2001
2002         while (1) {
2003                 if (for_all_proc_cpus(cpu_is_not_present)) {
2004                         re_initialize();
2005                         goto restart;
2006                 }
2007                 nanosleep(&interval_ts, NULL);
2008                 if (snapshot_proc_sysfs_files())
2009                         goto restart;
2010                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
2011                 if (retval < -1) {
2012                         exit(retval);
2013                 } else if (retval == -1) {
2014                         re_initialize();
2015                         goto restart;
2016                 }
2017                 gettimeofday(&tv_odd, (struct timezone *)NULL);
2018                 timersub(&tv_odd, &tv_even, &tv_delta);
2019                 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
2020                 compute_average(EVEN_COUNTERS);
2021                 format_all_counters(EVEN_COUNTERS);
2022                 flush_output_stdout();
2023                 nanosleep(&interval_ts, NULL);
2024                 if (snapshot_proc_sysfs_files())
2025                         goto restart;
2026                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2027                 if (retval < -1) {
2028                         exit(retval);
2029                 } else if (retval == -1) {
2030                         re_initialize();
2031                         goto restart;
2032                 }
2033                 gettimeofday(&tv_even, (struct timezone *)NULL);
2034                 timersub(&tv_even, &tv_odd, &tv_delta);
2035                 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
2036                 compute_average(ODD_COUNTERS);
2037                 format_all_counters(ODD_COUNTERS);
2038                 flush_output_stdout();
2039         }
2040 }
2041
2042 void check_dev_msr()
2043 {
2044         struct stat sb;
2045         char pathname[32];
2046
2047         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2048         if (stat(pathname, &sb))
2049                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2050                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2051 }
2052
2053 void check_permissions()
2054 {
2055         struct __user_cap_header_struct cap_header_data;
2056         cap_user_header_t cap_header = &cap_header_data;
2057         struct __user_cap_data_struct cap_data_data;
2058         cap_user_data_t cap_data = &cap_data_data;
2059         extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2060         int do_exit = 0;
2061         char pathname[32];
2062
2063         /* check for CAP_SYS_RAWIO */
2064         cap_header->pid = getpid();
2065         cap_header->version = _LINUX_CAPABILITY_VERSION;
2066         if (capget(cap_header, cap_data) < 0)
2067                 err(-6, "capget(2) failed");
2068
2069         if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2070                 do_exit++;
2071                 warnx("capget(CAP_SYS_RAWIO) failed,"
2072                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
2073         }
2074
2075         /* test file permissions */
2076         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2077         if (euidaccess(pathname, R_OK)) {
2078                 do_exit++;
2079                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
2080         }
2081
2082         /* if all else fails, thell them to be root */
2083         if (do_exit)
2084                 if (getuid() != 0)
2085                         warnx("... or simply run as root");
2086
2087         if (do_exit)
2088                 exit(-6);
2089 }
2090
2091 /*
2092  * NHM adds support for additional MSRs:
2093  *
2094  * MSR_SMI_COUNT                   0x00000034
2095  *
2096  * MSR_PLATFORM_INFO               0x000000ce
2097  * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
2098  *
2099  * MSR_PKG_C3_RESIDENCY            0x000003f8
2100  * MSR_PKG_C6_RESIDENCY            0x000003f9
2101  * MSR_CORE_C3_RESIDENCY           0x000003fc
2102  * MSR_CORE_C6_RESIDENCY           0x000003fd
2103  *
2104  * Side effect:
2105  * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
2106  */
2107 int probe_nhm_msrs(unsigned int family, unsigned int model)
2108 {
2109         unsigned long long msr;
2110         unsigned int base_ratio;
2111         int *pkg_cstate_limits;
2112
2113         if (!genuine_intel)
2114                 return 0;
2115
2116         if (family != 6)
2117                 return 0;
2118
2119         bclk = discover_bclk(family, model);
2120
2121         switch (model) {
2122         case 0x1A:      /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2123         case 0x1E:      /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2124         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
2125         case 0x25:      /* Westmere Client - Clarkdale, Arrandale */
2126         case 0x2C:      /* Westmere EP - Gulftown */
2127         case 0x2E:      /* Nehalem-EX Xeon - Beckton */
2128         case 0x2F:      /* Westmere-EX Xeon - Eagleton */
2129                 pkg_cstate_limits = nhm_pkg_cstate_limits;
2130                 break;
2131         case 0x2A:      /* SNB */
2132         case 0x2D:      /* SNB Xeon */
2133         case 0x3A:      /* IVB */
2134         case 0x3E:      /* IVB Xeon */
2135                 pkg_cstate_limits = snb_pkg_cstate_limits;
2136                 break;
2137         case 0x3C:      /* HSW */
2138         case 0x3F:      /* HSX */
2139         case 0x45:      /* HSW */
2140         case 0x46:      /* HSW */
2141         case 0x3D:      /* BDW */
2142         case 0x47:      /* BDW */
2143         case 0x4F:      /* BDX */
2144         case 0x56:      /* BDX-DE */
2145         case 0x4E:      /* SKL */
2146         case 0x5E:      /* SKL */
2147                 pkg_cstate_limits = hsw_pkg_cstate_limits;
2148                 break;
2149         case 0x37:      /* BYT */
2150         case 0x4D:      /* AVN */
2151                 pkg_cstate_limits = slv_pkg_cstate_limits;
2152                 break;
2153         case 0x4C:      /* AMT */
2154                 pkg_cstate_limits = amt_pkg_cstate_limits;
2155                 break;
2156         case 0x57:      /* PHI */
2157                 pkg_cstate_limits = phi_pkg_cstate_limits;
2158                 break;
2159         default:
2160                 return 0;
2161         }
2162         get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
2163         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
2164
2165         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2166         base_ratio = (msr >> 8) & 0xFF;
2167
2168         base_hz = base_ratio * bclk * 1000000;
2169         has_base_hz = 1;
2170         return 1;
2171 }
2172 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
2173 {
2174         switch (model) {
2175         /* Nehalem compatible, but do not include turbo-ratio limit support */
2176         case 0x2E:      /* Nehalem-EX Xeon - Beckton */
2177         case 0x2F:      /* Westmere-EX Xeon - Eagleton */
2178         case 0x57:      /* PHI - Knights Landing (different MSR definition) */
2179                 return 0;
2180         default:
2181                 return 1;
2182         }
2183 }
2184 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
2185 {
2186         if (!genuine_intel)
2187                 return 0;
2188
2189         if (family != 6)
2190                 return 0;
2191
2192         switch (model) {
2193         case 0x3E:      /* IVB Xeon */
2194         case 0x3F:      /* HSW Xeon */
2195                 return 1;
2196         default:
2197                 return 0;
2198         }
2199 }
2200 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
2201 {
2202         if (!genuine_intel)
2203                 return 0;
2204
2205         if (family != 6)
2206                 return 0;
2207
2208         switch (model) {
2209         case 0x3F:      /* HSW Xeon */
2210                 return 1;
2211         default:
2212                 return 0;
2213         }
2214 }
2215
2216 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
2217 {
2218         if (!genuine_intel)
2219                 return 0;
2220
2221         if (family != 6)
2222                 return 0;
2223
2224         switch (model) {
2225         case 0x57:      /* Knights Landing */
2226                 return 1;
2227         default:
2228                 return 0;
2229         }
2230 }
2231 int has_config_tdp(unsigned int family, unsigned int model)
2232 {
2233         if (!genuine_intel)
2234                 return 0;
2235
2236         if (family != 6)
2237                 return 0;
2238
2239         switch (model) {
2240         case 0x3A:      /* IVB */
2241         case 0x3C:      /* HSW */
2242         case 0x3F:      /* HSX */
2243         case 0x45:      /* HSW */
2244         case 0x46:      /* HSW */
2245         case 0x3D:      /* BDW */
2246         case 0x47:      /* BDW */
2247         case 0x4F:      /* BDX */
2248         case 0x56:      /* BDX-DE */
2249         case 0x4E:      /* SKL */
2250         case 0x5E:      /* SKL */
2251
2252         case 0x57:      /* Knights Landing */
2253                 return 1;
2254         default:
2255                 return 0;
2256         }
2257 }
2258
2259 static void
2260 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
2261 {
2262         if (!do_nhm_platform_info)
2263                 return;
2264
2265         dump_nhm_platform_info();
2266
2267         if (has_hsw_turbo_ratio_limit(family, model))
2268                 dump_hsw_turbo_ratio_limits();
2269
2270         if (has_ivt_turbo_ratio_limit(family, model))
2271                 dump_ivt_turbo_ratio_limits();
2272
2273         if (has_nhm_turbo_ratio_limit(family, model))
2274                 dump_nhm_turbo_ratio_limits();
2275
2276         if (has_knl_turbo_ratio_limit(family, model))
2277                 dump_knl_turbo_ratio_limits();
2278
2279         if (has_config_tdp(family, model))
2280                 dump_config_tdp();
2281
2282         dump_nhm_cst_cfg();
2283 }
2284
2285
2286 /*
2287  * print_epb()
2288  * Decode the ENERGY_PERF_BIAS MSR
2289  */
2290 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2291 {
2292         unsigned long long msr;
2293         char *epb_string;
2294         int cpu;
2295
2296         if (!has_epb)
2297                 return 0;
2298
2299         cpu = t->cpu_id;
2300
2301         /* EPB is per-package */
2302         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2303                 return 0;
2304
2305         if (cpu_migrate(cpu)) {
2306                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2307                 return -1;
2308         }
2309
2310         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
2311                 return 0;
2312
2313         switch (msr & 0xF) {
2314         case ENERGY_PERF_BIAS_PERFORMANCE:
2315                 epb_string = "performance";
2316                 break;
2317         case ENERGY_PERF_BIAS_NORMAL:
2318                 epb_string = "balanced";
2319                 break;
2320         case ENERGY_PERF_BIAS_POWERSAVE:
2321                 epb_string = "powersave";
2322                 break;
2323         default:
2324                 epb_string = "custom";
2325                 break;
2326         }
2327         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
2328
2329         return 0;
2330 }
2331 /*
2332  * print_hwp()
2333  * Decode the MSR_HWP_CAPABILITIES
2334  */
2335 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2336 {
2337         unsigned long long msr;
2338         int cpu;
2339
2340         if (!has_hwp)
2341                 return 0;
2342
2343         cpu = t->cpu_id;
2344
2345         /* MSR_HWP_CAPABILITIES is per-package */
2346         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2347                 return 0;
2348
2349         if (cpu_migrate(cpu)) {
2350                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2351                 return -1;
2352         }
2353
2354         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
2355                 return 0;
2356
2357         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
2358                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
2359
2360         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
2361         if ((msr & (1 << 0)) == 0)
2362                 return 0;
2363
2364         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
2365                 return 0;
2366
2367         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
2368                         "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
2369                         cpu, msr,
2370                         (unsigned int)HWP_HIGHEST_PERF(msr),
2371                         (unsigned int)HWP_GUARANTEED_PERF(msr),
2372                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
2373                         (unsigned int)HWP_LOWEST_PERF(msr));
2374
2375         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
2376                 return 0;
2377
2378         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
2379                         "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
2380                         cpu, msr,
2381                         (unsigned int)(((msr) >> 0) & 0xff),
2382                         (unsigned int)(((msr) >> 8) & 0xff),
2383                         (unsigned int)(((msr) >> 16) & 0xff),
2384                         (unsigned int)(((msr) >> 24) & 0xff),
2385                         (unsigned int)(((msr) >> 32) & 0xff3),
2386                         (unsigned int)(((msr) >> 42) & 0x1));
2387
2388         if (has_hwp_pkg) {
2389                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
2390                         return 0;
2391
2392                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
2393                         "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
2394                         cpu, msr,
2395                         (unsigned int)(((msr) >> 0) & 0xff),
2396                         (unsigned int)(((msr) >> 8) & 0xff),
2397                         (unsigned int)(((msr) >> 16) & 0xff),
2398                         (unsigned int)(((msr) >> 24) & 0xff),
2399                         (unsigned int)(((msr) >> 32) & 0xff3));
2400         }
2401         if (has_hwp_notify) {
2402                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
2403                         return 0;
2404
2405                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
2406                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
2407                         cpu, msr,
2408                         ((msr) & 0x1) ? "EN" : "Dis",
2409                         ((msr) & 0x2) ? "EN" : "Dis");
2410         }
2411         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
2412                 return 0;
2413
2414         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
2415                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
2416                         cpu, msr,
2417                         ((msr) & 0x1) ? "" : "No-",
2418                         ((msr) & 0x2) ? "" : "No-");
2419
2420         return 0;
2421 }
2422
2423 /*
2424  * print_perf_limit()
2425  */
2426 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2427 {
2428         unsigned long long msr;
2429         int cpu;
2430
2431         cpu = t->cpu_id;
2432
2433         /* per-package */
2434         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2435                 return 0;
2436
2437         if (cpu_migrate(cpu)) {
2438                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2439                 return -1;
2440         }
2441
2442         if (do_core_perf_limit_reasons) {
2443                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
2444                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2445                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
2446                         (msr & 1 << 15) ? "bit15, " : "",
2447                         (msr & 1 << 14) ? "bit14, " : "",
2448                         (msr & 1 << 13) ? "Transitions, " : "",
2449                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
2450                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
2451                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2452                         (msr & 1 << 9) ? "CorePwr, " : "",
2453                         (msr & 1 << 8) ? "Amps, " : "",
2454                         (msr & 1 << 6) ? "VR-Therm, " : "",
2455                         (msr & 1 << 5) ? "Auto-HWP, " : "",
2456                         (msr & 1 << 4) ? "Graphics, " : "",
2457                         (msr & 1 << 2) ? "bit2, " : "",
2458                         (msr & 1 << 1) ? "ThermStatus, " : "",
2459                         (msr & 1 << 0) ? "PROCHOT, " : "");
2460                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
2461                         (msr & 1 << 31) ? "bit31, " : "",
2462                         (msr & 1 << 30) ? "bit30, " : "",
2463                         (msr & 1 << 29) ? "Transitions, " : "",
2464                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
2465                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
2466                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
2467                         (msr & 1 << 25) ? "CorePwr, " : "",
2468                         (msr & 1 << 24) ? "Amps, " : "",
2469                         (msr & 1 << 22) ? "VR-Therm, " : "",
2470                         (msr & 1 << 21) ? "Auto-HWP, " : "",
2471                         (msr & 1 << 20) ? "Graphics, " : "",
2472                         (msr & 1 << 18) ? "bit18, " : "",
2473                         (msr & 1 << 17) ? "ThermStatus, " : "",
2474                         (msr & 1 << 16) ? "PROCHOT, " : "");
2475
2476         }
2477         if (do_gfx_perf_limit_reasons) {
2478                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
2479                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2480                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
2481                         (msr & 1 << 0) ? "PROCHOT, " : "",
2482                         (msr & 1 << 1) ? "ThermStatus, " : "",
2483                         (msr & 1 << 4) ? "Graphics, " : "",
2484                         (msr & 1 << 6) ? "VR-Therm, " : "",
2485                         (msr & 1 << 8) ? "Amps, " : "",
2486                         (msr & 1 << 9) ? "GFXPwr, " : "",
2487                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2488                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
2489                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
2490                         (msr & 1 << 16) ? "PROCHOT, " : "",
2491                         (msr & 1 << 17) ? "ThermStatus, " : "",
2492                         (msr & 1 << 20) ? "Graphics, " : "",
2493                         (msr & 1 << 22) ? "VR-Therm, " : "",
2494                         (msr & 1 << 24) ? "Amps, " : "",
2495                         (msr & 1 << 25) ? "GFXPwr, " : "",
2496                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
2497                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
2498         }
2499         if (do_ring_perf_limit_reasons) {
2500                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
2501                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2502                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
2503                         (msr & 1 << 0) ? "PROCHOT, " : "",
2504                         (msr & 1 << 1) ? "ThermStatus, " : "",
2505                         (msr & 1 << 6) ? "VR-Therm, " : "",
2506                         (msr & 1 << 8) ? "Amps, " : "",
2507                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2508                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
2509                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
2510                         (msr & 1 << 16) ? "PROCHOT, " : "",
2511                         (msr & 1 << 17) ? "ThermStatus, " : "",
2512                         (msr & 1 << 22) ? "VR-Therm, " : "",
2513                         (msr & 1 << 24) ? "Amps, " : "",
2514                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
2515                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
2516         }
2517         return 0;
2518 }
2519
2520 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
2521 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
2522
2523 double get_tdp(unsigned int model)
2524 {
2525         unsigned long long msr;
2526
2527         if (do_rapl & RAPL_PKG_POWER_INFO)
2528                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
2529                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
2530
2531         switch (model) {
2532         case 0x37:
2533         case 0x4D:
2534                 return 30.0;
2535         default:
2536                 return 135.0;
2537         }
2538 }
2539
2540 /*
2541  * rapl_dram_energy_units_probe()
2542  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
2543  */
2544 static double
2545 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
2546 {
2547         /* only called for genuine_intel, family 6 */
2548
2549         switch (model) {
2550         case 0x3F:      /* HSX */
2551         case 0x4F:      /* BDX */
2552         case 0x56:      /* BDX-DE */
2553         case 0x57:      /* KNL */
2554                 return (rapl_dram_energy_units = 15.3 / 1000000);
2555         default:
2556                 return (rapl_energy_units);
2557         }
2558 }
2559
2560
2561 /*
2562  * rapl_probe()
2563  *
2564  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
2565  */
2566 void rapl_probe(unsigned int family, unsigned int model)
2567 {
2568         unsigned long long msr;
2569         unsigned int time_unit;
2570         double tdp;
2571
2572         if (!genuine_intel)
2573                 return;
2574
2575         if (family != 6)
2576                 return;
2577
2578         switch (model) {
2579         case 0x2A:
2580         case 0x3A:
2581         case 0x3C:      /* HSW */
2582         case 0x45:      /* HSW */
2583         case 0x46:      /* HSW */
2584         case 0x3D:      /* BDW */
2585         case 0x47:      /* BDW */
2586                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
2587                 break;
2588         case 0x4E:      /* SKL */
2589         case 0x5E:      /* SKL */
2590                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2591                 break;
2592         case 0x3F:      /* HSX */
2593         case 0x4F:      /* BDX */
2594         case 0x56:      /* BDX-DE */
2595         case 0x57:      /* KNL */
2596                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2597                 break;
2598         case 0x2D:
2599         case 0x3E:
2600                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
2601                 break;
2602         case 0x37:      /* BYT */
2603         case 0x4D:      /* AVN */
2604                 do_rapl = RAPL_PKG | RAPL_CORES ;
2605                 break;
2606         default:
2607                 return;
2608         }
2609
2610         /* units on package 0, verify later other packages match */
2611         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
2612                 return;
2613
2614         rapl_power_units = 1.0 / (1 << (msr & 0xF));
2615         if (model == 0x37)
2616                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
2617         else
2618                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
2619
2620         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
2621
2622         time_unit = msr >> 16 & 0xF;
2623         if (time_unit == 0)
2624                 time_unit = 0xA;
2625
2626         rapl_time_units = 1.0 / (1 << (time_unit));
2627
2628         tdp = get_tdp(model);
2629
2630         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
2631         if (debug)
2632                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
2633
2634         return;
2635 }
2636
2637 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
2638 {
2639         if (!genuine_intel)
2640                 return;
2641
2642         if (family != 6)
2643                 return;
2644
2645         switch (model) {
2646         case 0x3C:      /* HSW */
2647         case 0x45:      /* HSW */
2648         case 0x46:      /* HSW */
2649                 do_gfx_perf_limit_reasons = 1;
2650         case 0x3F:      /* HSX */
2651                 do_core_perf_limit_reasons = 1;
2652                 do_ring_perf_limit_reasons = 1;
2653         default:
2654                 return;
2655         }
2656 }
2657
2658 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2659 {
2660         unsigned long long msr;
2661         unsigned int dts;
2662         int cpu;
2663
2664         if (!(do_dts || do_ptm))
2665                 return 0;
2666
2667         cpu = t->cpu_id;
2668
2669         /* DTS is per-core, no need to print for each thread */
2670         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 
2671                 return 0;
2672
2673         if (cpu_migrate(cpu)) {
2674                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2675                 return -1;
2676         }
2677
2678         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
2679                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2680                         return 0;
2681
2682                 dts = (msr >> 16) & 0x7F;
2683                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
2684                         cpu, msr, tcc_activation_temp - dts);
2685
2686 #ifdef  THERM_DEBUG
2687                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
2688                         return 0;
2689
2690                 dts = (msr >> 16) & 0x7F;
2691                 dts2 = (msr >> 8) & 0x7F;
2692                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
2693                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
2694 #endif
2695         }
2696
2697
2698         if (do_dts) {
2699                 unsigned int resolution;
2700
2701                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
2702                         return 0;
2703
2704                 dts = (msr >> 16) & 0x7F;
2705                 resolution = (msr >> 27) & 0xF;
2706                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
2707                         cpu, msr, tcc_activation_temp - dts, resolution);
2708
2709 #ifdef THERM_DEBUG
2710                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
2711                         return 0;
2712
2713                 dts = (msr >> 16) & 0x7F;
2714                 dts2 = (msr >> 8) & 0x7F;
2715                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
2716                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
2717 #endif
2718         }
2719
2720         return 0;
2721 }
2722
2723 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
2724 {
2725         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
2726                 cpu, label,
2727                 ((msr >> 15) & 1) ? "EN" : "DIS",
2728                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
2729                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
2730                 (((msr >> 16) & 1) ? "EN" : "DIS"));
2731
2732         return;
2733 }
2734
2735 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2736 {
2737         unsigned long long msr;
2738         int cpu;
2739
2740         if (!do_rapl)
2741                 return 0;
2742
2743         /* RAPL counters are per package, so print only for 1st thread/package */
2744         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2745                 return 0;
2746
2747         cpu = t->cpu_id;
2748         if (cpu_migrate(cpu)) {
2749                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2750                 return -1;
2751         }
2752
2753         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
2754                 return -1;
2755
2756         if (debug) {
2757                 fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
2758                         "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
2759                         rapl_power_units, rapl_energy_units, rapl_time_units);
2760         }
2761         if (do_rapl & RAPL_PKG_POWER_INFO) {
2762
2763                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
2764                         return -5;
2765
2766
2767                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2768                         cpu, msr,
2769                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2770                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2771                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2772                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
2773
2774         }
2775         if (do_rapl & RAPL_PKG) {
2776
2777                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
2778                         return -9;
2779
2780                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
2781                         cpu, msr, (msr >> 63) & 1 ? "": "UN");
2782
2783                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
2784                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
2785                         cpu,
2786                         ((msr >> 47) & 1) ? "EN" : "DIS",
2787                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
2788                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
2789                         ((msr >> 48) & 1) ? "EN" : "DIS");
2790         }
2791
2792         if (do_rapl & RAPL_DRAM_POWER_INFO) {
2793                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
2794                         return -6;
2795
2796                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2797                         cpu, msr,
2798                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2799                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2800                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2801                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
2802         }
2803         if (do_rapl & RAPL_DRAM) {
2804                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
2805                         return -9;
2806                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
2807                                 cpu, msr, (msr >> 31) & 1 ? "": "UN");
2808
2809                 print_power_limit_msr(cpu, msr, "DRAM Limit");
2810         }
2811         if (do_rapl & RAPL_CORE_POLICY) {
2812                 if (debug) {
2813                         if (get_msr(cpu, MSR_PP0_POLICY, &msr))
2814                                 return -7;
2815
2816                         fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
2817                 }
2818         }
2819         if (do_rapl & RAPL_CORES) {
2820                 if (debug) {
2821
2822                         if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
2823                                 return -9;
2824                         fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
2825                                         cpu, msr, (msr >> 31) & 1 ? "": "UN");
2826                         print_power_limit_msr(cpu, msr, "Cores Limit");
2827                 }
2828         }
2829         if (do_rapl & RAPL_GFX) {
2830                 if (debug) {
2831                         if (get_msr(cpu, MSR_PP1_POLICY, &msr))
2832                                 return -8;
2833
2834                         fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
2835
2836                         if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
2837                                 return -9;
2838                         fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
2839                                         cpu, msr, (msr >> 31) & 1 ? "": "UN");
2840                         print_power_limit_msr(cpu, msr, "GFX Limit");
2841                 }
2842         }
2843         return 0;
2844 }
2845
2846 /*
2847  * SNB adds support for additional MSRs:
2848  *
2849  * MSR_PKG_C7_RESIDENCY            0x000003fa
2850  * MSR_CORE_C7_RESIDENCY           0x000003fe
2851  * MSR_PKG_C2_RESIDENCY            0x0000060d
2852  */
2853
2854 int has_snb_msrs(unsigned int family, unsigned int model)
2855 {
2856         if (!genuine_intel)
2857                 return 0;
2858
2859         switch (model) {
2860         case 0x2A:
2861         case 0x2D:
2862         case 0x3A:      /* IVB */
2863         case 0x3E:      /* IVB Xeon */
2864         case 0x3C:      /* HSW */
2865         case 0x3F:      /* HSW */
2866         case 0x45:      /* HSW */
2867         case 0x46:      /* HSW */
2868         case 0x3D:      /* BDW */
2869         case 0x47:      /* BDW */
2870         case 0x4F:      /* BDX */
2871         case 0x56:      /* BDX-DE */
2872         case 0x4E:      /* SKL */
2873         case 0x5E:      /* SKL */
2874                 return 1;
2875         }
2876         return 0;
2877 }
2878
2879 /*
2880  * HSW adds support for additional MSRs:
2881  *
2882  * MSR_PKG_C8_RESIDENCY            0x00000630
2883  * MSR_PKG_C9_RESIDENCY            0x00000631
2884  * MSR_PKG_C10_RESIDENCY           0x00000632
2885  */
2886 int has_hsw_msrs(unsigned int family, unsigned int model)
2887 {
2888         if (!genuine_intel)
2889                 return 0;
2890
2891         switch (model) {
2892         case 0x45:      /* HSW */
2893         case 0x3D:      /* BDW */
2894         case 0x4E:      /* SKL */
2895         case 0x5E:      /* SKL */
2896                 return 1;
2897         }
2898         return 0;
2899 }
2900
2901 /*
2902  * SKL adds support for additional MSRS:
2903  *
2904  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
2905  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
2906  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
2907  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
2908  */
2909 int has_skl_msrs(unsigned int family, unsigned int model)
2910 {
2911         if (!genuine_intel)
2912                 return 0;
2913
2914         switch (model) {
2915         case 0x4E:      /* SKL */
2916         case 0x5E:      /* SKL */
2917                 return 1;
2918         }
2919         return 0;
2920 }
2921
2922
2923
2924 int is_slm(unsigned int family, unsigned int model)
2925 {
2926         if (!genuine_intel)
2927                 return 0;
2928         switch (model) {
2929         case 0x37:      /* BYT */
2930         case 0x4D:      /* AVN */
2931                 return 1;
2932         }
2933         return 0;
2934 }
2935
2936 int is_knl(unsigned int family, unsigned int model)
2937 {
2938         if (!genuine_intel)
2939                 return 0;
2940         switch (model) {
2941         case 0x57:      /* KNL */
2942                 return 1;
2943         }
2944         return 0;
2945 }
2946
2947 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
2948 {
2949         if (is_knl(family, model))
2950                 return 1024;
2951         return 1;
2952 }
2953
2954 #define SLM_BCLK_FREQS 5
2955 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
2956
2957 double slm_bclk(void)
2958 {
2959         unsigned long long msr = 3;
2960         unsigned int i;
2961         double freq;
2962
2963         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
2964                 fprintf(outf, "SLM BCLK: unknown\n");
2965
2966         i = msr & 0xf;
2967         if (i >= SLM_BCLK_FREQS) {
2968                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
2969                 msr = 3;
2970         }
2971         freq = slm_freq_table[i];
2972
2973         fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
2974
2975         return freq;
2976 }
2977
2978 double discover_bclk(unsigned int family, unsigned int model)
2979 {
2980         if (has_snb_msrs(family, model) || is_knl(family, model))
2981                 return 100.00;
2982         else if (is_slm(family, model))
2983                 return slm_bclk();
2984         else
2985                 return 133.33;
2986 }
2987
2988 /*
2989  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
2990  * the Thermal Control Circuit (TCC) activates.
2991  * This is usually equal to tjMax.
2992  *
2993  * Older processors do not have this MSR, so there we guess,
2994  * but also allow cmdline over-ride with -T.
2995  *
2996  * Several MSR temperature values are in units of degrees-C
2997  * below this value, including the Digital Thermal Sensor (DTS),
2998  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
2999  */
3000 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3001 {
3002         unsigned long long msr;
3003         unsigned int target_c_local;
3004         int cpu;
3005
3006         /* tcc_activation_temp is used only for dts or ptm */
3007         if (!(do_dts || do_ptm))
3008                 return 0;
3009
3010         /* this is a per-package concept */
3011         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3012                 return 0;
3013
3014         cpu = t->cpu_id;
3015         if (cpu_migrate(cpu)) {
3016                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3017                 return -1;
3018         }
3019
3020         if (tcc_activation_temp_override != 0) {
3021                 tcc_activation_temp = tcc_activation_temp_override;
3022                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
3023                         cpu, tcc_activation_temp);
3024                 return 0;
3025         }
3026
3027         /* Temperature Target MSR is Nehalem and newer only */
3028         if (!do_nhm_platform_info)
3029                 goto guess;
3030
3031         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
3032                 goto guess;
3033
3034         target_c_local = (msr >> 16) & 0xFF;
3035
3036         if (debug)
3037                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
3038                         cpu, msr, target_c_local);
3039
3040         if (!target_c_local)
3041                 goto guess;
3042
3043         tcc_activation_temp = target_c_local;
3044
3045         return 0;
3046
3047 guess:
3048         tcc_activation_temp = TJMAX_DEFAULT;
3049         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
3050                 cpu, tcc_activation_temp);
3051
3052         return 0;
3053 }
3054
3055 void decode_feature_control_msr(void)
3056 {
3057         unsigned long long msr;
3058
3059         if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
3060                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
3061                         base_cpu, msr,
3062                         msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
3063                         msr & (1 << 18) ? "SGX" : "");
3064 }
3065
3066 void decode_misc_enable_msr(void)
3067 {
3068         unsigned long long msr;
3069
3070         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
3071                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
3072                         base_cpu, msr,
3073                         msr & (1 << 3) ? "TCC" : "",
3074                         msr & (1 << 16) ? "EIST" : "",
3075                         msr & (1 << 18) ? "MONITOR" : "");
3076 }
3077
3078 /*
3079  * Decode MSR_MISC_PWR_MGMT
3080  *
3081  * Decode the bits according to the Nehalem documentation
3082  * bit[0] seems to continue to have same meaning going forward
3083  * bit[1] less so...
3084  */
3085 void decode_misc_pwr_mgmt_msr(void)
3086 {
3087         unsigned long long msr;
3088
3089         if (!do_nhm_platform_info)
3090                 return;
3091
3092         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
3093                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
3094                         base_cpu, msr,
3095                         msr & (1 << 0) ? "DIS" : "EN",
3096                         msr & (1 << 1) ? "EN" : "DIS");
3097 }
3098
3099 void process_cpuid()
3100 {
3101         unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
3102         unsigned int fms, family, model, stepping;
3103
3104         eax = ebx = ecx = edx = 0;
3105
3106         __cpuid(0, max_level, ebx, ecx, edx);
3107
3108         if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
3109                 genuine_intel = 1;
3110
3111         if (debug)
3112                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
3113                         (char *)&ebx, (char *)&edx, (char *)&ecx);
3114
3115         __cpuid(1, fms, ebx, ecx, edx);
3116         family = (fms >> 8) & 0xf;
3117         model = (fms >> 4) & 0xf;
3118         stepping = fms & 0xf;
3119         if (family == 6 || family == 0xf)
3120                 model += ((fms >> 16) & 0xf) << 4;
3121
3122         if (debug) {
3123                 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
3124                         max_level, family, model, stepping, family, model, stepping);
3125                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
3126                         ecx & (1 << 0) ? "SSE3" : "-",
3127                         ecx & (1 << 3) ? "MONITOR" : "-",
3128                         ecx & (1 << 6) ? "SMX" : "-",
3129                         ecx & (1 << 7) ? "EIST" : "-",
3130                         ecx & (1 << 8) ? "TM2" : "-",
3131                         edx & (1 << 4) ? "TSC" : "-",
3132                         edx & (1 << 5) ? "MSR" : "-",
3133                         edx & (1 << 22) ? "ACPI-TM" : "-",
3134                         edx & (1 << 29) ? "TM" : "-");
3135         }
3136
3137         if (!(edx & (1 << 5)))
3138                 errx(1, "CPUID: no MSR");
3139
3140         /*
3141          * check max extended function levels of CPUID.
3142          * This is needed to check for invariant TSC.
3143          * This check is valid for both Intel and AMD.
3144          */
3145         ebx = ecx = edx = 0;
3146         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
3147
3148         if (max_extended_level >= 0x80000007) {
3149
3150                 /*
3151                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
3152                  * this check is valid for both Intel and AMD
3153                  */
3154                 __cpuid(0x80000007, eax, ebx, ecx, edx);
3155                 has_invariant_tsc = edx & (1 << 8);
3156         }
3157
3158         /*
3159          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
3160          * this check is valid for both Intel and AMD
3161          */
3162
3163         __cpuid(0x6, eax, ebx, ecx, edx);
3164         has_aperf = ecx & (1 << 0);
3165         do_dts = eax & (1 << 0);
3166         do_ptm = eax & (1 << 6);
3167         has_hwp = eax & (1 << 7);
3168         has_hwp_notify = eax & (1 << 8);
3169         has_hwp_activity_window = eax & (1 << 9);
3170         has_hwp_epp = eax & (1 << 10);
3171         has_hwp_pkg = eax & (1 << 11);
3172         has_epb = ecx & (1 << 3);
3173
3174         if (debug)
3175                 fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
3176                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
3177                         has_aperf ? "" : "No-",
3178                         do_dts ? "" : "No-",
3179                         do_ptm ? "" : "No-",
3180                         has_hwp ? "" : "No-",
3181                         has_hwp_notify ? "" : "No-",
3182                         has_hwp_activity_window ? "" : "No-",
3183                         has_hwp_epp ? "" : "No-",
3184                         has_hwp_pkg ? "" : "No-",
3185                         has_epb ? "" : "No-");
3186
3187         if (debug)
3188                 decode_misc_enable_msr();
3189
3190         if (max_level >= 0x7) {
3191                 int has_sgx;
3192
3193                 ecx = 0;
3194
3195                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
3196
3197                 has_sgx = ebx & (1 << 2);
3198                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
3199
3200                 if (has_sgx)
3201                         decode_feature_control_msr();
3202         }
3203
3204         if (max_level >= 0x15) {
3205                 unsigned int eax_crystal;
3206                 unsigned int ebx_tsc;
3207
3208                 /*
3209                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
3210                  */
3211                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
3212                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
3213
3214                 if (ebx_tsc != 0) {
3215
3216                         if (debug && (ebx != 0))
3217                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
3218                                         eax_crystal, ebx_tsc, crystal_hz);
3219
3220                         if (crystal_hz == 0)
3221                                 switch(model) {
3222                                 case 0x4E:      /* SKL */
3223                                 case 0x5E:      /* SKL */
3224                                         crystal_hz = 24000000;  /* 24 MHz */
3225                                         break;
3226                                 default:
3227                                         crystal_hz = 0;
3228                         }
3229
3230                         if (crystal_hz) {
3231                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
3232                                 if (debug)
3233                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
3234                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
3235                         }
3236                 }
3237         }
3238         if (max_level >= 0x16) {
3239                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
3240
3241                 /*
3242                  * CPUID 16H Base MHz, Max MHz, Bus MHz
3243                  */
3244                 base_mhz = max_mhz = bus_mhz = edx = 0;
3245
3246                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
3247                 if (debug)
3248                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
3249                                 base_mhz, max_mhz, bus_mhz);
3250         }
3251
3252         if (has_aperf)
3253                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
3254
3255         do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
3256         do_snb_cstates = has_snb_msrs(family, model);
3257         do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
3258         do_pc3 = (pkg_cstate_limit >= PCL__3);
3259         do_pc6 = (pkg_cstate_limit >= PCL__6);
3260         do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
3261         do_c8_c9_c10 = has_hsw_msrs(family, model);
3262         do_skl_residency = has_skl_msrs(family, model);
3263         do_slm_cstates = is_slm(family, model);
3264         do_knl_cstates  = is_knl(family, model);
3265
3266         if (debug)
3267                 decode_misc_pwr_mgmt_msr();
3268
3269         rapl_probe(family, model);
3270         perf_limit_reasons_probe(family, model);
3271
3272         if (debug)
3273                 dump_cstate_pstate_config_info(family, model);
3274
3275         if (has_skl_msrs(family, model))
3276                 calculate_tsc_tweak();
3277
3278         do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
3279
3280         do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
3281
3282         return;
3283 }
3284
3285 void help()
3286 {
3287         fprintf(outf,
3288         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
3289         "\n"
3290         "Turbostat forks the specified COMMAND and prints statistics\n"
3291         "when COMMAND completes.\n"
3292         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
3293         "to print statistics, until interrupted.\n"
3294         "--debug        run in \"debug\" mode\n"
3295         "--interval sec Override default 5-second measurement interval\n"
3296         "--help         print this help message\n"
3297         "--counter msr  print 32-bit counter at address \"msr\"\n"
3298         "--Counter msr  print 64-bit Counter at address \"msr\"\n"
3299         "--out file     create or truncate \"file\" for all output\n"
3300         "--msr msr      print 32-bit value at address \"msr\"\n"
3301         "--MSR msr      print 64-bit Value at address \"msr\"\n"
3302         "--version      print version information\n"
3303         "\n"
3304         "For more help, run \"man turbostat\"\n");
3305 }
3306
3307
3308 /*
3309  * in /dev/cpu/ return success for names that are numbers
3310  * ie. filter out ".", "..", "microcode".
3311  */
3312 int dir_filter(const struct dirent *dirp)
3313 {
3314         if (isdigit(dirp->d_name[0]))
3315                 return 1;
3316         else
3317                 return 0;
3318 }
3319
3320 int open_dev_cpu_msr(int dummy1)
3321 {
3322         return 0;
3323 }
3324
3325 void topology_probe()
3326 {
3327         int i;
3328         int max_core_id = 0;
3329         int max_package_id = 0;
3330         int max_siblings = 0;
3331         struct cpu_topology {
3332                 int core_id;
3333                 int physical_package_id;
3334         } *cpus;
3335
3336         /* Initialize num_cpus, max_cpu_num */
3337         topo.num_cpus = 0;
3338         topo.max_cpu_num = 0;
3339         for_all_proc_cpus(count_cpus);
3340         if (!summary_only && topo.num_cpus > 1)
3341                 show_cpu = 1;
3342
3343         if (debug > 1)
3344                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
3345
3346         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
3347         if (cpus == NULL)
3348                 err(1, "calloc cpus");
3349
3350         /*
3351          * Allocate and initialize cpu_present_set
3352          */
3353         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
3354         if (cpu_present_set == NULL)
3355                 err(3, "CPU_ALLOC");
3356         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3357         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
3358         for_all_proc_cpus(mark_cpu_present);
3359
3360         /*
3361          * Allocate and initialize cpu_affinity_set
3362          */
3363         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
3364         if (cpu_affinity_set == NULL)
3365                 err(3, "CPU_ALLOC");
3366         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3367         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
3368
3369
3370         /*
3371          * For online cpus
3372          * find max_core_id, max_package_id
3373          */
3374         for (i = 0; i <= topo.max_cpu_num; ++i) {
3375                 int siblings;
3376
3377                 if (cpu_is_not_present(i)) {
3378                         if (debug > 1)
3379                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
3380                         continue;
3381                 }
3382                 cpus[i].core_id = get_core_id(i);
3383                 if (cpus[i].core_id > max_core_id)
3384                         max_core_id = cpus[i].core_id;
3385
3386                 cpus[i].physical_package_id = get_physical_package_id(i);
3387                 if (cpus[i].physical_package_id > max_package_id)
3388                         max_package_id = cpus[i].physical_package_id;
3389
3390                 siblings = get_num_ht_siblings(i);
3391                 if (siblings > max_siblings)
3392                         max_siblings = siblings;
3393                 if (debug > 1)
3394                         fprintf(outf, "cpu %d pkg %d core %d\n",
3395                                 i, cpus[i].physical_package_id, cpus[i].core_id);
3396         }
3397         topo.num_cores_per_pkg = max_core_id + 1;
3398         if (debug > 1)
3399                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
3400                         max_core_id, topo.num_cores_per_pkg);
3401         if (debug && !summary_only && topo.num_cores_per_pkg > 1)
3402                 show_core = 1;
3403
3404         topo.num_packages = max_package_id + 1;
3405         if (debug > 1)
3406                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
3407                         max_package_id, topo.num_packages);
3408         if (debug && !summary_only && topo.num_packages > 1)
3409                 show_pkg = 1;
3410
3411         topo.num_threads_per_core = max_siblings;
3412         if (debug > 1)
3413                 fprintf(outf, "max_siblings %d\n", max_siblings);
3414
3415         free(cpus);
3416 }
3417
3418 void
3419 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
3420 {
3421         int i;
3422
3423         *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
3424                 topo.num_packages, sizeof(struct thread_data));
3425         if (*t == NULL)
3426                 goto error;
3427
3428         for (i = 0; i < topo.num_threads_per_core *
3429                 topo.num_cores_per_pkg * topo.num_packages; i++)
3430                 (*t)[i].cpu_id = -1;
3431
3432         *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
3433                 sizeof(struct core_data));
3434         if (*c == NULL)
3435                 goto error;
3436
3437         for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
3438                 (*c)[i].core_id = -1;
3439
3440         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
3441         if (*p == NULL)
3442                 goto error;
3443
3444         for (i = 0; i < topo.num_packages; i++)
3445                 (*p)[i].package_id = i;
3446
3447         return;
3448 error:
3449         err(1, "calloc counters");
3450 }
3451 /*
3452  * init_counter()
3453  *
3454  * set cpu_id, core_num, pkg_num
3455  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
3456  *
3457  * increment topo.num_cores when 1st core in pkg seen
3458  */
3459 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
3460         struct pkg_data *pkg_base, int thread_num, int core_num,
3461         int pkg_num, int cpu_id)
3462 {
3463         struct thread_data *t;
3464         struct core_data *c;
3465         struct pkg_data *p;
3466
3467         t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
3468         c = GET_CORE(core_base, core_num, pkg_num);
3469         p = GET_PKG(pkg_base, pkg_num);
3470
3471         t->cpu_id = cpu_id;
3472         if (thread_num == 0) {
3473                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
3474                 if (cpu_is_first_core_in_package(cpu_id))
3475                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
3476         }
3477
3478         c->core_id = core_num;
3479         p->package_id = pkg_num;
3480 }
3481
3482
3483 int initialize_counters(int cpu_id)
3484 {
3485         int my_thread_id, my_core_id, my_package_id;
3486
3487         my_package_id = get_physical_package_id(cpu_id);
3488         my_core_id = get_core_id(cpu_id);
3489         my_thread_id = get_cpu_position_in_core(cpu_id);
3490         if (!my_thread_id)
3491                 topo.num_cores++;
3492
3493         init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
3494         init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
3495         return 0;
3496 }
3497
3498 void allocate_output_buffer()
3499 {
3500         output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
3501         outp = output_buffer;
3502         if (outp == NULL)
3503                 err(-1, "calloc output buffer");
3504 }
3505 void allocate_fd_percpu(void)
3506 {
3507         fd_percpu = calloc(topo.max_cpu_num, sizeof(int));
3508         if (fd_percpu == NULL)
3509                 err(-1, "calloc fd_percpu");
3510 }
3511 void allocate_irq_buffers(void)
3512 {
3513         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
3514         if (irq_column_2_cpu == NULL)
3515                 err(-1, "calloc %d", topo.num_cpus);
3516
3517         irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int));
3518         if (irqs_per_cpu == NULL)
3519                 err(-1, "calloc %d", topo.max_cpu_num);
3520 }
3521 void setup_all_buffers(void)
3522 {
3523         topology_probe();
3524         allocate_irq_buffers();
3525         allocate_fd_percpu();
3526         allocate_counters(&thread_even, &core_even, &package_even);
3527         allocate_counters(&thread_odd, &core_odd, &package_odd);
3528         allocate_output_buffer();
3529         for_all_proc_cpus(initialize_counters);
3530 }
3531
3532 void set_base_cpu(void)
3533 {
3534         base_cpu = sched_getcpu();
3535         if (base_cpu < 0)
3536                 err(-ENODEV, "No valid cpus found");
3537
3538         if (debug > 1)
3539                 fprintf(outf, "base_cpu = %d\n", base_cpu);
3540 }
3541
3542 void turbostat_init()
3543 {
3544         setup_all_buffers();
3545         set_base_cpu();
3546         check_dev_msr();
3547         check_permissions();
3548         process_cpuid();
3549
3550
3551         if (debug)
3552                 for_all_cpus(print_hwp, ODD_COUNTERS);
3553
3554         if (debug)
3555                 for_all_cpus(print_epb, ODD_COUNTERS);
3556
3557         if (debug)
3558                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
3559
3560         if (debug)
3561                 for_all_cpus(print_rapl, ODD_COUNTERS);
3562
3563         for_all_cpus(set_temperature_target, ODD_COUNTERS);
3564
3565         if (debug)
3566                 for_all_cpus(print_thermal, ODD_COUNTERS);
3567 }
3568
3569 int fork_it(char **argv)
3570 {
3571         pid_t child_pid;
3572         int status;
3573
3574         status = for_all_cpus(get_counters, EVEN_COUNTERS);
3575         if (status)
3576                 exit(status);
3577         /* clear affinity side-effect of get_counters() */
3578         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
3579         gettimeofday(&tv_even, (struct timezone *)NULL);
3580
3581         child_pid = fork();
3582         if (!child_pid) {
3583                 /* child */
3584                 execvp(argv[0], argv);
3585         } else {
3586
3587                 /* parent */
3588                 if (child_pid == -1)
3589                         err(1, "fork");
3590
3591                 signal(SIGINT, SIG_IGN);
3592                 signal(SIGQUIT, SIG_IGN);
3593                 if (waitpid(child_pid, &status, 0) == -1)
3594                         err(status, "waitpid");
3595         }
3596         /*
3597          * n.b. fork_it() does not check for errors from for_all_cpus()
3598          * because re-starting is problematic when forking
3599          */
3600         for_all_cpus(get_counters, ODD_COUNTERS);
3601         gettimeofday(&tv_odd, (struct timezone *)NULL);
3602         timersub(&tv_odd, &tv_even, &tv_delta);
3603         for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
3604         compute_average(EVEN_COUNTERS);
3605         format_all_counters(EVEN_COUNTERS);
3606
3607         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
3608
3609         flush_output_stderr();
3610
3611         return status;
3612 }
3613
3614 int get_and_dump_counters(void)
3615 {
3616         int status;
3617
3618         status = for_all_cpus(get_counters, ODD_COUNTERS);
3619         if (status)
3620                 return status;
3621
3622         status = for_all_cpus(dump_counters, ODD_COUNTERS);
3623         if (status)
3624                 return status;
3625
3626         flush_output_stdout();
3627
3628         return status;
3629 }
3630
3631 void print_version() {
3632         fprintf(outf, "turbostat version 4.11 27 Feb 2016"
3633                 " - Len Brown <lenb@kernel.org>\n");
3634 }
3635
3636 void cmdline(int argc, char **argv)
3637 {
3638         int opt;
3639         int option_index = 0;
3640         static struct option long_options[] = {
3641                 {"Counter",     required_argument,      0, 'C'},
3642                 {"counter",     required_argument,      0, 'c'},
3643                 {"Dump",        no_argument,            0, 'D'},
3644                 {"debug",       no_argument,            0, 'd'},
3645                 {"interval",    required_argument,      0, 'i'},
3646                 {"help",        no_argument,            0, 'h'},
3647                 {"Joules",      no_argument,            0, 'J'},
3648                 {"MSR",         required_argument,      0, 'M'},
3649                 {"msr",         required_argument,      0, 'm'},
3650                 {"out",         required_argument,      0, 'o'},
3651                 {"Package",     no_argument,            0, 'p'},
3652                 {"processor",   no_argument,            0, 'p'},
3653                 {"Summary",     no_argument,            0, 'S'},
3654                 {"TCC",         required_argument,      0, 'T'},
3655                 {"version",     no_argument,            0, 'v' },
3656                 {0,             0,                      0,  0 }
3657         };
3658
3659         progname = argv[0];
3660
3661         while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
3662                                 long_options, &option_index)) != -1) {
3663                 switch (opt) {
3664                 case 'C':
3665                         sscanf(optarg, "%x", &extra_delta_offset64);
3666                         break;
3667                 case 'c':
3668                         sscanf(optarg, "%x", &extra_delta_offset32);
3669                         break;
3670                 case 'D':
3671                         dump_only++;
3672                         break;
3673                 case 'd':
3674                         debug++;
3675                         break;
3676                 case 'h':
3677                 default:
3678                         help();
3679                         exit(1);
3680                 case 'i':
3681                         {
3682                                 double interval = strtod(optarg, NULL);
3683
3684                                 if (interval < 0.001) {
3685                                         fprintf(outf, "interval %f seconds is too small\n",
3686                                                 interval);
3687                                         exit(2);
3688                                 }
3689
3690                                 interval_ts.tv_sec = interval;
3691                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
3692                         }
3693                         break;
3694                 case 'J':
3695                         rapl_joules++;
3696                         break;
3697                 case 'M':
3698                         sscanf(optarg, "%x", &extra_msr_offset64);
3699                         break;
3700                 case 'm':
3701                         sscanf(optarg, "%x", &extra_msr_offset32);
3702                         break;
3703                 case 'o':
3704                         outf = fopen_or_die(optarg, "w");
3705                         break;
3706                 case 'P':
3707                         show_pkg_only++;
3708                         break;
3709                 case 'p':
3710                         show_core_only++;
3711                         break;
3712                 case 'S':
3713                         summary_only++;
3714                         break;
3715                 case 'T':
3716                         tcc_activation_temp_override = atoi(optarg);
3717                         break;
3718                 case 'v':
3719                         print_version();
3720                         exit(0);
3721                         break;
3722                 }
3723         }
3724 }
3725
3726 int main(int argc, char **argv)
3727 {
3728         outf = stderr;
3729
3730         cmdline(argc, argv);
3731
3732         if (debug)
3733                 print_version();
3734
3735         turbostat_init();
3736
3737         /* dump counters and exit */
3738         if (dump_only)
3739                 return get_and_dump_counters();
3740
3741         /*
3742          * if any params left, it must be a command to fork
3743          */
3744         if (argc - optind)
3745                 return fork_it(argv + optind);
3746         else
3747                 turbostat_loop();
3748
3749         return 0;
3750 }