]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - arch/x86/kernel/cpu/perf_event_intel.c
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[karo-tx-linux.git] / arch / x86 / kernel / cpu / perf_event_intel.c
index 121f1be4da19430426c45d1c17fce6d8d2d7037e..3bd37bdf1b8ee170765fc064aeef7ca01cee1b7f 100644 (file)
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x00c4,
   [PERF_COUNT_HW_BRANCH_MISSES]                = 0x00c5,
   [PERF_COUNT_HW_BUS_CYCLES]           = 0x013c,
+  [PERF_COUNT_HW_REF_CPU_CYCLES]       = 0x0300, /* pseudo-encoding */
 };
 
 static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
 {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
        FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       /*
-        * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
-        * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
-        * ratio between these counters.
-        */
-       /* FIXED_EVENT_CONSTRAINT(0x013c, 2),  CPU_CLK_UNHALTED.REF */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
        INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
        INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
        INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
        FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
        INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
        INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
        INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
 {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
        FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
        INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
        INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
        INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
        FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
        INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
        INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
        FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
        EVENT_CONSTRAINT_END
 };
 
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
        .guest_get_msrs         = intel_guest_get_msrs,
 };
 
-static void intel_clovertown_quirks(void)
+static __init void intel_clovertown_quirk(void)
 {
        /*
         * PEBS is unreliable due to:
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void)
        x86_pmu.pebs_constraints = NULL;
 }
 
-static void intel_sandybridge_quirks(void)
+static __init void intel_sandybridge_quirk(void)
 {
        printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
        x86_pmu.pebs = 0;
        x86_pmu.pebs_constraints = NULL;
 }
 
+static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
+       { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
+       { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
+       { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
+       { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
+       { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
+       { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
+       { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
+};
+
+static __init void intel_arch_events_quirk(void)
+{
+       int bit;
+
+       /* disable event that reported as not presend by cpuid */
+       for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
+               intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
+               printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
+                               intel_arch_events_map[bit].name);
+       }
+}
+
+static __init void intel_nehalem_quirk(void)
+{
+       union cpuid10_ebx ebx;
+
+       ebx.full = x86_pmu.events_maskl;
+       if (ebx.split.no_branch_misses_retired) {
+               /*
+                * Erratum AAJ80 detected, we work it around by using
+                * the BR_MISP_EXEC.ANY event. This will over-count
+                * branch-misses, but it's still much better than the
+                * architectural event which is often completely bogus:
+                */
+               intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+               ebx.split.no_branch_misses_retired = 0;
+               x86_pmu.events_maskl = ebx.full;
+               printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
+       }
+}
+
 __init int intel_pmu_init(void)
 {
        union cpuid10_edx edx;
        union cpuid10_eax eax;
+       union cpuid10_ebx ebx;
        unsigned int unused;
-       unsigned int ebx;
        int version;
 
        if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void)
         * Check whether the Architectural PerfMon supports
         * Branch Misses Retired hw_event or not.
         */
-       cpuid(10, &eax.full, &ebx, &unused, &edx.full);
-       if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+       cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+       if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
                return -ENODEV;
 
        version = eax.split.version_id;
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void)
        x86_pmu.cntval_bits             = eax.split.bit_width;
        x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
 
+       x86_pmu.events_maskl            = ebx.full;
+       x86_pmu.events_mask_len         = eax.split.mask_length;
+
        /*
         * Quirk: v2 perfmon does not report fixed-purpose events, so
         * assume at least 3 events:
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void)
 
        intel_ds_init();
 
+       x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+
        /*
         * Install the hw-cache-events table:
         */
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void)
                break;
 
        case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-               x86_pmu.quirks = intel_clovertown_quirks;
+               x86_add_quirk(intel_clovertown_quirk);
        case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
        case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
        case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void)
                /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
                intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
 
-               if (ebx & 0x40) {
-                       /*
-                        * Erratum AAJ80 detected, we work it around by using
-                        * the BR_MISP_EXEC.ANY event. This will over-count
-                        * branch-misses, but it's still much better than the
-                        * architectural event which is often completely bogus:
-                        */
-                       intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+               x86_add_quirk(intel_nehalem_quirk);
 
-                       pr_cont("erratum AAJ80 worked around, ");
-               }
                pr_cont("Nehalem events, ");
                break;
 
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void)
                break;
 
        case 42: /* SandyBridge */
-               x86_pmu.quirks = intel_sandybridge_quirks;
+               x86_add_quirk(intel_sandybridge_quirk);
        case 45: /* SandyBridge, "Romely-EP" */
                memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void)
                        break;
                }
        }
+
        return 0;
 }