]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - arch/x86/kernel/cpu/perf_event.c
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[karo-tx-linux.git] / arch / x86 / kernel / cpu / perf_event.c
index 9d977a2ea693c4db1d6cdf1d86b76672885a76e2..eed3673a8656f5688d54f6b4d72fd9774c831fcb 100644 (file)
@@ -30,6 +30,7 @@
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
 #include <asm/compat.h>
+#include <asm/smp.h>
 
 #if 0
 #undef wrmsrl
@@ -93,6 +94,8 @@ struct amd_nb {
        struct event_constraint event_constraints[X86_PMC_IDX_MAX];
 };
 
+struct intel_percore;
+
 #define MAX_LBR_ENTRIES                16
 
 struct cpu_hw_events {
@@ -127,6 +130,13 @@ struct cpu_hw_events {
        struct perf_branch_stack        lbr_stack;
        struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
 
+       /*
+        * Intel percore register state.
+        * Coordinate shared resources between HT threads.
+        */
+       int                             percore_used; /* Used by this CPU? */
+       struct intel_percore            *per_core;
+
        /*
         * AMD specific bits
         */
@@ -166,7 +176,7 @@ struct cpu_hw_events {
 /*
  * Constraint on the Event code + UMask
  */
-#define PEBS_EVENT_CONSTRAINT(c, n)    \
+#define INTEL_UEVENT_CONSTRAINT(c, n)  \
        EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 
 #define EVENT_CONSTRAINT_END           \
@@ -175,6 +185,28 @@ struct cpu_hw_events {
 #define for_each_event_constraint(e, c)        \
        for ((e) = (c); (e)->weight; (e)++)
 
+/*
+ * Extra registers for specific events.
+ * Some events need large masks and require external MSRs.
+ * Define a mapping to these extra registers.
+ */
+struct extra_reg {
+       unsigned int            event;
+       unsigned int            msr;
+       u64                     config_mask;
+       u64                     valid_mask;
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm) {        \
+       .event = (e),           \
+       .msr = (ms),            \
+       .config_mask = (m),     \
+       .valid_mask = (vm),     \
+       }
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm)  \
+       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
+
 union perf_capabilities {
        struct {
                u64     lbr_format    : 6;
@@ -219,6 +251,7 @@ struct x86_pmu {
        void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
                                                 struct perf_event *event);
        struct event_constraint *event_constraints;
+       struct event_constraint *percore_constraints;
        void            (*quirks)(void);
        int             perfctr_second_write;
 
@@ -247,6 +280,11 @@ struct x86_pmu {
         */
        unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
        int             lbr_nr;                    /* hardware stack size */
+
+       /*
+        * Extra registers for events
+        */
+       struct extra_reg *extra_regs;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -271,6 +309,10 @@ static u64 __read_mostly hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX];
+static u64 __read_mostly hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
 
 /*
  * Propagate event elapsed time into the generic event.
@@ -298,7 +340,7 @@ x86_perf_event_update(struct perf_event *event)
         */
 again:
        prev_raw_count = local64_read(&hwc->prev_count);
-       rdmsrl(hwc->event_base + idx, new_raw_count);
+       rdmsrl(hwc->event_base, new_raw_count);
 
        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                                        new_raw_count) != prev_raw_count)
@@ -321,6 +363,49 @@ again:
        return new_raw_count;
 }
 
+/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
+static inline int x86_pmu_addr_offset(int index)
+{
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+               return index << 1;
+       return index;
+}
+
+static inline unsigned int x86_pmu_config_addr(int index)
+{
+       return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+       return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+}
+
+/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
+{
+       struct extra_reg *er;
+
+       event->hw.extra_reg = 0;
+       event->hw.extra_config = 0;
+
+       if (!x86_pmu.extra_regs)
+               return 0;
+
+       for (er = x86_pmu.extra_regs; er->msr; er++) {
+               if (er->event != (config & er->config_mask))
+                       continue;
+               if (event->attr.config1 & ~er->valid_mask)
+                       return -EINVAL;
+               event->hw.extra_reg = er->msr;
+               event->hw.extra_config = event->attr.config1;
+               break;
+       }
+       return 0;
+}
+
 static atomic_t active_events;
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
@@ -331,12 +416,12 @@ static bool reserve_pmc_hardware(void)
        int i;
 
        for (i = 0; i < x86_pmu.num_counters; i++) {
-               if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+               if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
                        goto perfctr_fail;
        }
 
        for (i = 0; i < x86_pmu.num_counters; i++) {
-               if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+               if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
                        goto eventsel_fail;
        }
 
@@ -344,13 +429,13 @@ static bool reserve_pmc_hardware(void)
 
 eventsel_fail:
        for (i--; i >= 0; i--)
-               release_evntsel_nmi(x86_pmu.eventsel + i);
+               release_evntsel_nmi(x86_pmu_config_addr(i));
 
        i = x86_pmu.num_counters;
 
 perfctr_fail:
        for (i--; i >= 0; i--)
-               release_perfctr_nmi(x86_pmu.perfctr + i);
+               release_perfctr_nmi(x86_pmu_event_addr(i));
 
        return false;
 }
@@ -360,8 +445,8 @@ static void release_pmc_hardware(void)
        int i;
 
        for (i = 0; i < x86_pmu.num_counters; i++) {
-               release_perfctr_nmi(x86_pmu.perfctr + i);
-               release_evntsel_nmi(x86_pmu.eventsel + i);
+               release_perfctr_nmi(x86_pmu_event_addr(i));
+               release_evntsel_nmi(x86_pmu_config_addr(i));
        }
 }
 
@@ -382,7 +467,7 @@ static bool check_hw_exists(void)
         * complain and bail.
         */
        for (i = 0; i < x86_pmu.num_counters; i++) {
-               reg = x86_pmu.eventsel + i;
+               reg = x86_pmu_config_addr(i);
                ret = rdmsrl_safe(reg, &val);
                if (ret)
                        goto msr_fail;
@@ -407,20 +492,25 @@ static bool check_hw_exists(void)
         * that don't trap on the MSR access and always return 0s.
         */
        val = 0xabcdUL;
-       ret = checking_wrmsrl(x86_pmu.perfctr, val);
-       ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+       ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
+       ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
        if (ret || val != val_new)
                goto msr_fail;
 
        return true;
 
 bios_fail:
-       printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+       /*
+        * We still allow the PMU driver to operate:
+        */
+       printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
        printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
-       return false;
+
+       return true;
 
 msr_fail:
        printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+
        return false;
 }
 
@@ -442,8 +532,9 @@ static inline int x86_pmu_initialized(void)
 }
 
 static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 {
+       struct perf_event_attr *attr = &event->attr;
        unsigned int cache_type, cache_op, cache_result;
        u64 config, val;
 
@@ -470,8 +561,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
                return -EINVAL;
 
        hwc->config |= val;
-
-       return 0;
+       attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
+       return x86_pmu_extra_regs(val, event);
 }
 
 static int x86_setup_perfctr(struct perf_event *event)
@@ -496,10 +587,10 @@ static int x86_setup_perfctr(struct perf_event *event)
        }
 
        if (attr->type == PERF_TYPE_RAW)
-               return 0;
+               return x86_pmu_extra_regs(event->attr.config, event);
 
        if (attr->type == PERF_TYPE_HW_CACHE)
-               return set_ext_hw_attr(hwc, attr);
+               return set_ext_hw_attr(hwc, event);
 
        if (attr->config >= x86_pmu.max_events)
                return -EINVAL;
@@ -617,11 +708,11 @@ static void x86_pmu_disable_all(void)
 
                if (!test_bit(idx, cpuc->active_mask))
                        continue;
-               rdmsrl(x86_pmu.eventsel + idx, val);
+               rdmsrl(x86_pmu_config_addr(idx), val);
                if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
                        continue;
                val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-               wrmsrl(x86_pmu.eventsel + idx, val);
+               wrmsrl(x86_pmu_config_addr(idx), val);
        }
 }
 
@@ -642,21 +733,26 @@ static void x86_pmu_disable(struct pmu *pmu)
        x86_pmu.disable_all();
 }
 
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+                                         u64 enable_mask)
+{
+       if (hwc->extra_reg)
+               wrmsrl(hwc->extra_reg, hwc->extra_config);
+       wrmsrl(hwc->config_base, hwc->config | enable_mask);
+}
+
 static void x86_pmu_enable_all(int added)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx;
 
        for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct perf_event *event = cpuc->events[idx];
-               u64 val;
+               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
 
                if (!test_bit(idx, cpuc->active_mask))
                        continue;
 
-               val = event->hw.config;
-               val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-               wrmsrl(x86_pmu.eventsel + idx, val);
+               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
        }
 }
 
@@ -821,15 +917,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
                hwc->event_base = 0;
        } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
                hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-               /*
-                * We set it so that event_base + idx in wrmsr/rdmsr maps to
-                * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
-                */
-               hwc->event_base =
-                       MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+               hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
        } else {
-               hwc->config_base = x86_pmu.eventsel;
-               hwc->event_base  = x86_pmu.perfctr;
+               hwc->config_base = x86_pmu_config_addr(hwc->idx);
+               hwc->event_base  = x86_pmu_event_addr(hwc->idx);
        }
 }
 
@@ -915,17 +1006,11 @@ static void x86_pmu_enable(struct pmu *pmu)
        x86_pmu.enable_all(added);
 }
 
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
-                                         u64 enable_mask)
-{
-       wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
-}
-
 static inline void x86_pmu_disable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
 
-       wrmsrl(hwc->config_base + hwc->idx, hwc->config);
+       wrmsrl(hwc->config_base, hwc->config);
 }
 
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -978,7 +1063,7 @@ x86_perf_event_set_period(struct perf_event *event)
         */
        local64_set(&hwc->prev_count, (u64)-left);
 
-       wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+       wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
 
        /*
         * Due to erratum on certan cpu we need
@@ -986,7 +1071,7 @@ x86_perf_event_set_period(struct perf_event *event)
         * is updated properly
         */
        if (x86_pmu.perfctr_second_write) {
-               wrmsrl(hwc->event_base + idx,
+               wrmsrl(hwc->event_base,
                        (u64)(-left) & x86_pmu.cntval_mask);
        }
 
@@ -1029,7 +1114,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
 
        /*
         * If group events scheduling transaction was started,
-        * skip the schedulability test here, it will be peformed
+        * skip the schedulability test here, it will be performed
         * at commit time (->commit_txn) as a whole
         */
        if (cpuc->group_flag & PERF_EVENT_TXN)
@@ -1113,8 +1198,8 @@ void perf_event_print_debug(void)
        pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 
        for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
-               rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
+               rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+               rdmsrl(x86_pmu_event_addr(idx), pmc_count);
 
                prev_left = per_cpu(pmc_prev_left[idx], cpu);
 
@@ -1389,7 +1474,7 @@ static void __init pmu_check_apic(void)
        pr_info("no hardware sampling interrupt available.\n");
 }
 
-int __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
 {
        struct event_constraint *c;
        int err;
@@ -1608,7 +1693,7 @@ out:
        return ret;
 }
 
-int x86_pmu_event_init(struct perf_event *event)
+static int x86_pmu_event_init(struct perf_event *event)
 {
        struct pmu *tmp;
        int err;
@@ -1710,7 +1795,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
        perf_callchain_store(entry, regs->ip);
 
-       dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
+       dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
 }
 
 #ifdef CONFIG_COMPAT