]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - arch/x86/kernel/cpu/perf_event.c
x86, nmi: Wire up NMI handlers to new routines
[karo-tx-linux.git] / arch / x86 / kernel / cpu / perf_event.c
index cfa62ec090ece1dfb48420c5641cdb52b58feffa..640891014b2ae3dccdef498db11f05a0942145e5 100644 (file)
@@ -32,6 +32,8 @@
 #include <asm/smp.h>
 #include <asm/alternative.h>
 
+#include "perf_event.h"
+
 #if 0
 #undef wrmsrl
 #define wrmsrl(msr, val)                                       \
@@ -43,283 +45,17 @@ do {                                                               \
 } while (0)
 #endif
 
-/*
- *          |   NHM/WSM    |      SNB     |
- * register -------------------------------
- *          |  HT  | no HT |  HT  | no HT |
- *-----------------------------------------
- * offcore  | core | core  | cpu  | core  |
- * lbr_sel  | core | core  | cpu  | core  |
- * ld_lat   | cpu  | core  | cpu  | core  |
- *-----------------------------------------
- *
- * Given that there is a small number of shared regs,
- * we can pre-allocate their slot in the per-cpu
- * per-core reg tables.
- */
-enum extra_reg_type {
-       EXTRA_REG_NONE  = -1,   /* not used */
-
-       EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
-       EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
-
-       EXTRA_REG_MAX           /* number of entries needed */
-};
-
-struct event_constraint {
-       union {
-               unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-               u64             idxmsk64;
-       };
-       u64     code;
-       u64     cmask;
-       int     weight;
-};
-
-struct amd_nb {
-       int nb_id;  /* NorthBridge id */
-       int refcnt; /* reference count */
-       struct perf_event *owners[X86_PMC_IDX_MAX];
-       struct event_constraint event_constraints[X86_PMC_IDX_MAX];
-};
-
-struct intel_percore;
-
-#define MAX_LBR_ENTRIES                16
-
-struct cpu_hw_events {
-       /*
-        * Generic x86 PMC bits
-        */
-       struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
-       unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       unsigned long           running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       int                     enabled;
-
-       int                     n_events;
-       int                     n_added;
-       int                     n_txn;
-       int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
-       u64                     tags[X86_PMC_IDX_MAX];
-       struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
-
-       unsigned int            group_flag;
-
-       /*
-        * Intel DebugStore bits
-        */
-       struct debug_store      *ds;
-       u64                     pebs_enabled;
-
-       /*
-        * Intel LBR bits
-        */
-       int                             lbr_users;
-       void                            *lbr_context;
-       struct perf_branch_stack        lbr_stack;
-       struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
-
-       /*
-        * manage shared (per-core, per-cpu) registers
-        * used on Intel NHM/WSM/SNB
-        */
-       struct intel_shared_regs        *shared_regs;
-
-       /*
-        * AMD specific bits
-        */
-       struct amd_nb           *amd_nb;
-};
-
-#define __EVENT_CONSTRAINT(c, n, m, w) {\
-       { .idxmsk64 = (n) },            \
-       .code = (c),                    \
-       .cmask = (m),                   \
-       .weight = (w),                  \
-}
-
-#define EVENT_CONSTRAINT(c, n, m)      \
-       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
-
-/*
- * Constraint on the Event code.
- */
-#define INTEL_EVENT_CONSTRAINT(c, n)   \
-       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
-
-/*
- * Constraint on the Event code + UMask + fixed-mask
- *
- * filter mask to validate fixed counter events.
- * the following filters disqualify for fixed counters:
- *  - inv
- *  - edge
- *  - cnt-mask
- *  The other filters are supported by fixed counters.
- *  The any-thread option is supported starting with v3.
- */
-#define FIXED_EVENT_CONSTRAINT(c, n)   \
-       EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
-
-/*
- * Constraint on the Event code + UMask
- */
-#define INTEL_UEVENT_CONSTRAINT(c, n)  \
-       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
-
-#define EVENT_CONSTRAINT_END           \
-       EVENT_CONSTRAINT(0, 0, 0)
-
-#define for_each_event_constraint(e, c)        \
-       for ((e) = (c); (e)->weight; (e)++)
-
-/*
- * Per register state.
- */
-struct er_account {
-       raw_spinlock_t          lock;   /* per-core: protect structure */
-       u64                     config; /* extra MSR config */
-       u64                     reg;    /* extra MSR number */
-       atomic_t                ref;    /* reference count */
-};
-
-/*
- * Extra registers for specific events.
- *
- * Some events need large masks and require external MSRs.
- * Those extra MSRs end up being shared for all events on
- * a PMU and sometimes between PMU of sibling HT threads.
- * In either case, the kernel needs to handle conflicting
- * accesses to those extra, shared, regs. The data structure
- * to manage those registers is stored in cpu_hw_event.
- */
-struct extra_reg {
-       unsigned int            event;
-       unsigned int            msr;
-       u64                     config_mask;
-       u64                     valid_mask;
-       int                     idx;  /* per_xxx->regs[] reg index */
-};
-
-#define EVENT_EXTRA_REG(e, ms, m, vm, i) {     \
-       .event = (e),           \
-       .msr = (ms),            \
-       .config_mask = (m),     \
-       .valid_mask = (vm),     \
-       .idx = EXTRA_REG_##i    \
-       }
-
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)     \
-       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
-
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
-
-union perf_capabilities {
-       struct {
-               u64     lbr_format    : 6;
-               u64     pebs_trap     : 1;
-               u64     pebs_arch_reg : 1;
-               u64     pebs_format   : 4;
-               u64     smm_freeze    : 1;
-       };
-       u64     capabilities;
-};
-
-/*
- * struct x86_pmu - generic x86 pmu
- */
-struct x86_pmu {
-       /*
-        * Generic x86 PMC bits
-        */
-       const char      *name;
-       int             version;
-       int             (*handle_irq)(struct pt_regs *);
-       void            (*disable_all)(void);
-       void            (*enable_all)(int added);
-       void            (*enable)(struct perf_event *);
-       void            (*disable)(struct perf_event *);
-       int             (*hw_config)(struct perf_event *event);
-       int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
-       unsigned        eventsel;
-       unsigned        perfctr;
-       u64             (*event_map)(int);
-       int             max_events;
-       int             num_counters;
-       int             num_counters_fixed;
-       int             cntval_bits;
-       u64             cntval_mask;
-       int             apic;
-       u64             max_period;
-       struct event_constraint *
-                       (*get_event_constraints)(struct cpu_hw_events *cpuc,
-                                                struct perf_event *event);
-
-       void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
-                                                struct perf_event *event);
-       struct event_constraint *event_constraints;
-       void            (*quirks)(void);
-       int             perfctr_second_write;
-
-       int             (*cpu_prepare)(int cpu);
-       void            (*cpu_starting)(int cpu);
-       void            (*cpu_dying)(int cpu);
-       void            (*cpu_dead)(int cpu);
-
-       /*
-        * Intel Arch Perfmon v2+
-        */
-       u64                     intel_ctrl;
-       union perf_capabilities intel_cap;
+struct x86_pmu x86_pmu __read_mostly;
 
-       /*
-        * Intel DebugStore bits
-        */
-       int             bts, pebs;
-       int             bts_active, pebs_active;
-       int             pebs_record_size;
-       void            (*drain_pebs)(struct pt_regs *regs);
-       struct event_constraint *pebs_constraints;
-
-       /*
-        * Intel LBR
-        */
-       unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
-       int             lbr_nr;                    /* hardware stack size */
-
-       /*
-        * Extra registers for events
-        */
-       struct extra_reg *extra_regs;
-       unsigned int er_flags;
-};
-
-#define ERF_NO_HT_SHARING      1
-#define ERF_HAS_RSP_1          2
-
-static struct x86_pmu x86_pmu __read_mostly;
-
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
        .enabled = 1,
 };
 
-static int x86_perf_event_set_period(struct perf_event *event);
-
-/*
- * Generalized hw caching related hw_event table, filled
- * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'hw_event makes no sense on
- * this CPU', any other value means the raw hw_event
- * ID.
- */
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-static u64 __read_mostly hw_cache_event_ids
+u64 __read_mostly hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX];
-static u64 __read_mostly hw_cache_extra_regs
+u64 __read_mostly hw_cache_extra_regs
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX];
@@ -329,8 +65,7 @@ static u64 __read_mostly hw_cache_extra_regs
  * Can only be executed on the CPU where the event is active.
  * Returns the delta events processed.
  */
-static u64
-x86_perf_event_update(struct perf_event *event)
+u64 x86_perf_event_update(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
        int shift = 64 - x86_pmu.cntval_bits;
@@ -373,30 +108,6 @@ again:
        return new_raw_count;
 }
 
-static inline int x86_pmu_addr_offset(int index)
-{
-       int offset;
-
-       /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
-       alternative_io(ASM_NOP2,
-                      "shll $1, %%eax",
-                      X86_FEATURE_PERFCTR_CORE,
-                      "=a" (offset),
-                      "a"  (index));
-
-       return offset;
-}
-
-static inline unsigned int x86_pmu_config_addr(int index)
-{
-       return x86_pmu.eventsel + x86_pmu_addr_offset(index);
-}
-
-static inline unsigned int x86_pmu_event_addr(int index)
-{
-       return x86_pmu.perfctr + x86_pmu_addr_offset(index);
-}
-
 /*
  * Find and validate any extra registers to set up.
  */
@@ -532,9 +243,6 @@ msr_fail:
        return false;
 }
 
-static void reserve_ds_buffers(void);
-static void release_ds_buffers(void);
-
 static void hw_perf_event_destroy(struct perf_event *event)
 {
        if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
@@ -583,7 +291,7 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
        return x86_pmu_extra_regs(val, event);
 }
 
-static int x86_setup_perfctr(struct perf_event *event)
+int x86_setup_perfctr(struct perf_event *event)
 {
        struct perf_event_attr *attr = &event->attr;
        struct hw_perf_event *hwc = &event->hw;
@@ -647,7 +355,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        return 0;
 }
 
-static int x86_pmu_hw_config(struct perf_event *event)
+int x86_pmu_hw_config(struct perf_event *event)
 {
        if (event->attr.precise_ip) {
                int precise = 0;
@@ -723,7 +431,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
        return x86_pmu.hw_config(event);
 }
 
-static void x86_pmu_disable_all(void)
+void x86_pmu_disable_all(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx;
@@ -758,15 +466,7 @@ static void x86_pmu_disable(struct pmu *pmu)
        x86_pmu.disable_all();
 }
 
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
-                                         u64 enable_mask)
-{
-       if (hwc->extra_reg.reg)
-               wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
-       wrmsrl(hwc->config_base, hwc->config | enable_mask);
-}
-
-static void x86_pmu_enable_all(int added)
+void x86_pmu_enable_all(int added)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx;
@@ -788,7 +488,7 @@ static inline int is_x86_event(struct perf_event *event)
        return event->pmu == &pmu;
 }
 
-static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
        struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
        unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -959,7 +659,6 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
 }
 
 static void x86_pmu_start(struct perf_event *event, int flags);
-static void x86_pmu_stop(struct perf_event *event, int flags);
 
 static void x86_pmu_enable(struct pmu *pmu)
 {
@@ -1031,21 +730,13 @@ static void x86_pmu_enable(struct pmu *pmu)
        x86_pmu.enable_all(added);
 }
 
-static inline void x86_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       wrmsrl(hwc->config_base, hwc->config);
-}
-
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 
 /*
  * Set the next IRQ period, based on the hwc->period_left value.
  * To be called with the event disabled in hw:
  */
-static int
-x86_perf_event_set_period(struct perf_event *event)
+int x86_perf_event_set_period(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
        s64 left = local64_read(&hwc->period_left);
@@ -1105,7 +796,7 @@ x86_perf_event_set_period(struct perf_event *event)
        return ret;
 }
 
-static void x86_pmu_enable_event(struct perf_event *event)
+void x86_pmu_enable_event(struct perf_event *event)
 {
        if (__this_cpu_read(cpu_hw_events.enabled))
                __x86_pmu_enable_event(&event->hw,
@@ -1244,7 +935,7 @@ void perf_event_print_debug(void)
        local_irq_restore(flags);
 }
 
-static void x86_pmu_stop(struct perf_event *event, int flags)
+void x86_pmu_stop(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
@@ -1297,7 +988,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
        perf_event_update_userpage(event);
 }
 
-static int x86_pmu_handle_irq(struct pt_regs *regs)
+int x86_pmu_handle_irq(struct pt_regs *regs)
 {
        struct perf_sample_data data;
        struct cpu_hw_events *cpuc;
@@ -1367,109 +1058,28 @@ void perf_events_lapic_init(void)
        apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
-struct pmu_nmi_state {
-       unsigned int    marked;
-       int             handled;
-};
-
-static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
-
 static int __kprobes
-perf_event_nmi_handler(struct notifier_block *self,
-                        unsigned long cmd, void *__args)
+perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
-       struct die_args *args = __args;
-       unsigned int this_nmi;
-       int handled;
-
        if (!atomic_read(&active_events))
-               return NOTIFY_DONE;
-
-       switch (cmd) {
-       case DIE_NMI:
-               break;
-       case DIE_NMIUNKNOWN:
-               this_nmi = percpu_read(irq_stat.__nmi_count);
-               if (this_nmi != __this_cpu_read(pmu_nmi.marked))
-                       /* let the kernel handle the unknown nmi */
-                       return NOTIFY_DONE;
-               /*
-                * This one is a PMU back-to-back nmi. Two events
-                * trigger 'simultaneously' raising two back-to-back
-                * NMIs. If the first NMI handles both, the latter
-                * will be empty and daze the CPU. So, we drop it to
-                * avoid false-positive 'unknown nmi' messages.
-                */
-               return NOTIFY_STOP;
-       default:
-               return NOTIFY_DONE;
-       }
-
-       handled = x86_pmu.handle_irq(args->regs);
-       if (!handled)
-               return NOTIFY_DONE;
-
-       this_nmi = percpu_read(irq_stat.__nmi_count);
-       if ((handled > 1) ||
-               /* the next nmi could be a back-to-back nmi */
-           ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
-            (__this_cpu_read(pmu_nmi.handled) > 1))) {
-               /*
-                * We could have two subsequent back-to-back nmis: The
-                * first handles more than one counter, the 2nd
-                * handles only one counter and the 3rd handles no
-                * counter.
-                *
-                * This is the 2nd nmi because the previous was
-                * handling more than one counter. We will mark the
-                * next (3rd) and then drop it if unhandled.
-                */
-               __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
-               __this_cpu_write(pmu_nmi.handled, handled);
-       }
+               return NMI_DONE;
 
-       return NOTIFY_STOP;
+       return x86_pmu.handle_irq(regs);
 }
 
-static __read_mostly struct notifier_block perf_event_nmi_notifier = {
-       .notifier_call          = perf_event_nmi_handler,
-       .next                   = NULL,
-       .priority               = NMI_LOCAL_LOW_PRIOR,
-};
-
-static struct event_constraint unconstrained;
-static struct event_constraint emptyconstraint;
-
-static struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       if (x86_pmu.event_constraints) {
-               for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if ((event->hw.config & c->cmask) == c->code)
-                               return c;
-               }
-       }
-
-       return &unconstrained;
-}
-
-#include "perf_event_amd.c"
-#include "perf_event_p6.c"
-#include "perf_event_p4.c"
-#include "perf_event_intel_lbr.c"
-#include "perf_event_intel_ds.c"
-#include "perf_event_intel.c"
+struct event_constraint emptyconstraint;
+struct event_constraint unconstrained;
 
 static int __cpuinit
 x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 {
        unsigned int cpu = (long)hcpu;
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
        int ret = NOTIFY_OK;
 
        switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_UP_PREPARE:
+               cpuc->kfree_on_online = NULL;
                if (x86_pmu.cpu_prepare)
                        ret = x86_pmu.cpu_prepare(cpu);
                break;
@@ -1479,6 +1089,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
                        x86_pmu.cpu_starting(cpu);
                break;
 
+       case CPU_ONLINE:
+               kfree(cpuc->kfree_on_online);
+               break;
+
        case CPU_DYING:
                if (x86_pmu.cpu_dying)
                        x86_pmu.cpu_dying(cpu);
@@ -1557,7 +1171,7 @@ static int __init init_hw_perf_events(void)
                ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
        perf_events_lapic_init();
-       register_die_notifier(&perf_event_nmi_notifier);
+       register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
 
        unconstrained = (struct event_constraint)
                __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,