perf: Handle stopped state with tracepoints

[karo-tx-linux.git] / kernel / perf_event.c
diff --git a/kernel/perf_event.c b/kernel/perf_event.c

index eac7e3364335a7a3f94d902e69b9ed4eac4df74a..ee489d0847fe2fba89a1361808e15c563ac2a073 100644 (file)
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -652,6 +652,10 @@ retry:
         raw_spin_unlock_irq(&ctx->lock);
  }
  
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_event *event, int enable);
+
  static int
  event_sched_in(struct perf_event *event,
                  struct perf_cpu_context *cpuctx,
@@ -662,6 +666,17 @@ event_sched_in(struct perf_event *event,
  
         event->state = PERF_EVENT_STATE_ACTIVE;
         event->oncpu = smp_processor_id();
+
+       /*
+        * Unthrottle events, since we scheduled we might have missed several
+        * ticks already, also for a heavily scheduling task there is little
+        * guarantee it'll get a tick in a timely manner.
+        */
+       if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
+               perf_log_throttle(event, 1);
+               event->hw.interrupts = 0;
+       }
+
         /*
          * The new state must be visible before we turn it on in the hardware:
          */
@@ -1469,10 +1484,6 @@ void __perf_event_task_sched_in(struct task_struct *task)
         }
  }
  
-#define MAX_INTERRUPTS (~0ULL)
-
-static void perf_log_throttle(struct perf_event *event, int enable);
-
  static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
  {
         u64 frequency = event->attr.sample_freq;
@@ -1872,8 +1883,7 @@ static int alloc_callchain_buffers(void)
          * accessed from NMI. Use a temporary manual per cpu allocation
          * until that gets sorted out.
          */
-       size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
-               num_possible_cpus();
+       size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
  
         entries = kzalloc(size, GFP_KERNEL);
         if (!entries)
@@ -2101,14 +2111,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
         unsigned long flags;
         int ctxn, err;
  
-       if (!task && cpu != -1) {
+       if (!task) {
                 /* Must be root to operate on a CPU event: */
                 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
                         return ERR_PTR(-EACCES);
  
-               if (cpu < 0 || cpu >= nr_cpumask_bits)
-                       return ERR_PTR(-EINVAL);
-
                 /*
                  * We could be clever and allow to attach a event to an
                  * offline CPU and activate it when the CPU comes up, but
@@ -3824,6 +3831,8 @@ static void perf_event_task_event(struct perf_task_event *task_event)
         rcu_read_lock();
         list_for_each_entry_rcu(pmu, &pmus, entry) {
                 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+               if (cpuctx->active_pmu != pmu)
+                       goto next;
                 perf_event_task_ctx(&cpuctx->ctx, task_event);
  
                 ctx = task_event->task_ctx;
@@ -3959,6 +3968,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
         rcu_read_lock();
         list_for_each_entry_rcu(pmu, &pmus, entry) {
                 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+               if (cpuctx->active_pmu != pmu)
+                       goto next;
                 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
  
                 ctxn = pmu->task_ctx_nr;
@@ -4144,6 +4155,8 @@ got_name:
         rcu_read_lock();
         list_for_each_entry_rcu(pmu, &pmus, entry) {
                 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+               if (cpuctx->active_pmu != pmu)
+                       goto next;
                 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
                                         vma->vm_flags & VM_EXEC);
  
@@ -4401,7 +4414,7 @@ static int perf_exclude_event(struct perf_event *event,
                               struct pt_regs *regs)
  {
         if (event->hw.state & PERF_HES_STOPPED)
-               return 0;
+               return 1;
  
         if (regs) {
                 if (event->attr.exclude_user && user_mode(regs))
@@ -4713,7 +4726,7 @@ static int perf_swevent_init(struct perf_event *event)
                 break;
         }
  
-       if (event_id > PERF_COUNT_SW_MAX)
+       if (event_id >= PERF_COUNT_SW_MAX)
                 return -ENOENT;
  
         if (!event->parent) {
@@ -4757,6 +4770,8 @@ static int perf_tp_event_match(struct perf_event *event,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs)
  {
+       if (event->hw.state & PERF_HES_STOPPED)
+               return 0;
         /*
          * All tracepoints are from kernel-space.
          */
@@ -5145,20 +5160,36 @@ static void *find_pmu_context(int ctxn)
         return NULL;
  }
  
-static void free_pmu_context(void * __percpu cpu_context)
+static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
  {
-       struct pmu *pmu;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               struct perf_cpu_context *cpuctx;
+
+               cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+
+               if (cpuctx->active_pmu == old_pmu)
+                       cpuctx->active_pmu = pmu;
+       }
+}
+
+static void free_pmu_context(struct pmu *pmu)
+{
+       struct pmu *i;
  
         mutex_lock(&pmus_lock);
         /*
          * Like a real lame refcount.
          */
-       list_for_each_entry(pmu, &pmus, entry) {
-               if (pmu->pmu_cpu_context == cpu_context)
+       list_for_each_entry(i, &pmus, entry) {
+               if (i->pmu_cpu_context == pmu->pmu_cpu_context) {
+                       update_pmu_context(i, pmu);
                         goto out;
+               }
         }
  
-       free_percpu(cpu_context);
+       free_percpu(pmu->pmu_cpu_context);
  out:
         mutex_unlock(&pmus_lock);
  }
@@ -5190,6 +5221,7 @@ int perf_pmu_register(struct pmu *pmu)
                 cpuctx->ctx.pmu = pmu;
                 cpuctx->jiffies_interval = 1;
                 INIT_LIST_HEAD(&cpuctx->rotation_list);
+               cpuctx->active_pmu = pmu;
         }
  
  got_cpu_context:
@@ -5241,7 +5273,7 @@ void perf_pmu_unregister(struct pmu *pmu)
         synchronize_rcu();
  
         free_percpu(pmu->pmu_disable_count);
-       free_pmu_context(pmu->pmu_cpu_context);
+       free_pmu_context(pmu);
  }
  
  struct pmu *perf_init_event(struct perf_event *event)
@@ -5282,6 +5314,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
         struct hw_perf_event *hwc;
         long err;
  
+       if ((unsigned)cpu >= nr_cpu_ids) {
+               if (!task || cpu != -1)
+                       return ERR_PTR(-EINVAL);
+       }
+
         event = kzalloc(sizeof(*event), GFP_KERNEL);
         if (!event)
                 return ERR_PTR(-ENOMEM);
@@ -5330,7 +5367,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
  
         if (!overflow_handler && parent_event)
                 overflow_handler = parent_event->overflow_handler;
-       
+
         event->overflow_handler = overflow_handler;
  
         if (attr->disabled)