]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - kernel/events/core.c
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[karo-tx-linux.git] / kernel / events / core.c
index 440eefc67397e48f15b58bc8cf31712bec91286b..689237a0c5e853e1dd079d40cb9151bbd29a6ebf 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/mm_types.h>
 #include <linux/cgroup.h>
+#include <linux/module.h>
 
 #include "internal.h"
 
@@ -1677,6 +1678,8 @@ event_sched_in(struct perf_event *event,
        u64 tstamp = perf_event_time(event);
        int ret = 0;
 
+       lockdep_assert_held(&ctx->lock);
+
        if (event->state <= PERF_EVENT_STATE_OFF)
                return 0;
 
@@ -3244,9 +3247,13 @@ static void __free_event(struct perf_event *event)
        if (event->ctx)
                put_ctx(event->ctx);
 
+       if (event->pmu)
+               module_put(event->pmu->module);
+
        call_rcu(&event->rcu_head, free_event_rcu);
 }
-static void free_event(struct perf_event *event)
+
+static void _free_event(struct perf_event *event)
 {
        irq_work_sync(&event->pending);
 
@@ -3267,42 +3274,31 @@ static void free_event(struct perf_event *event)
        if (is_cgroup_event(event))
                perf_detach_cgroup(event);
 
-
        __free_event(event);
 }
 
-int perf_event_release_kernel(struct perf_event *event)
+/*
+ * Used to free events which have a known refcount of 1, such as in error paths
+ * where the event isn't exposed yet and inherited events.
+ */
+static void free_event(struct perf_event *event)
 {
-       struct perf_event_context *ctx = event->ctx;
-
-       WARN_ON_ONCE(ctx->parent_ctx);
-       /*
-        * There are two ways this annotation is useful:
-        *
-        *  1) there is a lock recursion from perf_event_exit_task
-        *     see the comment there.
-        *
-        *  2) there is a lock-inversion with mmap_sem through
-        *     perf_event_read_group(), which takes faults while
-        *     holding ctx->mutex, however this is called after
-        *     the last filedesc died, so there is no possibility
-        *     to trigger the AB-BA case.
-        */
-       mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
-       perf_remove_from_context(event, true);
-       mutex_unlock(&ctx->mutex);
-
-       free_event(event);
+       if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
+                               "unexpected event refcount: %ld; ptr=%p\n",
+                               atomic_long_read(&event->refcount), event)) {
+               /* leak to avoid use-after-free */
+               return;
+       }
 
-       return 0;
+       _free_event(event);
 }
-EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 
 /*
  * Called when the last reference to the file is gone.
  */
 static void put_event(struct perf_event *event)
 {
+       struct perf_event_context *ctx = event->ctx;
        struct task_struct *owner;
 
        if (!atomic_long_dec_and_test(&event->refcount))
@@ -3341,9 +3337,33 @@ static void put_event(struct perf_event *event)
                put_task_struct(owner);
        }
 
-       perf_event_release_kernel(event);
+       WARN_ON_ONCE(ctx->parent_ctx);
+       /*
+        * There are two ways this annotation is useful:
+        *
+        *  1) there is a lock recursion from perf_event_exit_task
+        *     see the comment there.
+        *
+        *  2) there is a lock-inversion with mmap_sem through
+        *     perf_event_read_group(), which takes faults while
+        *     holding ctx->mutex, however this is called after
+        *     the last filedesc died, so there is no possibility
+        *     to trigger the AB-BA case.
+        */
+       mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
+       perf_remove_from_context(event, true);
+       mutex_unlock(&ctx->mutex);
+
+       _free_event(event);
 }
 
+int perf_event_release_kernel(struct perf_event *event)
+{
+       put_event(event);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+
 static int perf_release(struct inode *inode, struct file *file)
 {
        put_event(file->private_data);
@@ -6578,6 +6598,7 @@ free_pdc:
        free_percpu(pmu->pmu_disable_count);
        goto unlock;
 }
+EXPORT_SYMBOL_GPL(perf_pmu_register);
 
 void perf_pmu_unregister(struct pmu *pmu)
 {
@@ -6599,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)
        put_device(pmu->dev);
        free_pmu_context(pmu);
 }
+EXPORT_SYMBOL_GPL(perf_pmu_unregister);
 
 struct pmu *perf_init_event(struct perf_event *event)
 {
@@ -6612,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)
        pmu = idr_find(&pmu_idr, event->attr.type);
        rcu_read_unlock();
        if (pmu) {
+               if (!try_module_get(pmu->module)) {
+                       pmu = ERR_PTR(-ENODEV);
+                       goto unlock;
+               }
                event->pmu = pmu;
                ret = pmu->event_init(event);
                if (ret)
@@ -6620,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)
        }
 
        list_for_each_entry_rcu(pmu, &pmus, entry) {
+               if (!try_module_get(pmu->module)) {
+                       pmu = ERR_PTR(-ENODEV);
+                       goto unlock;
+               }
                event->pmu = pmu;
                ret = pmu->event_init(event);
                if (!ret)
@@ -6798,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 err_pmu:
        if (event->destroy)
                event->destroy(event);
+       module_put(pmu->module);
 err_ns:
        if (event->ns)
                put_pid_ns(event->ns);
@@ -7067,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        }
 
+       if (task && group_leader &&
+           group_leader->attr.inherit != attr.inherit) {
+               err = -EINVAL;
+               goto err_task;
+       }
+
        get_online_cpus();
 
        event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
                                 NULL, NULL);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
-               goto err_task;
+               goto err_cpus;
        }
 
        if (flags & PERF_FLAG_PID_CGROUP) {
                err = perf_cgroup_connect(pid, event, &attr, group_leader);
                if (err) {
                        __free_event(event);
-                       goto err_task;
+                       goto err_cpus;
                }
        }
 
@@ -7242,8 +7279,9 @@ err_context:
        put_ctx(ctx);
 err_alloc:
        free_event(event);
-err_task:
+err_cpus:
        put_online_cpus();
+err_task:
        if (task)
                put_task_struct(task);
 err_group_fd:
@@ -7379,7 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,
                         struct perf_event_context *child_ctx,
                         struct task_struct *child)
 {
-       perf_remove_from_context(child_event, !!child_event->parent);
+       perf_remove_from_context(child_event, true);
 
        /*
         * It can happen that the parent exits first, and has events
@@ -7394,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-       struct perf_event *child_event, *tmp;
+       struct perf_event *child_event;
        struct perf_event_context *child_ctx;
        unsigned long flags;
 
@@ -7448,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         */
        mutex_lock(&child_ctx->mutex);
 
-again:
-       list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
-                                group_entry)
-               __perf_event_exit_task(child_event, child_ctx, child);
-
-       list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
-                                group_entry)
+       list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
                __perf_event_exit_task(child_event, child_ctx, child);
 
-       /*
-        * If the last event was a group event, it will have appended all
-        * its siblings to the list, but we obtained 'tmp' before that which
-        * will still point to the list head terminating the iteration.
-        */
-       if (!list_empty(&child_ctx->pinned_groups) ||
-           !list_empty(&child_ctx->flexible_groups))
-               goto again;
-
        mutex_unlock(&child_ctx->mutex);
 
        put_ctx(child_ctx);