From: Linus Torvalds Date: Tue, 28 Feb 2017 05:41:08 +0000 (-0800) Subject: Merge branch 'for-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup X-Git-Tag: v4.11-rc1~61 X-Git-Url: https://git.kernelconcepts.de/?a=commitdiff_plain;h=f7878dc3a9d3d900c86a66d9742f7e06681b06cd;p=karo-tx-linux.git Merge branch 'for-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup Pull cgroup updates from Tejun Heo: "Several noteworthy changes. - Parav's rdma controller is finally merged. It is very straight forward and can limit the abosolute numbers of common rdma constructs used by different cgroups. - kernel/cgroup.c got too chubby and disorganized. Created kernel/cgroup/ subdirectory and moved all cgroup related files under kernel/ there and reorganized the core code. This hurts for backporting patches but was long overdue. - cgroup v2 process listing reimplemented so that it no longer depends on allocating a buffer large enough to cache the entire result to sort and uniq the output. v2 has always mangled the sort order to ensure that users don't depend on the sorted output, so this shouldn't surprise anybody. This makes the pid listing functions use the same iterators that are used internally, which have to have the same iterating capabilities anyway. - perf cgroup filtering now works automatically on cgroup v2. This patch was posted a long time ago but somehow fell through the cracks. - misc fixes asnd documentation updates" * 'for-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (27 commits) kernfs: fix locking around kernfs_ops->release() callback cgroup: drop the matching uid requirement on migration for cgroup v2 cgroup, perf_event: make perf_event controller work on cgroup2 hierarchy cgroup: misc cleanups cgroup: call subsys->*attach() only for subsystems which are actually affected by migration cgroup: track migration context in cgroup_mgctx cgroup: cosmetic update to cgroup_taskset_add() rdmacg: Fixed uninitialized current resource usage cgroup: Add missing cgroup-v2 PID controller documentation. rdmacg: Added documentation for rdmacg IB/core: added support to use rdma cgroup controller rdmacg: Added rdma cgroup controller cgroup: fix a comment typo cgroup: fix RCU related sparse warnings cgroup: move namespace code to kernel/cgroup/namespace.c cgroup: rename functions for consistency cgroup: move v1 mount functions to kernel/cgroup/cgroup-v1.c cgroup: separate out cgroup1_kf_syscall_ops cgroup: refactor mount path and clearly distinguish v1 and v2 paths cgroup: move cgroup v1 specific code to kernel/cgroup/cgroup-v1.c ... --- f7878dc3a9d3d900c86a66d9742f7e06681b06cd diff --cc kernel/cgroup/cgroup.c index 53bbca7c4859,fe374f803b20..e8f87bf9840c --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@@ -6349,310 -5052,15 +5052,16 @@@ void cgroup_sk_free(struct sock_cgroup_ #endif /* CONFIG_SOCK_CGROUP_DATA */ - /* cgroup namespaces */ - - static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns) - { - return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES); - } - - static void dec_cgroup_namespaces(struct ucounts *ucounts) - { - dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES); - } - - static struct cgroup_namespace *alloc_cgroup_ns(void) - { - struct cgroup_namespace *new_ns; - int ret; - - new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL); - if (!new_ns) - return ERR_PTR(-ENOMEM); - ret = ns_alloc_inum(&new_ns->ns); - if (ret) { - kfree(new_ns); - return ERR_PTR(ret); - } - atomic_set(&new_ns->count, 1); - new_ns->ns.ops = &cgroupns_operations; - return new_ns; - } - - void free_cgroup_ns(struct cgroup_namespace *ns) - { - put_css_set(ns->root_cset); - dec_cgroup_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); - kfree(ns); - } - EXPORT_SYMBOL(free_cgroup_ns); - - struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, - struct user_namespace *user_ns, - struct cgroup_namespace *old_ns) - { - struct cgroup_namespace *new_ns; - struct ucounts *ucounts; - struct css_set *cset; - - BUG_ON(!old_ns); - - if (!(flags & CLONE_NEWCGROUP)) { - get_cgroup_ns(old_ns); - return old_ns; - } - - /* Allow only sysadmin to create cgroup namespace. */ - if (!ns_capable(user_ns, CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - - ucounts = inc_cgroup_namespaces(user_ns); - if (!ucounts) - return ERR_PTR(-ENOSPC); - - /* It is not safe to take cgroup_mutex here */ - spin_lock_irq(&css_set_lock); - cset = task_css_set(current); - get_css_set(cset); - spin_unlock_irq(&css_set_lock); - - new_ns = alloc_cgroup_ns(); - if (IS_ERR(new_ns)) { - put_css_set(cset); - dec_cgroup_namespaces(ucounts); - return new_ns; - } - - new_ns->user_ns = get_user_ns(user_ns); - new_ns->ucounts = ucounts; - new_ns->root_cset = cset; - - return new_ns; - } - - static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) - { - return container_of(ns, struct cgroup_namespace, ns); - } - - static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns) - { - struct cgroup_namespace *cgroup_ns = to_cg_ns(ns); - - if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) || - !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN)) - return -EPERM; - - /* Don't need to do anything if we are attaching to our own cgroupns. */ - if (cgroup_ns == nsproxy->cgroup_ns) - return 0; - - get_cgroup_ns(cgroup_ns); - put_cgroup_ns(nsproxy->cgroup_ns); - nsproxy->cgroup_ns = cgroup_ns; - - return 0; - } - - static struct ns_common *cgroupns_get(struct task_struct *task) - { - struct cgroup_namespace *ns = NULL; - struct nsproxy *nsproxy; - - task_lock(task); - nsproxy = task->nsproxy; - if (nsproxy) { - ns = nsproxy->cgroup_ns; - get_cgroup_ns(ns); - } - task_unlock(task); - - return ns ? &ns->ns : NULL; - } - - static void cgroupns_put(struct ns_common *ns) - { - put_cgroup_ns(to_cg_ns(ns)); - } - - static struct user_namespace *cgroupns_owner(struct ns_common *ns) - { - return to_cg_ns(ns)->user_ns; - } - - const struct proc_ns_operations cgroupns_operations = { - .name = "cgroup", - .type = CLONE_NEWCGROUP, - .get = cgroupns_get, - .put = cgroupns_put, - .install = cgroupns_install, - .owner = cgroupns_owner, - }; - - static __init int cgroup_namespaces_init(void) - { - return 0; - } - subsys_initcall(cgroup_namespaces_init); - #ifdef CONFIG_CGROUP_BPF -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type) +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable) { struct cgroup *parent = cgroup_parent(cgrp); + int ret; mutex_lock(&cgroup_mutex); - __cgroup_bpf_update(cgrp, parent, prog, type); + ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); mutex_unlock(&cgroup_mutex); + return ret; } #endif /* CONFIG_CGROUP_BPF */ - - #ifdef CONFIG_CGROUP_DEBUG - static struct cgroup_subsys_state * - debug_css_alloc(struct cgroup_subsys_state *parent_css) - { - struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); - - if (!css) - return ERR_PTR(-ENOMEM); - - return css; - } - - static void debug_css_free(struct cgroup_subsys_state *css) - { - kfree(css); - } - - static u64 debug_taskcount_read(struct cgroup_subsys_state *css, - struct cftype *cft) - { - return cgroup_task_count(css->cgroup); - } - - static u64 current_css_set_read(struct cgroup_subsys_state *css, - struct cftype *cft) - { - return (u64)(unsigned long)current->cgroups; - } - - static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, - struct cftype *cft) - { - u64 count; - - rcu_read_lock(); - count = atomic_read(&task_css_set(current)->refcount); - rcu_read_unlock(); - return count; - } - - static int current_css_set_cg_links_read(struct seq_file *seq, void *v) - { - struct cgrp_cset_link *link; - struct css_set *cset; - char *name_buf; - - name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL); - if (!name_buf) - return -ENOMEM; - - spin_lock_irq(&css_set_lock); - rcu_read_lock(); - cset = rcu_dereference(current->cgroups); - list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { - struct cgroup *c = link->cgrp; - - cgroup_name(c, name_buf, NAME_MAX + 1); - seq_printf(seq, "Root %d group %s\n", - c->root->hierarchy_id, name_buf); - } - rcu_read_unlock(); - spin_unlock_irq(&css_set_lock); - kfree(name_buf); - return 0; - } - - #define MAX_TASKS_SHOWN_PER_CSS 25 - static int cgroup_css_links_read(struct seq_file *seq, void *v) - { - struct cgroup_subsys_state *css = seq_css(seq); - struct cgrp_cset_link *link; - - spin_lock_irq(&css_set_lock); - list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { - struct css_set *cset = link->cset; - struct task_struct *task; - int count = 0; - - seq_printf(seq, "css_set %p\n", cset); - - list_for_each_entry(task, &cset->tasks, cg_list) { - if (count++ > MAX_TASKS_SHOWN_PER_CSS) - goto overflow; - seq_printf(seq, " task %d\n", task_pid_vnr(task)); - } - - list_for_each_entry(task, &cset->mg_tasks, cg_list) { - if (count++ > MAX_TASKS_SHOWN_PER_CSS) - goto overflow; - seq_printf(seq, " task %d\n", task_pid_vnr(task)); - } - continue; - overflow: - seq_puts(seq, " ...\n"); - } - spin_unlock_irq(&css_set_lock); - return 0; - } - - static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) - { - return (!cgroup_is_populated(css->cgroup) && - !css_has_online_children(&css->cgroup->self)); - } - - static struct cftype debug_files[] = { - { - .name = "taskcount", - .read_u64 = debug_taskcount_read, - }, - - { - .name = "current_css_set", - .read_u64 = current_css_set_read, - }, - - { - .name = "current_css_set_refcount", - .read_u64 = current_css_set_refcount_read, - }, - - { - .name = "current_css_set_cg_links", - .seq_show = current_css_set_cg_links_read, - }, - - { - .name = "cgroup_css_links", - .seq_show = cgroup_css_links_read, - }, - - { - .name = "releasable", - .read_u64 = releasable_read, - }, - - { } /* terminate */ - }; - - struct cgroup_subsys debug_cgrp_subsys = { - .css_alloc = debug_css_alloc, - .css_free = debug_css_free, - .legacy_cftypes = debug_files, - }; - #endif /* CONFIG_CGROUP_DEBUG */