Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
diff --combined arch/um/drivers/mconsole_kern.c

index 49e3b49e552f7f81dea63e708bbb0abf1e32a3f4,7fc71c628267faadd44e31709a7cc95ce4983a3b..4bd82ac0210f27c8ef7c755480399ad9dce2f15b
--- 1/arch/um/drivers/mconsole_kern.c
--- 2/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@@ -123,7 -123,7 +123,7 @@@ void mconsole_log(struct mc_request *re
   
   void mconsole_proc(struct mc_request *req)
   {
-       struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
+       struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
         char *buf;
         int len;
         struct file *file;
@@@ -648,7 -648,7 +648,7 @@@ static void stack_proc(void *arg
         struct task_struct *from = current, *to = arg;
   
         to->thread.saved_task = from;
- -      rcu_switch(from, to);
+ +      rcu_user_hooks_switch(from, to);
         switch_to(from, to, from);
   }
   
diff --combined drivers/staging/android/binder.c

index 4a36e9ab8cf7d5ffa66723ca3fd7be255c13aa72,a97bbcd1c9ea3f9bbeead7dba687b804fdeef318..2d12e8a1f82ee06b89f8be1127e644c16ead8994
--- 1/drivers/staging/android/binder.c
--- 2/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@@ -15,8 -15,6 +15,8 @@@
    *
    */
   
+ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ +
   #include <asm/cacheflush.h>
   #include <linux/fdtable.h>
   #include <linux/file.h>
@@@ -35,11 -33,11 +35,12 @@@
   #include <linux/uaccess.h>
   #include <linux/vmalloc.h>
   #include <linux/slab.h>
+ #include <linux/pid_namespace.h>
   
   #include "binder.h"
+ +#include "binder_trace.h"
   
- -static DEFINE_MUTEX(binder_lock);
+ +static DEFINE_MUTEX(binder_main_lock);
   static DEFINE_MUTEX(binder_deferred_lock);
   static DEFINE_MUTEX(binder_mmap_lock);
   
@@@ -414,19 -412,6 +415,19 @@@ static long task_close_fd(struct binder
         return retval;
   }
   
+ +static inline void binder_lock(const char *tag)
+ +{
+ +      trace_binder_lock(tag);
+ +      mutex_lock(&binder_main_lock);
+ +      trace_binder_locked(tag);
+ +}
+ +
+ +static inline void binder_unlock(const char *tag)
+ +{
+ +      trace_binder_unlock(tag);
+ +      mutex_unlock(&binder_main_lock);
+ +}
+ +
   static void binder_set_nice(long nice)
   {
         long min_nice;
@@@ -436,12 -421,12 +437,12 @@@
         }
         min_nice = 20 - current->signal->rlim[RLIMIT_NICE].rlim_cur;
         binder_debug(BINDER_DEBUG_PRIORITY_CAP,
- -                   "binder: %d: nice value %ld not allowed use "
- -                   "%ld instead\n", current->pid, nice, min_nice);
+ +                   "%d: nice value %ld not allowed use %ld instead\n",
+ +                    current->pid, nice, min_nice);
         set_user_nice(current, min_nice);
         if (min_nice < 20)
                 return;
- -      binder_user_error("binder: %d RLIMIT_NICE not set\n", current->pid);
+ +      binder_user_error("%d RLIMIT_NICE not set\n", current->pid);
   }
   
   static size_t binder_buffer_size(struct binder_proc *proc,
@@@ -468,8 -453,8 +469,8 @@@ static void binder_insert_free_buffer(s
         new_buffer_size = binder_buffer_size(proc, new_buffer);
   
         binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                   "binder: %d: add free buffer, size %zd, "
- -                   "at %p\n", proc->pid, new_buffer_size, new_buffer);
+ +                   "%d: add free buffer, size %zd, at %p\n",
+ +                    proc->pid, new_buffer_size, new_buffer);
   
         while (*p) {
                 parent = *p;
@@@ -547,14 -532,12 +548,14 @@@ static int binder_update_page_range(str
         struct mm_struct *mm;
   
         binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                   "binder: %d: %s pages %p-%p\n", proc->pid,
+ +                   "%d: %s pages %p-%p\n", proc->pid,
                      allocate ? "allocate" : "free", start, end);
   
         if (end <= start)
                 return 0;
   
+ +      trace_binder_update_page_range(proc, allocate, start, end);
+ +
         if (vma)
                 mm = NULL;
         else
@@@ -564,7 -547,7 +565,7 @@@
                 down_write(&mm->mmap_sem);
                 vma = proc->vma;
                 if (vma && mm != proc->vma_vm_mm) {
- -                      pr_err("binder: %d: vma mm and task mm mismatch\n",
+ +                      pr_err("%d: vma mm and task mm mismatch\n",
                                 proc->pid);
                         vma = NULL;
                 }
@@@ -574,8 -557,8 +575,8 @@@
                 goto free_range;
   
         if (vma == NULL) {
- -              pr_err("binder: %d: binder_alloc_buf failed to "
- -                     "map pages in userspace, no vma\n", proc->pid);
+ +              pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n",
+ +                      proc->pid);
                 goto err_no_vma;
         }
   
@@@ -587,8 -570,8 +588,8 @@@
                 BUG_ON(*page);
                 *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
                 if (*page == NULL) {
- -                      pr_err("binder: %d: binder_alloc_buf failed "
- -                             "for page at %p\n", proc->pid, page_addr);
+ +                      pr_err("%d: binder_alloc_buf failed for page at %p\n",
+ +                              proc->pid, page_addr);
                         goto err_alloc_page_failed;
                 }
                 tmp_area.addr = page_addr;
@@@ -596,7 -579,8 +597,7 @@@
                 page_array_ptr = page;
                 ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
                 if (ret) {
- -                      pr_err("binder: %d: binder_alloc_buf failed "
- -                             "to map page at %p in kernel\n",
+ +                      pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
                                proc->pid, page_addr);
                         goto err_map_kernel_failed;
                 }
@@@ -604,7 -588,8 +605,7 @@@
                         (uintptr_t)page_addr + proc->user_buffer_offset;
                 ret = vm_insert_page(vma, user_page_addr, page[0]);
                 if (ret) {
- -                      pr_err("binder: %d: binder_alloc_buf failed "
- -                             "to map page at %lx in userspace\n",
+ +                      pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n",
                                proc->pid, user_page_addr);
                         goto err_vm_insert_page_failed;
                 }
@@@ -652,7 -637,7 +653,7 @@@ static struct binder_buffer *binder_all
         size_t size;
   
         if (proc->vma == NULL) {
- -              pr_err("binder: %d: binder_alloc_buf, no vma\n",
+ +              pr_err("%d: binder_alloc_buf, no vma\n",
                        proc->pid);
                 return NULL;
         }
@@@ -661,16 -646,16 +662,16 @@@
                 ALIGN(offsets_size, sizeof(void *));
   
         if (size < data_size || size < offsets_size) {
- -              binder_user_error("binder: %d: got transaction with invalid "
- -                      "size %zd-%zd\n", proc->pid, data_size, offsets_size);
+ +              binder_user_error("%d: got transaction with invalid size %zd-%zd\n",
+ +                              proc->pid, data_size, offsets_size);
                 return NULL;
         }
   
         if (is_async &&
             proc->free_async_space < size + sizeof(struct binder_buffer)) {
                 binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                           "binder: %d: binder_alloc_buf size %zd"
- -                           "failed, no async space left\n", proc->pid, size);
+ +                           "%d: binder_alloc_buf size %zd failed, no async space left\n",
+ +                            proc->pid, size);
                 return NULL;
         }
   
@@@ -690,8 -675,8 +691,8 @@@
                 }
         }
         if (best_fit == NULL) {
- -              pr_err("binder: %d: binder_alloc_buf size %zd failed, "
- -                     "no address space\n", proc->pid, size);
+ +              pr_err("%d: binder_alloc_buf size %zd failed, no address space\n",
+ +                      proc->pid, size);
                 return NULL;
         }
         if (n == NULL) {
@@@ -700,8 -685,8 +701,8 @@@
         }
   
         binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                   "binder: %d: binder_alloc_buf size %zd got buff"
- -                   "er %p size %zd\n", proc->pid, size, buffer, buffer_size);
+ +                   "%d: binder_alloc_buf size %zd got buffer %p size %zd\n",
+ +                    proc->pid, size, buffer, buffer_size);
   
         has_page_addr =
                 (void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK);
@@@ -729,16 -714,17 +730,16 @@@
                 binder_insert_free_buffer(proc, new_buffer);
         }
         binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                   "binder: %d: binder_alloc_buf size %zd got "
- -                   "%p\n", proc->pid, size, buffer);
+ +                   "%d: binder_alloc_buf size %zd got %p\n",
+ +                    proc->pid, size, buffer);
         buffer->data_size = data_size;
         buffer->offsets_size = offsets_size;
         buffer->async_transaction = is_async;
         if (is_async) {
                 proc->free_async_space -= size + sizeof(struct binder_buffer);
                 binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
- -                           "binder: %d: binder_alloc_buf size %zd "
- -                           "async free %zd\n", proc->pid, size,
- -                           proc->free_async_space);
+ +                           "%d: binder_alloc_buf size %zd async free %zd\n",
+ +                            proc->pid, size, proc->free_async_space);
         }
   
         return buffer;
@@@ -769,8 -755,8 +770,8 @@@ static void binder_delete_free_buffer(s
                 if (buffer_end_page(prev) == buffer_end_page(buffer))
                         free_page_end = 0;
                 binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                           "binder: %d: merge free, buffer %p "
- -                           "share page with %p\n", proc->pid, buffer, prev);
+ +                           "%d: merge free, buffer %p share page with %p\n",
+ +                            proc->pid, buffer, prev);
         }
   
         if (!list_is_last(&buffer->entry, &proc->buffers)) {
@@@ -782,14 -768,16 +783,14 @@@
                             buffer_start_page(buffer))
                                 free_page_start = 0;
                         binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                                   "binder: %d: merge free, buffer"
- -                                   " %p share page with %p\n", proc->pid,
- -                                   buffer, prev);
+ +                                   "%d: merge free, buffer %p share page with %p\n",
+ +                                    proc->pid, buffer, prev);
                 }
         }
         list_del(&buffer->entry);
         if (free_page_start || free_page_end) {
                 binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                           "binder: %d: merge free, buffer %p do "
- -                           "not share page%s%s with with %p or %p\n",
+ +                           "%d: merge free, buffer %p do not share page%s%s with with %p or %p\n",
                              proc->pid, buffer, free_page_start ? "" : " end",
                              free_page_end ? "" : " start", prev, next);
                 binder_update_page_range(proc, 0, free_page_start ?
@@@ -810,8 -798,8 +811,8 @@@ static void binder_free_buf(struct bind
                 ALIGN(buffer->offsets_size, sizeof(void *));
   
         binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                   "binder: %d: binder_free_buf %p size %zd buffer"
- -                   "_size %zd\n", proc->pid, buffer, size, buffer_size);
+ +                   "%d: binder_free_buf %p size %zd buffer_size %zd\n",
+ +                    proc->pid, buffer, size, buffer_size);
   
         BUG_ON(buffer->free);
         BUG_ON(size > buffer_size);
@@@ -823,8 -811,9 +824,8 @@@
                 proc->free_async_space += size + sizeof(struct binder_buffer);
   
                 binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
- -                           "binder: %d: binder_free_buf size %zd "
- -                           "async free %zd\n", proc->pid, size,
- -                           proc->free_async_space);
+ +                           "%d: binder_free_buf size %zd async free %zd\n",
+ +                            proc->pid, size, proc->free_async_space);
         }
   
         binder_update_page_range(proc, 0,
@@@ -906,7 -895,7 +907,7 @@@ static struct binder_node *binder_new_n
         INIT_LIST_HEAD(&node->work.entry);
         INIT_LIST_HEAD(&node->async_todo);
         binder_debug(BINDER_DEBUG_INTERNAL_REFS,
- -                   "binder: %d:%d node %d u%p c%p created\n",
+ +                   "%d:%d node %d u%p c%p created\n",
                      proc->pid, current->pid, node->debug_id,
                      node->ptr, node->cookie);
         return node;
@@@ -921,8 -910,8 +922,8 @@@ static int binder_inc_node(struct binde
                             node->internal_strong_refs == 0 &&
                             !(node == binder_context_mgr_node &&
                             node->has_strong_ref)) {
- -                              pr_err("binder: invalid inc strong "
- -                                      "node for %d\n", node->debug_id);
+ +                              pr_err("invalid inc strong node for %d\n",
+ +                                      node->debug_id);
                                 return -EINVAL;
                         }
                         node->internal_strong_refs++;
@@@ -937,8 -926,8 +938,8 @@@
                         node->local_weak_refs++;
                 if (!node->has_weak_ref && list_empty(&node->work.entry)) {
                         if (target_list == NULL) {
- -                              pr_err("binder: invalid inc weak node "
- -                                      "for %d\n", node->debug_id);
+ +                              pr_err("invalid inc weak node for %d\n",
+ +                                      node->debug_id);
                                 return -EINVAL;
                         }
                         list_add_tail(&node->work.entry, target_list);
@@@ -974,12 -963,12 +975,12 @@@ static int binder_dec_node(struct binde
                         if (node->proc) {
                                 rb_erase(&node->rb_node, &node->proc->nodes);
                                 binder_debug(BINDER_DEBUG_INTERNAL_REFS,
- -                                           "binder: refless node %d deleted\n",
+ +                                           "refless node %d deleted\n",
                                              node->debug_id);
                         } else {
                                 hlist_del(&node->dead_node);
                                 binder_debug(BINDER_DEBUG_INTERNAL_REFS,
- -                                           "binder: dead node %d deleted\n",
+ +                                           "dead node %d deleted\n",
                                              node->debug_id);
                         }
                         kfree(node);
@@@ -1065,13 -1054,14 +1066,13 @@@ static struct binder_ref *binder_get_re
                 hlist_add_head(&new_ref->node_entry, &node->refs);
   
                 binder_debug(BINDER_DEBUG_INTERNAL_REFS,
- -                           "binder: %d new ref %d desc %d for "
- -                           "node %d\n", proc->pid, new_ref->debug_id,
- -                           new_ref->desc, node->debug_id);
+ +                           "%d new ref %d desc %d for node %d\n",
+ +                            proc->pid, new_ref->debug_id, new_ref->desc,
+ +                            node->debug_id);
         } else {
                 binder_debug(BINDER_DEBUG_INTERNAL_REFS,
- -                           "binder: %d new ref %d desc %d for "
- -                           "dead node\n", proc->pid, new_ref->debug_id,
- -                            new_ref->desc);
+ +                           "%d new ref %d desc %d for dead node\n",
+ +                            proc->pid, new_ref->debug_id, new_ref->desc);
         }
         return new_ref;
   }
@@@ -1079,9 -1069,9 +1080,9 @@@
   static void binder_delete_ref(struct binder_ref *ref)
   {
         binder_debug(BINDER_DEBUG_INTERNAL_REFS,
- -                   "binder: %d delete ref %d desc %d for "
- -                   "node %d\n", ref->proc->pid, ref->debug_id,
- -                   ref->desc, ref->node->debug_id);
+ +                   "%d delete ref %d desc %d for node %d\n",
+ +                    ref->proc->pid, ref->debug_id, ref->desc,
+ +                    ref->node->debug_id);
   
         rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc);
         rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node);
@@@ -1091,8 -1081,9 +1092,8 @@@
         binder_dec_node(ref->node, 0, 1);
         if (ref->death) {
                 binder_debug(BINDER_DEBUG_DEAD_BINDER,
- -                           "binder: %d delete ref %d desc %d "
- -                           "has death notification\n", ref->proc->pid,
- -                           ref->debug_id, ref->desc);
+ +                           "%d delete ref %d desc %d has death notification\n",
+ +                            ref->proc->pid, ref->debug_id, ref->desc);
                 list_del(&ref->death->work.entry);
                 kfree(ref->death);
                 binder_stats_deleted(BINDER_STAT_DEATH);
@@@ -1128,7 -1119,8 +1129,7 @@@ static int binder_dec_ref(struct binder
   {
         if (strong) {
                 if (ref->strong == 0) {
- -                      binder_user_error("binder: %d invalid dec strong, "
- -                                        "ref %d desc %d s %d w %d\n",
+ +                      binder_user_error("%d invalid dec strong, ref %d desc %d s %d w %d\n",
                                           ref->proc->pid, ref->debug_id,
                                           ref->desc, ref->strong, ref->weak);
                         return -EINVAL;
@@@ -1142,7 -1134,8 +1143,7 @@@
                 }
         } else {
                 if (ref->weak == 0) {
- -                      binder_user_error("binder: %d invalid dec weak, "
- -                                        "ref %d desc %d s %d w %d\n",
+ +                      binder_user_error("%d invalid dec weak, ref %d desc %d s %d w %d\n",
                                           ref->proc->pid, ref->debug_id,
                                           ref->desc, ref->strong, ref->weak);
                         return -EINVAL;
@@@ -1187,7 -1180,8 +1188,7 @@@ static void binder_send_failed_reply(st
                         }
                         if (target_thread->return_error == BR_OK) {
                                 binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
- -                                           "binder: send failed reply for "
- -                                           "transaction %d to %d:%d\n",
+ +                                           "send failed reply for transaction %d to %d:%d\n",
                                               t->debug_id, target_thread->proc->pid,
                                               target_thread->pid);
   
@@@ -1195,8 -1189,9 +1196,8 @@@
                                 target_thread->return_error = error_code;
                                 wake_up_interruptible(&target_thread->wait);
                         } else {
- -                              pr_err("binder: reply failed, target "
- -                                      "thread, %d:%d, has error code %d "
- -                                      "already\n", target_thread->proc->pid,
+ +                              pr_err("reply failed, target thread, %d:%d, has error code %d already\n",
+ +                                      target_thread->proc->pid,
                                         target_thread->pid,
                                         target_thread->return_error);
                         }
@@@ -1205,19 -1200,21 +1206,19 @@@
                         struct binder_transaction *next = t->from_parent;
   
                         binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
- -                                   "binder: send failed reply "
- -                                   "for transaction %d, target dead\n",
+ +                                   "send failed reply for transaction %d, target dead\n",
                                      t->debug_id);
   
                         binder_pop_transaction(target_thread, t);
                         if (next == NULL) {
                                 binder_debug(BINDER_DEBUG_DEAD_BINDER,
- -                                           "binder: reply failed,"
- -                                           " no target thread at root\n");
+ +                                           "reply failed, no target thread at root\n");
                                 return;
                         }
                         t = next;
                         binder_debug(BINDER_DEBUG_DEAD_BINDER,
- -                                   "binder: reply failed, no target "
- -                                   "thread -- retry %d\n", t->debug_id);
+ +                                   "reply failed, no target thread -- retry %d\n",
+ +                                    t->debug_id);
                 }
         }
   }
@@@ -1230,7 -1227,7 +1231,7 @@@ static void binder_transaction_buffer_r
         int debug_id = buffer->debug_id;
   
         binder_debug(BINDER_DEBUG_TRANSACTION,
- -                   "binder: %d buffer release %d, size %zd-%zd, failed at %p\n",
+ +                   "%d buffer release %d, size %zd-%zd, failed at %p\n",
                      proc->pid, buffer->debug_id,
                      buffer->data_size, buffer->offsets_size, failed_at);
   
@@@ -1247,8 -1244,9 +1248,8 @@@
                 if (*offp > buffer->data_size - sizeof(*fp) ||
                     buffer->data_size < sizeof(*fp) ||
                     !IS_ALIGNED(*offp, sizeof(void *))) {
- -                      pr_err("binder: transaction release %d bad"
- -                                      "offset %zd, size %zd\n", debug_id,
- -                                      *offp, buffer->data_size);
+ +                      pr_err("transaction release %d bad offset %zd, size %zd\n",
+ +                       debug_id, *offp, buffer->data_size);
                         continue;
                 }
                 fp = (struct flat_binder_object *)(buffer->data + *offp);
@@@ -1257,8 -1255,8 +1258,8 @@@
                 case BINDER_TYPE_WEAK_BINDER: {
                         struct binder_node *node = binder_get_node(proc, fp->binder);
                         if (node == NULL) {
- -                              pr_err("binder: transaction release %d"
- -                                     " bad node %p\n", debug_id, fp->binder);
+ +                              pr_err("transaction release %d bad node %p\n",
+ +                                      debug_id, fp->binder);
                                 break;
                         }
                         binder_debug(BINDER_DEBUG_TRANSACTION,
@@@ -1270,8 -1268,9 +1271,8 @@@
                 case BINDER_TYPE_WEAK_HANDLE: {
                         struct binder_ref *ref = binder_get_ref(proc, fp->handle);
                         if (ref == NULL) {
- -                              pr_err("binder: transaction release %d"
- -                                     " bad handle %ld\n", debug_id,
- -                                     fp->handle);
+ +                              pr_err("transaction release %d bad handle %ld\n",
+ +                               debug_id, fp->handle);
                                 break;
                         }
                         binder_debug(BINDER_DEBUG_TRANSACTION,
@@@ -1288,8 -1287,8 +1289,8 @@@
                         break;
   
                 default:
- -                      pr_err("binder: transaction release %d bad "
- -                             "object type %lx\n", debug_id, fp->type);
+ +                      pr_err("transaction release %d bad object type %lx\n",
+ +                              debug_id, fp->type);
                         break;
                 }
         }
@@@ -1322,14 -1321,17 +1323,14 @@@ static void binder_transaction(struct b
         if (reply) {
                 in_reply_to = thread->transaction_stack;
                 if (in_reply_to == NULL) {
- -                      binder_user_error("binder: %d:%d got reply transaction "
- -                                        "with no transaction stack\n",
+ +                      binder_user_error("%d:%d got reply transaction with no transaction stack\n",
                                           proc->pid, thread->pid);
                         return_error = BR_FAILED_REPLY;
                         goto err_empty_call_stack;
                 }
                 binder_set_nice(in_reply_to->saved_priority);
                 if (in_reply_to->to_thread != thread) {
- -                      binder_user_error("binder: %d:%d got reply transaction "
- -                              "with bad transaction stack,"
- -                              " transaction %d has target %d:%d\n",
+ +                      binder_user_error("%d:%d got reply transaction with bad transaction stack, transaction %d has target %d:%d\n",
                                 proc->pid, thread->pid, in_reply_to->debug_id,
                                 in_reply_to->to_proc ?
                                 in_reply_to->to_proc->pid : 0,
@@@ -1346,7 -1348,9 +1347,7 @@@
                         goto err_dead_binder;
                 }
                 if (target_thread->transaction_stack != in_reply_to) {
- -                      binder_user_error("binder: %d:%d got reply transaction "
- -                              "with bad target transaction stack %d, "
- -                              "expected %d\n",
+ +                      binder_user_error("%d:%d got reply transaction with bad target transaction stack %d, expected %d\n",
                                 proc->pid, thread->pid,
                                 target_thread->transaction_stack ?
                                 target_thread->transaction_stack->debug_id : 0,
@@@ -1362,7 -1366,8 +1363,7 @@@
                         struct binder_ref *ref;
                         ref = binder_get_ref(proc, tr->target.handle);
                         if (ref == NULL) {
- -                              binder_user_error("binder: %d:%d got "
- -                                      "transaction to invalid handle\n",
+ +                              binder_user_error("%d:%d got transaction to invalid handle\n",
                                         proc->pid, thread->pid);
                                 return_error = BR_FAILED_REPLY;
                                 goto err_invalid_target_handle;
@@@ -1385,7 -1390,9 +1386,7 @@@
                         struct binder_transaction *tmp;
                         tmp = thread->transaction_stack;
                         if (tmp->to_thread != thread) {
- -                              binder_user_error("binder: %d:%d got new "
- -                                      "transaction with bad transaction stack"
- -                                      ", transaction %d has target %d:%d\n",
+ +                              binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%d\n",
                                         proc->pid, thread->pid, tmp->debug_id,
                                         tmp->to_proc ? tmp->to_proc->pid : 0,
                                         tmp->to_thread ?
@@@ -1430,14 -1437,16 +1431,14 @@@
   
         if (reply)
                 binder_debug(BINDER_DEBUG_TRANSACTION,
- -                           "binder: %d:%d BC_REPLY %d -> %d:%d, "
- -                           "data %p-%p size %zd-%zd\n",
+ +                           "%d:%d BC_REPLY %d -> %d:%d, data %p-%p size %zd-%zd\n",
                              proc->pid, thread->pid, t->debug_id,
                              target_proc->pid, target_thread->pid,
                              tr->data.ptr.buffer, tr->data.ptr.offsets,
                              tr->data_size, tr->offsets_size);
         else
                 binder_debug(BINDER_DEBUG_TRANSACTION,
- -                           "binder: %d:%d BC_TRANSACTION %d -> "
- -                           "%d - node %d, data %p-%p size %zd-%zd\n",
+ +                           "%d:%d BC_TRANSACTION %d -> %d - node %d, data %p-%p size %zd-%zd\n",
                              proc->pid, thread->pid, t->debug_id,
                              target_proc->pid, target_node->debug_id,
                              tr->data.ptr.buffer, tr->data.ptr.offsets,
@@@ -1453,9 -1462,6 +1454,9 @@@
         t->code = tr->code;
         t->flags = tr->flags;
         t->priority = task_nice(current);
+ +
+ +      trace_binder_transaction(reply, t, target_node);
+ +
         t->buffer = binder_alloc_buf(target_proc, tr->data_size,
                 tr->offsets_size, !reply && (t->flags & TF_ONE_WAY));
         if (t->buffer == NULL) {
@@@ -1466,27 -1472,27 +1467,27 @@@
         t->buffer->debug_id = t->debug_id;
         t->buffer->transaction = t;
         t->buffer->target_node = target_node;
+ +      trace_binder_transaction_alloc_buf(t->buffer);
         if (target_node)
                 binder_inc_node(target_node, 1, 0, NULL);
   
         offp = (size_t *)(t->buffer->data + ALIGN(tr->data_size, sizeof(void *)));
   
         if (copy_from_user(t->buffer->data, tr->data.ptr.buffer, tr->data_size)) {
- -              binder_user_error("binder: %d:%d got transaction with invalid "
- -                      "data ptr\n", proc->pid, thread->pid);
+ +              binder_user_error("%d:%d got transaction with invalid data ptr\n",
+ +                              proc->pid, thread->pid);
                 return_error = BR_FAILED_REPLY;
                 goto err_copy_data_failed;
         }
         if (copy_from_user(offp, tr->data.ptr.offsets, tr->offsets_size)) {
- -              binder_user_error("binder: %d:%d got transaction with invalid "
- -                      "offsets ptr\n", proc->pid, thread->pid);
+ +              binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
+ +                              proc->pid, thread->pid);
                 return_error = BR_FAILED_REPLY;
                 goto err_copy_data_failed;
         }
         if (!IS_ALIGNED(tr->offsets_size, sizeof(size_t))) {
- -              binder_user_error("binder: %d:%d got transaction with "
- -                      "invalid offsets size, %zd\n",
- -                      proc->pid, thread->pid, tr->offsets_size);
+ +              binder_user_error("%d:%d got transaction with invalid offsets size, %zd\n",
+ +                              proc->pid, thread->pid, tr->offsets_size);
                 return_error = BR_FAILED_REPLY;
                 goto err_bad_offset;
         }
@@@ -1496,8 -1502,9 +1497,8 @@@
                 if (*offp > t->buffer->data_size - sizeof(*fp) ||
                     t->buffer->data_size < sizeof(*fp) ||
                     !IS_ALIGNED(*offp, sizeof(void *))) {
- -                      binder_user_error("binder: %d:%d got transaction with "
- -                              "invalid offset, %zd\n",
- -                              proc->pid, thread->pid, *offp);
+ +                      binder_user_error("%d:%d got transaction with invalid offset, %zd\n",
+ +                                      proc->pid, thread->pid, *offp);
                         return_error = BR_FAILED_REPLY;
                         goto err_bad_offset;
                 }
@@@ -1517,7 -1524,8 +1518,7 @@@
                                 node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS);
                         }
                         if (fp->cookie != node->cookie) {
- -                              binder_user_error("binder: %d:%d sending u%p "
- -                                      "node %d, cookie mismatch %p != %p\n",
+ +                              binder_user_error("%d:%d sending u%p node %d, cookie mismatch %p != %p\n",
                                         proc->pid, thread->pid,
                                         fp->binder, node->debug_id,
                                         fp->cookie, node->cookie);
@@@ -1536,7 -1544,6 +1537,7 @@@
                         binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE,
                                        &thread->todo);
   
+ +                      trace_binder_transaction_node_to_ref(t, node, ref);
                         binder_debug(BINDER_DEBUG_TRANSACTION,
                                      "        node %d u%p -> ref %d desc %d\n",
                                      node->debug_id, node->ptr, ref->debug_id,
@@@ -1546,9 -1553,10 +1547,9 @@@
                 case BINDER_TYPE_WEAK_HANDLE: {
                         struct binder_ref *ref = binder_get_ref(proc, fp->handle);
                         if (ref == NULL) {
- -                              binder_user_error("binder: %d:%d got "
- -                                      "transaction with invalid "
- -                                      "handle, %ld\n", proc->pid,
- -                                      thread->pid, fp->handle);
+ +                              binder_user_error("%d:%d got transaction with invalid handle, %ld\n",
+ +                                              proc->pid,
+ +                                              thread->pid, fp->handle);
                                 return_error = BR_FAILED_REPLY;
                                 goto err_binder_get_ref_failed;
                         }
@@@ -1560,7 -1568,6 +1561,7 @@@
                                 fp->binder = ref->node->ptr;
                                 fp->cookie = ref->node->cookie;
                                 binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL);
+ +                              trace_binder_transaction_ref_to_node(t, ref);
                                 binder_debug(BINDER_DEBUG_TRANSACTION,
                                              "        ref %d desc %d -> node %d u%p\n",
                                              ref->debug_id, ref->desc, ref->node->debug_id,
@@@ -1574,8 -1581,6 +1575,8 @@@
                                 }
                                 fp->handle = new_ref->desc;
                                 binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL);
+ +                              trace_binder_transaction_ref_to_ref(t, ref,
+ +                                                                  new_ref);
                                 binder_debug(BINDER_DEBUG_TRANSACTION,
                                              "        ref %d desc %d -> ref %d desc %d (node %d)\n",
                                              ref->debug_id, ref->desc, new_ref->debug_id,
@@@ -1589,13 -1594,13 +1590,13 @@@
   
                         if (reply) {
                                 if (!(in_reply_to->flags & TF_ACCEPT_FDS)) {
- -                                      binder_user_error("binder: %d:%d got reply with fd, %ld, but target does not allow fds\n",
+ +                                      binder_user_error("%d:%d got reply with fd, %ld, but target does not allow fds\n",
                                                 proc->pid, thread->pid, fp->handle);
                                         return_error = BR_FAILED_REPLY;
                                         goto err_fd_not_allowed;
                                 }
                         } else if (!target_node->accept_fds) {
- -                              binder_user_error("binder: %d:%d got transaction with fd, %ld, but target does not allow fds\n",
+ +                              binder_user_error("%d:%d got transaction with fd, %ld, but target does not allow fds\n",
                                         proc->pid, thread->pid, fp->handle);
                                 return_error = BR_FAILED_REPLY;
                                 goto err_fd_not_allowed;
@@@ -1603,7 -1608,7 +1604,7 @@@
   
                         file = fget(fp->handle);
                         if (file == NULL) {
- -                              binder_user_error("binder: %d:%d got transaction with invalid fd, %ld\n",
+ +                              binder_user_error("%d:%d got transaction with invalid fd, %ld\n",
                                         proc->pid, thread->pid, fp->handle);
                                 return_error = BR_FAILED_REPLY;
                                 goto err_fget_failed;
@@@ -1615,7 -1620,6 +1616,7 @@@
                                 goto err_get_unused_fd_failed;
                         }
                         task_fd_install(target_proc, target_fd, file);
+ +                      trace_binder_transaction_fd(t, fp->handle, target_fd);
                         binder_debug(BINDER_DEBUG_TRANSACTION,
                                      "        fd %ld -> %d\n", fp->handle, target_fd);
                         /* TODO: fput? */
@@@ -1623,7 -1627,8 +1624,7 @@@
                 } break;
   
                 default:
- -                      binder_user_error("binder: %d:%d got transactio"
- -                              "n with invalid object type, %lx\n",
+ +                      binder_user_error("%d:%d got transaction with invalid object type, %lx\n",
                                 proc->pid, thread->pid, fp->type);
                         return_error = BR_FAILED_REPLY;
                         goto err_bad_object_type;
@@@ -1663,7 -1668,6 +1664,7 @@@ err_binder_new_node_failed
   err_bad_object_type:
   err_bad_offset:
   err_copy_data_failed:
+ +      trace_binder_transaction_failed_buffer_release(t->buffer);
         binder_transaction_buffer_release(target_proc, t->buffer, offp);
         t->buffer->transaction = NULL;
         binder_free_buf(target_proc, t->buffer);
@@@ -1680,7 -1684,7 +1681,7 @@@ err_dead_binder
   err_invalid_target_handle:
   err_no_context_mgr_node:
         binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
- -                   "binder: %d:%d transaction failed %d, size %zd-%zd\n",
+ +                   "%d:%d transaction failed %d, size %zd-%zd\n",
                      proc->pid, thread->pid, return_error,
                      tr->data_size, tr->offsets_size);
   
@@@ -1709,7 -1713,6 +1710,7 @@@ int binder_thread_write(struct binder_p
                 if (get_user(cmd, (uint32_t __user *)ptr))
                         return -EFAULT;
                 ptr += sizeof(uint32_t);
+ +              trace_binder_command(cmd);
                 if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) {
                         binder_stats.bc[_IOC_NR(cmd)]++;
                         proc->stats.bc[_IOC_NR(cmd)]++;
@@@ -1732,14 -1735,18 +1733,14 @@@
                                 ref = binder_get_ref_for_node(proc,
                                                binder_context_mgr_node);
                                 if (ref->desc != target) {
- -                                      binder_user_error("binder: %d:"
- -                                              "%d tried to acquire "
- -                                              "reference to desc 0, "
- -                                              "got %d instead\n",
+ +                                      binder_user_error("%d:%d tried to acquire reference to desc 0, got %d instead\n",
                                                 proc->pid, thread->pid,
                                                 ref->desc);
                                 }
                         } else
                                 ref = binder_get_ref(proc, target);
                         if (ref == NULL) {
- -                              binder_user_error("binder: %d:%d refcou"
- -                                      "nt change on invalid ref %d\n",
+ +                              binder_user_error("%d:%d refcount change on invalid ref %d\n",
                                         proc->pid, thread->pid, target);
                                 break;
                         }
@@@ -1763,7 -1770,7 +1764,7 @@@
                                 break;
                         }
                         binder_debug(BINDER_DEBUG_USER_REFS,
- -                                   "binder: %d:%d %s ref %d desc %d s %d w %d for node %d\n",
+ +                                   "%d:%d %s ref %d desc %d s %d w %d for node %d\n",
                                      proc->pid, thread->pid, debug_string, ref->debug_id,
                                      ref->desc, ref->strong, ref->weak, ref->node->debug_id);
                         break;
@@@ -1782,7 -1789,8 +1783,7 @@@
                         ptr += sizeof(void *);
                         node = binder_get_node(proc, node_ptr);
                         if (node == NULL) {
- -                              binder_user_error("binder: %d:%d "
- -                                      "%s u%p no match\n",
+ +                              binder_user_error("%d:%d %s u%p no match\n",
                                         proc->pid, thread->pid,
                                         cmd == BC_INCREFS_DONE ?
                                         "BC_INCREFS_DONE" :
@@@ -1791,7 -1799,8 +1792,7 @@@
                                 break;
                         }
                         if (cookie != node->cookie) {
- -                              binder_user_error("binder: %d:%d %s u%p node %d"
- -                                      " cookie mismatch %p != %p\n",
+ +                              binder_user_error("%d:%d %s u%p node %d cookie mismatch %p != %p\n",
                                         proc->pid, thread->pid,
                                         cmd == BC_INCREFS_DONE ?
                                         "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE",
@@@ -1801,7 -1810,9 +1802,7 @@@
                         }
                         if (cmd == BC_ACQUIRE_DONE) {
                                 if (node->pending_strong_ref == 0) {
- -                                      binder_user_error("binder: %d:%d "
- -                                              "BC_ACQUIRE_DONE node %d has "
- -                                              "no pending acquire request\n",
+ +                                      binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n",
                                                 proc->pid, thread->pid,
                                                 node->debug_id);
                                         break;
@@@ -1809,7 -1820,9 +1810,7 @@@
                                 node->pending_strong_ref = 0;
                         } else {
                                 if (node->pending_weak_ref == 0) {
- -                                      binder_user_error("binder: %d:%d "
- -                                              "BC_INCREFS_DONE node %d has "
- -                                              "no pending increfs request\n",
+ +                                      binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n",
                                                 proc->pid, thread->pid,
                                                 node->debug_id);
                                         break;
@@@ -1818,17 -1831,17 +1819,17 @@@
                         }
                         binder_dec_node(node, cmd == BC_ACQUIRE_DONE, 0);
                         binder_debug(BINDER_DEBUG_USER_REFS,
- -                                   "binder: %d:%d %s node %d ls %d lw %d\n",
+ +                                   "%d:%d %s node %d ls %d lw %d\n",
                                      proc->pid, thread->pid,
                                      cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE",
                                      node->debug_id, node->local_strong_refs, node->local_weak_refs);
                         break;
                 }
                 case BC_ATTEMPT_ACQUIRE:
- -                      pr_err("binder: BC_ATTEMPT_ACQUIRE not supported\n");
+ +                      pr_err("BC_ATTEMPT_ACQUIRE not supported\n");
                         return -EINVAL;
                 case BC_ACQUIRE_RESULT:
- -                      pr_err("binder: BC_ACQUIRE_RESULT not supported\n");
+ +                      pr_err("BC_ACQUIRE_RESULT not supported\n");
                         return -EINVAL;
   
                 case BC_FREE_BUFFER: {
@@@ -1841,17 -1854,20 +1842,17 @@@
   
                         buffer = binder_buffer_lookup(proc, data_ptr);
                         if (buffer == NULL) {
- -                              binder_user_error("binder: %d:%d "
- -                                      "BC_FREE_BUFFER u%p no match\n",
+ +                              binder_user_error("%d:%d BC_FREE_BUFFER u%p no match\n",
                                         proc->pid, thread->pid, data_ptr);
                                 break;
                         }
                         if (!buffer->allow_user_free) {
- -                              binder_user_error("binder: %d:%d "
- -                                      "BC_FREE_BUFFER u%p matched "
- -                                      "unreturned buffer\n",
+ +                              binder_user_error("%d:%d BC_FREE_BUFFER u%p matched unreturned buffer\n",
                                         proc->pid, thread->pid, data_ptr);
                                 break;
                         }
                         binder_debug(BINDER_DEBUG_FREE_BUFFER,
- -                                   "binder: %d:%d BC_FREE_BUFFER u%p found buffer %d for %s transaction\n",
+ +                                   "%d:%d BC_FREE_BUFFER u%p found buffer %d for %s transaction\n",
                                      proc->pid, thread->pid, data_ptr, buffer->debug_id,
                                      buffer->transaction ? "active" : "finished");
   
@@@ -1866,7 -1882,6 +1867,7 @@@
                                 else
                                         list_move_tail(buffer->target_node->async_todo.next, &thread->todo);
                         }
+ +                      trace_binder_transaction_buffer_release(buffer);
                         binder_transaction_buffer_release(proc, buffer, NULL);
                         binder_free_buf(proc, buffer);
                         break;
@@@ -1885,15 -1900,19 +1886,15 @@@
   
                 case BC_REGISTER_LOOPER:
                         binder_debug(BINDER_DEBUG_THREADS,
- -                                   "binder: %d:%d BC_REGISTER_LOOPER\n",
+ +                                   "%d:%d BC_REGISTER_LOOPER\n",
                                      proc->pid, thread->pid);
                         if (thread->looper & BINDER_LOOPER_STATE_ENTERED) {
                                 thread->looper |= BINDER_LOOPER_STATE_INVALID;
- -                              binder_user_error("binder: %d:%d ERROR:"
- -                                      " BC_REGISTER_LOOPER called "
- -                                      "after BC_ENTER_LOOPER\n",
+ +                              binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called after BC_ENTER_LOOPER\n",
                                         proc->pid, thread->pid);
                         } else if (proc->requested_threads == 0) {
                                 thread->looper |= BINDER_LOOPER_STATE_INVALID;
- -                              binder_user_error("binder: %d:%d ERROR:"
- -                                      " BC_REGISTER_LOOPER called "
- -                                      "without request\n",
+ +                              binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called without request\n",
                                         proc->pid, thread->pid);
                         } else {
                                 proc->requested_threads--;
@@@ -1903,18 -1922,20 +1904,18 @@@
                         break;
                 case BC_ENTER_LOOPER:
                         binder_debug(BINDER_DEBUG_THREADS,
- -                                   "binder: %d:%d BC_ENTER_LOOPER\n",
+ +                                   "%d:%d BC_ENTER_LOOPER\n",
                                      proc->pid, thread->pid);
                         if (thread->looper & BINDER_LOOPER_STATE_REGISTERED) {
                                 thread->looper |= BINDER_LOOPER_STATE_INVALID;
- -                              binder_user_error("binder: %d:%d ERROR:"
- -                                      " BC_ENTER_LOOPER called after "
- -                                      "BC_REGISTER_LOOPER\n",
+ +                              binder_user_error("%d:%d ERROR: BC_ENTER_LOOPER called after BC_REGISTER_LOOPER\n",
                                         proc->pid, thread->pid);
                         }
                         thread->looper |= BINDER_LOOPER_STATE_ENTERED;
                         break;
                 case BC_EXIT_LOOPER:
                         binder_debug(BINDER_DEBUG_THREADS,
- -                                   "binder: %d:%d BC_EXIT_LOOPER\n",
+ +                                   "%d:%d BC_EXIT_LOOPER\n",
                                      proc->pid, thread->pid);
                         thread->looper |= BINDER_LOOPER_STATE_EXITED;
                         break;
@@@ -1934,7 -1955,8 +1935,7 @@@
                         ptr += sizeof(void *);
                         ref = binder_get_ref(proc, target);
                         if (ref == NULL) {
- -                              binder_user_error("binder: %d:%d %s "
- -                                      "invalid ref %d\n",
+ +                              binder_user_error("%d:%d %s invalid ref %d\n",
                                         proc->pid, thread->pid,
                                         cmd == BC_REQUEST_DEATH_NOTIFICATION ?
                                         "BC_REQUEST_DEATH_NOTIFICATION" :
@@@ -1944,7 -1966,7 +1945,7 @@@
                         }
   
                         binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,
- -                                   "binder: %d:%d %s %p ref %d desc %d s %d w %d for node %d\n",
+ +                                   "%d:%d %s %p ref %d desc %d s %d w %d for node %d\n",
                                      proc->pid, thread->pid,
                                      cmd == BC_REQUEST_DEATH_NOTIFICATION ?
                                      "BC_REQUEST_DEATH_NOTIFICATION" :
@@@ -1954,7 -1976,10 +1955,7 @@@
   
                         if (cmd == BC_REQUEST_DEATH_NOTIFICATION) {
                                 if (ref->death) {
- -                                      binder_user_error("binder: %d:%"
- -                                              "d BC_REQUEST_DEATH_NOTI"
- -                                              "FICATION death notific"
- -                                              "ation already set\n",
+ +                                      binder_user_error("%d:%d BC_REQUEST_DEATH_NOTIFICATION death notification already set\n",
                                                 proc->pid, thread->pid);
                                         break;
                                 }
@@@ -1962,7 -1987,8 +1963,7 @@@
                                 if (death == NULL) {
                                         thread->return_error = BR_ERROR;
                                         binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
- -                                                   "binder: %d:%d "
- -                                                   "BC_REQUEST_DEATH_NOTIFICATION failed\n",
+ +                                                   "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n",
                                                      proc->pid, thread->pid);
                                         break;
                                 }
@@@ -1981,13 -2007,20 +1982,13 @@@
                                 }
                         } else {
                                 if (ref->death == NULL) {
- -                                      binder_user_error("binder: %d:%"
- -                                              "d BC_CLEAR_DEATH_NOTIFI"
- -                                              "CATION death notificat"
- -                                              "ion not active\n",
+ +                                      binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n",
                                                 proc->pid, thread->pid);
                                         break;
                                 }
                                 death = ref->death;
                                 if (death->cookie != cookie) {
- -                                      binder_user_error("binder: %d:%"
- -                                              "d BC_CLEAR_DEATH_NOTIFI"
- -                                              "CATION death notificat"
- -                                              "ion cookie mismatch "
- -                                              "%p != %p\n",
+ +                                      binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch %p != %p\n",
                                                 proc->pid, thread->pid,
                                                 death->cookie, cookie);
                                         break;
@@@ -2023,10 -2056,11 +2024,10 @@@
                                 }
                         }
                         binder_debug(BINDER_DEBUG_DEAD_BINDER,
- -                                   "binder: %d:%d BC_DEAD_BINDER_DONE %p found %p\n",
+ +                                   "%d:%d BC_DEAD_BINDER_DONE %p found %p\n",
                                      proc->pid, thread->pid, cookie, death);
                         if (death == NULL) {
- -                              binder_user_error("binder: %d:%d BC_DEAD"
- -                                      "_BINDER_DONE %p not found\n",
+ +                              binder_user_error("%d:%d BC_DEAD_BINDER_DONE %p not found\n",
                                         proc->pid, thread->pid, cookie);
                                 break;
                         }
@@@ -2044,7 -2078,7 +2045,7 @@@
                 } break;
   
                 default:
- -                      pr_err("binder: %d:%d unknown command %d\n",
+ +                      pr_err("%d:%d unknown command %d\n",
                                proc->pid, thread->pid, cmd);
                         return -EINVAL;
                 }
@@@ -2056,7 -2090,6 +2057,7 @@@
   void binder_stat_br(struct binder_proc *proc, struct binder_thread *thread,
                     uint32_t cmd)
   {
+ +      trace_binder_return(cmd);
         if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) {
                 binder_stats.br[_IOC_NR(cmd)]++;
                 proc->stats.br[_IOC_NR(cmd)]++;
@@@ -2103,7 -2136,6 +2104,7 @@@ retry
                         if (put_user(thread->return_error2, (uint32_t __user *)ptr))
                                 return -EFAULT;
                         ptr += sizeof(uint32_t);
+ +                      binder_stat_br(proc, thread, thread->return_error2);
                         if (ptr == end)
                                 goto done;
                         thread->return_error2 = BR_OK;
@@@ -2111,7 -2143,6 +2112,7 @@@
                 if (put_user(thread->return_error, (uint32_t __user *)ptr))
                         return -EFAULT;
                 ptr += sizeof(uint32_t);
+ +              binder_stat_br(proc, thread, thread->return_error);
                 thread->return_error = BR_OK;
                 goto done;
         }
@@@ -2120,16 -2151,13 +2121,16 @@@
         thread->looper |= BINDER_LOOPER_STATE_WAITING;
         if (wait_for_proc_work)
                 proc->ready_threads++;
- -      mutex_unlock(&binder_lock);
+ +
+ +      binder_unlock(__func__);
+ +
+ +      trace_binder_wait_for_work(wait_for_proc_work,
+ +                                 !!thread->transaction_stack,
+ +                                 !list_empty(&thread->todo));
         if (wait_for_proc_work) {
                 if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED |
                                         BINDER_LOOPER_STATE_ENTERED))) {
- -                      binder_user_error("binder: %d:%d ERROR: Thread waiting "
- -                              "for process work before calling BC_REGISTER_"
- -                              "LOOPER or BC_ENTER_LOOPER (state %x)\n",
+ +                      binder_user_error("%d:%d ERROR: Thread waiting for process work before calling BC_REGISTER_LOOPER or BC_ENTER_LOOPER (state %x)\n",
                                 proc->pid, thread->pid, thread->looper);
                         wait_event_interruptible(binder_user_error_wait,
                                                  binder_stop_on_user_error < 2);
@@@ -2147,9 -2175,7 +2148,9 @@@
                 } else
                         ret = wait_event_interruptible(thread->wait, binder_has_thread_work(thread));
         }
- -      mutex_lock(&binder_lock);
+ +
+ +      binder_lock(__func__);
+ +
         if (wait_for_proc_work)
                 proc->ready_threads--;
         thread->looper &= ~BINDER_LOOPER_STATE_WAITING;
@@@ -2188,7 -2214,7 +2189,7 @@@
   
                         binder_stat_br(proc, thread, cmd);
                         binder_debug(BINDER_DEBUG_TRANSACTION_COMPLETE,
- -                                   "binder: %d:%d BR_TRANSACTION_COMPLETE\n",
+ +                                   "%d:%d BR_TRANSACTION_COMPLETE\n",
                                      proc->pid, thread->pid);
   
                         list_del(&w->entry);
@@@ -2235,13 -2261,13 +2236,13 @@@
   
                                 binder_stat_br(proc, thread, cmd);
                                 binder_debug(BINDER_DEBUG_USER_REFS,
- -                                           "binder: %d:%d %s %d u%p c%p\n",
+ +                                           "%d:%d %s %d u%p c%p\n",
                                              proc->pid, thread->pid, cmd_name, node->debug_id, node->ptr, node->cookie);
                         } else {
                                 list_del_init(&w->entry);
                                 if (!weak && !strong) {
                                         binder_debug(BINDER_DEBUG_INTERNAL_REFS,
- -                                                   "binder: %d:%d node %d u%p c%p deleted\n",
+ +                                                   "%d:%d node %d u%p c%p deleted\n",
                                                      proc->pid, thread->pid, node->debug_id,
                                                      node->ptr, node->cookie);
                                         rb_erase(&node->rb_node, &proc->nodes);
@@@ -2249,7 -2275,7 +2250,7 @@@
                                         binder_stats_deleted(BINDER_STAT_NODE);
                                 } else {
                                         binder_debug(BINDER_DEBUG_INTERNAL_REFS,
- -                                                   "binder: %d:%d node %d u%p c%p state unchanged\n",
+ +                                                   "%d:%d node %d u%p c%p state unchanged\n",
                                                      proc->pid, thread->pid, node->debug_id, node->ptr,
                                                      node->cookie);
                                 }
@@@ -2272,9 -2298,8 +2273,9 @@@
                         if (put_user(death->cookie, (void * __user *)ptr))
                                 return -EFAULT;
                         ptr += sizeof(void *);
+ +                      binder_stat_br(proc, thread, cmd);
                         binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,
- -                                   "binder: %d:%d %s %p\n",
+ +                                   "%d:%d %s %p\n",
                                       proc->pid, thread->pid,
                                       cmd == BR_DEAD_BINDER ?
                                       "BR_DEAD_BINDER" :
@@@ -2320,7 -2345,7 +2321,7 @@@
                 if (t->from) {
                         struct task_struct *sender = t->from->proc->tsk;
                         tr.sender_pid = task_tgid_nr_ns(sender,
-                                                       current->nsproxy->pid_ns);
+                                                       task_active_pid_ns(current));
                 } else {
                         tr.sender_pid = 0;
                 }
@@@ -2340,10 -2365,10 +2341,10 @@@
                         return -EFAULT;
                 ptr += sizeof(tr);
   
+ +              trace_binder_transaction_received(t);
                 binder_stat_br(proc, thread, cmd);
                 binder_debug(BINDER_DEBUG_TRANSACTION,
- -                           "binder: %d:%d %s %d %d:%d, cmd %d"
- -                           "size %zd-%zd ptr %p-%p\n",
+ +                           "%d:%d %s %d %d:%d, cmd %d size %zd-%zd ptr %p-%p\n",
                              proc->pid, thread->pid,
                              (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" :
                              "BR_REPLY",
@@@ -2376,11 -2401,10 +2377,11 @@@ done
              /*spawn a new thread if we leave this out */) {
                 proc->requested_threads++;
                 binder_debug(BINDER_DEBUG_THREADS,
- -                           "binder: %d:%d BR_SPAWN_LOOPER\n",
+ +                           "%d:%d BR_SPAWN_LOOPER\n",
                              proc->pid, thread->pid);
                 if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
                         return -EFAULT;
+ +              binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
         }
         return 0;
   }
@@@ -2401,7 -2425,7 +2402,7 @@@ static void binder_release_work(struct 
                                 binder_send_failed_reply(t, BR_DEAD_REPLY);
                         } else {
                                 binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
- -                                      "binder: undelivered transaction %d\n",
+ +                                      "undelivered transaction %d\n",
                                         t->debug_id);
                                 t->buffer->transaction = NULL;
                                 kfree(t);
@@@ -2410,7 -2434,7 +2411,7 @@@
                 } break;
                 case BINDER_WORK_TRANSACTION_COMPLETE: {
                         binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
- -                              "binder: undelivered TRANSACTION_COMPLETE\n");
+ +                              "undelivered TRANSACTION_COMPLETE\n");
                         kfree(w);
                         binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
                 } break;
@@@ -2420,13 -2444,13 +2421,13 @@@
   
                         death = container_of(w, struct binder_ref_death, work);
                         binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
- -                              "binder: undelivered death notification, %p\n",
+ +                              "undelivered death notification, %p\n",
                                 death->cookie);
                         kfree(death);
                         binder_stats_deleted(BINDER_STAT_DEATH);
                 } break;
                 default:
- -                      pr_err("binder: unexpected work type, %d, not freed\n",
+ +                      pr_err("unexpected work type, %d, not freed\n",
                                w->type);
                         break;
                 }
@@@ -2483,8 -2507,8 +2484,8 @@@ static int binder_free_thread(struct bi
         while (t) {
                 active_transactions++;
                 binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
- -                           "binder: release %d:%d transaction %d "
- -                           "%s, still active\n", proc->pid, thread->pid,
+ +                           "release %d:%d transaction %d %s, still active\n",
+ +                            proc->pid, thread->pid,
                              t->debug_id,
                              (t->to_thread == thread) ? "in" : "out");
   
@@@ -2517,14 -2541,12 +2518,14 @@@ static unsigned int binder_poll(struct 
         struct binder_thread *thread = NULL;
         int wait_for_proc_work;
   
- -      mutex_lock(&binder_lock);
+ +      binder_lock(__func__);
+ +
         thread = binder_get_thread(proc);
   
         wait_for_proc_work = thread->transaction_stack == NULL &&
                 list_empty(&thread->todo) && thread->return_error == BR_OK;
- -      mutex_unlock(&binder_lock);
+ +
+ +      binder_unlock(__func__);
   
         if (wait_for_proc_work) {
                 if (binder_has_proc_work(proc, thread))
@@@ -2552,13 -2574,11 +2553,13 @@@ static long binder_ioctl(struct file *f
   
         /*pr_info("binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/
   
+ +      trace_binder_ioctl(cmd, arg);
+ +
         ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
         if (ret)
- -              return ret;
+ +              goto err_unlocked;
   
- -      mutex_lock(&binder_lock);
+ +      binder_lock(__func__);
         thread = binder_get_thread(proc);
         if (thread == NULL) {
                 ret = -ENOMEM;
@@@ -2577,13 -2597,12 +2578,13 @@@
                         goto err;
                 }
                 binder_debug(BINDER_DEBUG_READ_WRITE,
- -                           "binder: %d:%d write %ld at %08lx, read %ld at %08lx\n",
- -                           proc->pid, thread->pid, bwr.write_size, bwr.write_buffer,
- -                           bwr.read_size, bwr.read_buffer);
+ +                           "%d:%d write %ld at %08lx, read %ld at %08lx\n",
+ +                           proc->pid, thread->pid, bwr.write_size,
+ +                           bwr.write_buffer, bwr.read_size, bwr.read_buffer);
   
                 if (bwr.write_size > 0) {
                         ret = binder_thread_write(proc, thread, (void __user *)bwr.write_buffer, bwr.write_size, &bwr.write_consumed);
+ +                      trace_binder_write_done(ret);
                         if (ret < 0) {
                                 bwr.read_consumed = 0;
                                 if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
@@@ -2593,7 -2612,6 +2594,7 @@@
                 }
                 if (bwr.read_size > 0) {
                         ret = binder_thread_read(proc, thread, (void __user *)bwr.read_buffer, bwr.read_size, &bwr.read_consumed, filp->f_flags & O_NONBLOCK);
+ +                      trace_binder_read_done(ret);
                         if (!list_empty(&proc->todo))
                                 wake_up_interruptible(&proc->wait);
                         if (ret < 0) {
@@@ -2603,7 -2621,7 +2604,7 @@@
                         }
                 }
                 binder_debug(BINDER_DEBUG_READ_WRITE,
- -                           "binder: %d:%d wrote %ld of %ld, read return %ld of %ld\n",
+ +                           "%d:%d wrote %ld of %ld, read return %ld of %ld\n",
                              proc->pid, thread->pid, bwr.write_consumed, bwr.write_size,
                              bwr.read_consumed, bwr.read_size);
                 if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
@@@ -2620,13 -2638,14 +2621,13 @@@
                 break;
         case BINDER_SET_CONTEXT_MGR:
                 if (binder_context_mgr_node != NULL) {
- -                      pr_err("binder: BINDER_SET_CONTEXT_MGR already set\n");
+ +                      pr_err("BINDER_SET_CONTEXT_MGR already set\n");
                         ret = -EBUSY;
                         goto err;
                 }
                 if (uid_valid(binder_context_mgr_uid)) {
                         if (!uid_eq(binder_context_mgr_uid, current->cred->euid)) {
- -                              pr_err("binder: BINDER_SET_"
- -                                     "CONTEXT_MGR bad uid %d != %d\n",
+ +                              pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n",
                                        from_kuid(&init_user_ns, current->cred->euid),
                                        from_kuid(&init_user_ns, binder_context_mgr_uid));
                                 ret = -EPERM;
@@@ -2645,7 -2664,7 +2646,7 @@@
                 binder_context_mgr_node->has_weak_ref = 1;
                 break;
         case BINDER_THREAD_EXIT:
- -              binder_debug(BINDER_DEBUG_THREADS, "binder: %d:%d exit\n",
+ +              binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n",
                              proc->pid, thread->pid);
                 binder_free_thread(proc, thread);
                 thread = NULL;
@@@ -2668,12 -2687,10 +2669,12 @@@
   err:
         if (thread)
                 thread->looper &= ~BINDER_LOOPER_STATE_NEED_RETURN;
- -      mutex_unlock(&binder_lock);
+ +      binder_unlock(__func__);
         wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
         if (ret && ret != -ERESTARTSYS)
- -              pr_info("binder: %d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret);
+ +              pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret);
+ +err_unlocked:
+ +      trace_binder_ioctl_done(ret);
         return ret;
   }
   
@@@ -2681,7 -2698,7 +2682,7 @@@ static void binder_vma_open(struct vm_a
   {
         struct binder_proc *proc = vma->vm_private_data;
         binder_debug(BINDER_DEBUG_OPEN_CLOSE,
- -                   "binder: %d open vm area %lx-%lx (%ld K) vma %lx pagep %lx\n",
+ +                   "%d open vm area %lx-%lx (%ld K) vma %lx pagep %lx\n",
                      proc->pid, vma->vm_start, vma->vm_end,
                      (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
                      (unsigned long)pgprot_val(vma->vm_page_prot));
@@@ -2691,7 -2708,7 +2692,7 @@@ static void binder_vma_close(struct vm_
   {
         struct binder_proc *proc = vma->vm_private_data;
         binder_debug(BINDER_DEBUG_OPEN_CLOSE,
- -                   "binder: %d close vm area %lx-%lx (%ld K) vma %lx pagep %lx\n",
+ +                   "%d close vm area %lx-%lx (%ld K) vma %lx pagep %lx\n",
                      proc->pid, vma->vm_start, vma->vm_end,
                      (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
                      (unsigned long)pgprot_val(vma->vm_page_prot));
@@@ -2819,16 -2836,13 +2820,16 @@@ static int binder_open(struct inode *no
         INIT_LIST_HEAD(&proc->todo);
         init_waitqueue_head(&proc->wait);
         proc->default_priority = task_nice(current);
- -      mutex_lock(&binder_lock);
+ +
+ +      binder_lock(__func__);
+ +
         binder_stats_created(BINDER_STAT_PROC);
         hlist_add_head(&proc->proc_node, &binder_procs);
         proc->pid = current->group_leader->pid;
         INIT_LIST_HEAD(&proc->delivered_death);
         filp->private_data = proc;
- -      mutex_unlock(&binder_lock);
+ +
+ +      binder_unlock(__func__);
   
         if (binder_debugfs_dir_entry_proc) {
                 char strbuf[11];
@@@ -2936,8 -2950,9 +2937,8 @@@ static void binder_deferred_release(str
                                 }
                         }
                         binder_debug(BINDER_DEBUG_DEAD_BINDER,
- -                                   "binder: node %d now dead, "
- -                                   "refs %d, death %d\n", node->debug_id,
- -                                   incoming_refs, death);
+ +                                   "node %d now dead, refs %d, death %d\n",
+ +                                    node->debug_id, incoming_refs, death);
                 }
         }
         outgoing_refs = 0;
@@@ -2958,7 -2973,8 +2959,7 @@@
                 if (t) {
                         t->buffer = NULL;
                         buffer->transaction = NULL;
- -                      pr_err("binder: release proc %d, "
- -                             "transaction %d, not freed\n",
+ +                      pr_err("release proc %d, transaction %d, not freed\n",
                                proc->pid, t->debug_id);
                         /*BUG();*/
                 }
@@@ -2975,7 -2991,8 +2976,7 @@@
                         if (proc->pages[i]) {
                                 void *page_addr = proc->buffer + i * PAGE_SIZE;
                                 binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- -                                           "binder_release: %d: "
- -                                           "page %d at %p not freed\n",
+ +                                           "binder_release: %d: page %d at %p not freed\n",
                                              proc->pid, i,
                                              page_addr);
                                 unmap_kernel_range((unsigned long)page_addr,
@@@ -2991,7 -3008,9 +2992,7 @@@
         put_task_struct(proc->tsk);
   
         binder_debug(BINDER_DEBUG_OPEN_CLOSE,
- -                   "binder_release: %d threads %d, nodes %d (ref %d), "
- -                   "refs %d, active transactions %d, buffers %d, "
- -                   "pages %d\n",
+ +                   "binder_release: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d, buffers %d, pages %d\n",
                      proc->pid, threads, nodes, incoming_refs, outgoing_refs,
                      active_transactions, buffers, page_count);
   
@@@ -3005,7 -3024,7 +3006,7 @@@ static void binder_deferred_func(struc
   
         int defer;
         do {
- -              mutex_lock(&binder_lock);
+ +              binder_lock(__func__);
                 mutex_lock(&binder_deferred_lock);
                 if (!hlist_empty(&binder_deferred_list)) {
                         proc = hlist_entry(binder_deferred_list.first,
@@@ -3032,7 -3051,7 +3033,7 @@@
                 if (defer & BINDER_DEFERRED_RELEASE)
                         binder_deferred_release(proc); /* frees proc */
   
- -              mutex_unlock(&binder_lock);
+ +              binder_unlock(__func__);
                 if (files)
                         put_files_struct(files);
         } while (proc);
@@@ -3373,7 -3392,7 +3374,7 @@@ static int binder_state_show(struct seq
         int do_lock = !binder_debug_no_lock;
   
         if (do_lock)
- -              mutex_lock(&binder_lock);
+ +              binder_lock(__func__);
   
         seq_puts(m, "binder state:\n");
   
@@@ -3385,7 -3404,7 +3386,7 @@@
         hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
                 print_binder_proc(m, proc, 1);
         if (do_lock)
- -              mutex_unlock(&binder_lock);
+ +              binder_unlock(__func__);
         return 0;
   }
   
@@@ -3396,7 -3415,7 +3397,7 @@@ static int binder_stats_show(struct seq
         int do_lock = !binder_debug_no_lock;
   
         if (do_lock)
- -              mutex_lock(&binder_lock);
+ +              binder_lock(__func__);
   
         seq_puts(m, "binder stats:\n");
   
@@@ -3405,7 -3424,7 +3406,7 @@@
         hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
                 print_binder_proc_stats(m, proc);
         if (do_lock)
- -              mutex_unlock(&binder_lock);
+ +              binder_unlock(__func__);
         return 0;
   }
   
@@@ -3416,13 -3435,13 +3417,13 @@@ static int binder_transactions_show(str
         int do_lock = !binder_debug_no_lock;
   
         if (do_lock)
- -              mutex_lock(&binder_lock);
+ +              binder_lock(__func__);
   
         seq_puts(m, "binder transactions:\n");
         hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
                 print_binder_proc(m, proc, 0);
         if (do_lock)
- -              mutex_unlock(&binder_lock);
+ +              binder_unlock(__func__);
         return 0;
   }
   
@@@ -3432,11 -3451,11 +3433,11 @@@ static int binder_proc_show(struct seq_
         int do_lock = !binder_debug_no_lock;
   
         if (do_lock)
- -              mutex_lock(&binder_lock);
+ +              binder_lock(__func__);
         seq_puts(m, "binder proc state:\n");
         print_binder_proc(m, proc, 1);
         if (do_lock)
- -              mutex_unlock(&binder_lock);
+ +              binder_unlock(__func__);
         return 0;
   }
   
@@@ -3531,7 -3550,4 +3532,7 @@@ static int __init binder_init(void
   
   device_initcall(binder_init);
   
+ +#define CREATE_TRACE_POINTS
+ +#include "binder_trace.h"
+ +
   MODULE_LICENSE("GPL v2");
diff --combined fs/exec.c

index 721a299295117f92d271f17afd224db1787712a1,aef0c2f19750f40a83970749f6aec112fd75baac..b71b08ce71204824c7c66c8d101a7ebecdbb4db0
--- 1/fs/exec.c
--- 2/fs/exec.c
+++ b/fs/exec.c
@@@ -1266,14 -1266,13 +1266,13 @@@ int prepare_binprm(struct linux_binprm 
         bprm->cred->egid = current_egid();
   
         if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-           !current->no_new_privs) {
+           !current->no_new_privs &&
+           kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
+           kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
                 /* Set-uid? */
                 if (mode & S_ISUID) {
-                       if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
-                               return -EPERM;
                         bprm->per_clear |= PER_CLEAR_ON_SETID;
                         bprm->cred->euid = inode->i_uid;
- 
                 }
   
                 /* Set-gid? */
@@@ -1283,8 -1282,6 +1282,6 @@@
                  * executable.
                  */
                 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
-                       if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
-                               return -EPERM;
                         bprm->per_clear |= PER_CLEAR_ON_SETID;
                         bprm->cred->egid = inode->i_gid;
                 }
@@@ -1349,7 -1346,7 +1346,7 @@@ EXPORT_SYMBOL(remove_arg_zero)
   /*
    * cycle the list of binary formats handler, until one recognizes the image
    */
- -int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
+ +int search_binary_handler(struct linux_binprm *bprm)
   {
         unsigned int depth = bprm->recursion_depth;
         int try,retval;
@@@ -1374,13 -1371,13 +1371,13 @@@
         for (try=0; try<2; try++) {
                 read_lock(&binfmt_lock);
                 list_for_each_entry(fmt, &formats, lh) {
- -                      int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
+ +                      int (*fn)(struct linux_binprm *) = fmt->load_binary;
                         if (!fn)
                                 continue;
                         if (!try_module_get(fmt->module))
                                 continue;
                         read_unlock(&binfmt_lock);
- -                      retval = fn(bprm, regs);
+ +                      retval = fn(bprm);
                         /*
                          * Restore the depth counter to its starting value
                          * in this call, so we don't have to rely on every
@@@ -1439,7 -1436,8 +1436,7 @@@ EXPORT_SYMBOL(search_binary_handler)
    */
   static int do_execve_common(const char *filename,
                                 struct user_arg_ptr argv,
- -                              struct user_arg_ptr envp,
- -                              struct pt_regs *regs)
+ +                              struct user_arg_ptr envp)
   {
         struct linux_binprm *bprm;
         struct file *file;
@@@ -1523,7 -1521,7 +1520,7 @@@
         if (retval < 0)
                 goto out;
   
- -      retval = search_binary_handler(bprm,regs);
+ +      retval = search_binary_handler(bprm);
         if (retval < 0)
                 goto out;
   
@@@ -1565,17 -1563,19 +1562,17 @@@ out_ret
   
   int do_execve(const char *filename,
         const char __user *const __user *__argv,
- -      const char __user *const __user *__envp,
- -      struct pt_regs *regs)
+ +      const char __user *const __user *__envp)
   {
         struct user_arg_ptr argv = { .ptr.native = __argv };
         struct user_arg_ptr envp = { .ptr.native = __envp };
- -      return do_execve_common(filename, argv, envp, regs);
+ +      return do_execve_common(filename, argv, envp);
   }
   
   #ifdef CONFIG_COMPAT
- -int compat_do_execve(const char *filename,
+ +static int compat_do_execve(const char *filename,
         const compat_uptr_t __user *__argv,
- -      const compat_uptr_t __user *__envp,
- -      struct pt_regs *regs)
+ +      const compat_uptr_t __user *__envp)
   {
         struct user_arg_ptr argv = {
                 .is_compat = true,
@@@ -1585,7 -1585,7 +1582,7 @@@
                 .is_compat = true,
                 .ptr.compat = __envp,
         };
- -      return do_execve_common(filename, argv, envp, regs);
+ +      return do_execve_common(filename, argv, envp);
   }
   #endif
   
@@@ -1666,7 -1666,7 +1663,7 @@@ SYSCALL_DEFINE3(execve
         struct filename *path = getname(filename);
         int error = PTR_ERR(path);
         if (!IS_ERR(path)) {
- -              error = do_execve(path->name, argv, envp, current_pt_regs());
+ +              error = do_execve(path->name, argv, envp);
                 putname(path);
         }
         return error;
@@@ -1679,7 -1679,8 +1676,7 @@@ asmlinkage long compat_sys_execve(cons
         struct filename *path = getname(filename);
         int error = PTR_ERR(path);
         if (!IS_ERR(path)) {
- -              error = compat_do_execve(path->name, argv, envp,
- -                                                      current_pt_regs());
+ +              error = compat_do_execve(path->name, argv, envp);
                 putname(path);
         }
         return error;
@@@ -1692,9 -1693,12 +1689,9 @@@ int kernel_execve(const char *filename
                   const char *const argv[],
                   const char *const envp[])
   {
- -      struct pt_regs *p = current_pt_regs();
- -      int ret;
- -
- -      ret = do_execve(filename,
+ +      int ret = do_execve(filename,
                         (const char __user *const __user *)argv,
- -                      (const char __user *const __user *)envp, p);
+ +                      (const char __user *const __user *)envp);
         if (ret < 0)
                 return ret;
   
@@@ -1702,6 -1706,6 +1699,6 @@@
          * We were successful.  We won't be returning to our caller, but
          * instead to user space by manipulating the kernel stack.
          */
- -      ret_from_kernel_execve(p);
+ +      ret_from_kernel_execve(current_pt_regs());
   }
   #endif
diff --combined fs/proc/array.c

index d3696708fc1ae4bff76a1d5a253103e792629ae4,554434265613331c22c87e860c219b83652f3e93..d66248a1919b3a3028d07ae1d4dc59f36ce57aea
--- 1/fs/proc/array.c
--- 2/fs/proc/array.c
+++ b/fs/proc/array.c
@@@ -162,7 -162,7 +162,7 @@@ static inline const char *get_task_stat
   static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
                                 struct pid *pid, struct task_struct *p)
   {
-       struct user_namespace *user_ns = current_user_ns();
+       struct user_namespace *user_ns = seq_user_ns(m);
         struct group_info *group_info;
         int g;
         struct fdtable *fdt = NULL;
@@@ -438,7 -438,7 +438,7 @@@ static int do_task_stat(struct seq_fil
   
                         min_flt += sig->min_flt;
                         maj_flt += sig->maj_flt;
- -                      thread_group_times(task, &utime, &stime);
+ +                      thread_group_cputime_adjusted(task, &utime, &stime);
                         gtime += sig->gtime;
                 }
   
@@@ -454,7 -454,7 +454,7 @@@
         if (!whole) {
                 min_flt = task->min_flt;
                 maj_flt = task->maj_flt;
- -              task_times(task, &utime, &stime);
+ +              task_cputime_adjusted(task, &utime, &stime);
                 gtime = task->gtime;
         }
   
diff --combined fs/proc/base.c

index aa63d25157b8d396a9a7d0f1728fe673fa577e31,7621dc51cff8c7feac70b5c93e275167bb3e1766..5a5a0be40e405f4693bad85fcc7d04703b362523
--- 1/fs/proc/base.c
--- 2/fs/proc/base.c
+++ b/fs/proc/base.c
@@@ -873,119 -873,12 +873,119 @@@ static const struct file_operations pro
         .release        = mem_release,
   };
   
+ +static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
+ +                          loff_t *ppos)
+ +{
+ +      struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ +      char buffer[PROC_NUMBUF];
+ +      int oom_adj = OOM_ADJUST_MIN;
+ +      size_t len;
+ +      unsigned long flags;
+ +
+ +      if (!task)
+ +              return -ESRCH;
+ +      if (lock_task_sighand(task, &flags)) {
+ +              if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+ +                      oom_adj = OOM_ADJUST_MAX;
+ +              else
+ +                      oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
+ +                                OOM_SCORE_ADJ_MAX;
+ +              unlock_task_sighand(task, &flags);
+ +      }
+ +      put_task_struct(task);
+ +      len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
+ +      return simple_read_from_buffer(buf, count, ppos, buffer, len);
+ +}
+ +
+ +static ssize_t oom_adj_write(struct file *file, const char __user *buf,
+ +                           size_t count, loff_t *ppos)
+ +{
+ +      struct task_struct *task;
+ +      char buffer[PROC_NUMBUF];
+ +      int oom_adj;
+ +      unsigned long flags;
+ +      int err;
+ +
+ +      memset(buffer, 0, sizeof(buffer));
+ +      if (count > sizeof(buffer) - 1)
+ +              count = sizeof(buffer) - 1;
+ +      if (copy_from_user(buffer, buf, count)) {
+ +              err = -EFAULT;
+ +              goto out;
+ +      }
+ +
+ +      err = kstrtoint(strstrip(buffer), 0, &oom_adj);
+ +      if (err)
+ +              goto out;
+ +      if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
+ +           oom_adj != OOM_DISABLE) {
+ +              err = -EINVAL;
+ +              goto out;
+ +      }
+ +
+ +      task = get_proc_task(file->f_path.dentry->d_inode);
+ +      if (!task) {
+ +              err = -ESRCH;
+ +              goto out;
+ +      }
+ +
+ +      task_lock(task);
+ +      if (!task->mm) {
+ +              err = -EINVAL;
+ +              goto err_task_lock;
+ +      }
+ +
+ +      if (!lock_task_sighand(task, &flags)) {
+ +              err = -ESRCH;
+ +              goto err_task_lock;
+ +      }
+ +
+ +      /*
+ +       * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
+ +       * value is always attainable.
+ +       */
+ +      if (oom_adj == OOM_ADJUST_MAX)
+ +              oom_adj = OOM_SCORE_ADJ_MAX;
+ +      else
+ +              oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
+ +
+ +      if (oom_adj < task->signal->oom_score_adj &&
+ +          !capable(CAP_SYS_RESOURCE)) {
+ +              err = -EACCES;
+ +              goto err_sighand;
+ +      }
+ +
+ +      /*
+ +       * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
+ +       * /proc/pid/oom_score_adj instead.
+ +       */
+ +      printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+ +                current->comm, task_pid_nr(current), task_pid_nr(task),
+ +                task_pid_nr(task));
+ +
+ +      task->signal->oom_score_adj = oom_adj;
+ +      trace_oom_score_adj_update(task);
+ +err_sighand:
+ +      unlock_task_sighand(task, &flags);
+ +err_task_lock:
+ +      task_unlock(task);
+ +      put_task_struct(task);
+ +out:
+ +      return err < 0 ? err : count;
+ +}
+ +
+ +static const struct file_operations proc_oom_adj_operations = {
+ +      .read           = oom_adj_read,
+ +      .write          = oom_adj_write,
+ +      .llseek         = generic_file_llseek,
+ +};
+ +
   static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
                                         size_t count, loff_t *ppos)
   {
         struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
         char buffer[PROC_NUMBUF];
- -      int oom_score_adj = OOM_SCORE_ADJ_MIN;
+ +      short oom_score_adj = OOM_SCORE_ADJ_MIN;
         unsigned long flags;
         size_t len;
   
@@@ -996,7 -889,7 +996,7 @@@
                 unlock_task_sighand(task, &flags);
         }
         put_task_struct(task);
- -      len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
+ +      len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
         return simple_read_from_buffer(buf, count, ppos, buffer, len);
   }
   
@@@ -1043,15 -936,15 +1043,15 @@@ static ssize_t oom_score_adj_write(stru
                 goto err_task_lock;
         }
   
- -      if (oom_score_adj < task->signal->oom_score_adj_min &&
+ +      if ((short)oom_score_adj < task->signal->oom_score_adj_min &&
                         !capable(CAP_SYS_RESOURCE)) {
                 err = -EACCES;
                 goto err_sighand;
         }
   
- -      task->signal->oom_score_adj = oom_score_adj;
+ +      task->signal->oom_score_adj = (short)oom_score_adj;
         if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
- -              task->signal->oom_score_adj_min = oom_score_adj;
+ +              task->signal->oom_score_adj_min = (short)oom_score_adj;
         trace_oom_score_adj_update(task);
   
   err_sighand:
@@@ -1877,9 -1770,8 +1877,9 @@@ static struct dentry *proc_map_files_lo
         if (!vma)
                 goto out_no_vma;
   
- -      result = proc_map_files_instantiate(dir, dentry, task,
- -                      (void *)(unsigned long)vma->vm_file->f_mode);
+ +      if (vma->vm_file)
+ +              result = proc_map_files_instantiate(dir, dentry, task,
+ +                              (void *)(unsigned long)vma->vm_file->f_mode);
   
   out_no_vma:
         up_read(&mm->mmap_sem);
@@@ -2345,146 -2237,6 +2345,6 @@@ static const struct file_operations pro
   };
   #endif
   
- /*
-  * /proc/self:
-  */
- static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
-                             int buflen)
- {
-       struct pid_namespace *ns = dentry->d_sb->s_fs_info;
-       pid_t tgid = task_tgid_nr_ns(current, ns);
-       char tmp[PROC_NUMBUF];
-       if (!tgid)
-               return -ENOENT;
-       sprintf(tmp, "%d", tgid);
-       return vfs_readlink(dentry,buffer,buflen,tmp);
- }
- 
- static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
- {
-       struct pid_namespace *ns = dentry->d_sb->s_fs_info;
-       pid_t tgid = task_tgid_nr_ns(current, ns);
-       char *name = ERR_PTR(-ENOENT);
-       if (tgid) {
-               /* 11 for max length of signed int in decimal + NULL term */
-               name = kmalloc(12, GFP_KERNEL);
-               if (!name)
-                       name = ERR_PTR(-ENOMEM);
-               else
-                       sprintf(name, "%d", tgid);
-       }
-       nd_set_link(nd, name);
-       return NULL;
- }
- 
- static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
-                               void *cookie)
- {
-       char *s = nd_get_link(nd);
-       if (!IS_ERR(s))
-               kfree(s);
- }
- 
- static const struct inode_operations proc_self_inode_operations = {
-       .readlink       = proc_self_readlink,
-       .follow_link    = proc_self_follow_link,
-       .put_link       = proc_self_put_link,
- };
- 
- /*
-  * proc base
-  *
-  * These are the directory entries in the root directory of /proc
-  * that properly belong to the /proc filesystem, as they describe
-  * describe something that is process related.
-  */
- static const struct pid_entry proc_base_stuff[] = {
-       NOD("self", S_IFLNK|S_IRWXUGO,
-               &proc_self_inode_operations, NULL, {}),
- };
- 
- static struct dentry *proc_base_instantiate(struct inode *dir,
-       struct dentry *dentry, struct task_struct *task, const void *ptr)
- {
-       const struct pid_entry *p = ptr;
-       struct inode *inode;
-       struct proc_inode *ei;
-       struct dentry *error;
- 
-       /* Allocate the inode */
-       error = ERR_PTR(-ENOMEM);
-       inode = new_inode(dir->i_sb);
-       if (!inode)
-               goto out;
- 
-       /* Initialize the inode */
-       ei = PROC_I(inode);
-       inode->i_ino = get_next_ino();
-       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- 
-       /*
-        * grab the reference to the task.
-        */
-       ei->pid = get_task_pid(task, PIDTYPE_PID);
-       if (!ei->pid)
-               goto out_iput;
- 
-       inode->i_mode = p->mode;
-       if (S_ISDIR(inode->i_mode))
-               set_nlink(inode, 2);
-       if (S_ISLNK(inode->i_mode))
-               inode->i_size = 64;
-       if (p->iop)
-               inode->i_op = p->iop;
-       if (p->fop)
-               inode->i_fop = p->fop;
-       ei->op = p->op;
-       d_add(dentry, inode);
-       error = NULL;
- out:
-       return error;
- out_iput:
-       iput(inode);
-       goto out;
- }
- 
- static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
- {
-       struct dentry *error;
-       struct task_struct *task = get_proc_task(dir);
-       const struct pid_entry *p, *last;
- 
-       error = ERR_PTR(-ENOENT);
- 
-       if (!task)
-               goto out_no_task;
- 
-       /* Lookup the directory entry */
-       last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
-       for (p = proc_base_stuff; p <= last; p++) {
-               if (p->len != dentry->d_name.len)
-                       continue;
-               if (!memcmp(dentry->d_name.name, p->name, p->len))
-                       break;
-       }
-       if (p > last)
-               goto out;
- 
-       error = proc_base_instantiate(dir, dentry, task, p);
- 
- out:
-       put_task_struct(task);
- out_no_task:
-       return error;
- }
- 
- static int proc_base_fill_cache(struct file *filp, void *dirent,
-       filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
- {
-       return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
-                               proc_base_instantiate, task, p);
- }
- 
   #ifdef CONFIG_TASK_IO_ACCOUNTING
   static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
   {
@@@ -2706,7 -2458,6 +2566,7 @@@ static const struct pid_entry tgid_base
         REG("cgroup",  S_IRUGO, proc_cgroup_operations),
   #endif
         INF("oom_score",  S_IRUGO, proc_oom_score),
+ +      REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
         REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
   #ifdef CONFIG_AUDITSYSCALL
         REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@@ -2839,10 -2590,6 +2699,6 @@@ void proc_flush_task(struct task_struc
                 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
                                         tgid->numbers[i].nr);
         }
- 
-       upid = &pid->numbers[pid->level];
-       if (upid->nr == 1)
-               pid_ns_release_proc(upid->ns);
   }
   
   static struct dentry *proc_pid_instantiate(struct inode *dir,
@@@ -2876,15 -2623,11 +2732,11 @@@ out
   
   struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
   {
-       struct dentry *result;
+       struct dentry *result = NULL;
         struct task_struct *task;
         unsigned tgid;
         struct pid_namespace *ns;
   
-       result = proc_base_lookup(dir, dentry);
-       if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
-               goto out;
- 
         tgid = name_to_int(dentry);
         if (tgid == ~0U)
                 goto out;
@@@ -2947,7 -2690,7 +2799,7 @@@ retry
         return iter;
   }
   
- #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
+ #define TGID_OFFSET (FIRST_PROCESS_ENTRY)
   
   static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
         struct tgid_iter iter)
@@@ -2967,25 -2710,12 +2819,12 @@@ static int fake_filldir(void *buf, cons
   /* for the /proc/ directory itself, after non-process stuff has been done */
   int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
   {
-       unsigned int nr;
-       struct task_struct *reaper;
         struct tgid_iter iter;
         struct pid_namespace *ns;
         filldir_t __filldir;
   
         if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
-               goto out_no_task;
-       nr = filp->f_pos - FIRST_PROCESS_ENTRY;
- 
-       reaper = get_proc_task(filp->f_path.dentry->d_inode);
-       if (!reaper)
-               goto out_no_task;
- 
-       for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
-               const struct pid_entry *p = &proc_base_stuff[nr];
-               if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
-                       goto out;
-       }
+               goto out;
   
         ns = filp->f_dentry->d_sb->s_fs_info;
         iter.task = NULL;
@@@ -3006,8 -2736,6 +2845,6 @@@
         }
         filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
   out:
-       put_task_struct(reaper);
- out_no_task:
         return 0;
   }
   
@@@ -3073,7 -2801,6 +2910,7 @@@ static const struct pid_entry tid_base_
         REG("cgroup",  S_IRUGO, proc_cgroup_operations),
   #endif
         INF("oom_score", S_IRUGO, proc_oom_score),
+ +      REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
         REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
   #ifdef CONFIG_AUDITSYSCALL
         REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --combined include/linux/cred.h

index 0142aacb70b7049583a1618c735c7410a6827d65,856d2622d832eeab89abde1de6ce5d07d58b69d3..abb2cd50f6b26ace7da7a554527087d4685d3e76
--- 1/include/linux/cred.h
--- 2/include/linux/cred.h
+++ b/include/linux/cred.h
@@@ -76,6 -76,21 +76,6 @@@ extern int groups_search(const struct g
   extern int in_group_p(kgid_t);
   extern int in_egroup_p(kgid_t);
   
- -/*
- - * The common credentials for a thread group
- - * - shared by CLONE_THREAD
- - */
- -#ifdef CONFIG_KEYS
- -struct thread_group_cred {
- -      atomic_t        usage;
- -      pid_t           tgid;                   /* thread group process ID */
- -      spinlock_t      lock;
- -      struct key __rcu *session_keyring;      /* keyring inherited over fork */
- -      struct key      *process_keyring;       /* keyring private to this process */
- -      struct rcu_head rcu;                    /* RCU deletion hook */
- -};
- -#endif
- -
   /*
    * The security context of a task
    *
@@@ -124,8 -139,6 +124,8 @@@ struct cred 
   #ifdef CONFIG_KEYS
         unsigned char   jit_keyring;    /* default keyring to attach requested
                                          * keys to */
+ +      struct key __rcu *session_keyring; /* keyring inherited over fork */
+ +      struct key      *process_keyring; /* keyring private to this process */
         struct key      *thread_keyring; /* keyring private to this thread */
         struct key      *request_key_auth; /* assumed request_key authority */
         struct thread_group_cred *tgcred; /* thread-group shared credentials */
@@@ -344,10 -357,8 +344,8 @@@ static inline void put_cred(const struc
   extern struct user_namespace init_user_ns;
   #ifdef CONFIG_USER_NS
   #define current_user_ns()     (current_cred_xxx(user_ns))
- #define task_user_ns(task)    (task_cred_xxx((task), user_ns))
   #else
   #define current_user_ns()     (&init_user_ns)
- #define task_user_ns(task)    (&init_user_ns)
   #endif
   
   
diff --combined include/linux/fs.h

index 408fb1e77a0a36804363d49d804e3ea3d3a802ab,5037aa6817fd2878c2da338de6680563d3bfd956..035521b46528ace428c7f07d4aa49ac4880e145e
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -418,7 -418,7 +418,7 @@@ struct address_space 
         struct backing_dev_info *backing_dev_info; /* device readahead, etc */
         spinlock_t              private_lock;   /* for use by the address_space */
         struct list_head        private_list;   /* ditto */
- -      struct address_space    *assoc_mapping; /* ditto */
+ +      void                    *private_data;  /* ditto */
   } __attribute__((aligned(sizeof(long))));
         /*
          * On most architectures that alignment is already the case; but
@@@ -462,6 -462,8 +462,6 @@@ struct block_device 
         int                     bd_fsfreeze_count;
         /* Mutex for freeze */
         struct mutex            bd_fsfreeze_mutex;
- -      /* A semaphore that prevents I/O while block size is being changed */
- -      struct percpu_rw_semaphore      bd_block_size_semaphore;
   };
   
   /*
@@@ -1810,6 -1812,8 +1810,8 @@@ struct file_system_type 
   #define FS_REQUIRES_DEV               1 
   #define FS_BINARY_MOUNTDATA   2
   #define FS_HAS_SUBTYPE                4
+ #define FS_USERNS_MOUNT               8       /* Can be mounted by userns root */
+ #define FS_USERNS_DEV_MOUNT   16 /* A userns mount does not imply MNT_NODEV */
   #define FS_REVAL_DOT          16384   /* Check the paths ".", ".." for staleness */
   #define FS_RENAME_DOES_D_MOVE 32768   /* FS will handle d_move() during rename() internally. */
         struct dentry *(*mount) (struct file_system_type *, int,
@@@ -2047,6 -2051,7 +2049,6 @@@ extern void unregister_blkdev(unsigned 
   extern struct block_device *bdget(dev_t);
   extern struct block_device *bdgrab(struct block_device *bdev);
   extern void bd_set_size(struct block_device *, loff_t size);
- -extern sector_t blkdev_max_block(struct block_device *bdev);
   extern void bd_forget(struct inode *inode);
   extern void bdput(struct block_device *);
   extern void invalidate_bdev(struct block_device *);
@@@ -2376,6 -2381,8 +2378,6 @@@ extern int generic_segment_checks(cons
                 unsigned long *nr_segs, size_t *count, int access_flags);
   
   /* fs/block_dev.c */
- -extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
- -                             unsigned long nr_segs, loff_t pos);
   extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
                                 unsigned long nr_segs, loff_t pos);
   extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
diff --combined init/Kconfig

index 1a207efca5918d8ba97a8f9abffcc65527f3da2d,38c1a1d0bf3879441d162cfdcae667afea99ac43..675d8a2326cf29fc3c758e6a4533e98d40aa6aa1
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -486,35 -486,35 +486,35 @@@ config PREEMPT_RC
           This option enables preemptible-RCU code that is common between
           the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
   
+ +config CONTEXT_TRACKING
+ +       bool
+ +
   config RCU_USER_QS
         bool "Consider userspace as in RCU extended quiescent state"
- -      depends on HAVE_RCU_USER_QS && SMP
+ +      depends on HAVE_CONTEXT_TRACKING && SMP
+ +      select CONTEXT_TRACKING
         help
           This option sets hooks on kernel / userspace boundaries and
           puts RCU in extended quiescent state when the CPU runs in
           userspace. It means that when a CPU runs in userspace, it is
           excluded from the global RCU state machine and thus doesn't
- -        to keep the timer tick on for RCU.
+ +        try to keep the timer tick on for RCU.
   
           Unless you want to hack and help the development of the full
- -        tickless feature, you shouldn't enable this option. It adds
- -        unnecessary overhead.
+ +        dynticks mode, you shouldn't enable this option.  It also
+ +        adds unnecessary overhead.
   
           If unsure say N
   
- -config RCU_USER_QS_FORCE
- -      bool "Force userspace extended QS by default"
- -      depends on RCU_USER_QS
+ +config CONTEXT_TRACKING_FORCE
+ +      bool "Force context tracking"
+ +      depends on CONTEXT_TRACKING
         help
- -        Set the hooks in user/kernel boundaries by default in order to
- -        test this feature that treats userspace as an extended quiescent
- -        state until we have a real user like a full adaptive nohz option.
- -
- -        Unless you want to hack and help the development of the full
- -        tickless feature, you shouldn't enable this option. It adds
- -        unnecessary overhead.
- -
- -        If unsure say N
+ +        Probe on user/kernel boundaries by default in order to
+ +        test the features that rely on it such as userspace RCU extended
+ +        quiescent states.
+ +        This test is there for debugging until we have a real user like the
+ +        full dynticks mode.
   
   config RCU_FANOUT
         int "Tree-based hierarchical RCU fanout value"
@@@ -582,13 -582,14 +582,13 @@@ config RCU_FAST_NO_H
         depends on NO_HZ && SMP
         default n
         help
- -        This option causes RCU to attempt to accelerate grace periods
- -        in order to allow CPUs to enter dynticks-idle state more
- -        quickly.  On the other hand, this option increases the overhead
- -        of the dynticks-idle checking, particularly on systems with
- -        large numbers of CPUs.
+ +        This option causes RCU to attempt to accelerate grace periods in
+ +        order to allow CPUs to enter dynticks-idle state more quickly.
+ +        On the other hand, this option increases the overhead of the
+ +        dynticks-idle checking, thus degrading scheduling latency.
   
- -        Say Y if energy efficiency is critically important, particularly
- -              if you have relatively few CPUs.
+ +        Say Y if energy efficiency is critically important, and you don't
+ +              care about real-time response.
   
           Say N if you are unsure.
   
@@@ -654,28 -655,6 +654,28 @@@ config RCU_BOOST_DELA
   
           Accept the default if unsure.
   
+ +config RCU_NOCB_CPU
+ +      bool "Offload RCU callback processing from boot-selected CPUs"
+ +      depends on TREE_RCU || TREE_PREEMPT_RCU
+ +      default n
+ +      help
+ +        Use this option to reduce OS jitter for aggressive HPC or
+ +        real-time workloads.  It can also be used to offload RCU
+ +        callback invocation to energy-efficient CPUs in battery-powered
+ +        asymmetric multiprocessors.
+ +
+ +        This option offloads callback invocation from the set of
+ +        CPUs specified at boot time by the rcu_nocbs parameter.
+ +        For each such CPU, a kthread ("rcuoN") will be created to
+ +        invoke callbacks, where the "N" is the CPU being offloaded.
+ +        Nothing prevents this kthread from running on the specified
+ +        CPUs, but (1) the kthreads may be preempted between each
+ +        callback, and (2) affinity or cgroups can be used to force
+ +        the kthreads to run on whatever set of CPUs is desired.
+ +
+ +        Say Y here if you want reduced OS jitter on selected CPUs.
+ +        Say N here if you are unsure.
+ +
   endmenu # "RCU Subsystem"
   
   config IKCONFIG
@@@ -717,50 -696,6 +717,50 @@@ config LOG_BUF_SHIF
   config HAVE_UNSTABLE_SCHED_CLOCK
         bool
   
+ +#
+ +# For architectures that want to enable the support for NUMA-affine scheduler
+ +# balancing logic:
+ +#
+ +config ARCH_SUPPORTS_NUMA_BALANCING
+ +      bool
+ +
+ +# For architectures that (ab)use NUMA to represent different memory regions
+ +# all cpu-local but of different latencies, such as SuperH.
+ +#
+ +config ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ +      bool
+ +
+ +#
+ +# For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE
+ +config ARCH_WANTS_PROT_NUMA_PROT_NONE
+ +      bool
+ +
+ +config ARCH_USES_NUMA_PROT_NONE
+ +      bool
+ +      default y
+ +      depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
+ +      depends on NUMA_BALANCING
+ +
+ +config NUMA_BALANCING_DEFAULT_ENABLED
+ +      bool "Automatically enable NUMA aware memory/task placement"
+ +      default y
+ +      depends on NUMA_BALANCING
+ +      help
+ +        If set, autonumic NUMA balancing will be enabled if running on a NUMA
+ +        machine.
+ +
+ +config NUMA_BALANCING
+ +      bool "Memory placement aware NUMA scheduler"
+ +      depends on ARCH_SUPPORTS_NUMA_BALANCING
+ +      depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ +      depends on SMP && NUMA && MIGRATION
+ +      help
+ +        This option adds support for automatic NUMA aware memory/task placement.
+ +        The mechanism is quite primitive and is based on migrating memory when
+ +        it is references to the node the task is running on.
+ +
+ +        This system will be inactive on UMA systems.
+ +
   menuconfig CGROUPS
         boolean "Control Group support"
         depends on EVENTFD
@@@ -1069,11 -1004,9 +1069,9 @@@ config UIDGID_CONVERTE
         # Filesystems
         depends on 9P_FS = n
         depends on AFS_FS = n
-       depends on AUTOFS4_FS = n
         depends on CEPH_FS = n
         depends on CIFS = n
         depends on CODA_FS = n
-       depends on FUSE_FS = n
         depends on GFS2_FS = n
         depends on NCP_FS = n
         depends on NFSD = n
diff --combined init/main.c

index 63ae904a99a8eb3718f6a57ee515c12f0b60b8dc,317750a18f74c87be9023bcc42bdbc4cb58c1177..baf1f0f5c4611eb08b3f0eae7995c5d789f8e741
--- 1/init/main.c
--- 2/init/main.c
+++ b/init/main.c
@@@ -442,11 -442,9 +442,11 @@@ void __init __weak smp_setup_processor_
   {
   }
   
+ +# if THREAD_SIZE >= PAGE_SIZE
   void __init __weak thread_info_cache_init(void)
   {
   }
+ +#endif
   
   /*
    * Set up kernel memory allocators
@@@ -812,7 -810,6 +812,6 @@@ static int __ref kernel_init(void *unus
         system_state = SYSTEM_RUNNING;
         numa_default_policy();
   
-       current->signal->flags |= SIGNAL_UNKILLABLE;
         flush_delayed_fput();
   
         if (ramdisk_execute_command) {
@@@ -857,7 -854,7 +856,7 @@@ static void __init kernel_init_freeable
         /*
          * init can allocate pages on any node
          */
- -      set_mems_allowed(node_states[N_HIGH_MEMORY]);
+ +      set_mems_allowed(node_states[N_MEMORY]);
         /*
          * init can run on any cpu.
          */
diff --combined kernel/cgroup.c

index f34c41bfaa37daa2b399c6387740d397703a277c,0dbfba2efa770be9cbd79e730be501e12db04728..9915ffe013727d68fbe21013288b282154d576de
--- 1/kernel/cgroup.c
--- 2/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@@ -138,9 -138,6 +138,9 @@@ struct cgroupfs_root 
         /* Hierarchy-specific flags */
         unsigned long flags;
   
+ +      /* IDs for cgroups in this hierarchy */
+ +      struct ida cgroup_ida;
+ +
         /* The path to use for release notifications. */
         char release_agent_path[PATH_MAX];
   
@@@ -174,8 -171,8 +174,8 @@@ struct css_id 
          * The css to which this ID points. This pointer is set to valid value
          * after cgroup is populated. If cgroup is removed, this will be NULL.
          * This pointer is expected to be RCU-safe because destroy()
- -       * is called after synchronize_rcu(). But for safe use, css_is_removed()
- -       * css_tryget() should be used for avoiding race.
+ +       * is called after synchronize_rcu(). But for safe use, css_tryget()
+ +       * should be used for avoiding race.
          */
         struct cgroup_subsys_state __rcu *css;
         /*
@@@ -245,10 -242,6 +245,10 @@@ static DEFINE_SPINLOCK(hierarchy_id_loc
    */
   static int need_forkexit_callback __read_mostly;
   
+ +static int cgroup_destroy_locked(struct cgroup *cgrp);
+ +static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
+ +                            struct cftype cfts[], bool is_add);
+ +
   #ifdef CONFIG_PROVE_LOCKING
   int cgroup_lock_is_held(void)
   {
@@@ -301,6 -294,11 +301,6 @@@ static int notify_on_release(const stru
         return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
   }
   
- -static int clone_children(const struct cgroup *cgrp)
- -{
- -      return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
- -}
- -
   /*
    * for_each_subsys() allows you to iterate on each subsystem attached to
    * an active hierarchy
@@@ -784,12 -782,12 +784,12 @@@ static struct cgroup *task_cgroup_from_
    *    The task_lock() exception
    *
    * The need for this exception arises from the action of
- - * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
+ + * cgroup_attach_task(), which overwrites one task's cgroup pointer with
    * another.  It does so using cgroup_mutex, however there are
    * several performance critical places that need to reference
    * task->cgroup without the expense of grabbing a system global
    * mutex.  Therefore except as noted below, when dereferencing or, as
- - * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
+ + * in cgroup_attach_task(), modifying a task's cgroup pointer we use
    * task_lock(), which acts on a spinlock (task->alloc_lock) already in
    * the task_struct routinely used for such matters.
    *
@@@ -856,6 -854,30 +856,6 @@@ static struct inode *cgroup_new_inode(u
         return inode;
   }
   
- -/*
- - * Call subsys's pre_destroy handler.
- - * This is called before css refcnt check.
- - */
- -static int cgroup_call_pre_destroy(struct cgroup *cgrp)
- -{
- -      struct cgroup_subsys *ss;
- -      int ret = 0;
- -
- -      for_each_subsys(cgrp->root, ss) {
- -              if (!ss->pre_destroy)
- -                      continue;
- -
- -              ret = ss->pre_destroy(cgrp);
- -              if (ret) {
- -                      /* ->pre_destroy() failure is being deprecated */
- -                      WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs);
- -                      break;
- -              }
- -      }
- -
- -      return ret;
- -}
- -
   static void cgroup_diput(struct dentry *dentry, struct inode *inode)
   {
         /* is dentry a directory ? if so, kfree() associated cgroup */
@@@ -876,7 -898,7 +876,7 @@@
                  * Release the subsystem state objects.
                  */
                 for_each_subsys(cgrp->root, ss)
- -                      ss->destroy(cgrp);
+ +                      ss->css_free(cgrp);
   
                 cgrp->root->number_of_cgroups--;
                 mutex_unlock(&cgroup_mutex);
@@@ -895,7 -917,6 +895,7 @@@
   
                 simple_xattrs_free(&cgrp->xattrs);
   
+ +              ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
                 kfree_rcu(cgrp, rcu_head);
         } else {
                 struct cfent *cfe = __d_cfe(dentry);
@@@ -966,7 -987,7 +966,7 @@@ static void cgroup_clear_directory(stru
                 if (!test_bit(ss->subsys_id, &subsys_mask))
                         continue;
                 list_for_each_entry(set, &ss->cftsets, node)
- -                      cgroup_rm_file(cgrp, set->cfts);
+ +                      cgroup_addrm_files(cgrp, NULL, set->cfts, false);
         }
         if (base_files) {
                 while (!list_empty(&cgrp->files))
@@@ -993,6 -1014,33 +993,6 @@@ static void cgroup_d_remove_dir(struct 
         remove_dir(dentry);
   }
   
- -/*
- - * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
- - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
- - * reference to css->refcnt. In general, this refcnt is expected to goes down
- - * to zero, soon.
- - *
- - * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
- - */
- -static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
- -
- -static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
- -{
- -      if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
- -              wake_up_all(&cgroup_rmdir_waitq);
- -}
- -
- -void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
- -{
- -      css_get(css);
- -}
- -
- -void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
- -{
- -      cgroup_wakeup_rmdir_waiter(css->cgroup);
- -      css_put(css);
- -}
- -
   /*
    * Call with cgroup_mutex held. Drops reference counts on modules, including
    * any duplicate ones that parse_cgroupfs_options took. If this function
@@@ -1102,7 -1150,7 +1102,7 @@@ static int cgroup_show_options(struct s
                 seq_puts(seq, ",xattr");
         if (strlen(root->release_agent_path))
                 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
- -      if (clone_children(&root->top_cgroup))
+ +      if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
                 seq_puts(seq, ",clone_children");
         if (strlen(root->name))
                 seq_printf(seq, ",name=%s", root->name);
@@@ -1114,7 -1162,7 +1114,7 @@@ struct cgroup_sb_opts 
         unsigned long subsys_mask;
         unsigned long flags;
         char *release_agent;
- -      bool clone_children;
+ +      bool cpuset_clone_children;
         char *name;
         /* User explicitly requested empty subsystem */
         bool none;
@@@ -1165,7 -1213,7 +1165,7 @@@ static int parse_cgroupfs_options(char 
                         continue;
                 }
                 if (!strcmp(token, "clone_children")) {
- -                      opts->clone_children = true;
+ +                      opts->cpuset_clone_children = true;
                         continue;
                 }
                 if (!strcmp(token, "xattr")) {
@@@ -1349,21 -1397,14 +1349,21 @@@ static int cgroup_remount(struct super_
                 goto out_unlock;
         }
   
+ +      /*
+ +       * Clear out the files of subsystems that should be removed, do
+ +       * this before rebind_subsystems, since rebind_subsystems may
+ +       * change this hierarchy's subsys_list.
+ +       */
+ +      cgroup_clear_directory(cgrp->dentry, false, removed_mask);
+ +
         ret = rebind_subsystems(root, opts.subsys_mask);
         if (ret) {
+ +              /* rebind_subsystems failed, re-populate the removed files */
+ +              cgroup_populate_dir(cgrp, false, removed_mask);
                 drop_parsed_module_refcounts(opts.subsys_mask);
                 goto out_unlock;
         }
   
- -      /* clear out any existing files and repopulate subsystem files */
- -      cgroup_clear_directory(cgrp->dentry, false, removed_mask);
         /* re-populate subsystem files */
         cgroup_populate_dir(cgrp, false, added_mask);
   
@@@ -1391,7 -1432,6 +1391,7 @@@ static void init_cgroup_housekeeping(st
         INIT_LIST_HEAD(&cgrp->children);
         INIT_LIST_HEAD(&cgrp->files);
         INIT_LIST_HEAD(&cgrp->css_sets);
+ +      INIT_LIST_HEAD(&cgrp->allcg_node);
         INIT_LIST_HEAD(&cgrp->release_list);
         INIT_LIST_HEAD(&cgrp->pidlists);
         mutex_init(&cgrp->pidlist_mutex);
@@@ -1410,8 -1450,8 +1410,8 @@@ static void init_cgroup_root(struct cgr
         root->number_of_cgroups = 1;
         cgrp->root = root;
         cgrp->top_cgroup = cgrp;
- -      list_add_tail(&cgrp->allcg_node, &root->allcg_list);
         init_cgroup_housekeeping(cgrp);
+ +      list_add_tail(&cgrp->allcg_node, &root->allcg_list);
   }
   
   static bool init_root_id(struct cgroupfs_root *root)
@@@ -1478,13 -1518,12 +1478,13 @@@ static struct cgroupfs_root *cgroup_roo
   
         root->subsys_mask = opts->subsys_mask;
         root->flags = opts->flags;
+ +      ida_init(&root->cgroup_ida);
         if (opts->release_agent)
                 strcpy(root->release_agent_path, opts->release_agent);
         if (opts->name)
                 strcpy(root->name, opts->name);
- -      if (opts->clone_children)
- -              set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
+ +      if (opts->cpuset_clone_children)
+ +              set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
         return root;
   }
   
@@@ -1497,7 -1536,6 +1497,7 @@@ static void cgroup_drop_root(struct cgr
         spin_lock(&hierarchy_id_lock);
         ida_remove(&hierarchy_ida, root->hierarchy_id);
         spin_unlock(&hierarchy_id_lock);
+ +      ida_destroy(&root->cgroup_ida);
         kfree(root);
   }
   
@@@ -1663,6 -1701,7 +1663,6 @@@ static struct dentry *cgroup_mount(stru
   
                 free_cg_links(&tmp_cg_links);
   
- -              BUG_ON(!list_empty(&root_cgrp->sibling));
                 BUG_ON(!list_empty(&root_cgrp->children));
                 BUG_ON(root->number_of_cgroups != 1);
   
@@@ -1711,6 -1750,7 +1711,6 @@@ static void cgroup_kill_sb(struct super
   
         BUG_ON(root->number_of_cgroups != 1);
         BUG_ON(!list_empty(&cgrp->children));
- -      BUG_ON(!list_empty(&cgrp->sibling));
   
         mutex_lock(&cgroup_mutex);
         mutex_lock(&cgroup_root_mutex);
@@@ -1768,11 -1808,9 +1768,11 @@@ static struct kobject *cgroup_kobj
    */
   int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
   {
+ +      struct dentry *dentry = cgrp->dentry;
         char *start;
- -      struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
- -                                                    cgroup_lock_is_held());
+ +
+ +      rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
+ +                         "cgroup_path() called without proper locking");
   
         if (!dentry || cgrp == dummytop) {
                 /*
@@@ -1783,9 -1821,9 +1783,9 @@@
                 return 0;
         }
   
- -      start = buf + buflen;
+ +      start = buf + buflen - 1;
   
- -      *--start = '\0';
+ +      *start = '\0';
         for (;;) {
                 int len = dentry->d_name.len;
   
@@@ -1796,7 -1834,8 +1796,7 @@@
                 if (!cgrp)
                         break;
   
- -              dentry = rcu_dereference_check(cgrp->dentry,
- -                                             cgroup_lock_is_held());
+ +              dentry = cgrp->dentry;
                 if (!cgrp->parent)
                         continue;
                 if (--start < buf)
@@@ -1891,7 -1930,9 +1891,7 @@@ EXPORT_SYMBOL_GPL(cgroup_taskset_size)
   /*
    * cgroup_task_migrate - move a task from one cgroup to another.
    *
- - * 'guarantee' is set if the caller promises that a new css_set for the task
- - * will already exist. If not set, this function might sleep, and can fail with
- - * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
+ + * Must be called with cgroup_mutex and threadgroup locked.
    */
   static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
                                 struct task_struct *tsk, struct css_set *newcg)
@@@ -1984,6 -2025,12 +1984,6 @@@ int cgroup_attach_task(struct cgroup *c
         }
   
         synchronize_rcu();
- -
- -      /*
- -       * wake up rmdir() waiter. the rmdir should fail since the cgroup
- -       * is no longer empty.
- -       */
- -      cgroup_wakeup_rmdir_waiter(cgrp);
   out:
         if (retval) {
                 for_each_subsys(root, ss) {
@@@ -2153,6 -2200,7 +2153,6 @@@ static int cgroup_attach_proc(struct cg
          * step 5: success! and cleanup
          */
         synchronize_rcu();
- -      cgroup_wakeup_rmdir_waiter(cgrp);
         retval = 0;
   out_put_css_set_refs:
         if (retval) {
@@@ -2663,17 -2711,10 +2663,17 @@@ static int cgroup_create_file(struct de
   
                 /* start off with i_nlink == 2 (for "." entry) */
                 inc_nlink(inode);
+ +              inc_nlink(dentry->d_parent->d_inode);
   
- -              /* start with the directory inode held, so that we can
- -               * populate it without racing with another mkdir */
- -              mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ +              /*
+ +               * Control reaches here with cgroup_mutex held.
+ +               * @inode->i_mutex should nest outside cgroup_mutex but we
+ +               * want to populate it immediately without releasing
+ +               * cgroup_mutex.  As @inode isn't visible to anyone else
+ +               * yet, trylock will always succeed without affecting
+ +               * lockdep checks.
+ +               */
+ +              WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
         } else if (S_ISREG(mode)) {
                 inode->i_size = 0;
                 inode->i_fop = &cgroup_file_operations;
@@@ -2684,6 -2725,32 +2684,6 @@@
         return 0;
   }
   
- -/*
- - * cgroup_create_dir - create a directory for an object.
- - * @cgrp: the cgroup we create the directory for. It must have a valid
- - *        ->parent field. And we are going to fill its ->dentry field.
- - * @dentry: dentry of the new cgroup
- - * @mode: mode to set on new directory.
- - */
- -static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
- -                              umode_t mode)
- -{
- -      struct dentry *parent;
- -      int error = 0;
- -
- -      parent = cgrp->parent->dentry;
- -      error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
- -      if (!error) {
- -              dentry->d_fsdata = cgrp;
- -              inc_nlink(parent->d_inode);
- -              rcu_assign_pointer(cgrp->dentry, dentry);
- -              dget(dentry);
- -      }
- -      dput(dentry);
- -
- -      return error;
- -}
- -
   /**
    * cgroup_file_mode - deduce file mode of a control file
    * @cft: the control file in question
@@@ -2724,6 -2791,12 +2724,6 @@@ static int cgroup_add_file(struct cgrou
   
         simple_xattrs_init(&cft->xattrs);
   
- -      /* does @cft->flags tell us to skip creation on @cgrp? */
- -      if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
- -              return 0;
- -      if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
- -              return 0;
- -
         if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
                 strcpy(name, subsys->name);
                 strcat(name, ".");
@@@ -2764,12 -2837,6 +2764,12 @@@ static int cgroup_addrm_files(struct cg
         int err, ret = 0;
   
         for (cft = cfts; cft->name[0] != '\0'; cft++) {
+ +              /* does cft->flags tell us to skip this file on @cgrp? */
+ +              if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
+ +                      continue;
+ +              if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
+ +                      continue;
+ +
                 if (is_add)
                         err = cgroup_add_file(cgrp, subsys, cft);
                 else
@@@ -2977,92 -3044,6 +2977,92 @@@ static void cgroup_enable_task_cg_lists
         write_unlock(&css_set_lock);
   }
   
+ +/**
+ + * cgroup_next_descendant_pre - find the next descendant for pre-order walk
+ + * @pos: the current position (%NULL to initiate traversal)
+ + * @cgroup: cgroup whose descendants to walk
+ + *
+ + * To be used by cgroup_for_each_descendant_pre().  Find the next
+ + * descendant to visit for pre-order traversal of @cgroup's descendants.
+ + */
+ +struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
+ +                                        struct cgroup *cgroup)
+ +{
+ +      struct cgroup *next;
+ +
+ +      WARN_ON_ONCE(!rcu_read_lock_held());
+ +
+ +      /* if first iteration, pretend we just visited @cgroup */
+ +      if (!pos) {
+ +              if (list_empty(&cgroup->children))
+ +                      return NULL;
+ +              pos = cgroup;
+ +      }
+ +
+ +      /* visit the first child if exists */
+ +      next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
+ +      if (next)
+ +              return next;
+ +
+ +      /* no child, visit my or the closest ancestor's next sibling */
+ +      do {
+ +              next = list_entry_rcu(pos->sibling.next, struct cgroup,
+ +                                    sibling);
+ +              if (&next->sibling != &pos->parent->children)
+ +                      return next;
+ +
+ +              pos = pos->parent;
+ +      } while (pos != cgroup);
+ +
+ +      return NULL;
+ +}
+ +EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
+ +
+ +static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
+ +{
+ +      struct cgroup *last;
+ +
+ +      do {
+ +              last = pos;
+ +              pos = list_first_or_null_rcu(&pos->children, struct cgroup,
+ +                                           sibling);
+ +      } while (pos);
+ +
+ +      return last;
+ +}
+ +
+ +/**
+ + * cgroup_next_descendant_post - find the next descendant for post-order walk
+ + * @pos: the current position (%NULL to initiate traversal)
+ + * @cgroup: cgroup whose descendants to walk
+ + *
+ + * To be used by cgroup_for_each_descendant_post().  Find the next
+ + * descendant to visit for post-order traversal of @cgroup's descendants.
+ + */
+ +struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
+ +                                         struct cgroup *cgroup)
+ +{
+ +      struct cgroup *next;
+ +
+ +      WARN_ON_ONCE(!rcu_read_lock_held());
+ +
+ +      /* if first iteration, visit the leftmost descendant */
+ +      if (!pos) {
+ +              next = cgroup_leftmost_descendant(cgroup);
+ +              return next != cgroup ? next : NULL;
+ +      }
+ +
+ +      /* if there's an unvisited sibling, visit its leftmost descendant */
+ +      next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
+ +      if (&next->sibling != &pos->parent->children)
+ +              return cgroup_leftmost_descendant(next);
+ +
+ +      /* no sibling left, visit parent */
+ +      next = pos->parent;
+ +      return next != cgroup ? next : NULL;
+ +}
+ +EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
+ +
   void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
         __acquires(css_set_lock)
   {
@@@ -3409,7 -3390,7 +3409,7 @@@ static struct cgroup_pidlist *cgroup_pi
   {
         struct cgroup_pidlist *l;
         /* don't need task_nsproxy() if we're looking at ourself */
-       struct pid_namespace *ns = current->nsproxy->pid_ns;
+       struct pid_namespace *ns = task_active_pid_ns(current);
   
         /*
          * We can't drop the pidlist_mutex before taking the l->mutex in case
@@@ -3776,7 -3757,7 +3776,7 @@@ static int cgroup_event_wake(wait_queue
         if (flags & POLLHUP) {
                 __remove_wait_queue(event->wqh, &event->wait);
                 spin_lock(&cgrp->event_list_lock);
- -              list_del(&event->list);
+ +              list_del_init(&event->list);
                 spin_unlock(&cgrp->event_list_lock);
                 /*
                  * We are in atomic context, but cgroup_event_remove() may
@@@ -3913,7 -3894,7 +3913,7 @@@ fail
   static u64 cgroup_clone_children_read(struct cgroup *cgrp,
                                     struct cftype *cft)
   {
- -      return clone_children(cgrp);
+ +      return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
   }
   
   static int cgroup_clone_children_write(struct cgroup *cgrp,
@@@ -3921,9 -3902,9 +3921,9 @@@
                                      u64 val)
   {
         if (val)
- -              set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+ +              set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
         else
- -              clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+ +              clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
         return 0;
   }
   
@@@ -4036,57 -4017,19 +4036,57 @@@ static void init_cgroup_css(struct cgro
         css->flags = 0;
         css->id = NULL;
         if (cgrp == dummytop)
- -              set_bit(CSS_ROOT, &css->flags);
+ +              css->flags |= CSS_ROOT;
         BUG_ON(cgrp->subsys[ss->subsys_id]);
         cgrp->subsys[ss->subsys_id] = css;
   
         /*
- -       * If !clear_css_refs, css holds an extra ref to @cgrp->dentry
- -       * which is put on the last css_put().  dput() requires process
- -       * context, which css_put() may be called without.  @css->dput_work
- -       * will be used to invoke dput() asynchronously from css_put().
+ +       * css holds an extra ref to @cgrp->dentry which is put on the last
+ +       * css_put().  dput() requires process context, which css_put() may
+ +       * be called without.  @css->dput_work will be used to invoke
+ +       * dput() asynchronously from css_put().
          */
         INIT_WORK(&css->dput_work, css_dput_fn);
- -      if (ss->__DEPRECATED_clear_css_refs)
- -              set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
+ +}
+ +
+ +/* invoke ->post_create() on a new CSS and mark it online if successful */
+ +static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
+ +{
+ +      int ret = 0;
+ +
+ +      lockdep_assert_held(&cgroup_mutex);
+ +
+ +      if (ss->css_online)
+ +              ret = ss->css_online(cgrp);
+ +      if (!ret)
+ +              cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
+ +      return ret;
+ +}
+ +
+ +/* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
+ +static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
+ +      __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
+ +{
+ +      struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+ +
+ +      lockdep_assert_held(&cgroup_mutex);
+ +
+ +      if (!(css->flags & CSS_ONLINE))
+ +              return;
+ +
+ +      /*
+ +       * css_offline() should be called with cgroup_mutex unlocked.  See
+ +       * 3fa59dfbc3 ("cgroup: fix potential deadlock in pre_destroy") for
+ +       * details.  This temporary unlocking should go away once
+ +       * cgroup_mutex is unexported from controllers.
+ +       */
+ +      if (ss->css_offline) {
+ +              mutex_unlock(&cgroup_mutex);
+ +              ss->css_offline(cgrp);
+ +              mutex_lock(&cgroup_mutex);
+ +      }
+ +
+ +      cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
   }
   
   /*
@@@ -4106,27 -4049,10 +4106,27 @@@ static long cgroup_create(struct cgrou
         struct cgroup_subsys *ss;
         struct super_block *sb = root->sb;
   
+ +      /* allocate the cgroup and its ID, 0 is reserved for the root */
         cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
         if (!cgrp)
                 return -ENOMEM;
   
+ +      cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
+ +      if (cgrp->id < 0)
+ +              goto err_free_cgrp;
+ +
+ +      /*
+ +       * Only live parents can have children.  Note that the liveliness
+ +       * check isn't strictly necessary because cgroup_mkdir() and
+ +       * cgroup_rmdir() are fully synchronized by i_mutex; however, do it
+ +       * anyway so that locking is contained inside cgroup proper and we
+ +       * don't get nasty surprises if we ever grow another caller.
+ +       */
+ +      if (!cgroup_lock_live_group(parent)) {
+ +              err = -ENODEV;
+ +              goto err_free_id;
+ +      }
+ +
         /* Grab a reference on the superblock so the hierarchy doesn't
          * get deleted on unmount if there are child cgroups.  This
          * can be done outside cgroup_mutex, since the sb can't
@@@ -4134,6 -4060,8 +4134,6 @@@
          * fs */
         atomic_inc(&sb->s_active);
   
- -      mutex_lock(&cgroup_mutex);
- -
         init_cgroup_housekeeping(cgrp);
   
         cgrp->parent = parent;
@@@ -4143,51 -4071,26 +4143,51 @@@
         if (notify_on_release(parent))
                 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
   
- -      if (clone_children(parent))
- -              set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+ +      if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
+ +              set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
   
         for_each_subsys(root, ss) {
                 struct cgroup_subsys_state *css;
   
- -              css = ss->create(cgrp);
+ +              css = ss->css_alloc(cgrp);
                 if (IS_ERR(css)) {
                         err = PTR_ERR(css);
- -                      goto err_destroy;
+ +                      goto err_free_all;
                 }
                 init_cgroup_css(css, ss, cgrp);
                 if (ss->use_id) {
                         err = alloc_css_id(ss, parent, cgrp);
                         if (err)
- -                              goto err_destroy;
+ +                              goto err_free_all;
                 }
- -              /* At error, ->destroy() callback has to free assigned ID. */
- -              if (clone_children(parent) && ss->post_clone)
- -                      ss->post_clone(cgrp);
+ +      }
+ +
+ +      /*
+ +       * Create directory.  cgroup_create_file() returns with the new
+ +       * directory locked on success so that it can be populated without
+ +       * dropping cgroup_mutex.
+ +       */
+ +      err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
+ +      if (err < 0)
+ +              goto err_free_all;
+ +      lockdep_assert_held(&dentry->d_inode->i_mutex);
+ +
+ +      /* allocation complete, commit to creation */
+ +      dentry->d_fsdata = cgrp;
+ +      cgrp->dentry = dentry;
+ +      list_add_tail(&cgrp->allcg_node, &root->allcg_list);
+ +      list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
+ +      root->number_of_cgroups++;
+ +
+ +      /* each css holds a ref to the cgroup's dentry */
+ +      for_each_subsys(root, ss)
+ +              dget(dentry);
+ +
+ +      /* creation succeeded, notify subsystems */
+ +      for_each_subsys(root, ss) {
+ +              err = online_css(ss, cgrp);
+ +              if (err)
+ +                      goto err_destroy;
   
                 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
                     parent->parent) {
@@@ -4199,34 -4102,50 +4199,34 @@@
                 }
         }
   
- -      list_add(&cgrp->sibling, &cgrp->parent->children);
- -      root->number_of_cgroups++;
- -
- -      err = cgroup_create_dir(cgrp, dentry, mode);
- -      if (err < 0)
- -              goto err_remove;
- -
- -      /* If !clear_css_refs, each css holds a ref to the cgroup's dentry */
- -      for_each_subsys(root, ss)
- -              if (!ss->__DEPRECATED_clear_css_refs)
- -                      dget(dentry);
- -
- -      /* The cgroup directory was pre-locked for us */
- -      BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
- -
- -      list_add_tail(&cgrp->allcg_node, &root->allcg_list);
- -
         err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
- -      /* If err < 0, we have a half-filled directory - oh well ;) */
+ +      if (err)
+ +              goto err_destroy;
   
         mutex_unlock(&cgroup_mutex);
         mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
   
         return 0;
   
- - err_remove:
- -
- -      list_del(&cgrp->sibling);
- -      root->number_of_cgroups--;
- -
- - err_destroy:
- -
+ +err_free_all:
         for_each_subsys(root, ss) {
                 if (cgrp->subsys[ss->subsys_id])
- -                      ss->destroy(cgrp);
+ +                      ss->css_free(cgrp);
         }
- -
         mutex_unlock(&cgroup_mutex);
- -
         /* Release the reference count that we took on the superblock */
         deactivate_super(sb);
- -
+ +err_free_id:
+ +      ida_simple_remove(&root->cgroup_ida, cgrp->id);
+ +err_free_cgrp:
         kfree(cgrp);
         return err;
+ +
+ +err_destroy:
+ +      cgroup_destroy_locked(cgrp);
+ +      mutex_unlock(&cgroup_mutex);
+ +      mutex_unlock(&dentry->d_inode->i_mutex);
+ +      return err;
   }
   
   static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@@ -4278,60 -4197,153 +4278,60 @@@ static int cgroup_has_css_refs(struct c
         return 0;
   }
   
- -/*
- - * Atomically mark all (or else none) of the cgroup's CSS objects as
- - * CSS_REMOVED. Return true on success, or false if the cgroup has
- - * busy subsystems. Call with cgroup_mutex held
- - *
- - * Depending on whether a subsys has __DEPRECATED_clear_css_refs set or
- - * not, cgroup removal behaves differently.
- - *
- - * If clear is set, css refcnt for the subsystem should be zero before
- - * cgroup removal can be committed.  This is implemented by
- - * CGRP_WAIT_ON_RMDIR and retry logic around ->pre_destroy(), which may be
- - * called multiple times until all css refcnts reach zero and is allowed to
- - * veto removal on any invocation.  This behavior is deprecated and will be
- - * removed as soon as the existing user (memcg) is updated.
- - *
- - * If clear is not set, each css holds an extra reference to the cgroup's
- - * dentry and cgroup removal proceeds regardless of css refs.
- - * ->pre_destroy() will be called at least once and is not allowed to fail.
- - * On the last put of each css, whenever that may be, the extra dentry ref
- - * is put so that dentry destruction happens only after all css's are
- - * released.
- - */
- -static int cgroup_clear_css_refs(struct cgroup *cgrp)
+ +static int cgroup_destroy_locked(struct cgroup *cgrp)
+ +      __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
   {
+ +      struct dentry *d = cgrp->dentry;
+ +      struct cgroup *parent = cgrp->parent;
+ +      DEFINE_WAIT(wait);
+ +      struct cgroup_event *event, *tmp;
         struct cgroup_subsys *ss;
- -      unsigned long flags;
- -      bool failed = false;
+ +      LIST_HEAD(tmp_list);
+ +
+ +      lockdep_assert_held(&d->d_inode->i_mutex);
+ +      lockdep_assert_held(&cgroup_mutex);
   
- -      local_irq_save(flags);
+ +      if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children))
+ +              return -EBUSY;
   
         /*
- -       * Block new css_tryget() by deactivating refcnt.  If all refcnts
- -       * for subsystems w/ clear_css_refs set were 1 at the moment of
- -       * deactivation, we succeeded.
+ +       * Block new css_tryget() by deactivating refcnt and mark @cgrp
+ +       * removed.  This makes future css_tryget() and child creation
+ +       * attempts fail thus maintaining the removal conditions verified
+ +       * above.
          */
         for_each_subsys(cgrp->root, ss) {
                 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
   
                 WARN_ON(atomic_read(&css->refcnt) < 0);
                 atomic_add(CSS_DEACT_BIAS, &css->refcnt);
- -
- -              if (ss->__DEPRECATED_clear_css_refs)
- -                      failed |= css_refcnt(css) != 1;
- -      }
- -
- -      /*
- -       * If succeeded, set REMOVED and put all the base refs; otherwise,
- -       * restore refcnts to positive values.  Either way, all in-progress
- -       * css_tryget() will be released.
- -       */
- -      for_each_subsys(cgrp->root, ss) {
- -              struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
- -
- -              if (!failed) {
- -                      set_bit(CSS_REMOVED, &css->flags);
- -                      css_put(css);
- -              } else {
- -                      atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
- -              }
         }
+ +      set_bit(CGRP_REMOVED, &cgrp->flags);
   
- -      local_irq_restore(flags);
- -      return !failed;
- -}
- -
- -static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
- -{
- -      struct cgroup *cgrp = dentry->d_fsdata;
- -      struct dentry *d;
- -      struct cgroup *parent;
- -      DEFINE_WAIT(wait);
- -      struct cgroup_event *event, *tmp;
- -      int ret;
- -
- -      /* the vfs holds both inode->i_mutex already */
- -again:
- -      mutex_lock(&cgroup_mutex);
- -      if (atomic_read(&cgrp->count) != 0) {
- -              mutex_unlock(&cgroup_mutex);
- -              return -EBUSY;
- -      }
- -      if (!list_empty(&cgrp->children)) {
- -              mutex_unlock(&cgroup_mutex);
- -              return -EBUSY;
- -      }
- -      mutex_unlock(&cgroup_mutex);
- -
- -      /*
- -       * In general, subsystem has no css->refcnt after pre_destroy(). But
- -       * in racy cases, subsystem may have to get css->refcnt after
- -       * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
- -       * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
- -       * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
- -       * and subsystem's reference count handling. Please see css_get/put
- -       * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
- -       */
- -      set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+ +      /* tell subsystems to initate destruction */
+ +      for_each_subsys(cgrp->root, ss)
+ +              offline_css(ss, cgrp);
   
         /*
- -       * Call pre_destroy handlers of subsys. Notify subsystems
- -       * that rmdir() request comes.
+ +       * Put all the base refs.  Each css holds an extra reference to the
+ +       * cgroup's dentry and cgroup removal proceeds regardless of css
+ +       * refs.  On the last put of each css, whenever that may be, the
+ +       * extra dentry ref is put so that dentry destruction happens only
+ +       * after all css's are released.
          */
- -      ret = cgroup_call_pre_destroy(cgrp);
- -      if (ret) {
- -              clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
- -              return ret;
- -      }
- -
- -      mutex_lock(&cgroup_mutex);
- -      parent = cgrp->parent;
- -      if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
- -              clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
- -              mutex_unlock(&cgroup_mutex);
- -              return -EBUSY;
- -      }
- -      prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
- -      if (!cgroup_clear_css_refs(cgrp)) {
- -              mutex_unlock(&cgroup_mutex);
- -              /*
- -               * Because someone may call cgroup_wakeup_rmdir_waiter() before
- -               * prepare_to_wait(), we need to check this flag.
- -               */
- -              if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
- -                      schedule();
- -              finish_wait(&cgroup_rmdir_waitq, &wait);
- -              clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
- -              if (signal_pending(current))
- -                      return -EINTR;
- -              goto again;
- -      }
- -      /* NO css_tryget() can success after here. */
- -      finish_wait(&cgroup_rmdir_waitq, &wait);
- -      clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+ +      for_each_subsys(cgrp->root, ss)
+ +              css_put(cgrp->subsys[ss->subsys_id]);
   
         raw_spin_lock(&release_list_lock);
- -      set_bit(CGRP_REMOVED, &cgrp->flags);
         if (!list_empty(&cgrp->release_list))
                 list_del_init(&cgrp->release_list);
         raw_spin_unlock(&release_list_lock);
   
         /* delete this cgroup from parent->children */
- -      list_del_init(&cgrp->sibling);
- -
+ +      list_del_rcu(&cgrp->sibling);
         list_del_init(&cgrp->allcg_node);
   
- -      d = dget(cgrp->dentry);
- -
+ +      dget(d);
         cgroup_d_remove_dir(d);
         dput(d);
   
@@@ -4341,35 -4353,21 +4341,35 @@@
         /*
          * Unregister events and notify userspace.
          * Notify userspace about cgroup removing only after rmdir of cgroup
- -       * directory to avoid race between userspace and kernelspace
+ +       * directory to avoid race between userspace and kernelspace. Use
+ +       * a temporary list to avoid a deadlock with cgroup_event_wake(). Since
+ +       * cgroup_event_wake() is called with the wait queue head locked,
+ +       * remove_wait_queue() cannot be called while holding event_list_lock.
          */
         spin_lock(&cgrp->event_list_lock);
- -      list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
- -              list_del(&event->list);
+ +      list_splice_init(&cgrp->event_list, &tmp_list);
+ +      spin_unlock(&cgrp->event_list_lock);
+ +      list_for_each_entry_safe(event, tmp, &tmp_list, list) {
+ +              list_del_init(&event->list);
                 remove_wait_queue(event->wqh, &event->wait);
                 eventfd_signal(event->eventfd, 1);
                 schedule_work(&event->remove);
         }
- -      spin_unlock(&cgrp->event_list_lock);
   
- -      mutex_unlock(&cgroup_mutex);
         return 0;
   }
   
+ +static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
+ +{
+ +      int ret;
+ +
+ +      mutex_lock(&cgroup_mutex);
+ +      ret = cgroup_destroy_locked(dentry->d_fsdata);
+ +      mutex_unlock(&cgroup_mutex);
+ +
+ +      return ret;
+ +}
+ +
   static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
   {
         INIT_LIST_HEAD(&ss->cftsets);
@@@ -4390,15 -4388,13 +4390,15 @@@ static void __init cgroup_init_subsys(s
   
         printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
   
+ +      mutex_lock(&cgroup_mutex);
+ +
         /* init base cftset */
         cgroup_init_cftsets(ss);
   
         /* Create the top cgroup state for this subsystem */
         list_add(&ss->sibling, &rootnode.subsys_list);
         ss->root = &rootnode;
- -      css = ss->create(dummytop);
+ +      css = ss->css_alloc(dummytop);
         /* We don't handle early failures gracefully */
         BUG_ON(IS_ERR(css));
         init_cgroup_css(css, ss, dummytop);
@@@ -4407,7 -4403,7 +4407,7 @@@
          * pointer to this state - since the subsystem is
          * newly registered, all tasks and hence the
          * init_css_set is in the subsystem's top cgroup. */
- -      init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
+ +      init_css_set.subsys[ss->subsys_id] = css;
   
         need_forkexit_callback |= ss->fork || ss->exit;
   
@@@ -4417,9 -4413,6 +4417,9 @@@
         BUG_ON(!list_empty(&init_task.tasks));
   
         ss->active = 1;
+ +      BUG_ON(online_css(ss, dummytop));
+ +
+ +      mutex_unlock(&cgroup_mutex);
   
         /* this function shouldn't be used with modular subsystems, since they
          * need to register a subsys_id, among other things */
@@@ -4437,12 -4430,12 +4437,12 @@@
    */
   int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
   {
- -      int i;
         struct cgroup_subsys_state *css;
+ +      int i, ret;
   
         /* check name and function validity */
         if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
- -          ss->create == NULL || ss->destroy == NULL)
+ +          ss->css_alloc == NULL || ss->css_free == NULL)
                 return -EINVAL;
   
         /*
@@@ -4471,11 -4464,10 +4471,11 @@@
         subsys[ss->subsys_id] = ss;
   
         /*
- -       * no ss->create seems to need anything important in the ss struct, so
- -       * this can happen first (i.e. before the rootnode attachment).
+ +       * no ss->css_alloc seems to need anything important in the ss
+ +       * struct, so this can happen first (i.e. before the rootnode
+ +       * attachment).
          */
- -      css = ss->create(dummytop);
+ +      css = ss->css_alloc(dummytop);
         if (IS_ERR(css)) {
                 /* failure case - need to deassign the subsys[] slot. */
                 subsys[ss->subsys_id] = NULL;
@@@ -4490,9 -4482,14 +4490,9 @@@
         init_cgroup_css(css, ss, dummytop);
         /* init_idr must be after init_cgroup_css because it sets css->id. */
         if (ss->use_id) {
- -              int ret = cgroup_init_idr(ss, css);
- -              if (ret) {
- -                      dummytop->subsys[ss->subsys_id] = NULL;
- -                      ss->destroy(dummytop);
- -                      subsys[ss->subsys_id] = NULL;
- -                      mutex_unlock(&cgroup_mutex);
- -                      return ret;
- -              }
+ +              ret = cgroup_init_idr(ss, css);
+ +              if (ret)
+ +                      goto err_unload;
         }
   
         /*
@@@ -4525,19 -4522,10 +4525,19 @@@
         write_unlock(&css_set_lock);
   
         ss->active = 1;
+ +      ret = online_css(ss, dummytop);
+ +      if (ret)
+ +              goto err_unload;
   
         /* success! */
         mutex_unlock(&cgroup_mutex);
         return 0;
+ +
+ +err_unload:
+ +      mutex_unlock(&cgroup_mutex);
+ +      /* @ss can't be mounted here as try_module_get() would fail */
+ +      cgroup_unload_subsys(ss);
+ +      return ret;
   }
   EXPORT_SYMBOL_GPL(cgroup_load_subsys);
   
@@@ -4564,15 -4552,6 +4564,15 @@@ void cgroup_unload_subsys(struct cgroup
         BUG_ON(ss->root != &rootnode);
   
         mutex_lock(&cgroup_mutex);
+ +
+ +      offline_css(ss, dummytop);
+ +      ss->active = 0;
+ +
+ +      if (ss->use_id) {
+ +              idr_remove_all(&ss->idr);
+ +              idr_destroy(&ss->idr);
+ +      }
+ +
         /* deassign the subsys_id */
         subsys[ss->subsys_id] = NULL;
   
@@@ -4588,6 -4567,7 +4588,6 @@@
                 struct css_set *cg = link->cg;
   
                 hlist_del(&cg->hlist);
- -              BUG_ON(!cg->subsys[ss->subsys_id]);
                 cg->subsys[ss->subsys_id] = NULL;
                 hhead = css_set_hash(cg->subsys);
                 hlist_add_head(&cg->hlist, hhead);
@@@ -4595,12 -4575,12 +4595,12 @@@
         write_unlock(&css_set_lock);
   
         /*
- -       * remove subsystem's css from the dummytop and free it - need to free
- -       * before marking as null because ss->destroy needs the cgrp->subsys
- -       * pointer to find their state. note that this also takes care of
- -       * freeing the css_id.
+ +       * remove subsystem's css from the dummytop and free it - need to
+ +       * free before marking as null because ss->css_free needs the
+ +       * cgrp->subsys pointer to find their state. note that this also
+ +       * takes care of freeing the css_id.
          */
- -      ss->destroy(dummytop);
+ +      ss->css_free(dummytop);
         dummytop->subsys[ss->subsys_id] = NULL;
   
         mutex_unlock(&cgroup_mutex);
@@@ -4644,8 -4624,8 +4644,8 @@@ int __init cgroup_init_early(void
   
                 BUG_ON(!ss->name);
                 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
- -              BUG_ON(!ss->create);
- -              BUG_ON(!ss->destroy);
+ +              BUG_ON(!ss->css_alloc);
+ +              BUG_ON(!ss->css_free);
                 if (ss->subsys_id != i) {
                         printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
                                ss->name, ss->subsys_id);
@@@ -4851,20 -4831,45 +4851,20 @@@ void cgroup_fork(struct task_struct *ch
         INIT_LIST_HEAD(&child->cg_list);
   }
   
- -/**
- - * cgroup_fork_callbacks - run fork callbacks
- - * @child: the new task
- - *
- - * Called on a new task very soon before adding it to the
- - * tasklist. No need to take any locks since no-one can
- - * be operating on this task.
- - */
- -void cgroup_fork_callbacks(struct task_struct *child)
- -{
- -      if (need_forkexit_callback) {
- -              int i;
- -              for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- -                      struct cgroup_subsys *ss = subsys[i];
- -
- -                      /*
- -                       * forkexit callbacks are only supported for
- -                       * builtin subsystems.
- -                       */
- -                      if (!ss || ss->module)
- -                              continue;
- -
- -                      if (ss->fork)
- -                              ss->fork(child);
- -              }
- -      }
- -}
- -
   /**
    * cgroup_post_fork - called on a new task after adding it to the task list
    * @child: the task in question
    *
- - * Adds the task to the list running through its css_set if necessary.
- - * Has to be after the task is visible on the task list in case we race
- - * with the first call to cgroup_iter_start() - to guarantee that the
- - * new task ends up on its list.
+ + * Adds the task to the list running through its css_set if necessary and
+ + * call the subsystem fork() callbacks.  Has to be after the task is
+ + * visible on the task list in case we race with the first call to
+ + * cgroup_iter_start() - to guarantee that the new task ends up on its
+ + * list.
    */
   void cgroup_post_fork(struct task_struct *child)
   {
+ +      int i;
+ +
         /*
          * use_task_css_set_links is set to 1 before we walk the tasklist
          * under the tasklist_lock and we read it here after we added the child
@@@ -4884,30 -4889,7 +4884,30 @@@
                 task_unlock(child);
                 write_unlock(&css_set_lock);
         }
+ +
+ +      /*
+ +       * Call ss->fork().  This must happen after @child is linked on
+ +       * css_set; otherwise, @child might change state between ->fork()
+ +       * and addition to css_set.
+ +       */
+ +      if (need_forkexit_callback) {
+ +              for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ +                      struct cgroup_subsys *ss = subsys[i];
+ +
+ +                      /*
+ +                       * fork/exit callbacks are supported only for
+ +                       * builtin subsystems and we don't need further
+ +                       * synchronization as they never go away.
+ +                       */
+ +                      if (!ss || ss->module)
+ +                              continue;
+ +
+ +                      if (ss->fork)
+ +                              ss->fork(child);
+ +              }
+ +      }
   }
+ +
   /**
    * cgroup_exit - detach cgroup from exiting task
    * @tsk: pointer to task_struct of exiting process
@@@ -5040,17 -5022,15 +5040,17 @@@ static void check_for_release(struct cg
   /* Caller must verify that the css is not for root cgroup */
   bool __css_tryget(struct cgroup_subsys_state *css)
   {
- -      do {
- -              int v = css_refcnt(css);
+ +      while (true) {
+ +              int t, v;
   
- -              if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
+ +              v = css_refcnt(css);
+ +              t = atomic_cmpxchg(&css->refcnt, v, v + 1);
+ +              if (likely(t == v))
                         return true;
+ +              else if (t < 0)
+ +                      return false;
                 cpu_relax();
- -      } while (!test_bit(CSS_REMOVED, &css->flags));
- -
- -      return false;
+ +      }
   }
   EXPORT_SYMBOL_GPL(__css_tryget);
   
@@@ -5069,9 -5049,11 +5069,9 @@@ void __css_put(struct cgroup_subsys_sta
                         set_bit(CGRP_RELEASABLE, &cgrp->flags);
                         check_for_release(cgrp);
                 }
- -              cgroup_wakeup_rmdir_waiter(cgrp);
                 break;
         case 0:
- -              if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags))
- -                      schedule_work(&css->dput_work);
+ +              schedule_work(&css->dput_work);
                 break;
         }
         rcu_read_unlock();
@@@ -5457,7 -5439,7 +5457,7 @@@ struct cgroup_subsys_state *cgroup_css_
   }
   
   #ifdef CONFIG_CGROUP_DEBUG
- -static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
+ +static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont)
   {
         struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
   
@@@ -5467,7 -5449,7 +5467,7 @@@
         return css;
   }
   
- -static void debug_destroy(struct cgroup *cont)
+ +static void debug_css_free(struct cgroup *cont)
   {
         kfree(cont->subsys[debug_subsys_id]);
   }
@@@ -5596,8 -5578,8 +5596,8 @@@ static struct cftype debug_files[] =  
   
   struct cgroup_subsys debug_subsys = {
         .name = "debug",
- -      .create = debug_create,
- -      .destroy = debug_destroy,
+ +      .css_alloc = debug_css_alloc,
+ +      .css_free = debug_css_free,
         .subsys_id = debug_subsys_id,
         .base_cftypes = debug_files,
   };
diff --combined kernel/events/core.c

index f9ff5493171d83208b140d19f8276fe3908e670b,738f3564e83bface92dfd487c9187242026e351a..301079d06f24ebe44081a286766436de104a3a91
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -6155,7 -6155,7 +6155,7 @@@ perf_event_alloc(struct perf_event_att
   
         event->parent           = parent_event;
   
-       event->ns               = get_pid_ns(current->nsproxy->pid_ns);
+       event->ns               = get_pid_ns(task_active_pid_ns(current));
         event->id               = atomic64_inc_return(&perf_event_id);
   
         event->state            = PERF_EVENT_STATE_INACTIVE;
@@@ -7434,7 -7434,7 +7434,7 @@@ unlock
   device_initcall(perf_event_sysfs_init);
   
   #ifdef CONFIG_CGROUP_PERF
- -static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont)
+ +static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont)
   {
         struct perf_cgroup *jc;
   
@@@ -7451,7 -7451,7 +7451,7 @@@
         return &jc->css;
   }
   
- -static void perf_cgroup_destroy(struct cgroup *cont)
+ +static void perf_cgroup_css_free(struct cgroup *cont)
   {
         struct perf_cgroup *jc;
         jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
@@@ -7492,8 -7492,8 +7492,8 @@@ static void perf_cgroup_exit(struct cgr
   struct cgroup_subsys perf_subsys = {
         .name           = "perf_event",
         .subsys_id      = perf_subsys_id,
- -      .create         = perf_cgroup_create,
- -      .destroy        = perf_cgroup_destroy,
+ +      .css_alloc      = perf_cgroup_css_alloc,
+ +      .css_free       = perf_cgroup_css_free,
         .exit           = perf_cgroup_exit,
         .attach         = perf_cgroup_attach,
   
diff --combined kernel/exit.c

index 50d2e93c36ea6ff421192e7fb0f92a3cb0df6e63,d7fe58db452709444a42e622c964e604ddd42302..b4df21937216e1704670d89e8ef8fe8aa9aee810
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -72,18 -72,6 +72,6 @@@ static void __unhash_process(struct tas
                 list_del_rcu(&p->tasks);
                 list_del_init(&p->sibling);
                 __this_cpu_dec(process_counts);
-               /*
-                * If we are the last child process in a pid namespace to be
-                * reaped, notify the reaper sleeping zap_pid_ns_processes().
-                */
-               if (IS_ENABLED(CONFIG_PID_NS)) {
-                       struct task_struct *parent = p->real_parent;
- 
-                       if ((task_active_pid_ns(parent)->child_reaper == parent) &&
-                           list_empty(&parent->children) &&
-                           (parent->flags & PF_EXITING))
-                               wake_up_process(parent);
-               }
         }
         list_del_rcu(&p->thread_group);
   }
@@@ -322,6 -310,43 +310,6 @@@ kill_orphaned_pgrp(struct task_struct *
         }
   }
   
- -/**
- - * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
- - *
- - * If a kernel thread is launched as a result of a system call, or if
- - * it ever exits, it should generally reparent itself to kthreadd so it
- - * isn't in the way of other processes and is correctly cleaned up on exit.
- - *
- - * The various task state such as scheduling policy and priority may have
- - * been inherited from a user process, so we reset them to sane values here.
- - *
- - * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
- - */
- -static void reparent_to_kthreadd(void)
- -{
- -      write_lock_irq(&tasklist_lock);
- -
- -      ptrace_unlink(current);
- -      /* Reparent to init */
- -      current->real_parent = current->parent = kthreadd_task;
- -      list_move_tail(&current->sibling, &current->real_parent->children);
- -
- -      /* Set the exit signal to SIGCHLD so we signal init on exit */
- -      current->exit_signal = SIGCHLD;
- -
- -      if (task_nice(current) < 0)
- -              set_user_nice(current, 0);
- -      /* cpus_allowed? */
- -      /* rt_priority? */
- -      /* signals? */
- -      memcpy(current->signal->rlim, init_task.signal->rlim,
- -             sizeof(current->signal->rlim));
- -
- -      atomic_inc(&init_cred.usage);
- -      commit_creds(&init_cred);
- -      write_unlock_irq(&tasklist_lock);
- -}
- -
   void __set_special_pids(struct pid *pid)
   {
         struct task_struct *curr = current->group_leader;
@@@ -333,6 -358,13 +321,6 @@@
                 change_pid(curr, PIDTYPE_PGID, pid);
   }
   
- -static void set_special_pids(struct pid *pid)
- -{
- -      write_lock_irq(&tasklist_lock);
- -      __set_special_pids(pid);
- -      write_unlock_irq(&tasklist_lock);
- -}
- -
   /*
    * Let kernel threads use this to say that they allow a certain signal.
    * Must not be used if kthread was cloned with CLONE_SIGHAND.
@@@ -372,6 -404,54 +360,6 @@@ int disallow_signal(int sig
   
   EXPORT_SYMBOL(disallow_signal);
   
- -/*
- - *    Put all the gunge required to become a kernel thread without
- - *    attached user resources in one place where it belongs.
- - */
- -
- -void daemonize(const char *name, ...)
- -{
- -      va_list args;
- -      sigset_t blocked;
- -
- -      va_start(args, name);
- -      vsnprintf(current->comm, sizeof(current->comm), name, args);
- -      va_end(args);
- -
- -      /*
- -       * If we were started as result of loading a module, close all of the
- -       * user space pages.  We don't need them, and if we didn't close them
- -       * they would be locked into memory.
- -       */
- -      exit_mm(current);
- -      /*
- -       * We don't want to get frozen, in case system-wide hibernation
- -       * or suspend transition begins right now.
- -       */
- -      current->flags |= (PF_NOFREEZE | PF_KTHREAD);
- -
- -      if (current->nsproxy != &init_nsproxy) {
- -              get_nsproxy(&init_nsproxy);
- -              switch_task_namespaces(current, &init_nsproxy);
- -      }
- -      set_special_pids(&init_struct_pid);
- -      proc_clear_tty(current);
- -
- -      /* Block and flush all signals */
- -      sigfillset(&blocked);
- -      sigprocmask(SIG_BLOCK, &blocked, NULL);
- -      flush_signals(current);
- -
- -      /* Become as one with the init task */
- -
- -      daemonize_fs_struct();
- -      daemonize_descriptors();
- -
- -      reparent_to_kthreadd();
- -}
- -
- -EXPORT_SYMBOL(daemonize);
- -
   #ifdef CONFIG_MM_OWNER
   /*
    * A task is exiting.   If it owned this mm, find a new owner for the mm.
@@@ -1094,11 -1174,11 +1082,11 @@@ static int wait_task_zombie(struct wait
                  * as other threads in the parent group can be right
                  * here reaping other children at the same time.
                  *
- -               * We use thread_group_times() to get times for the thread
+ +               * We use thread_group_cputime_adjusted() to get times for the thread
                  * group, which consolidates times for all threads in the
                  * group including the group leader.
                  */
- -              thread_group_times(p, &tgutime, &tgstime);
+ +              thread_group_cputime_adjusted(p, &tgutime, &tgstime);
                 spin_lock_irq(&p->real_parent->sighand->siglock);
                 psig = p->real_parent->signal;
                 sig = p->signal;
diff --combined kernel/fork.c

index 115d6c2e4cca0dda8601efe7c3b114f3c37859a3,38e53b87402c865e06e9a55e5319c297715b893c..c36c4e301efef7c92a39b35b71a67e72cc0fb365
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -352,7 -352,6 +352,7 @@@ static int dup_mmap(struct mm_struct *m
         unsigned long charge;
         struct mempolicy *pol;
   
+ +      uprobe_start_dup_mmap();
         down_write(&oldmm->mmap_sem);
         flush_cache_dup_mm(oldmm);
         uprobe_dup_mmap(oldmm, mm);
@@@ -470,7 -469,6 +470,7 @@@ out
         up_write(&mm->mmap_sem);
         flush_tlb_mm(oldmm);
         up_write(&oldmm->mmap_sem);
+ +      uprobe_end_dup_mmap();
         return retval;
   fail_nomem_anon_vma_fork:
         mpol_put(pol);
@@@ -822,9 -820,6 +822,9 @@@ struct mm_struct *dup_mm(struct task_st
   
   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
         mm->pmd_huge_pte = NULL;
+ +#endif
+ +#ifdef CONFIG_NUMA_BALANCING
+ +      mm->first_nid = NUMA_PTE_SCAN_INIT;
   #endif
         if (!mm_init(mm, tsk))
                 goto fail_nomem;
@@@ -1044,8 -1039,6 +1044,6 @@@ static int copy_signal(unsigned long cl
         atomic_set(&sig->live, 1);
         atomic_set(&sig->sigcnt, 1);
         init_waitqueue_head(&sig->wait_chldexit);
-       if (clone_flags & CLONE_NEWPID)
-               sig->flags |= SIGNAL_UNKILLABLE;
         sig->curr_target = tsk;
         init_sigpending(&sig->shared_pending);
         INIT_LIST_HEAD(&sig->posix_timers);
@@@ -1132,6 -1125,7 +1130,6 @@@ static void posix_cpu_timers_init(struc
    */
   static struct task_struct *copy_process(unsigned long clone_flags,
                                         unsigned long stack_start,
- -                                      struct pt_regs *regs,
                                         unsigned long stack_size,
                                         int __user *child_tidptr,
                                         struct pid *pid,
@@@ -1139,6 -1133,7 +1137,6 @@@
   {
         int retval;
         struct task_struct *p;
- -      int cgroup_callbacks_done = 0;
   
         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                 return ERR_PTR(-EINVAL);
@@@ -1225,7 -1220,7 +1223,7 @@@
         p->utime = p->stime = p->gtime = 0;
         p->utimescaled = p->stimescaled = 0;
   #ifndef CONFIG_VIRT_CPU_ACCOUNTING
- -      p->prev_utime = p->prev_stime = 0;
+ +      p->prev_cputime.utime = p->prev_cputime.stime = 0;
   #endif
   #if defined(SPLIT_RSS_COUNTING)
         memset(&p->rss_stat, 0, sizeof(p->rss_stat));
@@@ -1323,7 -1318,7 +1321,7 @@@
         retval = copy_io(clone_flags, p);
         if (retval)
                 goto bad_fork_cleanup_namespaces;
- -      retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
+ +      retval = copy_thread(clone_flags, stack_start, stack_size, p);
         if (retval)
                 goto bad_fork_cleanup_io;
   
@@@ -1396,6 -1391,12 +1394,6 @@@
         INIT_LIST_HEAD(&p->thread_group);
         p->task_works = NULL;
   
- -      /* Now that the task is set up, run cgroup callbacks if
- -       * necessary. We need to run them before the task is visible
- -       * on the tasklist. */
- -      cgroup_fork_callbacks(p);
- -      cgroup_callbacks_done = 1;
- -
         /* Need tasklist lock for parent etc handling! */
         write_lock_irq(&tasklist_lock);
   
@@@ -1438,8 -1439,10 +1436,10 @@@
                 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
   
                 if (thread_group_leader(p)) {
-                       if (is_child_reaper(pid))
-                               p->nsproxy->pid_ns->child_reaper = p;
+                       if (is_child_reaper(pid)) {
+                               ns_of_pid(pid)->child_reaper = p;
+                               p->signal->flags |= SIGNAL_UNKILLABLE;
+                       }
   
                         p->signal->leader_pid = pid;
                         p->signal->tty = tty_kref_get(current->signal->tty);
@@@ -1473,8 -1476,6 +1473,6 @@@ bad_fork_cleanup_io
         if (p->io_context)
                 exit_io_context(p);
   bad_fork_cleanup_namespaces:
-       if (unlikely(clone_flags & CLONE_NEWPID))
-               pid_ns_release_proc(p->nsproxy->pid_ns);
         exit_task_namespaces(p);
   bad_fork_cleanup_mm:
         if (p->mm)
@@@ -1500,7 -1501,7 +1498,7 @@@ bad_fork_cleanup_cgroup
   #endif
         if (clone_flags & CLONE_THREAD)
                 threadgroup_change_end(current);
- -      cgroup_exit(p, cgroup_callbacks_done);
+ +      cgroup_exit(p, 0);
         delayacct_tsk_free(p);
         module_put(task_thread_info(p)->exec_domain->module);
   bad_fork_cleanup_count:
@@@ -1512,6 -1513,12 +1510,6 @@@ fork_out
         return ERR_PTR(retval);
   }
   
- -noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
- -{
- -      memset(regs, 0, sizeof(struct pt_regs));
- -      return regs;
- -}
- -
   static inline void init_idle_pids(struct pid_link *links)
   {
         enum pid_type type;
@@@ -1525,7 -1532,10 +1523,7 @@@
   struct task_struct * __cpuinit fork_idle(int cpu)
   {
         struct task_struct *task;
- -      struct pt_regs regs;
- -
- -      task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
- -                          &init_struct_pid, 0);
+ +      task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
         if (!IS_ERR(task)) {
                 init_idle_pids(task->pids);
                 init_idle(task, cpu);
@@@ -1542,6 -1552,7 +1540,6 @@@
    */
   long do_fork(unsigned long clone_flags,
               unsigned long stack_start,
- -            struct pt_regs *regs,
               unsigned long stack_size,
               int __user *parent_tidptr,
               int __user *child_tidptr)
@@@ -1554,15 -1565,9 +1552,9 @@@
          * Do some preliminary argument and permissions checking before we
          * actually start allocating stuff
          */
-       if (clone_flags & CLONE_NEWUSER) {
-               if (clone_flags & CLONE_THREAD)
+       if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
+               if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
                         return -EINVAL;
-               /* hopefully this check will go away when userns support is
-                * complete
-                */
-               if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
-                               !capable(CAP_SETGID))
-                       return -EPERM;
         }
   
         /*
@@@ -1571,7 -1576,7 +1563,7 @@@
          * requested, no event is reported; otherwise, report if the event
          * for the type of forking is enabled.
          */
- -      if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) {
+ +      if (!(clone_flags & CLONE_UNTRACED)) {
                 if (clone_flags & CLONE_VFORK)
                         trace = PTRACE_EVENT_VFORK;
                 else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@@ -1583,7 -1588,7 +1575,7 @@@
                         trace = 0;
         }
   
- -      p = copy_process(clone_flags, stack_start, regs, stack_size,
+ +      p = copy_process(clone_flags, stack_start, stack_size,
                          child_tidptr, NULL, trace);
         /*
          * Do this prior waking up the new thread - the thread pointer
@@@ -1627,54 -1632,11 +1619,54 @@@
    */
   pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
   {
- -      return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL,
+ +      return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
                 (unsigned long)arg, NULL, NULL);
   }
   #endif
   
+ +#ifdef __ARCH_WANT_SYS_FORK
+ +SYSCALL_DEFINE0(fork)
+ +{
+ +#ifdef CONFIG_MMU
+ +      return do_fork(SIGCHLD, 0, 0, NULL, NULL);
+ +#else
+ +      /* can not support in nommu mode */
+ +      return(-EINVAL);
+ +#endif
+ +}
+ +#endif
+ +
+ +#ifdef __ARCH_WANT_SYS_VFORK
+ +SYSCALL_DEFINE0(vfork)
+ +{
+ +      return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 
+ +                      0, NULL, NULL);
+ +}
+ +#endif
+ +
+ +#ifdef __ARCH_WANT_SYS_CLONE
+ +#ifdef CONFIG_CLONE_BACKWARDS
+ +SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
+ +               int __user *, parent_tidptr,
+ +               int, tls_val,
+ +               int __user *, child_tidptr)
+ +#elif defined(CONFIG_CLONE_BACKWARDS2)
+ +SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
+ +               int __user *, parent_tidptr,
+ +               int __user *, child_tidptr,
+ +               int, tls_val)
+ +#else
+ +SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
+ +               int __user *, parent_tidptr,
+ +               int __user *, child_tidptr,
+ +               int, tls_val)
+ +#endif
+ +{
+ +      return do_fork(clone_flags, newsp, 0,
+ +              parent_tidptr, child_tidptr);
+ +}
+ +#endif
+ +
   #ifndef ARCH_MIN_MMSTRUCT_ALIGN
   #define ARCH_MIN_MMSTRUCT_ALIGN 0
   #endif
@@@ -1724,7 -1686,8 +1716,8 @@@ static int check_unshare_flags(unsigne
   {
         if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
                                 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-                               CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+                               CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
+                               CLONE_NEWUSER|CLONE_NEWPID))
                 return -EINVAL;
         /*
          * Not implemented, but pretend it works if there is nothing to
@@@ -1791,19 -1754,40 +1784,40 @@@ SYSCALL_DEFINE1(unshare, unsigned long
   {
         struct fs_struct *fs, *new_fs = NULL;
         struct files_struct *fd, *new_fd = NULL;
+       struct cred *new_cred = NULL;
         struct nsproxy *new_nsproxy = NULL;
         int do_sysvsem = 0;
         int err;
   
-       err = check_unshare_flags(unshare_flags);
-       if (err)
-               goto bad_unshare_out;
- 
+       /*
+        * If unsharing a user namespace must also unshare the thread.
+        */
+       if (unshare_flags & CLONE_NEWUSER)
+               unshare_flags |= CLONE_THREAD;
+       /*
+        * If unsharing a pid namespace must also unshare the thread.
+        */
+       if (unshare_flags & CLONE_NEWPID)
+               unshare_flags |= CLONE_THREAD;
+       /*
+        * If unsharing a thread from a thread group, must also unshare vm.
+        */
+       if (unshare_flags & CLONE_THREAD)
+               unshare_flags |= CLONE_VM;
+       /*
+        * If unsharing vm, must also unshare signal handlers.
+        */
+       if (unshare_flags & CLONE_VM)
+               unshare_flags |= CLONE_SIGHAND;
         /*
          * If unsharing namespace, must also unshare filesystem information.
          */
         if (unshare_flags & CLONE_NEWNS)
                 unshare_flags |= CLONE_FS;
+ 
+       err = check_unshare_flags(unshare_flags);
+       if (err)
+               goto bad_unshare_out;
         /*
          * CLONE_NEWIPC must also detach from the undolist: after switching
          * to a new ipc namespace, the semaphore arrays from the old
@@@ -1817,11 -1801,15 +1831,15 @@@
         err = unshare_fd(unshare_flags, &new_fd);
         if (err)
                 goto bad_unshare_cleanup_fs;
-       err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
+       err = unshare_userns(unshare_flags, &new_cred);
         if (err)
                 goto bad_unshare_cleanup_fd;
+       err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
+                                        new_cred, new_fs);
+       if (err)
+               goto bad_unshare_cleanup_cred;
   
-       if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
+       if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
                 if (do_sysvsem) {
                         /*
                          * CLONE_SYSVSEM is equivalent to sys_exit().
@@@ -1854,11 -1842,20 +1872,20 @@@
                 }
   
                 task_unlock(current);
+ 
+               if (new_cred) {
+                       /* Install the new user namespace */
+                       commit_creds(new_cred);
+                       new_cred = NULL;
+               }
         }
   
         if (new_nsproxy)
                 put_nsproxy(new_nsproxy);
   
+ bad_unshare_cleanup_cred:
+       if (new_cred)
+               put_cred(new_cred);
   bad_unshare_cleanup_fd:
         if (new_fd)
                 put_files_struct(new_fd);
diff --combined kernel/pid.c

index fd996c1ed9f891988607812abb95dc8820ab3751,3026ddae0a348fb644c6f4cfb3bc35057601ac1e..3e2cf8100acc84b23b5741603c44fc908e3d5068
--- 1/kernel/pid.c
--- 2/kernel/pid.c
+++ b/kernel/pid.c
@@@ -1,8 -1,8 +1,8 @@@
   /*
    * Generic pidhash and scalable, time-bounded PID allocator
    *
- - * (C) 2002-2003 William Irwin, IBM
- - * (C) 2004 William Irwin, Oracle
+ + * (C) 2002-2003 Nadia Yvette Chambers, IBM
+ + * (C) 2004 Nadia Yvette Chambers, Oracle
    * (C) 2002-2004 Ingo Molnar, Red Hat
    *
    * pid-structures are backing objects for tasks sharing a given ID to chain
@@@ -36,6 -36,7 +36,7 @@@
   #include <linux/pid_namespace.h>
   #include <linux/init_task.h>
   #include <linux/syscalls.h>
+ #include <linux/proc_fs.h>
   
   #define pid_hashfn(nr, ns)    \
         hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@@ -78,6 -79,8 +79,8 @@@ struct pid_namespace init_pid_ns = 
         .last_pid = 0,
         .level = 0,
         .child_reaper = &init_task,
+       .user_ns = &init_user_ns,
+       .proc_inum = PROC_PID_INIT_INO,
   };
   EXPORT_SYMBOL_GPL(init_pid_ns);
   
@@@ -269,8 -272,24 +272,24 @@@ void free_pid(struct pid *pid
         unsigned long flags;
   
         spin_lock_irqsave(&pidmap_lock, flags);
-       for (i = 0; i <= pid->level; i++)
-               hlist_del_rcu(&pid->numbers[i].pid_chain);
+       for (i = 0; i <= pid->level; i++) {
+               struct upid *upid = pid->numbers + i;
+               struct pid_namespace *ns = upid->ns;
+               hlist_del_rcu(&upid->pid_chain);
+               switch(--ns->nr_hashed) {
+               case 1:
+                       /* When all that is left in the pid namespace
+                        * is the reaper wake up the reaper.  The reaper
+                        * may be sleeping in zap_pid_ns_processes().
+                        */
+                       wake_up_process(ns->child_reaper);
+                       break;
+               case 0:
+                       ns->nr_hashed = -1;
+                       schedule_work(&ns->proc_work);
+                       break;
+               }
+       }
         spin_unlock_irqrestore(&pidmap_lock, flags);
   
         for (i = 0; i <= pid->level; i++)
@@@ -292,6 -311,7 +311,7 @@@ struct pid *alloc_pid(struct pid_namesp
                 goto out;
   
         tmp = ns;
+       pid->level = ns->level;
         for (i = ns->level; i >= 0; i--) {
                 nr = alloc_pidmap(tmp);
                 if (nr < 0)
@@@ -302,22 -322,32 +322,32 @@@
                 tmp = tmp->parent;
         }
   
+       if (unlikely(is_child_reaper(pid))) {
+               if (pid_ns_prepare_proc(ns))
+                       goto out_free;
+       }
+ 
         get_pid_ns(ns);
-       pid->level = ns->level;
         atomic_set(&pid->count, 1);
         for (type = 0; type < PIDTYPE_MAX; ++type)
                 INIT_HLIST_HEAD(&pid->tasks[type]);
   
         upid = pid->numbers + ns->level;
         spin_lock_irq(&pidmap_lock);
-       for ( ; upid >= pid->numbers; --upid)
+       if (ns->nr_hashed < 0)
+               goto out_unlock;
+       for ( ; upid >= pid->numbers; --upid) {
                 hlist_add_head_rcu(&upid->pid_chain,
                                 &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+               upid->ns->nr_hashed++;
+       }
         spin_unlock_irq(&pidmap_lock);
   
   out:
         return pid;
   
+ out_unlock:
+       spin_unlock(&pidmap_lock);
   out_free:
         while (++i <= ns->level)
                 free_pidmap(pid->numbers + i);
@@@ -344,7 -374,7 +374,7 @@@ EXPORT_SYMBOL_GPL(find_pid_ns)
   
   struct pid *find_vpid(int nr)
   {
-       return find_pid_ns(nr, current->nsproxy->pid_ns);
+       return find_pid_ns(nr, task_active_pid_ns(current));
   }
   EXPORT_SYMBOL_GPL(find_vpid);
   
@@@ -428,7 -458,7 +458,7 @@@ struct task_struct *find_task_by_pid_ns
   
   struct task_struct *find_task_by_vpid(pid_t vnr)
   {
-       return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
+       return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
   }
   
   struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
@@@ -483,7 -513,7 +513,7 @@@ EXPORT_SYMBOL_GPL(pid_nr_ns)
   
   pid_t pid_vnr(struct pid *pid)
   {
-       return pid_nr_ns(pid, current->nsproxy->pid_ns);
+       return pid_nr_ns(pid, task_active_pid_ns(current));
   }
   EXPORT_SYMBOL_GPL(pid_vnr);
   
@@@ -494,7 -524,7 +524,7 @@@ pid_t __task_pid_nr_ns(struct task_stru
   
         rcu_read_lock();
         if (!ns)
-               ns = current->nsproxy->pid_ns;
+               ns = task_active_pid_ns(current);
         if (likely(pid_alive(task))) {
                 if (type != PIDTYPE_PID)
                         task = task->group_leader;
@@@ -569,6 -599,7 +599,7 @@@ void __init pidmap_init(void
         /* Reserve PID 0. We never call free_pidmap(0) */
         set_bit(0, init_pid_ns.pidmap[0].page);
         atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+       init_pid_ns.nr_hashed = 1;
   
         init_pid_ns.pid_cachep = KMEM_CACHE(pid,
                         SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --combined kernel/sched/core.c

index c1fb82104bfbc9405d0c782799173ecd388ceb9a,2f5eb1838b3eb5c2b1e569e8a0b2ac762203bc93..257002c13bb02acad92c74347e3b38ca3bc881b1
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -72,7 -72,6 +72,7 @@@
   #include <linux/slab.h>
   #include <linux/init_task.h>
   #include <linux/binfmts.h>
+ +#include <linux/context_tracking.h>
   
   #include <asm/switch_to.h>
   #include <asm/tlb.h>
@@@ -193,10 -192,23 +193,10 @@@ static void sched_feat_disable(int i) 
   static void sched_feat_enable(int i) { };
   #endif /* HAVE_JUMP_LABEL */
   
- -static ssize_t
- -sched_feat_write(struct file *filp, const char __user *ubuf,
- -              size_t cnt, loff_t *ppos)
+ +static int sched_feat_set(char *cmp)
   {
- -      char buf[64];
- -      char *cmp;
- -      int neg = 0;
         int i;
- -
- -      if (cnt > 63)
- -              cnt = 63;
- -
- -      if (copy_from_user(&buf, ubuf, cnt))
- -              return -EFAULT;
- -
- -      buf[cnt] = 0;
- -      cmp = strstrip(buf);
+ +      int neg = 0;
   
         if (strncmp(cmp, "NO_", 3) == 0) {
                 neg = 1;
@@@ -216,27 -228,6 +216,27 @@@
                 }
         }
   
+ +      return i;
+ +}
+ +
+ +static ssize_t
+ +sched_feat_write(struct file *filp, const char __user *ubuf,
+ +              size_t cnt, loff_t *ppos)
+ +{
+ +      char buf[64];
+ +      char *cmp;
+ +      int i;
+ +
+ +      if (cnt > 63)
+ +              cnt = 63;
+ +
+ +      if (copy_from_user(&buf, ubuf, cnt))
+ +              return -EFAULT;
+ +
+ +      buf[cnt] = 0;
+ +      cmp = strstrip(buf);
+ +
+ +      i = sched_feat_set(cmp);
         if (i == __SCHED_FEAT_NR)
                 return -EINVAL;
   
@@@ -931,13 -922,6 +931,13 @@@ void check_preempt_curr(struct rq *rq, 
                 rq->skip_clock_update = 1;
   }
   
+ +static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+ +
+ +void register_task_migration_notifier(struct notifier_block *n)
+ +{
+ +      atomic_notifier_chain_register(&task_migration_notifier, n);
+ +}
+ +
   #ifdef CONFIG_SMP
   void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
   {
@@@ -968,18 -952,8 +968,18 @@@
         trace_sched_migrate_task(p, new_cpu);
   
         if (task_cpu(p) != new_cpu) {
+ +              struct task_migration_notifier tmn;
+ +
+ +              if (p->sched_class->migrate_task_rq)
+ +                      p->sched_class->migrate_task_rq(p, new_cpu);
                 p->se.nr_migrations++;
                 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
+ +
+ +              tmn.task = p;
+ +              tmn.from_cpu = task_cpu(p);
+ +              tmn.to_cpu = new_cpu;
+ +
+ +              atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
         }
   
         __set_task_cpu(p, new_cpu);
@@@ -1550,15 -1524,6 +1550,15 @@@ static void __sched_fork(struct task_st
         p->se.vruntime                  = 0;
         INIT_LIST_HEAD(&p->se.group_node);
   
+ +/*
+ + * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
+ + * removed when useful for applications beyond shares distribution (e.g.
+ + * load-balance).
+ + */
+ +#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
+ +      p->se.avg.runnable_avg_period = 0;
+ +      p->se.avg.runnable_avg_sum = 0;
+ +#endif
   #ifdef CONFIG_SCHEDSTATS
         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
   #endif
@@@ -1568,41 -1533,8 +1568,41 @@@
   #ifdef CONFIG_PREEMPT_NOTIFIERS
         INIT_HLIST_HEAD(&p->preempt_notifiers);
   #endif
+ +
+ +#ifdef CONFIG_NUMA_BALANCING
+ +      if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
+ +              p->mm->numa_next_scan = jiffies;
+ +              p->mm->numa_next_reset = jiffies;
+ +              p->mm->numa_scan_seq = 0;
+ +      }
+ +
+ +      p->node_stamp = 0ULL;
+ +      p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
+ +      p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
+ +      p->numa_scan_period = sysctl_numa_balancing_scan_delay;
+ +      p->numa_work.next = &p->numa_work;
+ +#endif /* CONFIG_NUMA_BALANCING */
   }
   
+ +#ifdef CONFIG_NUMA_BALANCING
+ +#ifdef CONFIG_SCHED_DEBUG
+ +void set_numabalancing_state(bool enabled)
+ +{
+ +      if (enabled)
+ +              sched_feat_set("NUMA");
+ +      else
+ +              sched_feat_set("NO_NUMA");
+ +}
+ +#else
+ +__read_mostly bool numabalancing_enabled;
+ +
+ +void set_numabalancing_state(bool enabled)
+ +{
+ +      numabalancing_enabled = enabled;
+ +}
+ +#endif /* CONFIG_SCHED_DEBUG */
+ +#endif /* CONFIG_NUMA_BALANCING */
+ +
   /*
    * fork()/clone()-time setup:
    */
@@@ -1954,8 -1886,8 +1954,8 @@@ context_switch(struct rq *rq, struct ta
         spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
   #endif
   
+ +      context_tracking_task_switch(prev, next);
         /* Here we just switch the register state and the stack. */
- -      rcu_switch(prev, next);
         switch_to(prev, next, prev);
   
         barrier();
@@@ -2979,7 -2911,7 +2979,7 @@@ asmlinkage void __sched schedule(void
   }
   EXPORT_SYMBOL(schedule);
   
- -#ifdef CONFIG_RCU_USER_QS
+ +#ifdef CONFIG_CONTEXT_TRACKING
   asmlinkage void __sched schedule_user(void)
   {
         /*
@@@ -2988,9 -2920,9 +2988,9 @@@
          * we haven't yet exited the RCU idle mode. Do it here manually until
          * we find a better solution.
          */
- -      rcu_user_exit();
+ +      user_exit();
         schedule();
- -      rcu_user_enter();
+ +      user_enter();
   }
   #endif
   
@@@ -3095,7 -3027,7 +3095,7 @@@ asmlinkage void __sched preempt_schedul
         /* Catch callers which need to be fixed */
         BUG_ON(ti->preempt_count || !irqs_disabled());
   
- -      rcu_user_exit();
+ +      user_exit();
         do {
                 add_preempt_count(PREEMPT_ACTIVE);
                 local_irq_enable();
@@@ -4097,8 -4029,14 +4097,14 @@@ long sched_setaffinity(pid_t pid, cons
                 goto out_free_cpus_allowed;
         }
         retval = -EPERM;
-       if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
-               goto out_unlock;
+       if (!check_same_owner(p)) {
+               rcu_read_lock();
+               if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+                       rcu_read_unlock();
+                       goto out_unlock;
+               }
+               rcu_read_unlock();
+       }
   
         retval = security_task_setscheduler(p);
         if (retval)
@@@ -4542,7 -4480,6 +4548,7 @@@ static const char stat_nam[] = TASK_STA
   void sched_show_task(struct task_struct *p)
   {
         unsigned long free = 0;
+ +      int ppid;
         unsigned state;
   
         state = p->state ? __ffs(p->state) + 1 : 0;
@@@ -4562,11 -4499,8 +4568,11 @@@
   #ifdef CONFIG_DEBUG_STACK_USAGE
         free = stack_not_used(p);
   #endif
+ +      rcu_read_lock();
+ +      ppid = task_pid_nr(rcu_dereference(p->real_parent));
+ +      rcu_read_unlock();
         printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
- -              task_pid_nr(p), task_pid_nr(rcu_dereference(p->real_parent)),
+ +              task_pid_nr(p), ppid,
                 (unsigned long)task_thread_info(p)->flags);
   
         show_stack(p, NULL);
@@@ -7540,7 -7474,7 +7546,7 @@@ static inline struct task_group *cgroup
                             struct task_group, css);
   }
   
- -static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp)
+ +static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
   {
         struct task_group *tg, *parent;
   
@@@ -7557,7 -7491,7 +7563,7 @@@
         return &tg->css;
   }
   
- -static void cpu_cgroup_destroy(struct cgroup *cgrp)
+ +static void cpu_cgroup_css_free(struct cgroup *cgrp)
   {
         struct task_group *tg = cgroup_tg(cgrp);
   
@@@ -7917,8 -7851,8 +7923,8 @@@ static struct cftype cpu_files[] = 
   
   struct cgroup_subsys cpu_cgroup_subsys = {
         .name           = "cpu",
- -      .create         = cpu_cgroup_create,
- -      .destroy        = cpu_cgroup_destroy,
+ +      .css_alloc      = cpu_cgroup_css_alloc,
+ +      .css_free       = cpu_cgroup_css_free,
         .can_attach     = cpu_cgroup_can_attach,
         .attach         = cpu_cgroup_attach,
         .exit           = cpu_cgroup_exit,
@@@ -7941,7 -7875,7 +7947,7 @@@
   struct cpuacct root_cpuacct;
   
   /* create a new cpu accounting group */
- -static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp)
+ +static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
   {
         struct cpuacct *ca;
   
@@@ -7971,7 -7905,7 +7977,7 @@@ out
   }
   
   /* destroy an existing cpu accounting group */
- -static void cpuacct_destroy(struct cgroup *cgrp)
+ +static void cpuacct_css_free(struct cgroup *cgrp)
   {
         struct cpuacct *ca = cgroup_ca(cgrp);
   
@@@ -8142,15 -8076,9 +8148,15 @@@ void cpuacct_charge(struct task_struct 
   
   struct cgroup_subsys cpuacct_subsys = {
         .name = "cpuacct",
- -      .create = cpuacct_create,
- -      .destroy = cpuacct_destroy,
+ +      .css_alloc = cpuacct_css_alloc,
+ +      .css_free = cpuacct_css_free,
         .subsys_id = cpuacct_subsys_id,
         .base_cftypes = files,
   };
   #endif        /* CONFIG_CGROUP_CPUACCT */
+ +
+ +void dump_cpu_task(int cpu)
+ +{
+ +      pr_info("Task dump for CPU %d:\n", cpu);
+ +      sched_show_task(cpu_curr(cpu));
+ +}
diff --combined kernel/signal.c

index a49c7f36ceb3e595d98a437f0b3031a51cab305d,b2445d86f22691b34296796ee5f91cfe2c900b8d..580a91e634710b6dbbc75f328c3bbef549b999cb
--- 1/kernel/signal.c
--- 2/kernel/signal.c
+++ b/kernel/signal.c
@@@ -1159,9 -1159,8 +1159,9 @@@ static int send_signal(int sig, struct 
         return __send_signal(sig, info, t, group, from_ancestor_ns);
   }
   
- -static void print_fatal_signal(struct pt_regs *regs, int signr)
+ +static void print_fatal_signal(int signr)
   {
+ +      struct pt_regs *regs = signal_pt_regs();
         printk("%s/%d: potentially unexpected fatal signal %d.\n",
                 current->comm, task_pid_nr(current), signr);
   
@@@ -1753,7 -1752,7 +1753,7 @@@ static void do_notify_parent_cldstop(st
          * see comment in do_notify_parent() about the following 4 lines
          */
         rcu_read_lock();
-       info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
+       info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent));
         info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
         rcu_read_unlock();
   
@@@ -1909,7 -1908,7 +1909,7 @@@ static void ptrace_stop(int exit_code, 
                 preempt_disable();
                 read_unlock(&tasklist_lock);
                 preempt_enable_no_resched();
- -              schedule();
+ +              freezable_schedule();
         } else {
                 /*
                  * By the time we got the lock, our tracer went away.
@@@ -1930,6 -1929,13 +1930,6 @@@
                 read_unlock(&tasklist_lock);
         }
   
- -      /*
- -       * While in TASK_TRACED, we were considered "frozen enough".
- -       * Now that we woke up, it's crucial if we're supposed to be
- -       * frozen that we freeze now before running anything substantial.
- -       */
- -      try_to_freeze();
- -
         /*
          * We are back.  Now reacquire the siglock before touching
          * last_siginfo, so that we are sure to have synchronized with
@@@ -2086,7 -2092,7 +2086,7 @@@ static bool do_signal_stop(int signr
                 }
   
                 /* Now we don't run again until woken by SIGCONT or SIGKILL */
- -              schedule();
+ +              freezable_schedule();
                 return true;
         } else {
                 /*
@@@ -2132,9 -2138,10 +2132,9 @@@ static void do_jobctl_trap(void
         }
   }
   
- -static int ptrace_signal(int signr, siginfo_t *info,
- -                       struct pt_regs *regs, void *cookie)
+ +static int ptrace_signal(int signr, siginfo_t *info)
   {
- -      ptrace_signal_deliver(regs, cookie);
+ +      ptrace_signal_deliver();
         /*
          * We do not check sig_kernel_stop(signr) but set this marker
          * unconditionally because we do not know whether debugger will
@@@ -2193,14 -2200,15 +2193,14 @@@ int get_signal_to_deliver(siginfo_t *in
         if (unlikely(uprobe_deny_signal()))
                 return 0;
   
- -relock:
         /*
- -       * We'll jump back here after any time we were stopped in TASK_STOPPED.
- -       * While in TASK_STOPPED, we were considered "frozen enough".
- -       * Now that we woke up, it's crucial if we're supposed to be
- -       * frozen that we freeze now before running anything substantial.
+ +       * Do this once, we can't return to user-mode if freezing() == T.
+ +       * do_signal_stop() and ptrace_stop() do freezable_schedule() and
+ +       * thus do not need another check after return.
          */
         try_to_freeze();
   
+ +relock:
         spin_lock_irq(&sighand->siglock);
         /*
          * Every stopped thread goes here after wakeup. Check to see if
@@@ -2257,7 -2265,8 +2257,7 @@@
                         break; /* will return 0 */
   
                 if (unlikely(current->ptrace) && signr != SIGKILL) {
- -                      signr = ptrace_signal(signr, info,
- -                                            regs, cookie);
+ +                      signr = ptrace_signal(signr, info);
                         if (!signr)
                                 continue;
                 }
@@@ -2342,7 -2351,7 +2342,7 @@@
   
                 if (sig_kernel_coredump(signr)) {
                         if (print_fatal_signals)
- -                              print_fatal_signal(regs, info->si_signo);
+ +                              print_fatal_signal(info->si_signo);
                         /*
                          * If it was able to dump core, this kills all
                          * other threads in the group and synchronizes with
@@@ -2351,7 -2360,7 +2351,7 @@@
                          * first and our do_group_exit call below will use
                          * that value and ignore the one we pass it.
                          */
- -                      do_coredump(info, regs);
+ +                      do_coredump(info);
                 }
   
                 /*
diff --combined security/yama/yama_lsm.c

index 2663145d1197a104b71f0e2feca175d21156ee00,0e72239aeb053ddf5b7c589aeeff9563a673321f..23414b93771f30ec82ccf76b6cfb49fbed27edef
--- 1/security/yama/yama_lsm.c
--- 2/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@@ -17,7 -17,6 +17,7 @@@
   #include <linux/ptrace.h>
   #include <linux/prctl.h>
   #include <linux/ratelimit.h>
+ +#include <linux/workqueue.h>
   
   #define YAMA_SCOPE_DISABLED   0
   #define YAMA_SCOPE_RELATIONAL 1
@@@ -30,37 -29,12 +30,37 @@@ static int ptrace_scope = YAMA_SCOPE_RE
   struct ptrace_relation {
         struct task_struct *tracer;
         struct task_struct *tracee;
+ +      bool invalid;
         struct list_head node;
+ +      struct rcu_head rcu;
   };
   
   static LIST_HEAD(ptracer_relations);
   static DEFINE_SPINLOCK(ptracer_relations_lock);
   
+ +static void yama_relation_cleanup(struct work_struct *work);
+ +static DECLARE_WORK(yama_relation_work, yama_relation_cleanup);
+ +
+ +/**
+ + * yama_relation_cleanup - remove invalid entries from the relation list
+ + *
+ + */
+ +static void yama_relation_cleanup(struct work_struct *work)
+ +{
+ +      struct ptrace_relation *relation;
+ +
+ +      spin_lock(&ptracer_relations_lock);
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(relation, &ptracer_relations, node) {
+ +              if (relation->invalid) {
+ +                      list_del_rcu(&relation->node);
+ +                      kfree_rcu(relation, rcu);
+ +              }
+ +      }
+ +      rcu_read_unlock();
+ +      spin_unlock(&ptracer_relations_lock);
+ +}
+ +
   /**
    * yama_ptracer_add - add/replace an exception for this tracer/tracee pair
    * @tracer: the task_struct of the process doing the ptrace
@@@ -74,34 -48,32 +74,34 @@@
   static int yama_ptracer_add(struct task_struct *tracer,
                             struct task_struct *tracee)
   {
- -      int rc = 0;
- -      struct ptrace_relation *added;
- -      struct ptrace_relation *entry, *relation = NULL;
+ +      struct ptrace_relation *relation, *added;
   
         added = kmalloc(sizeof(*added), GFP_KERNEL);
         if (!added)
                 return -ENOMEM;
   
- -      spin_lock_bh(&ptracer_relations_lock);
- -      list_for_each_entry(entry, &ptracer_relations, node)
- -              if (entry->tracee == tracee) {
- -                      relation = entry;
- -                      break;
+ +      added->tracee = tracee;
+ +      added->tracer = tracer;
+ +      added->invalid = false;
+ +
+ +      spin_lock(&ptracer_relations_lock);
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(relation, &ptracer_relations, node) {
+ +              if (relation->invalid)
+ +                      continue;
+ +              if (relation->tracee == tracee) {
+ +                      list_replace_rcu(&relation->node, &added->node);
+ +                      kfree_rcu(relation, rcu);
+ +                      goto out;
                 }
- -      if (!relation) {
- -              relation = added;
- -              relation->tracee = tracee;
- -              list_add(&relation->node, &ptracer_relations);
         }
- -      relation->tracer = tracer;
   
- -      spin_unlock_bh(&ptracer_relations_lock);
- -      if (added != relation)
- -              kfree(added);
+ +      list_add_rcu(&added->node, &ptracer_relations);
   
- -      return rc;
+ +out:
+ +      rcu_read_unlock();
+ +      spin_unlock(&ptracer_relations_lock);
+ +      return 0;
   }
   
   /**
@@@ -112,23 -84,16 +112,23 @@@
   static void yama_ptracer_del(struct task_struct *tracer,
                              struct task_struct *tracee)
   {
- -      struct ptrace_relation *relation, *safe;
+ +      struct ptrace_relation *relation;
+ +      bool marked = false;
   
- -      spin_lock_bh(&ptracer_relations_lock);
- -      list_for_each_entry_safe(relation, safe, &ptracer_relations, node)
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(relation, &ptracer_relations, node) {
+ +              if (relation->invalid)
+ +                      continue;
                 if (relation->tracee == tracee ||
                     (tracer && relation->tracer == tracer)) {
- -                      list_del(&relation->node);
- -                      kfree(relation);
+ +                      relation->invalid = true;
+ +                      marked = true;
                 }
- -      spin_unlock_bh(&ptracer_relations_lock);
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      if (marked)
+ +              schedule_work(&yama_relation_work);
   }
   
   /**
@@@ -252,22 -217,21 +252,22 @@@ static int ptracer_exception_found(stru
         struct task_struct *parent = NULL;
         bool found = false;
   
- -      spin_lock_bh(&ptracer_relations_lock);
         rcu_read_lock();
         if (!thread_group_leader(tracee))
                 tracee = rcu_dereference(tracee->group_leader);
- -      list_for_each_entry(relation, &ptracer_relations, node)
+ +      list_for_each_entry_rcu(relation, &ptracer_relations, node) {
+ +              if (relation->invalid)
+ +                      continue;
                 if (relation->tracee == tracee) {
                         parent = relation->tracer;
                         found = true;
                         break;
                 }
+ +      }
   
         if (found && (parent == NULL || task_is_descendant(parent, tracer)))
                 rc = 1;
         rcu_read_unlock();
- -      spin_unlock_bh(&ptracer_relations_lock);
   
         return rc;
   }
@@@ -298,14 -262,18 +298,18 @@@ int yama_ptrace_access_check(struct tas
                         /* No additional restrictions. */
                         break;
                 case YAMA_SCOPE_RELATIONAL:
+                       rcu_read_lock();
                         if (!task_is_descendant(current, child) &&
                             !ptracer_exception_found(current, child) &&
-                           !ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+                           !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
                                 rc = -EPERM;
+                       rcu_read_unlock();
                         break;
                 case YAMA_SCOPE_CAPABILITY:
-                       if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+                       rcu_read_lock();
+                       if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
                                 rc = -EPERM;
+                       rcu_read_unlock();
                         break;
                 case YAMA_SCOPE_NO_ATTACH:
                 default:
@@@ -343,8 -311,10 +347,10 @@@ int yama_ptrace_traceme(struct task_str
         /* Only disallow PTRACE_TRACEME on more aggressive settings. */
         switch (ptrace_scope) {
         case YAMA_SCOPE_CAPABILITY:
-               if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE))
+               rcu_read_lock();
+               if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
                         rc = -EPERM;
+               rcu_read_unlock();
                 break;
         case YAMA_SCOPE_NO_ATTACH:
                 rc = -EPERM;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
		1	2
arch/um/drivers/mconsole_kern.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/android/binder.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/exec.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/array.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/base.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/cred.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
init/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/pid.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/signal.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/yama/yama_lsm.c	patch \|	diff1 \|	diff2 \|	blob \| history