mm owner: fix race between swapoff and exit

author Balbir Singh <balbir@linux.vnet.ibm.com>

Sun, 5 Oct 2008 16:43:37 +0000 (17:43 +0100)

committer Greg Kroah-Hartman <gregkh@suse.de>

Thu, 9 Oct 2008 03:23:12 +0000 (20:23 -0700)
author Balbir Singh <balbir@linux.vnet.ibm.com>
Sun, 5 Oct 2008 16:43:37 +0000 (17:43 +0100)
committer Greg Kroah-Hartman <gregkh@suse.de>
Thu, 9 Oct 2008 03:23:12 +0000 (20:23 -0700)
diff --git a/fs/exec.c b/fs/exec.c

index fd9234379e8d2816965ff924ace687160ff0c365..85e99483eba0494fedff2b7e937f06313cf098d7 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -740,11 +740,11 @@ static int exec_mmap(struct mm_struct *mm)
         tsk->active_mm = mm;
         activate_mm(active_mm, mm);
         task_unlock(tsk);
-       mm_update_next_owner(old_mm);
         arch_pick_mmap_layout(mm);
         if (old_mm) {
                 up_read(&old_mm->mmap_sem);
                 BUG_ON(active_mm != old_mm);
+               mm_update_next_owner(old_mm);
                 mmput(old_mm);
                 return 0;
         }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 15ac0e1e4f4de9dde14a6fe17e9b3d6e059a4baf..d53caaa49be67f05947afca90797d8e494eb8b21 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2761,14 +2761,15 @@ void cgroup_fork_callbacks(struct task_struct *child)
   */
  void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
  {
-       struct cgroup *oldcgrp, *newcgrp;
+       struct cgroup *oldcgrp, *newcgrp = NULL;
  
         if (need_mm_owner_callback) {
                 int i;
                 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                         struct cgroup_subsys *ss = subsys[i];
                         oldcgrp = task_cgroup(old, ss->subsys_id);
-                       newcgrp = task_cgroup(new, ss->subsys_id);
+                       if (new)
+                               newcgrp = task_cgroup(new, ss->subsys_id);
                         if (oldcgrp == newcgrp)
                                 continue;
                         if (ss->mm_owner_changed)
diff --git a/kernel/exit.c b/kernel/exit.c

index 3bb2b8310ad35bcb9906eb85f1436647e4343306..f68b081124bfa42940f88609c8ebf0e9b011e161 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -577,8 +577,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
          * If there are other users of the mm and the owner (us) is exiting
          * we need to find a new owner to take on the responsibility.
          */
-       if (!mm)
-               return 0;
         if (atomic_read(&mm->mm_users) <= 1)
                 return 0;
         if (mm->owner != p)
@@ -621,6 +619,16 @@ retry:
         } while_each_thread(g, c);
  
         read_unlock(&tasklist_lock);
+       /*
+        * We found no owner yet mm_users > 1: this implies that we are
+        * most likely racing with swapoff (try_to_unuse()) or /proc or
+        * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
+        * so that subsystems can understand the callback and take action.
+        */
+       down_write(&mm->mmap_sem);
+       cgroup_mm_owner_callbacks(mm->owner, NULL);
+       mm->owner = NULL;
+       up_write(&mm->mmap_sem);
         return;
  
  assign_new_owner:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index e46451e1d9b793563b08d8ef015927b484b0be1d..ed1cfb12bc95d0f2bb4ec4918b81e136fbc9963f 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -250,6 +250,14 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
  
  struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
  {
+       /*
+        * mm_update_next_owner() may clear mm->owner to NULL
+        * if it races with swapoff, page migration, etc.
+        * So this can be called with p == NULL.
+        */
+       if (unlikely(!p))
+               return NULL;
+
         return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
                                 struct mem_cgroup, css);
  }
@@ -574,6 +582,11 @@ retry:
  
         rcu_read_lock();
         mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+       if (unlikely(!mem)) {
+               rcu_read_unlock();
+               kmem_cache_free(page_cgroup_cache, pc);
+               return 0;
+       }
         /*
          * For every charge from the cgroup, increment reference count
          */
author	Balbir Singh <balbir@linux.vnet.ibm.com>
	Sun, 5 Oct 2008 16:43:37 +0000 (17:43 +0100)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Thu, 9 Oct 2008 03:23:12 +0000 (20:23 -0700)
fs/exec.c		patch \| blob \| history
kernel/cgroup.c		patch \| blob \| history
kernel/exit.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history