Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target...

[karo-tx-linux.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 85df503ec02347f14b9703c7318d8e67accdd8fb..ef91e856c7e456a0674e7b76e15cbd771a6c9acb 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -296,7 +296,6 @@ struct mem_cgroup {
          * Should the accounting and control be hierarchical, per subtree?
          */
         bool use_hierarchy;
-       unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */
  
         bool            oom_lock;
         atomic_t        under_oom;
@@ -366,22 +365,11 @@ struct mem_cgroup {
         /* WARNING: nodeinfo must be the last member here */
  };
  
-/* internal only representation about the status of kmem accounting. */
-enum {
-       KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */
-};
-
  #ifdef CONFIG_MEMCG_KMEM
-static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
-{
-       set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
-}
-
  static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
  {
-       return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
+       return memcg->kmemcg_id >= 0;
  }
-
  #endif
  
  /* Stuffs for move charges at task migration. */
@@ -1571,7 +1559,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
          * select it.  The goal is to allow it to allocate so that it may
          * quickly exit and free its memory.
          */
-       if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
+       if (fatal_signal_pending(current) || task_will_free_mem(current)) {
                 set_thread_flag(TIF_MEMDIE);
                 return;
         }
@@ -1628,6 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
                          NULL, "Memory cgroup out of memory");
  }
  
+#if MAX_NUMNODES > 1
+
  /**
   * test_mem_cgroup_node_reclaimable
   * @memcg: the target memcg
@@ -1650,7 +1640,6 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
         return false;
  
  }
-#if MAX_NUMNODES > 1
  
  /*
   * Always updating the nodemask is not very good - even if we have an empty
@@ -2646,7 +2635,6 @@ static void memcg_register_cache(struct mem_cgroup *memcg,
         if (!cachep)
                 return;
  
-       css_get(&memcg->css);
         list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
  
         /*
@@ -2680,40 +2668,6 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
         list_del(&cachep->memcg_params->list);
  
         kmem_cache_destroy(cachep);
-
-       /* drop the reference taken in memcg_register_cache */
-       css_put(&memcg->css);
-}
-
-/*
- * During the creation a new cache, we need to disable our accounting mechanism
- * altogether. This is true even if we are not creating, but rather just
- * enqueing new caches to be created.
- *
- * This is because that process will trigger allocations; some visible, like
- * explicit kmallocs to auxiliary data structures, name strings and internal
- * cache structures; some well concealed, like INIT_WORK() that can allocate
- * objects during debug.
- *
- * If any allocation happens during memcg_kmem_get_cache, we will recurse back
- * to it. This may not be a bounded recursion: since the first cache creation
- * failed to complete (waiting on the allocation), we'll just try to create the
- * cache again, failing at the same point.
- *
- * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
- * memcg_kmem_skip_account. So we enclose anything that might allocate memory
- * inside the following two functions.
- */
-static inline void memcg_stop_kmem_account(void)
-{
-       VM_BUG_ON(!current->mm);
-       current->memcg_kmem_skip_account++;
-}
-
-static inline void memcg_resume_kmem_account(void)
-{
-       VM_BUG_ON(!current->mm);
-       current->memcg_kmem_skip_account--;
  }
  
  int __memcg_cleanup_cache_params(struct kmem_cache *s)
@@ -2747,9 +2701,7 @@ static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
         mutex_lock(&memcg_slab_mutex);
         list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
                 cachep = memcg_params_to_cache(params);
-               kmem_cache_shrink(cachep);
-               if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
-                       memcg_unregister_cache(cachep);
+               memcg_unregister_cache(cachep);
         }
         mutex_unlock(&memcg_slab_mutex);
  }
@@ -2784,10 +2736,10 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
         struct memcg_register_cache_work *cw;
  
         cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
-       if (cw == NULL) {
-               css_put(&memcg->css);
+       if (!cw)
                 return;
-       }
+
+       css_get(&memcg->css);
  
         cw->memcg = memcg;
         cw->cachep = cachep;
@@ -2810,20 +2762,16 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
          * this point we can't allow ourselves back into memcg_kmem_get_cache,
          * the safest choice is to do it like this, wrapping the whole function.
          */
-       memcg_stop_kmem_account();
+       current->memcg_kmem_skip_account = 1;
         __memcg_schedule_register_cache(memcg, cachep);
-       memcg_resume_kmem_account();
+       current->memcg_kmem_skip_account = 0;
  }
  
  int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
  {
         unsigned int nr_pages = 1 << order;
-       int res;
  
-       res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
-       if (!res)
-               atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
-       return res;
+       return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
  }
  
  void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
@@ -2831,7 +2779,6 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
         unsigned int nr_pages = 1 << order;
  
         memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
-       atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
  }
  
  /*
@@ -2847,8 +2794,7 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
   * Can't be called in interrupt context or from kernel threads.
   * This function needs to be called with rcu_read_lock() held.
   */
-struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
-                                         gfp_t gfp)
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
  {
         struct mem_cgroup *memcg;
         struct kmem_cache *memcg_cachep;
@@ -2856,25 +2802,16 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
         VM_BUG_ON(!cachep->memcg_params);
         VM_BUG_ON(!cachep->memcg_params->is_root_cache);
  
-       if (!current->mm || current->memcg_kmem_skip_account)
+       if (current->memcg_kmem_skip_account)
                 return cachep;
  
-       rcu_read_lock();
-       memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
-
+       memcg = get_mem_cgroup_from_mm(current->mm);
         if (!memcg_kmem_is_active(memcg))
                 goto out;
  
         memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));
-       if (likely(memcg_cachep)) {
-               cachep = memcg_cachep;
-               goto out;
-       }
-
-       /* The corresponding put will be done in the workqueue. */
-       if (!css_tryget_online(&memcg->css))
-               goto out;
-       rcu_read_unlock();
+       if (likely(memcg_cachep))
+               return memcg_cachep;
  
         /*
          * If we are in a safe context (can wait, and not in interrupt
@@ -2889,12 +2826,17 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
          * defer everything.
          */
         memcg_schedule_register_cache(memcg, cachep);
-       return cachep;
  out:
-       rcu_read_unlock();
+       css_put(&memcg->css);
         return cachep;
  }
  
+void __memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+       if (!is_root_cache(cachep))
+               css_put(&cachep->memcg_params->memcg->css);
+}
+
  /*
   * We need to verify if the allocation against current->mm->owner's memcg is
   * possible for the given order. But the page is not allocated yet, so we'll
@@ -2917,34 +2859,6 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
  
         *_memcg = NULL;
  
-       /*
-        * Disabling accounting is only relevant for some specific memcg
-        * internal allocations. Therefore we would initially not have such
-        * check here, since direct calls to the page allocator that are
-        * accounted to kmemcg (alloc_kmem_pages and friends) only happen
-        * outside memcg core. We are mostly concerned with cache allocations,
-        * and by having this test at memcg_kmem_get_cache, we are already able
-        * to relay the allocation to the root cache and bypass the memcg cache
-        * altogether.
-        *
-        * There is one exception, though: the SLUB allocator does not create
-        * large order caches, but rather service large kmallocs directly from
-        * the page allocator. Therefore, the following sequence when backed by
-        * the SLUB allocator:
-        *
-        *      memcg_stop_kmem_account();
-        *      kmalloc(<large_number>)
-        *      memcg_resume_kmem_account();
-        *
-        * would effectively ignore the fact that we should skip accounting,
-        * since it will drive us directly to this function without passing
-        * through the cache selector memcg_kmem_get_cache. Such large
-        * allocations are extremely rare but can happen, for instance, for the
-        * cache arrays. We bring this test here.
-        */
-       if (!current->mm || current->memcg_kmem_skip_account)
-               return true;
-
         memcg = get_mem_cgroup_from_mm(current->mm);
  
         if (!memcg_kmem_is_active(memcg)) {
@@ -2985,10 +2899,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
         memcg_uncharge_kmem(memcg, 1 << order);
         page->mem_cgroup = NULL;
  }
-#else
-static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
-{
-}
  #endif /* CONFIG_MEMCG_KMEM */
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -3538,12 +3448,6 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
         if (memcg_kmem_is_active(memcg))
                 return 0;
  
-       /*
-        * We are going to allocate memory for data shared by all memory
-        * cgroups so let's stop accounting here.
-        */
-       memcg_stop_kmem_account();
-
         /*
          * For simplicity, we won't allow this to be disabled.  It also can't
          * be changed if the cgroup has children already, or if tasks had
@@ -3570,25 +3474,22 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
                 goto out;
         }
  
-       memcg->kmemcg_id = memcg_id;
-       INIT_LIST_HEAD(&memcg->memcg_slab_caches);
-
         /*
-        * We couldn't have accounted to this cgroup, because it hasn't got the
-        * active bit set yet, so this should succeed.
+        * We couldn't have accounted to this cgroup, because it hasn't got
+        * activated yet, so this should succeed.
          */
         err = page_counter_limit(&memcg->kmem, nr_pages);
         VM_BUG_ON(err);
  
         static_key_slow_inc(&memcg_kmem_enabled_key);
         /*
-        * Setting the active bit after enabling static branching will
+        * A memory cgroup is considered kmem-active as soon as it gets
+        * kmemcg_id. Setting the id after enabling static branching will
          * guarantee no one starts accounting before all call sites are
          * patched.
          */
-       memcg_kmem_set_active(memcg);
+       memcg->kmemcg_id = memcg_id;
  out:
-       memcg_resume_kmem_account();
         return err;
  }
  
@@ -3791,11 +3692,6 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
  }
  #endif /* CONFIG_NUMA */
  
-static inline void mem_cgroup_lru_names_not_uptodate(void)
-{
-       BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
-}
-
  static int memcg_stat_show(struct seq_file *m, void *v)
  {
         struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -3803,6 +3699,8 @@ static int memcg_stat_show(struct seq_file *m, void *v)
         struct mem_cgroup *mi;
         unsigned int i;
  
+       BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
+
         for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
                 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
                         continue;
@@ -4259,7 +4157,6 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
  {
         int ret;
  
-       memcg->kmemcg_id = -1;
         ret = memcg_propagate_kmem(memcg);
         if (ret)
                 return ret;
@@ -4269,6 +4166,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
  
  static void memcg_destroy_kmem(struct mem_cgroup *memcg)
  {
+       memcg_unregister_all_caches(memcg);
         mem_cgroup_sockets_destroy(memcg);
  }
  #else
@@ -4724,17 +4622,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
  
         free_percpu(memcg->stat);
  
-       /*
-        * We need to make sure that (at least for now), the jump label
-        * destruction code runs outside of the cgroup lock. This is because
-        * get_online_cpus(), which is called from the static_branch update,
-        * can't be called inside the cgroup_lock. cpusets are the ones
-        * enforcing this dependency, so if they ever change, we might as well.
-        *
-        * schedule_work() will guarantee this happens. Be careful if you need
-        * to move this code around, and make sure it is outside
-        * the cgroup_lock.
-        */
         disarm_static_keys(memcg);
         kfree(memcg);
  }
@@ -4804,6 +4691,10 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
         vmpressure_init(&memcg->vmpressure);
         INIT_LIST_HEAD(&memcg->event_list);
         spin_lock_init(&memcg->event_list_lock);
+#ifdef CONFIG_MEMCG_KMEM
+       memcg->kmemcg_id = -1;
+       INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+#endif
  
         return &memcg->css;
  
@@ -4885,7 +4776,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
         }
         spin_unlock(&memcg->event_list_lock);
  
-       memcg_unregister_all_caches(memcg);
         vmpressure_cleanup(&memcg->vmpressure);
  }