* Should the accounting and control be hierarchical, per subtree?
*/
bool use_hierarchy;
- unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */
bool oom_lock;
atomic_t under_oom;
/* WARNING: nodeinfo must be the last member here */
};
-/* internal only representation about the status of kmem accounting. */
-enum {
- KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */
-};
-
#ifdef CONFIG_MEMCG_KMEM
-static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
-{
- set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
-}
-
static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
{
- return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
+ return memcg->kmemcg_id >= 0;
}
-
#endif
/* Stuffs for move charges at task migration. */
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
*/
- if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
+ if (fatal_signal_pending(current) || task_will_free_mem(current)) {
set_thread_flag(TIF_MEMDIE);
return;
}
NULL, "Memory cgroup out of memory");
}
+#if MAX_NUMNODES > 1
+
/**
* test_mem_cgroup_node_reclaimable
* @memcg: the target memcg
return false;
}
-#if MAX_NUMNODES > 1
/*
* Always updating the nodemask is not very good - even if we have an empty
if (!cachep)
return;
- css_get(&memcg->css);
list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
/*
list_del(&cachep->memcg_params->list);
kmem_cache_destroy(cachep);
-
- /* drop the reference taken in memcg_register_cache */
- css_put(&memcg->css);
-}
-
-/*
- * During the creation a new cache, we need to disable our accounting mechanism
- * altogether. This is true even if we are not creating, but rather just
- * enqueing new caches to be created.
- *
- * This is because that process will trigger allocations; some visible, like
- * explicit kmallocs to auxiliary data structures, name strings and internal
- * cache structures; some well concealed, like INIT_WORK() that can allocate
- * objects during debug.
- *
- * If any allocation happens during memcg_kmem_get_cache, we will recurse back
- * to it. This may not be a bounded recursion: since the first cache creation
- * failed to complete (waiting on the allocation), we'll just try to create the
- * cache again, failing at the same point.
- *
- * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
- * memcg_kmem_skip_account. So we enclose anything that might allocate memory
- * inside the following two functions.
- */
-static inline void memcg_stop_kmem_account(void)
-{
- VM_BUG_ON(!current->mm);
- current->memcg_kmem_skip_account++;
-}
-
-static inline void memcg_resume_kmem_account(void)
-{
- VM_BUG_ON(!current->mm);
- current->memcg_kmem_skip_account--;
}
int __memcg_cleanup_cache_params(struct kmem_cache *s)
mutex_lock(&memcg_slab_mutex);
list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
cachep = memcg_params_to_cache(params);
- kmem_cache_shrink(cachep);
- if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
- memcg_unregister_cache(cachep);
+ memcg_unregister_cache(cachep);
}
mutex_unlock(&memcg_slab_mutex);
}
struct memcg_register_cache_work *cw;
cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
- if (cw == NULL) {
- css_put(&memcg->css);
+ if (!cw)
return;
- }
+
+ css_get(&memcg->css);
cw->memcg = memcg;
cw->cachep = cachep;
* this point we can't allow ourselves back into memcg_kmem_get_cache,
* the safest choice is to do it like this, wrapping the whole function.
*/
- memcg_stop_kmem_account();
+ current->memcg_kmem_skip_account = 1;
__memcg_schedule_register_cache(memcg, cachep);
- memcg_resume_kmem_account();
+ current->memcg_kmem_skip_account = 0;
}
int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
{
unsigned int nr_pages = 1 << order;
- int res;
- res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
- if (!res)
- atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
- return res;
+ return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
}
void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
unsigned int nr_pages = 1 << order;
memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
- atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
}
/*
* Can't be called in interrupt context or from kernel threads.
* This function needs to be called with rcu_read_lock() held.
*/
-struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
- gfp_t gfp)
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
{
struct mem_cgroup *memcg;
struct kmem_cache *memcg_cachep;
VM_BUG_ON(!cachep->memcg_params);
VM_BUG_ON(!cachep->memcg_params->is_root_cache);
- if (!current->mm || current->memcg_kmem_skip_account)
+ if (current->memcg_kmem_skip_account)
return cachep;
- rcu_read_lock();
- memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
-
+ memcg = get_mem_cgroup_from_mm(current->mm);
if (!memcg_kmem_is_active(memcg))
goto out;
memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));
- if (likely(memcg_cachep)) {
- cachep = memcg_cachep;
- goto out;
- }
-
- /* The corresponding put will be done in the workqueue. */
- if (!css_tryget_online(&memcg->css))
- goto out;
- rcu_read_unlock();
+ if (likely(memcg_cachep))
+ return memcg_cachep;
/*
* If we are in a safe context (can wait, and not in interrupt
* defer everything.
*/
memcg_schedule_register_cache(memcg, cachep);
- return cachep;
out:
- rcu_read_unlock();
+ css_put(&memcg->css);
return cachep;
}
+void __memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+ if (!is_root_cache(cachep))
+ css_put(&cachep->memcg_params->memcg->css);
+}
+
/*
* We need to verify if the allocation against current->mm->owner's memcg is
* possible for the given order. But the page is not allocated yet, so we'll
*_memcg = NULL;
- /*
- * Disabling accounting is only relevant for some specific memcg
- * internal allocations. Therefore we would initially not have such
- * check here, since direct calls to the page allocator that are
- * accounted to kmemcg (alloc_kmem_pages and friends) only happen
- * outside memcg core. We are mostly concerned with cache allocations,
- * and by having this test at memcg_kmem_get_cache, we are already able
- * to relay the allocation to the root cache and bypass the memcg cache
- * altogether.
- *
- * There is one exception, though: the SLUB allocator does not create
- * large order caches, but rather service large kmallocs directly from
- * the page allocator. Therefore, the following sequence when backed by
- * the SLUB allocator:
- *
- * memcg_stop_kmem_account();
- * kmalloc(<large_number>)
- * memcg_resume_kmem_account();
- *
- * would effectively ignore the fact that we should skip accounting,
- * since it will drive us directly to this function without passing
- * through the cache selector memcg_kmem_get_cache. Such large
- * allocations are extremely rare but can happen, for instance, for the
- * cache arrays. We bring this test here.
- */
- if (!current->mm || current->memcg_kmem_skip_account)
- return true;
-
memcg = get_mem_cgroup_from_mm(current->mm);
if (!memcg_kmem_is_active(memcg)) {
memcg_uncharge_kmem(memcg, 1 << order);
page->mem_cgroup = NULL;
}
-#else
-static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
-{
-}
#endif /* CONFIG_MEMCG_KMEM */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (memcg_kmem_is_active(memcg))
return 0;
- /*
- * We are going to allocate memory for data shared by all memory
- * cgroups so let's stop accounting here.
- */
- memcg_stop_kmem_account();
-
/*
* For simplicity, we won't allow this to be disabled. It also can't
* be changed if the cgroup has children already, or if tasks had
goto out;
}
- memcg->kmemcg_id = memcg_id;
- INIT_LIST_HEAD(&memcg->memcg_slab_caches);
-
/*
- * We couldn't have accounted to this cgroup, because it hasn't got the
- * active bit set yet, so this should succeed.
+ * We couldn't have accounted to this cgroup, because it hasn't got
+ * activated yet, so this should succeed.
*/
err = page_counter_limit(&memcg->kmem, nr_pages);
VM_BUG_ON(err);
static_key_slow_inc(&memcg_kmem_enabled_key);
/*
- * Setting the active bit after enabling static branching will
+ * A memory cgroup is considered kmem-active as soon as it gets
+ * kmemcg_id. Setting the id after enabling static branching will
* guarantee no one starts accounting before all call sites are
* patched.
*/
- memcg_kmem_set_active(memcg);
+ memcg->kmemcg_id = memcg_id;
out:
- memcg_resume_kmem_account();
return err;
}
}
#endif /* CONFIG_NUMA */
-static inline void mem_cgroup_lru_names_not_uptodate(void)
-{
- BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
-}
-
static int memcg_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
struct mem_cgroup *mi;
unsigned int i;
+ BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
+
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
{
int ret;
- memcg->kmemcg_id = -1;
ret = memcg_propagate_kmem(memcg);
if (ret)
return ret;
static void memcg_destroy_kmem(struct mem_cgroup *memcg)
{
+ memcg_unregister_all_caches(memcg);
mem_cgroup_sockets_destroy(memcg);
}
#else
free_percpu(memcg->stat);
- /*
- * We need to make sure that (at least for now), the jump label
- * destruction code runs outside of the cgroup lock. This is because
- * get_online_cpus(), which is called from the static_branch update,
- * can't be called inside the cgroup_lock. cpusets are the ones
- * enforcing this dependency, so if they ever change, we might as well.
- *
- * schedule_work() will guarantee this happens. Be careful if you need
- * to move this code around, and make sure it is outside
- * the cgroup_lock.
- */
disarm_static_keys(memcg);
kfree(memcg);
}
vmpressure_init(&memcg->vmpressure);
INIT_LIST_HEAD(&memcg->event_list);
spin_lock_init(&memcg->event_list_lock);
+#ifdef CONFIG_MEMCG_KMEM
+ memcg->kmemcg_id = -1;
+ INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+#endif
return &memcg->css;
}
spin_unlock(&memcg->event_list_lock);
- memcg_unregister_all_caches(memcg);
vmpressure_cleanup(&memcg->vmpressure);
}