* increase it.
*/
static DEFINE_IDA(kmem_limited_groups);
-static int memcg_limited_groups_array_size;
+int memcg_limited_groups_array_size;
+
/*
* MIN_SIZE is different than 1, because we would like to avoid going through
* the alloc/free process all the time. In a small machine, 4 kmem-limited
return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
}
+#ifdef CONFIG_SLABINFO
+static int mem_cgroup_slabinfo_read(struct cgroup *cont, struct cftype *cft,
+ struct seq_file *m)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct memcg_cache_params *params;
+
+ if (!memcg_can_account_kmem(memcg))
+ return -EIO;
+
+ print_slabinfo_header(m);
+
+ mutex_lock(&memcg->slab_caches_mutex);
+ list_for_each_entry(params, &memcg->memcg_slab_caches, list)
+ cache_show(memcg_params_to_cache(params), m);
+ mutex_unlock(&memcg->slab_caches_mutex);
+
+ return 0;
+}
+#endif
+
static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
{
struct res_counter *fail_res;
return 0;
}
-int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+ struct kmem_cache *root_cache)
{
size_t size = sizeof(struct memcg_cache_params);
if (!s->memcg_params)
return -ENOMEM;
- if (memcg)
+ if (memcg) {
s->memcg_params->memcg = memcg;
+ s->memcg_params->root_cache = root_cache;
+ } else
+ s->memcg_params->is_root_cache = true;
+
return 0;
}
cachep = memcg_params_to_cache(p);
- if (!atomic_read(&cachep->memcg_params->nr_pages))
+ /*
+ * If we get down to 0 after shrink, we could delete right away.
+ * However, memcg_release_pages() already puts us back in the workqueue
+ * in that case. If we proceed deleting, we'll get a dangling
+ * reference, and removing the object from the workqueue in that case
+ * is unnecessary complication. We are not a fast path.
+ *
+ * Note that this case is fundamentally different from racing with
+ * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
+ * kmem_cache_shrink, not only we would be reinserting a dead cache
+ * into the queue, but doing so from inside the worker racing to
+ * destroy it.
+ *
+ * So if we aren't down to zero, we'll just schedule a worker and try
+ * again
+ */
+ if (atomic_read(&cachep->memcg_params->nr_pages) != 0) {
+ kmem_cache_shrink(cachep);
+ if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
+ return;
+ } else
kmem_cache_destroy(cachep);
}
if (!cachep->memcg_params->dead)
return;
+ /*
+ * There are many ways in which we can get here.
+ *
+ * We can get to a memory-pressure situation while the delayed work is
+ * still pending to run. The vmscan shrinkers can then release all
+ * cache memory and get us to destruction. If this is the case, we'll
+ * be executed twice, which is a bug (the second time will execute over
+ * bogus data). In this case, cancelling the work should be fine.
+ *
+ * But we can also get here from the worker itself, if
+ * kmem_cache_shrink is enough to shake all the remaining objects and
+ * get the page count to 0. In this case, we'll deadlock if we try to
+ * cancel the work (the worker runs with an internal lock held, which
+ * is the same lock we would hold for cancel_work_sync().)
+ *
+ * Since we can't possibly know who got us here, just refrain from
+ * running if there is already work pending
+ */
+ if (work_pending(&cachep->memcg_params->destroy))
+ return;
/*
* We have to defer the actual destroying to a workqueue, because
* we might currently be in a context that cannot sleep.
return NULL;
new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
- (s->flags & ~SLAB_PANIC), s->ctor);
+ (s->flags & ~SLAB_PANIC), s->ctor, s);
if (new)
new->allocflags |= __GFP_KMEMCG;
}
mem_cgroup_get(memcg);
- new_cachep->memcg_params->root_cache = cachep;
atomic_set(&new_cachep->memcg_params->nr_pages , 0);
cachep->memcg_params->memcg_caches[idx] = new_cachep;
* set, so flip it down to guarantee we are in control.
*/
c->memcg_params->dead = false;
- cancel_delayed_work_sync(&c->memcg_params->destroy);
+ cancel_work_sync(&c->memcg_params->destroy);
kmem_cache_destroy(c);
}
mutex_unlock(&set_limit_mutex);
cachep = memcg_params_to_cache(params);
cachep->memcg_params->dead = true;
INIT_WORK(&cachep->memcg_params->destroy,
- kmem_cache_destroy_work_func);
+ kmem_cache_destroy_work_func);
schedule_work(&cachep->memcg_params->destroy);
}
mutex_unlock(&memcg->slab_caches_mutex);
.trigger = mem_cgroup_reset,
.read = mem_cgroup_read,
},
+#ifdef CONFIG_SLABINFO
+ {
+ .name = "kmem.slabinfo",
+ .read_seq_string = mem_cgroup_slabinfo_read,
+ },
+#endif
#endif
{ }, /* terminate */
};
&per_cpu(memcg_stock, cpu);
INIT_WORK(&stock->work, drain_local_stock);
}
- hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
} else {
parent = mem_cgroup_from_cont(cont->parent);
memcg->use_hierarchy = parent->use_hierarchy;
.use_id = 1,
};
+/*
+ * The rest of init is performed during ->css_alloc() for root css which
+ * happens before initcalls. hotcpu_notifier() can't be done together as
+ * it would introduce circular locking by adding cgroup_lock -> cpu hotplug
+ * dependency. Do it from a subsys_initcall().
+ */
+static int __init mem_cgroup_init(void)
+{
+ hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
+ return 0;
+}
+subsys_initcall(mem_cgroup_init);
+
#ifdef CONFIG_MEMCG_SWAP
static int __init enable_swap_account(char *s)
{