* Copyright (C) 2012 Parallels Inc. and Google Inc.
* Authors: Glauber Costa and Suleiman Souhlal
*
+ * Native page reclaim
+ * Charge lifetime sanitation
+ * Lockless page tracking & accounting
+ * Unified hierarchy configuration model
+ * Copyright (C) 2015 Red Hat, Inc., Johannes Weiner
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* page cache and RSS per cgroup. We would eventually like to provide
* statistics based on the statistics developed by Rik Van Riel for clock-pro,
* to help the administrator determine what knobs to tune.
- *
- * TODO: Add a water mark for the memory controller. Reclaim will begin when
- * we hit the water mark. May be even add a low water mark, such that
- * no reclaim occurs from a cgroup at it's low water mark, this is
- * a feature that will be implemented much later in the future.
*/
struct mem_cgroup {
struct cgroup_subsys_state css;
return memcg->css.id;
}
+/*
+ * A helper function to get mem_cgroup from ID. must be called under
+ * rcu_read_lock(). The caller is responsible for calling
+ * css_tryget_online() if the mem_cgroup is used for charging. (dropping
+ * refcnt from swap can be called against removed memcg.)
+ */
static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
{
struct cgroup_subsys_state *css;
static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
{
unsigned long nr_pages = page_counter_read(&memcg->memory);
- unsigned long soft_limit = ACCESS_ONCE(memcg->soft_limit);
+ unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
unsigned long excess = 0;
if (nr_pages > soft_limit)
goto out_unlock;
do {
- pos = ACCESS_ONCE(iter->position);
+ pos = READ_ONCE(iter->position);
/*
* A racing update may change the position and
* put the last reference, hence css_tryget(),
unsigned long limit;
count = page_counter_read(&memcg->memory);
- limit = ACCESS_ONCE(memcg->memory.limit);
+ limit = READ_ONCE(memcg->memory.limit);
if (count < limit)
margin = limit - count;
if (do_swap_account) {
count = page_counter_read(&memcg->memsw);
- limit = ACCESS_ONCE(memcg->memsw.limit);
+ limit = READ_ONCE(memcg->memsw.limit);
if (count <= limit)
margin = min(margin, limit - count);
}
struct mem_cgroup *iter;
unsigned int i;
- if (!p)
- return;
-
mutex_lock(&oom_info_lock);
rcu_read_lock();
- pr_info("Task in ");
- pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id));
- pr_cont(" killed as a result of limit of ");
+ if (p) {
+ pr_info("Task in ");
+ pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id));
+ pr_cont(" killed as a result of limit of ");
+ } else {
+ pr_info("Memory limit reached of cgroup ");
+ }
+
pr_cont_cgroup_path(memcg->css.cgroup);
pr_cont("\n");
return;
}
- check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
+ check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
for_each_mem_cgroup_tree(iter, memcg) {
struct css_task_iter it;
css_put_many(&memcg->css, nr_pages);
}
-/*
- * A helper function to get mem_cgroup from ID. must be called under
- * rcu_read_lock(). The caller is responsible for calling
- * css_tryget_online() if the mem_cgroup is used for charging. (dropping
- * refcnt from swap can be called against removed memcg.)
- */
-static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
-{
- /* ID 0 is unused ID */
- if (!id)
- return NULL;
- return mem_cgroup_from_id(id);
-}
-
/*
* try_get_mem_cgroup_from_page - look up page's memcg association
* @page: the page
ent.val = page_private(page);
id = lookup_swap_cgroup_id(ent);
rcu_read_lock();
- memcg = mem_cgroup_lookup(id);
+ memcg = mem_cgroup_from_id(id);
if (memcg && !css_tryget_online(&memcg->css))
memcg = NULL;
rcu_read_unlock();
return cachep;
memcg = get_mem_cgroup_from_mm(current->mm);
- kmemcg_id = ACCESS_ONCE(memcg->kmemcg_id);
+ kmemcg_id = READ_ONCE(memcg->kmemcg_id);
if (kmemcg_id < 0)
goto out;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-/**
- * mem_cgroup_move_account - move account of the page
- * @page: the page
- * @nr_pages: number of regular pages (>1 for huge pages)
- * @from: mem_cgroup which the page is moved from.
- * @to: mem_cgroup which the page is moved to. @from != @to.
- *
- * The caller must confirm following.
- * - page is not on LRU (isolate_page() is useful.)
- * - compound_lock is held when nr_pages > 1
- *
- * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"
- * from old cgroup.
- */
-static int mem_cgroup_move_account(struct page *page,
- unsigned int nr_pages,
- struct mem_cgroup *from,
- struct mem_cgroup *to)
-{
- unsigned long flags;
- int ret;
-
- VM_BUG_ON(from == to);
- VM_BUG_ON_PAGE(PageLRU(page), page);
- /*
- * The page is isolated from LRU. So, collapse function
- * will not handle this page. But page splitting can happen.
- * Do this check under compound_page_lock(). The caller should
- * hold it.
- */
- ret = -EBUSY;
- if (nr_pages > 1 && !PageTransHuge(page))
- goto out;
-
- /*
- * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup
- * of its source page while we change it: page migration takes
- * both pages off the LRU, but page cache replacement doesn't.
- */
- if (!trylock_page(page))
- goto out;
-
- ret = -EINVAL;
- if (page->mem_cgroup != from)
- goto out_unlock;
-
- spin_lock_irqsave(&from->move_lock, flags);
-
- if (!PageAnon(page) && page_mapped(page)) {
- __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
- nr_pages);
- __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
- nr_pages);
- }
-
- if (PageWriteback(page)) {
- __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK],
- nr_pages);
- __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK],
- nr_pages);
- }
-
- /*
- * It is safe to change page->mem_cgroup here because the page
- * is referenced, charged, and isolated - we can't race with
- * uncharging, charging, migration, or LRU putback.
- */
-
- /* caller should have done css_get */
- page->mem_cgroup = to;
- spin_unlock_irqrestore(&from->move_lock, flags);
-
- ret = 0;
-
- local_irq_disable();
- mem_cgroup_charge_statistics(to, page, nr_pages);
- memcg_check_events(to, page);
- mem_cgroup_charge_statistics(from, page, -nr_pages);
- memcg_check_events(from, page);
- local_irq_enable();
-out_unlock:
- unlock_page(page);
-out:
- return ret;
-}
-
#ifdef CONFIG_MEMCG_SWAP
static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
bool charge)
return page;
}
+/**
+ * mem_cgroup_move_account - move account of the page
+ * @page: the page
+ * @nr_pages: number of regular pages (>1 for huge pages)
+ * @from: mem_cgroup which the page is moved from.
+ * @to: mem_cgroup which the page is moved to. @from != @to.
+ *
+ * The caller must confirm following.
+ * - page is not on LRU (isolate_page() is useful.)
+ * - compound_lock is held when nr_pages > 1
+ *
+ * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"
+ * from old cgroup.
+ */
+static int mem_cgroup_move_account(struct page *page,
+ unsigned int nr_pages,
+ struct mem_cgroup *from,
+ struct mem_cgroup *to)
+{
+ unsigned long flags;
+ int ret;
+
+ VM_BUG_ON(from == to);
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ /*
+ * The page is isolated from LRU. So, collapse function
+ * will not handle this page. But page splitting can happen.
+ * Do this check under compound_page_lock(). The caller should
+ * hold it.
+ */
+ ret = -EBUSY;
+ if (nr_pages > 1 && !PageTransHuge(page))
+ goto out;
+
+ /*
+ * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup
+ * of its source page while we change it: page migration takes
+ * both pages off the LRU, but page cache replacement doesn't.
+ */
+ if (!trylock_page(page))
+ goto out;
+
+ ret = -EINVAL;
+ if (page->mem_cgroup != from)
+ goto out_unlock;
+
+ spin_lock_irqsave(&from->move_lock, flags);
+
+ if (!PageAnon(page) && page_mapped(page)) {
+ __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
+ nr_pages);
+ __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
+ nr_pages);
+ }
+
+ if (PageWriteback(page)) {
+ __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK],
+ nr_pages);
+ __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK],
+ nr_pages);
+ }
+
+ /*
+ * It is safe to change page->mem_cgroup here because the page
+ * is referenced, charged, and isolated - we can't race with
+ * uncharging, charging, migration, or LRU putback.
+ */
+
+ /* caller should have done css_get */
+ page->mem_cgroup = to;
+ spin_unlock_irqrestore(&from->move_lock, flags);
+
+ ret = 0;
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(to, page, nr_pages);
+ memcg_check_events(to, page);
+ mem_cgroup_charge_statistics(from, page, -nr_pages);
+ memcg_check_events(from, page);
+ local_irq_enable();
+out_unlock:
+ unlock_page(page);
+out:
+ return ret;
+}
+
static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, union mc_target *target)
{
* tunable will only affect upcoming migrations, not the current one.
* So we need to save it, and keep it going.
*/
- move_flags = ACCESS_ONCE(memcg->move_charge_at_immigrate);
+ move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
if (move_flags) {
struct mm_struct *mm;
struct mem_cgroup *from = mem_cgroup_from_task(p);
static int memory_low_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
- unsigned long low = ACCESS_ONCE(memcg->low);
+ unsigned long low = READ_ONCE(memcg->low);
if (low == PAGE_COUNTER_MAX)
seq_puts(m, "max\n");
static int memory_high_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
- unsigned long high = ACCESS_ONCE(memcg->high);
+ unsigned long high = READ_ONCE(memcg->high);
if (high == PAGE_COUNTER_MAX)
seq_puts(m, "max\n");
static int memory_max_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
- unsigned long max = ACCESS_ONCE(memcg->memory.limit);
+ unsigned long max = READ_ONCE(memcg->memory.limit);
if (max == PAGE_COUNTER_MAX)
seq_puts(m, "max\n");
id = swap_cgroup_record(entry, 0);
rcu_read_lock();
- memcg = mem_cgroup_lookup(id);
+ memcg = mem_cgroup_from_id(id);
if (memcg) {
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memsw, 1);