val = __this_cpu_read(memcg->stat->nr_page_events);
next = __this_cpu_read(memcg->stat->targets[target]);
/* from time_after() in jiffies.h */
- if ((long)next - (long)val < 0) {
+ if ((long)(next - val) < 0) {
switch (target) {
case MEM_CGROUP_TARGET_THRESH:
next = val + THRESHOLDS_EVENTS_TARGET;
#ifdef CONFIG_MEMCG_SWAP
static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
- bool charge)
+ int nr_entries)
{
- int val = (charge) ? 1 : -1;
- this_cpu_add(memcg->stat->count[MEMCG_SWAP], val);
+ this_cpu_add(memcg->stat->count[MEMCG_SWAP], nr_entries);
}
/**
new_id = mem_cgroup_id(to);
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
- mem_cgroup_swap_statistics(from, false);
- mem_cgroup_swap_statistics(to, true);
+ mem_cgroup_swap_statistics(from, -1);
+ mem_cgroup_swap_statistics(to, 1);
return 0;
}
return -EINVAL;
seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
+ seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
return 0;
}
if (!pn)
return 1;
+ pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
+ if (!pn->lruvec_stat) {
+ kfree(pn);
+ return 1;
+ }
+
lruvec_init(&pn->lruvec);
pn->usage_in_excess = 0;
pn->on_tree = false;
static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
{
- kfree(memcg->nodeinfo[node]);
+ struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
+
+ free_percpu(pn->lruvec_stat);
+ kfree(pn);
}
static void __mem_cgroup_free(struct mem_cgroup *memcg)
seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+ seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
return 0;
}
seq_printf(m, "kernel_stack %llu\n",
(u64)stat[MEMCG_KERNEL_STACK_KB] * 1024);
seq_printf(m, "slab %llu\n",
- (u64)(stat[MEMCG_SLAB_RECLAIMABLE] +
- stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
+ (u64)(stat[NR_SLAB_RECLAIMABLE] +
+ stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
seq_printf(m, "sock %llu\n",
(u64)stat[MEMCG_SOCK] * PAGE_SIZE);
}
seq_printf(m, "slab_reclaimable %llu\n",
- (u64)stat[MEMCG_SLAB_RECLAIMABLE] * PAGE_SIZE);
+ (u64)stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
seq_printf(m, "slab_unreclaimable %llu\n",
- (u64)stat[MEMCG_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
+ (u64)stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
/* Accumulated memory events */
seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
+ seq_printf(m, "pgrefill %lu\n", events[PGREFILL]);
+ seq_printf(m, "pgscan %lu\n", events[PGSCAN_KSWAPD] +
+ events[PGSCAN_DIRECT]);
+ seq_printf(m, "pgsteal %lu\n", events[PGSTEAL_KSWAPD] +
+ events[PGSTEAL_DIRECT]);
+ seq_printf(m, "pgactivate %lu\n", events[PGACTIVATE]);
+ seq_printf(m, "pgdeactivate %lu\n", events[PGDEACTIVATE]);
+ seq_printf(m, "pglazyfree %lu\n", events[PGLAZYFREE]);
+ seq_printf(m, "pglazyfreed %lu\n", events[PGLAZYFREED]);
+
seq_printf(m, "workingset_refault %lu\n",
stat[WORKINGSET_REFAULT]);
seq_printf(m, "workingset_activate %lu\n",
/**
* mem_cgroup_low - check if memory consumption is below the normal range
- * @root: the highest ancestor to consider
+ * @root: the top ancestor of the sub-tree being checked
* @memcg: the memory cgroup to check
*
* Returns %true if memory consumption of @memcg, and that of all
- * configurable ancestors up to @root, is below the normal range.
+ * ancestors up to (but not including) @root, is below the normal range.
+ *
+ * @root is exclusive; it is never low when looked at directly and isn't
+ * checked when traversing the hierarchy.
+ *
+ * Excluding @root enables using memory.low to prioritize memory usage
+ * between cgroups within a subtree of the hierarchy that is limited by
+ * memory.high or memory.max.
+ *
+ * For example, given cgroup A with children B and C:
+ *
+ * A
+ * / \
+ * B C
+ *
+ * and
+ *
+ * 1. A/memory.current > A/memory.high
+ * 2. A/B/memory.current < A/B/memory.low
+ * 3. A/C/memory.current >= A/C/memory.low
+ *
+ * As 'A' is high, i.e. triggers reclaim from 'A', and 'B' is low, we
+ * should reclaim from 'C' until 'A' is no longer high or until we can
+ * no longer reclaim from 'C'. If 'A', i.e. @root, isn't excluded by
+ * mem_cgroup_low when reclaming from 'A', then 'B' won't be considered
+ * low and we will reclaim indiscriminately from both 'B' and 'C'.
*/
bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
{
if (mem_cgroup_disabled())
return false;
- /*
- * The toplevel group doesn't have a configurable range, so
- * it's never low when looked at directly, and it is not
- * considered an ancestor when assessing the hierarchy.
- */
-
- if (memcg == root_mem_cgroup)
- return false;
-
- if (page_counter_read(&memcg->memory) >= memcg->low)
+ if (!root)
+ root = root_mem_cgroup;
+ if (memcg == root)
return false;
- while (memcg != root) {
- memcg = parent_mem_cgroup(memcg);
-
- if (memcg == root_mem_cgroup)
- break;
-
+ for (; memcg != root; memcg = parent_mem_cgroup(memcg)) {
if (page_counter_read(&memcg->memory) >= memcg->low)
return false;
}
+
return true;
}
* let's not wait for it. The page already received a
* memory+swap charge, drop the swap entry duplicate.
*/
- mem_cgroup_uncharge_swap(entry);
+ mem_cgroup_uncharge_swap(entry, nr_pages);
}
}
* ancestor for the swap instead and transfer the memory+swap charge.
*/
swap_memcg = mem_cgroup_id_get_online(memcg);
- oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg), 1);
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(swap_memcg, true);
+ mem_cgroup_swap_statistics(swap_memcg, 1);
page->mem_cgroup = NULL;
css_put(&memcg->css);
}
-/*
- * mem_cgroup_try_charge_swap - try charging a swap entry
+/**
+ * mem_cgroup_try_charge_swap - try charging swap space for a page
* @page: page being added to swap
* @entry: swap entry to charge
*
- * Try to charge @entry to the memcg that @page belongs to.
+ * Try to charge @page's memcg for the swap space at @entry.
*
* Returns 0 on success, -ENOMEM on failure.
*/
int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
{
- struct mem_cgroup *memcg;
+ unsigned int nr_pages = hpage_nr_pages(page);
struct page_counter *counter;
+ struct mem_cgroup *memcg;
unsigned short oldid;
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) || !do_swap_account)
memcg = mem_cgroup_id_get_online(memcg);
if (!mem_cgroup_is_root(memcg) &&
- !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+ !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
mem_cgroup_id_put(memcg);
return -ENOMEM;
}
- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ /* Get references for the tail pages, too */
+ if (nr_pages > 1)
+ mem_cgroup_id_get_many(memcg, nr_pages - 1);
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages);
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(memcg, true);
+ mem_cgroup_swap_statistics(memcg, nr_pages);
return 0;
}
/**
- * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * mem_cgroup_uncharge_swap - uncharge swap space
* @entry: swap entry to uncharge
- *
- * Drop the swap charge associated with @entry.
+ * @nr_pages: the amount of swap space to uncharge
*/
-void mem_cgroup_uncharge_swap(swp_entry_t entry)
+void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
{
struct mem_cgroup *memcg;
unsigned short id;
if (!do_swap_account)
return;
- id = swap_cgroup_record(entry, 0);
+ id = swap_cgroup_record(entry, 0, nr_pages);
rcu_read_lock();
memcg = mem_cgroup_from_id(id);
if (memcg) {
if (!mem_cgroup_is_root(memcg)) {
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
- page_counter_uncharge(&memcg->swap, 1);
+ page_counter_uncharge(&memcg->swap, nr_pages);
else
- page_counter_uncharge(&memcg->memsw, 1);
+ page_counter_uncharge(&memcg->memsw, nr_pages);
}
- mem_cgroup_swap_statistics(memcg, false);
- mem_cgroup_id_put(memcg);
+ mem_cgroup_swap_statistics(memcg, -nr_pages);
+ mem_cgroup_id_put_many(memcg, nr_pages);
}
rcu_read_unlock();
}