cgroup, memcg, cpuset: implement cgroup_taskset_for_each_leader()

[karo-tx-linux.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 1af057575ce9e65c862dfc61f574c46d42b7cd5f..33c8dad6830f86b36eb8fca89f734e1b16f139a9 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -111,56 +111,10 @@ static const char * const mem_cgroup_lru_names[] = {
         "unevictable",
  };
  
-/*
- * Per memcg event counter is incremented at every pagein/pageout. With THP,
- * it will be incremated by the number of pages. This counter is used for
- * for trigger some periodic events. This is straightforward and better
- * than using jiffies etc. to handle periodic memcg event.
- */
-enum mem_cgroup_events_target {
-       MEM_CGROUP_TARGET_THRESH,
-       MEM_CGROUP_TARGET_SOFTLIMIT,
-       MEM_CGROUP_TARGET_NUMAINFO,
-       MEM_CGROUP_NTARGETS,
-};
  #define THRESHOLDS_EVENTS_TARGET 128
  #define SOFTLIMIT_EVENTS_TARGET 1024
  #define NUMAINFO_EVENTS_TARGET 1024
  
-struct mem_cgroup_stat_cpu {
-       long count[MEM_CGROUP_STAT_NSTATS];
-       unsigned long events[MEMCG_NR_EVENTS];
-       unsigned long nr_page_events;
-       unsigned long targets[MEM_CGROUP_NTARGETS];
-};
-
-struct reclaim_iter {
-       struct mem_cgroup *position;
-       /* scan generation, increased every round-trip */
-       unsigned int generation;
-};
-
-/*
- * per-zone information in memory controller.
- */
-struct mem_cgroup_per_zone {
-       struct lruvec           lruvec;
-       unsigned long           lru_size[NR_LRU_LISTS];
-
-       struct reclaim_iter     iter[DEF_PRIORITY + 1];
-
-       struct rb_node          tree_node;      /* RB tree node */
-       unsigned long           usage_in_excess;/* Set to the value by which */
-                                               /* the soft limit is exceeded*/
-       bool                    on_tree;
-       struct mem_cgroup       *memcg;         /* Back pointer, we cannot */
-                                               /* use container_of        */
-};
-
-struct mem_cgroup_per_node {
-       struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
-};
-
  /*
   * Cgroups above their limits are maintained in a RB-Tree, independent of
   * their hierarchy representation
@@ -181,32 +135,6 @@ struct mem_cgroup_tree {
  
  static struct mem_cgroup_tree soft_limit_tree __read_mostly;
  
-struct mem_cgroup_threshold {
-       struct eventfd_ctx *eventfd;
-       unsigned long threshold;
-};
-
-/* For threshold */
-struct mem_cgroup_threshold_ary {
-       /* An array index points to threshold just below or equal to usage. */
-       int current_threshold;
-       /* Size of entries[] */
-       unsigned int size;
-       /* Array of thresholds */
-       struct mem_cgroup_threshold entries[0];
-};
-
-struct mem_cgroup_thresholds {
-       /* Primary thresholds array */
-       struct mem_cgroup_threshold_ary *primary;
-       /*
-        * Spare threshold array.
-        * This is needed to make mem_cgroup_unregister_event() "never fail".
-        * It must be able to store at least primary->size - 1 entries.
-        */
-       struct mem_cgroup_threshold_ary *spare;
-};
-
  /* for OOM */
  struct mem_cgroup_eventfd_list {
         struct list_head list;
@@ -256,113 +184,6 @@ struct mem_cgroup_event {
  static void mem_cgroup_threshold(struct mem_cgroup *memcg);
  static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
  
-/*
- * The memory controller data structure. The memory controller controls both
- * page cache and RSS per cgroup. We would eventually like to provide
- * statistics based on the statistics developed by Rik Van Riel for clock-pro,
- * to help the administrator determine what knobs to tune.
- */
-struct mem_cgroup {
-       struct cgroup_subsys_state css;
-
-       /* Accounted resources */
-       struct page_counter memory;
-       struct page_counter memsw;
-       struct page_counter kmem;
-
-       /* Normal memory consumption range */
-       unsigned long low;
-       unsigned long high;
-
-       unsigned long soft_limit;
-
-       /* vmpressure notifications */
-       struct vmpressure vmpressure;
-
-       /* css_online() has been completed */
-       int initialized;
-
-       /*
-        * Should the accounting and control be hierarchical, per subtree?
-        */
-       bool use_hierarchy;
-
-       /* protected by memcg_oom_lock */
-       bool            oom_lock;
-       int             under_oom;
-
-       int     swappiness;
-       /* OOM-Killer disable */
-       int             oom_kill_disable;
-
-       /* protect arrays of thresholds */
-       struct mutex thresholds_lock;
-
-       /* thresholds for memory usage. RCU-protected */
-       struct mem_cgroup_thresholds thresholds;
-
-       /* thresholds for mem+swap usage. RCU-protected */
-       struct mem_cgroup_thresholds memsw_thresholds;
-
-       /* For oom notifier event fd */
-       struct list_head oom_notify;
-
-       /*
-        * Should we move charges of a task when a task is moved into this
-        * mem_cgroup ? And what type of charges should we move ?
-        */
-       unsigned long move_charge_at_immigrate;
-       /*
-        * set > 0 if pages under this cgroup are moving to other cgroup.
-        */
-       atomic_t                moving_account;
-       /* taken only while moving_account > 0 */
-       spinlock_t              move_lock;
-       struct task_struct      *move_lock_task;
-       unsigned long           move_lock_flags;
-       /*
-        * percpu counter.
-        */
-       struct mem_cgroup_stat_cpu __percpu *stat;
-       spinlock_t pcp_counter_lock;
-
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
-       struct cg_proto tcp_mem;
-#endif
-#if defined(CONFIG_MEMCG_KMEM)
-        /* Index in the kmem_cache->memcg_params.memcg_caches array */
-       int kmemcg_id;
-       bool kmem_acct_activated;
-       bool kmem_acct_active;
-#endif
-
-       int last_scanned_node;
-#if MAX_NUMNODES > 1
-       nodemask_t      scan_nodes;
-       atomic_t        numainfo_events;
-       atomic_t        numainfo_updating;
-#endif
-
-#ifdef CONFIG_CGROUP_WRITEBACK
-       struct list_head cgwb_list;
-       struct wb_domain cgwb_domain;
-#endif
-
-       /* List of events which userspace want to receive */
-       struct list_head event_list;
-       spinlock_t event_list_lock;
-
-       struct mem_cgroup_per_node *nodeinfo[0];
-       /* WARNING: nodeinfo must be the last member here */
-};
-
-#ifdef CONFIG_MEMCG_KMEM
-bool memcg_kmem_is_active(struct mem_cgroup *memcg)
-{
-       return memcg->kmem_acct_active;
-}
-#endif
-
  /* Stuffs for move charges at task migration. */
  /*
   * Types of charges to be moved.
@@ -423,11 +244,6 @@ enum res_type {
   */
  static DEFINE_MUTEX(memcg_create_mutex);
  
-struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
-{
-       return s ? container_of(s, struct mem_cgroup, css) : NULL;
-}
-
  /* Some nice accessors for the vmpressure. */
  struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
  {
@@ -499,8 +315,7 @@ void sock_update_memcg(struct sock *sk)
                 rcu_read_lock();
                 memcg = mem_cgroup_from_task(current);
                 cg_proto = sk->sk_prot->proto_cgroup(memcg);
-               if (!mem_cgroup_is_root(memcg) &&
-                   memcg_proto_active(cg_proto) &&
+               if (cg_proto && test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags) &&
                     css_tryget_online(&memcg->css)) {
                         sk->sk_cgrp = cg_proto;
                 }
@@ -593,11 +408,6 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
         return &memcg->nodeinfo[nid]->zoneinfo[zid];
  }
  
-struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
-{
-       return &memcg->css;
-}
-
  /**
   * mem_cgroup_css_from_page - css of the memcg associated with a page
   * @page: page of interest
@@ -624,13 +434,41 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
  
         memcg = page->mem_cgroup;
  
-       if (!memcg || !cgroup_on_dfl(memcg->css.cgroup))
+       if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
                 memcg = root_mem_cgroup;
  
         rcu_read_unlock();
         return &memcg->css;
  }
  
+/**
+ * page_cgroup_ino - return inode number of the memcg a page is charged to
+ * @page: the page
+ *
+ * Look up the closest online ancestor of the memory cgroup @page is charged to
+ * and return its inode number or 0 if @page is not charged to any cgroup. It
+ * is safe to call this function without holding a reference to @page.
+ *
+ * Note, this function is inherently racy, because there is nothing to prevent
+ * the cgroup inode from getting torn down and potentially reallocated a moment
+ * after page_cgroup_ino() returns, so it only should be used by callers that
+ * do not care (such as procfs interfaces).
+ */
+ino_t page_cgroup_ino(struct page *page)
+{
+       struct mem_cgroup *memcg;
+       unsigned long ino = 0;
+
+       rcu_read_lock();
+       memcg = READ_ONCE(page->mem_cgroup);
+       while (memcg && !(memcg->css.flags & CSS_ONLINE))
+               memcg = parent_mem_cgroup(memcg);
+       if (memcg)
+               ino = cgroup_ino(memcg->css.cgroup);
+       rcu_read_unlock();
+       return ino;
+}
+
  static struct mem_cgroup_per_zone *
  mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
  {
@@ -876,14 +714,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
         __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
  }
  
-unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
-{
-       struct mem_cgroup_per_zone *mz;
-
-       mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
-       return mz->lru_size[lru];
-}
-
  static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
                                                   int nid,
                                                   unsigned int lru_mask)
@@ -986,6 +816,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
  
         return mem_cgroup_from_css(task_css(p, memory_cgrp_id));
  }
+EXPORT_SYMBOL(mem_cgroup_from_task);
  
  static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
  {
@@ -1031,7 +862,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                                    struct mem_cgroup *prev,
                                    struct mem_cgroup_reclaim_cookie *reclaim)
  {
-       struct reclaim_iter *uninitialized_var(iter);
+       struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
         struct cgroup_subsys_state *css = NULL;
         struct mem_cgroup *memcg = NULL;
         struct mem_cgroup *pos = NULL;
@@ -1173,30 +1004,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
              iter != NULL;                              \
              iter = mem_cgroup_iter(NULL, iter, NULL))
  
-void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
-{
-       struct mem_cgroup *memcg;
-
-       rcu_read_lock();
-       memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-       if (unlikely(!memcg))
-               goto out;
-
-       switch (idx) {
-       case PGFAULT:
-               this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]);
-               break;
-       case PGMAJFAULT:
-               this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
-               break;
-       default:
-               BUG();
-       }
-out:
-       rcu_read_unlock();
-}
-EXPORT_SYMBOL(__mem_cgroup_count_vm_event);
-
  /**
   * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
   * @zone: zone of the wanted lruvec
@@ -1295,15 +1102,6 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
         VM_BUG_ON((long)(*lru_size) < 0);
  }
  
-bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root)
-{
-       if (root == memcg)
-               return true;
-       if (!root->use_hierarchy)
-               return false;
-       return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
-}
-
  bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
  {
         struct mem_cgroup *task_memcg;
@@ -1330,39 +1128,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
         return ret;
  }
  
-int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
-{
-       unsigned long inactive_ratio;
-       unsigned long inactive;
-       unsigned long active;
-       unsigned long gb;
-
-       inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
-       active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
-
-       gb = (inactive + active) >> (30 - PAGE_SHIFT);
-       if (gb)
-               inactive_ratio = int_sqrt(10 * gb);
-       else
-               inactive_ratio = 1;
-
-       return inactive * inactive_ratio < active;
-}
-
-bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
-{
-       struct mem_cgroup_per_zone *mz;
-       struct mem_cgroup *memcg;
-
-       if (mem_cgroup_disabled())
-               return true;
-
-       mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
-       memcg = mz->memcg;
-
-       return !!(memcg->css.flags & CSS_ONLINE);
-}
-
  #define mem_cgroup_from_counter(counter, member)       \
         container_of(counter, struct mem_cgroup, member)
  
@@ -1394,15 +1159,6 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
         return margin;
  }
  
-int mem_cgroup_swappiness(struct mem_cgroup *memcg)
-{
-       /* root ? */
-       if (mem_cgroup_disabled() || !memcg->css.parent)
-               return vm_swappiness;
-
-       return memcg->swappiness;
-}
-
  /*
   * A routine for checking "mem" is under move_account() or not.
   *
@@ -1545,6 +1301,12 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
  static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
                                      int order)
  {
+       struct oom_control oc = {
+               .zonelist = NULL,
+               .nodemask = NULL,
+               .gfp_mask = gfp_mask,
+               .order = order,
+       };
         struct mem_cgroup *iter;
         unsigned long chosen_points = 0;
         unsigned long totalpages;
@@ -1563,7 +1325,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
                 goto unlock;
         }
  
-       check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+       check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
         totalpages = mem_cgroup_get_limit(memcg) ? : 1;
         for_each_mem_cgroup_tree(iter, memcg) {
                 struct css_task_iter it;
@@ -1571,8 +1333,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
  
                 css_task_iter_start(&iter->css, &it);
                 while ((task = css_task_iter_next(&it))) {
-                       switch (oom_scan_process_thread(task, totalpages, NULL,
-                                                       false)) {
+                       switch (oom_scan_process_thread(&oc, task, totalpages)) {
                         case OOM_SCAN_SELECT:
                                 if (chosen)
                                         put_task_struct(chosen);
@@ -1610,8 +1371,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
  
         if (chosen) {
                 points = chosen_points * 1000 / totalpages;
-               oom_kill_process(chosen, gfp_mask, order, points, totalpages,
-                                memcg, NULL, "Memory cgroup out of memory");
+               oom_kill_process(&oc, chosen, points, totalpages, memcg,
+                                "Memory cgroup out of memory");
         }
  unlock:
         mutex_unlock(&oom_lock);
@@ -2062,23 +1823,6 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
  }
  EXPORT_SYMBOL(mem_cgroup_end_page_stat);
  
-/**
- * mem_cgroup_update_page_stat - update page state statistics
- * @memcg: memcg to account against
- * @idx: page state item to account
- * @val: number of pages (positive or negative)
- *
- * See mem_cgroup_begin_page_stat() for locking requirements.
- */
-void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
-                                enum mem_cgroup_stat_index idx, int val)
-{
-       VM_BUG_ON(!rcu_read_lock_held());
-
-       if (memcg)
-               this_cpu_add(memcg->stat->count[idx], val);
-}
-
  /*
   * size of first charge trial. "32" comes from vmscan.c's magic value.
   * TODO: maybe necessary to use big numbers in big irons.
@@ -2355,40 +2099,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
         css_put_many(&memcg->css, nr_pages);
  }
  
-/*
- * try_get_mem_cgroup_from_page - look up page's memcg association
- * @page: the page
- *
- * Look up, get a css reference, and return the memcg that owns @page.
- *
- * The page must be locked to prevent racing with swap-in and page
- * cache charges.  If coming from an unlocked page table, the caller
- * must ensure the page is on the LRU or this can race with charging.
- */
-struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
-{
-       struct mem_cgroup *memcg;
-       unsigned short id;
-       swp_entry_t ent;
-
-       VM_BUG_ON_PAGE(!PageLocked(page), page);
-
-       memcg = page->mem_cgroup;
-       if (memcg) {
-               if (!css_tryget_online(&memcg->css))
-                       memcg = NULL;
-       } else if (PageSwapCache(page)) {
-               ent.val = page_private(page);
-               id = lookup_swap_cgroup_id(ent);
-               rcu_read_lock();
-               memcg = mem_cgroup_from_id(id);
-               if (memcg && !css_tryget_online(&memcg->css))
-                       memcg = NULL;
-               rcu_read_unlock();
-       }
-       return memcg;
-}
-
  static void lock_page_lru(struct page *page, int *isolated)
  {
         struct zone *zone = page_zone(page);
@@ -2504,16 +2214,6 @@ void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
         css_put_many(&memcg->css, nr_pages);
  }
  
-/*
- * helper for acessing a memcg's index. It will be used as an index in the
- * child cache array in kmem_cache, and also to derive its name. This function
- * will return -1 when this is not a kmem-limited memcg.
- */
-int memcg_cache_id(struct mem_cgroup *memcg)
-{
-       return memcg ? memcg->kmemcg_id : -1;
-}
-
  static int memcg_alloc_cache_id(void)
  {
         int id, size;
@@ -4360,8 +4060,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
         {
                 .name = "cgroup.event_control",         /* XXX: for compat */
                 .write = memcg_write_event_control,
-               .flags = CFTYPE_NO_PREFIX,
-               .mode = S_IWUGO,
+               .flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE,
         },
         {
                 .name = "swappiness",
@@ -5127,10 +4826,12 @@ static void mem_cgroup_clear_mc(void)
  static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
                                  struct cgroup_taskset *tset)
  {
-       struct task_struct *p = cgroup_taskset_first(tset);
-       int ret = 0;
         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+       struct mem_cgroup *from;
+       struct task_struct *leader, *p;
+       struct mm_struct *mm;
         unsigned long move_flags;
+       int ret = 0;
  
         /*
          * We are now commited to this value whatever it is. Changes in this
@@ -5138,36 +4839,50 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
          * So we need to save it, and keep it going.
          */
         move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
-       if (move_flags) {
-               struct mm_struct *mm;
-               struct mem_cgroup *from = mem_cgroup_from_task(p);
+       if (!move_flags)
+               return 0;
+
+       /*
+        * Multi-process migrations only happen on the default hierarchy
+        * where charge immigration is not used.  Perform charge
+        * immigration if @tset contains a leader and whine if there are
+        * multiple.
+        */
+       p = NULL;
+       cgroup_taskset_for_each_leader(leader, tset) {
+               WARN_ON_ONCE(p);
+               p = leader;
+       }
+       if (!p)
+               return 0;
  
-               VM_BUG_ON(from == memcg);
+       from = mem_cgroup_from_task(p);
  
-               mm = get_task_mm(p);
-               if (!mm)
-                       return 0;
-               /* We move charges only when we move a owner of the mm */
-               if (mm->owner == p) {
-                       VM_BUG_ON(mc.from);
-                       VM_BUG_ON(mc.to);
-                       VM_BUG_ON(mc.precharge);
-                       VM_BUG_ON(mc.moved_charge);
-                       VM_BUG_ON(mc.moved_swap);
-
-                       spin_lock(&mc.lock);
-                       mc.from = from;
-                       mc.to = memcg;
-                       mc.flags = move_flags;
-                       spin_unlock(&mc.lock);
-                       /* We set mc.moving_task later */
-
-                       ret = mem_cgroup_precharge_mc(mm);
-                       if (ret)
-                               mem_cgroup_clear_mc();
-               }
-               mmput(mm);
+       VM_BUG_ON(from == memcg);
+
+       mm = get_task_mm(p);
+       if (!mm)
+               return 0;
+       /* We move charges only when we move a owner of the mm */
+       if (mm->owner == p) {
+               VM_BUG_ON(mc.from);
+               VM_BUG_ON(mc.to);
+               VM_BUG_ON(mc.precharge);
+               VM_BUG_ON(mc.moved_charge);
+               VM_BUG_ON(mc.moved_swap);
+
+               spin_lock(&mc.lock);
+               mc.from = from;
+               mc.to = memcg;
+               mc.flags = move_flags;
+               spin_unlock(&mc.lock);
+               /* We set mc.moving_task later */
+
+               ret = mem_cgroup_precharge_mc(mm);
+               if (ret)
+                       mem_cgroup_clear_mc();
         }
+       mmput(mm);
         return ret;
  }
  
@@ -5356,7 +5071,7 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
          * guarantees that @root doesn't have any children, so turning it
          * on for the root memcg is enough.
          */
-       if (cgroup_on_dfl(root_css->cgroup))
+       if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
                 root_mem_cgroup->use_hierarchy = true;
         else
                 root_mem_cgroup->use_hierarchy = false;
@@ -5500,6 +5215,7 @@ static struct cftype memory_files[] = {
         {
                 .name = "events",
                 .flags = CFTYPE_NOT_ON_ROOT,
+               .file_offset = offsetof(struct mem_cgroup, events_file),
                 .seq_show = memory_events_show,
         },
         { }     /* terminate */
@@ -5520,19 +5236,6 @@ struct cgroup_subsys memory_cgrp_subsys = {
         .early_init = 0,
  };
  
-/**
- * mem_cgroup_events - count memory events against a cgroup
- * @memcg: the memory cgroup
- * @idx: the event index
- * @nr: the number of events to account for
- */
-void mem_cgroup_events(struct mem_cgroup *memcg,
-                      enum mem_cgroup_events_index idx,
-                      unsigned int nr)
-{
-       this_cpu_add(memcg->stat->events[idx], nr);
-}
-
  /**
   * mem_cgroup_low - check if memory consumption is below the normal range
   * @root: the highest ancestor to consider
@@ -5605,8 +5308,20 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
                  * the page lock, which serializes swap cache removal, which
                  * in turn serializes uncharging.
                  */
+               VM_BUG_ON_PAGE(!PageLocked(page), page);
                 if (page->mem_cgroup)
                         goto out;
+
+               if (do_swap_account) {
+                       swp_entry_t ent = { .val = page_private(page), };
+                       unsigned short id = lookup_swap_cgroup_id(ent);
+
+                       rcu_read_lock();
+                       memcg = mem_cgroup_from_id(id);
+                       if (memcg && !css_tryget_online(&memcg->css))
+                               memcg = NULL;
+                       rcu_read_unlock();
+               }
         }
  
         if (PageTransHuge(page)) {
@@ -5614,8 +5329,6 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
                 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
         }
  
-       if (do_swap_account && PageSwapCache(page))
-               memcg = try_get_mem_cgroup_from_page(page);
         if (!memcg)
                 memcg = get_mem_cgroup_from_mm(mm);