mm: migration: do not lose soft dirty bit if page is in migration state

[karo-tx-linux.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 2b73dbde2274a535aadf90bb3ca2873347ef66f9..f7b7692c05edee78faf951778bb3909143ac9ac1 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -837,6 +837,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                                          */
                                         make_migration_entry_read(&entry);
                                         pte = swp_entry_to_pte(entry);
+                                       if (pte_swp_soft_dirty(*src_pte))
+                                               pte = pte_swp_mksoft_dirty(pte);
                                         set_pte_at(src_mm, addr, src_pte, pte);
                                 }
                         }
@@ -3695,7 +3697,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
   * but allow concurrent faults), and pte mapped but not yet locked.
   * We return with mmap_sem still held, but pte unmapped and unlocked.
   */
-int handle_pte_fault(struct mm_struct *mm,
+static int handle_pte_fault(struct mm_struct *mm,
                      struct vm_area_struct *vma, unsigned long address,
                      pte_t *pte, pmd_t *pmd, unsigned int flags)
  {
@@ -3754,22 +3756,14 @@ unlock:
  /*
   * By the time we get here, we already hold the mm semaphore
   */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-               unsigned long address, unsigned int flags)
+static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                            unsigned long address, unsigned int flags)
  {
         pgd_t *pgd;
         pud_t *pud;
         pmd_t *pmd;
         pte_t *pte;
  
-       __set_current_state(TASK_RUNNING);
-
-       count_vm_event(PGFAULT);
-       mem_cgroup_count_vm_event(mm, PGFAULT);
-
-       /* do counter updates before entering really critical section. */
-       check_sync_rss_stat(current);
-
         if (unlikely(is_vm_hugetlb_page(vma)))
                 return hugetlb_fault(mm, vma, address, flags);
  
@@ -3782,9 +3776,12 @@ retry:
         if (!pmd)
                 return VM_FAULT_OOM;
         if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
+               int ret = VM_FAULT_FALLBACK;
                 if (!vma->vm_ops)
-                       return do_huge_pmd_anonymous_page(mm, vma, address,
-                                                         pmd, flags);
+                       ret = do_huge_pmd_anonymous_page(mm, vma, address,
+                                       pmd, flags);
+               if (!(ret & VM_FAULT_FALLBACK))
+                       return ret;
         } else {
                 pmd_t orig_pmd = *pmd;
                 int ret;
@@ -3850,6 +3847,37 @@ retry:
         return handle_pte_fault(mm, vma, address, pte, pmd, flags);
  }
  
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                   unsigned long address, unsigned int flags)
+{
+       int ret;
+
+       __set_current_state(TASK_RUNNING);
+
+       count_vm_event(PGFAULT);
+       mem_cgroup_count_vm_event(mm, PGFAULT);
+
+       /* do counter updates before entering really critical section. */
+       check_sync_rss_stat(current);
+
+       /*
+        * Enable the memcg OOM handling for faults triggered in user
+        * space.  Kernel faults are handled more gracefully.
+        */
+       if (flags & FAULT_FLAG_USER)
+               mem_cgroup_enable_oom();
+
+       ret = __handle_mm_fault(mm, vma, address, flags);
+
+       if (flags & FAULT_FLAG_USER)
+               mem_cgroup_disable_oom();
+
+       if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
+               mem_cgroup_oom_synchronize();
+
+       return ret;
+}
+
  #ifndef __PAGETABLE_PUD_FOLDED
  /*
   * Allocate page upper directory.