]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - mm/huge_memory.c
Merge tag 'kbuild-fixes-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/masah...
[karo-tx-linux.git] / mm / huge_memory.c
index 88c6167f194db0ec07d707329569c2d4a9d34876..90731e3b7e589ea9f83c21827916917a27f71b82 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/userfaultfd_k.h>
 #include <linux/page_idle.h>
 #include <linux/shmem_fs.h>
+#include <linux/oom.h>
 
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
@@ -550,6 +551,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
        struct mem_cgroup *memcg;
        pgtable_t pgtable;
        unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+       int ret = 0;
 
        VM_BUG_ON_PAGE(!PageCompound(page), page);
 
@@ -561,9 +563,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
        pgtable = pte_alloc_one(vma->vm_mm, haddr);
        if (unlikely(!pgtable)) {
-               mem_cgroup_cancel_charge(page, memcg, true);
-               put_page(page);
-               return VM_FAULT_OOM;
+               ret = VM_FAULT_OOM;
+               goto release;
        }
 
        clear_huge_page(page, haddr, HPAGE_PMD_NR);
@@ -576,13 +577,14 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
        vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
        if (unlikely(!pmd_none(*vmf->pmd))) {
-               spin_unlock(vmf->ptl);
-               mem_cgroup_cancel_charge(page, memcg, true);
-               put_page(page);
-               pte_free(vma->vm_mm, pgtable);
+               goto unlock_release;
        } else {
                pmd_t entry;
 
+               ret = check_stable_address_space(vma->vm_mm);
+               if (ret)
+                       goto unlock_release;
+
                /* Deliver the page fault to userland */
                if (userfaultfd_missing(vma)) {
                        int ret;
@@ -610,6 +612,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
        }
 
        return 0;
+unlock_release:
+       spin_unlock(vmf->ptl);
+release:
+       if (pgtable)
+               pte_free(vma->vm_mm, pgtable);
+       mem_cgroup_cancel_charge(page, memcg, true);
+       put_page(page);
+       return ret;
+
 }
 
 /*
@@ -688,7 +699,10 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                ret = 0;
                set = false;
                if (pmd_none(*vmf->pmd)) {
-                       if (userfaultfd_missing(vma)) {
+                       ret = check_stable_address_space(vma->vm_mm);
+                       if (ret) {
+                               spin_unlock(vmf->ptl);
+                       } else if (userfaultfd_missing(vma)) {
                                spin_unlock(vmf->ptl);
                                ret = handle_userfault(vmf, VM_UFFD_MISSING);
                                VM_BUG_ON(ret & VM_FAULT_FALLBACK);
@@ -1495,6 +1509,13 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
                goto clear_pmdnuma;
        }
 
+       /*
+        * The page_table_lock above provides a memory barrier
+        * with change_protection_range.
+        */
+       if (mm_tlb_flush_pending(vma->vm_mm))
+               flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
        /*
         * Migrate the THP to the requested node, returns with page unlocked
         * and access rights restored.
@@ -1575,8 +1596,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                get_page(page);
                spin_unlock(ptl);
                split_huge_page(page);
-               put_page(page);
                unlock_page(page);
+               put_page(page);
                goto out_unlocked;
        }
 
@@ -2203,7 +2224,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
         * atomic_set() here would be safe on all archs (and not only on x86),
         * it's safer to use atomic_inc()/atomic_add().
         */
-       if (PageAnon(head)) {
+       if (PageAnon(head) && !PageSwapCache(head)) {
                page_ref_inc(page_tail);
        } else {
                /* Additional pin to radix tree */
@@ -2214,6 +2235,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
        page_tail->flags |= (head->flags &
                        ((1L << PG_referenced) |
                         (1L << PG_swapbacked) |
+                        (1L << PG_swapcache) |
                         (1L << PG_mlocked) |
                         (1L << PG_uptodate) |
                         (1L << PG_active) |
@@ -2276,7 +2298,11 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        ClearPageCompound(head);
        /* See comment in __split_huge_page_tail() */
        if (PageAnon(head)) {
-               page_ref_inc(head);
+               /* Additional pin to radix tree of swap cache */
+               if (PageSwapCache(head))
+                       page_ref_add(head, 2);
+               else
+                       page_ref_inc(head);
        } else {
                /* Additional pin to radix tree */
                page_ref_add(head, 2);
@@ -2385,6 +2411,21 @@ int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
        return ret;
 }
 
+/* Racy check whether the huge page can be split */
+bool can_split_huge_page(struct page *page, int *pextra_pins)
+{
+       int extra_pins;
+
+       /* Additional pins from radix tree */
+       if (PageAnon(page))
+               extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
+       else
+               extra_pins = HPAGE_PMD_NR;
+       if (pextra_pins)
+               *pextra_pins = extra_pins;
+       return total_mapcount(page) == page_count(page) - extra_pins - 1;
+}
+
 /*
  * This function splits huge page into normal pages. @page can point to any
  * subpage of huge page to split. Split doesn't change the position of @page.
@@ -2432,7 +2473,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        ret = -EBUSY;
                        goto out;
                }
-               extra_pins = 0;
                mapping = NULL;
                anon_vma_lock_write(anon_vma);
        } else {
@@ -2444,8 +2484,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        goto out;
                }
 
-               /* Addidional pins from radix tree */
-               extra_pins = HPAGE_PMD_NR;
                anon_vma = NULL;
                i_mmap_lock_read(mapping);
        }
@@ -2454,7 +2492,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
         * Racy check if we can split the page, before freeze_page() will
         * split PMDs
         */
-       if (total_mapcount(head) != page_count(head) - extra_pins - 1) {
+       if (!can_split_huge_page(head, &extra_pins)) {
                ret = -EBUSY;
                goto out_unlock;
        }