ARM: defconfig: add support for Ka-Ro TXUL modules

[karo-tx-linux.git] / mm / rmap.c
diff --git a/mm/rmap.c b/mm/rmap.c

index f5b5c1f3dcd755ae313bba1404f2c9b079d5c18f..288622f5f34d75258ae9134a1be9cd14e0e3adf8 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -565,27 +565,6 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
         anon_vma_unlock_read(anon_vma);
  }
  
-/*
- * At what user virtual address is page expected in @vma?
- */
-static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
-{
-       pgoff_t pgoff = page_to_pgoff(page);
-       return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
-}
-
-inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
-{
-       unsigned long address = __vma_address(page, vma);
-
-       /* page should be within @vma mapping range */
-       VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
-
-       return address;
-}
-
  #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
  static void percpu_flush_tlb_batch_pages(void *data)
  {
@@ -841,8 +820,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                  * rmap might return false positives; we must filter
                  * these out using page_check_address_pmd().
                  */
-               pmd = page_check_address_pmd(page, mm, address,
-                                            PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
+               pmd = page_check_address_pmd(page, mm, address, &ptl);
                 if (!pmd)
                         return SWAP_AGAIN;
  
@@ -852,9 +830,9 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                         return SWAP_FAIL; /* To break the loop */
                 }
  
-               /* go ahead even if the pmd is pmd_trans_splitting() */
                 if (pmdp_clear_flush_young_notify(vma, address, pmd))
                         referenced++;
+
                 spin_unlock(ptl);
         } else {
                 pte_t *pte;
@@ -884,6 +862,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                         if (likely(!(vma->vm_flags & VM_SEQ_READ)))
                                 referenced++;
                 }
+
                 pte_unmap_unlock(pte, ptl);
         }
  
@@ -943,6 +922,7 @@ int page_referenced(struct page *page,
         };
  
         *vm_flags = 0;
+
         if (!page_mapped(page))
                 return 0;
  
@@ -1122,7 +1102,7 @@ static void __page_check_anon_rmap(struct page *page,
          * over the call to page_add_new_anon_rmap.
          */
         BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
-       BUG_ON(page->index != linear_page_index(vma, address));
+       BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address));
  #endif
  }
  
@@ -1131,6 +1111,7 @@ static void __page_check_anon_rmap(struct page *page,
   * @page:      the page to add the mapping to
   * @vma:       the vm area in which the mapping is added
   * @address:   the user virtual address mapped
+ * @compound:  charge the page as compound or small page
   *
   * The caller needs to hold the pte lock, and the page must be locked in
   * the anon_vma case: to serialize mapping,index checking after setting,
@@ -1138,9 +1119,9 @@ static void __page_check_anon_rmap(struct page *page,
   * (but PageKsm is never downgraded to PageAnon).
   */
  void page_add_anon_rmap(struct page *page,
-       struct vm_area_struct *vma, unsigned long address)
+       struct vm_area_struct *vma, unsigned long address, bool compound)
  {
-       do_page_add_anon_rmap(page, vma, address, 0);
+       do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
  }
  
  /*
@@ -1149,29 +1130,54 @@ void page_add_anon_rmap(struct page *page,
   * Everybody else should continue to use page_add_anon_rmap above.
   */
  void do_page_add_anon_rmap(struct page *page,
-       struct vm_area_struct *vma, unsigned long address, int exclusive)
+       struct vm_area_struct *vma, unsigned long address, int flags)
  {
-       int first = atomic_inc_and_test(&page->_mapcount);
+       bool compound = flags & RMAP_COMPOUND;
+       bool first;
+
+       if (PageTransCompound(page)) {
+               VM_BUG_ON_PAGE(!PageLocked(page), page);
+               if (compound) {
+                       atomic_t *mapcount;
+
+                       VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+                       mapcount = compound_mapcount_ptr(page);
+                       first = atomic_inc_and_test(mapcount);
+               } else {
+                       /* Anon THP always mapped first with PMD */
+                       first = 0;
+                       VM_BUG_ON_PAGE(!page_mapcount(page), page);
+                       atomic_inc(&page->_mapcount);
+               }
+       } else {
+               VM_BUG_ON_PAGE(compound, page);
+               first = atomic_inc_and_test(&page->_mapcount);
+       }
+
         if (first) {
+               int nr = compound ? hpage_nr_pages(page) : 1;
                 /*
                  * We use the irq-unsafe __{inc|mod}_zone_page_stat because
                  * these counters are not modified in interrupt context, and
                  * pte lock(a spinlock) is held, which implies preemption
                  * disabled.
                  */
-               if (PageTransHuge(page))
+               if (compound) {
+                       VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                         __inc_zone_page_state(page,
                                               NR_ANON_TRANSPARENT_HUGEPAGES);
-               __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-                               hpage_nr_pages(page));
+               }
+               __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
         }
         if (unlikely(PageKsm(page)))
                 return;
  
         VM_BUG_ON_PAGE(!PageLocked(page), page);
+
         /* address might be in next vma when migration races vma_adjust */
         if (first)
-               __page_set_anon_rmap(page, vma, address, exclusive);
+               __page_set_anon_rmap(page, vma, address,
+                               flags & RMAP_EXCLUSIVE);
         else
                 __page_check_anon_rmap(page, vma, address);
  }
@@ -1181,21 +1187,31 @@ void do_page_add_anon_rmap(struct page *page,
   * @page:      the page to add the mapping to
   * @vma:       the vm area in which the mapping is added
   * @address:   the user virtual address mapped
+ * @compound:  charge the page as compound or small page
   *
   * Same as page_add_anon_rmap but must only be called on *new* pages.
   * This means the inc-and-test can be bypassed.
   * Page does not have to be locked.
   */
  void page_add_new_anon_rmap(struct page *page,
-       struct vm_area_struct *vma, unsigned long address)
+       struct vm_area_struct *vma, unsigned long address, bool compound)
  {
+       int nr = compound ? hpage_nr_pages(page) : 1;
+
         VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
         SetPageSwapBacked(page);
-       atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
-       if (PageTransHuge(page))
+       if (compound) {
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+               /* increment count (starts at -1) */
+               atomic_set(compound_mapcount_ptr(page), 0);
                 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
-       __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-                       hpage_nr_pages(page));
+       } else {
+               /* Anon THP always mapped first with PMD */
+               VM_BUG_ON_PAGE(PageTransCompound(page), page);
+               /* increment count (starts at -1) */
+               atomic_set(&page->_mapcount, 0);
+       }
+       __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
         __page_set_anon_rmap(page, vma, address, 1);
  }
  
@@ -1223,12 +1239,15 @@ static void page_remove_file_rmap(struct page *page)
  
         memcg = mem_cgroup_begin_page_stat(page);
  
-       /* page still mapped by someone else? */
-       if (!atomic_add_negative(-1, &page->_mapcount))
+       /* Hugepages are not counted in NR_FILE_MAPPED for now. */
+       if (unlikely(PageHuge(page))) {
+               /* hugetlb pages are always mapped with pmds */
+               atomic_dec(compound_mapcount_ptr(page));
                 goto out;
+       }
  
-       /* Hugepages are not counted in NR_FILE_MAPPED for now. */
-       if (unlikely(PageHuge(page)))
+       /* page still mapped by someone else? */
+       if (!atomic_add_negative(-1, &page->_mapcount))
                 goto out;
  
         /*
@@ -1245,41 +1264,76 @@ out:
         mem_cgroup_end_page_stat(memcg);
  }
  
+static void page_remove_anon_compound_rmap(struct page *page)
+{
+       int i, nr;
+
+       if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
+               return;
+
+       /* Hugepages are not counted in NR_ANON_PAGES for now. */
+       if (unlikely(PageHuge(page)))
+               return;
+
+       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+               return;
+
+       __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+
+       if (TestClearPageDoubleMap(page)) {
+               /*
+                * Subpages can be mapped with PTEs too. Check how many of
+                * themi are still mapped.
+                */
+               for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
+                       if (atomic_add_negative(-1, &page[i]._mapcount))
+                               nr++;
+               }
+       } else {
+               nr = HPAGE_PMD_NR;
+       }
+
+       if (nr) {
+               __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -nr);
+               deferred_split_huge_page(page);
+       }
+}
+
  /**
   * page_remove_rmap - take down pte mapping from a page
- * @page: page to remove mapping from
+ * @page:      page to remove mapping from
+ * @compound:  uncharge the page as compound or small page
   *
   * The caller needs to hold the pte lock.
   */
-void page_remove_rmap(struct page *page)
+void page_remove_rmap(struct page *page, bool compound)
  {
         if (!PageAnon(page)) {
+               VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
                 page_remove_file_rmap(page);
                 return;
         }
  
+       if (compound)
+               return page_remove_anon_compound_rmap(page);
+
         /* page still mapped by someone else? */
         if (!atomic_add_negative(-1, &page->_mapcount))
                 return;
  
-       /* Hugepages are not counted in NR_ANON_PAGES for now. */
-       if (unlikely(PageHuge(page)))
-               return;
-
         /*
          * We use the irq-unsafe __{inc|mod}_zone_page_stat because
          * these counters are not modified in interrupt context, and
          * pte lock(a spinlock) is held, which implies preemption disabled.
          */
-       if (PageTransHuge(page))
-               __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
-
-       __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-                             -hpage_nr_pages(page));
+       __dec_zone_page_state(page, NR_ANON_PAGES);
  
         if (unlikely(PageMlocked(page)))
                 clear_page_mlock(page);
  
+       if (PageTransCompound(page))
+               deferred_split_huge_page(compound_head(page));
+
         /*
          * It would be tidy to reset the PageAnon mapping here,
          * but that might overwrite a racing page_add_anon_rmap
@@ -1304,6 +1358,10 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         int ret = SWAP_AGAIN;
         enum ttu_flags flags = (enum ttu_flags)arg;
  
+       /* munlock has nothing to gain from examining un-locked vmas */
+       if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
+               goto out;
+
         pte = page_check_address(page, mm, address, &ptl, 0);
         if (!pte)
                 goto out;
@@ -1314,9 +1372,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
          * skipped over this mm) then we should reactivate it.
          */
         if (!(flags & TTU_IGNORE_MLOCK)) {
-               if (vma->vm_flags & VM_LOCKED)
-                       goto out_mlock;
-
+               if (vma->vm_flags & VM_LOCKED) {
+                       /* Holding pte lock, we do *not* need mmap_sem here */
+                       mlock_vma_page(page);
+                       ret = SWAP_MLOCK;
+                       goto out_unmap;
+               }
                 if (flags & TTU_MUNLOCK)
                         goto out_unmap;
         }
@@ -1352,7 +1413,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         update_hiwater_rss(mm);
  
         if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
-               if (!PageHuge(page)) {
+               if (PageHuge(page)) {
+                       hugetlb_count_sub(1 << compound_order(page), mm);
+               } else {
                         if (PageAnon(page))
                                 dec_mm_counter(mm, MM_ANONPAGES);
                         else
@@ -1370,80 +1433,66 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                         dec_mm_counter(mm, MM_ANONPAGES);
                 else
                         dec_mm_counter(mm, MM_FILEPAGES);
+       } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) {
+               swp_entry_t entry;
+               pte_t swp_pte;
+               /*
+                * Store the pfn of the page in a special migration
+                * pte. do_swap_page() will wait until the migration
+                * pte is removed and then restart fault handling.
+                */
+               entry = make_migration_entry(page, pte_write(pteval));
+               swp_pte = swp_entry_to_pte(entry);
+               if (pte_soft_dirty(pteval))
+                       swp_pte = pte_swp_mksoft_dirty(swp_pte);
+               set_pte_at(mm, address, pte, swp_pte);
         } else if (PageAnon(page)) {
                 swp_entry_t entry = { .val = page_private(page) };
                 pte_t swp_pte;
  
-               if (PageSwapCache(page)) {
-                       /*
-                        * Store the swap location in the pte.
-                        * See handle_pte_fault() ...
-                        */
-                       if (swap_duplicate(entry) < 0) {
-                               set_pte_at(mm, address, pte, pteval);
-                               ret = SWAP_FAIL;
-                               goto out_unmap;
-                       }
-                       if (list_empty(&mm->mmlist)) {
-                               spin_lock(&mmlist_lock);
-                               if (list_empty(&mm->mmlist))
-                                       list_add(&mm->mmlist, &init_mm.mmlist);
-                               spin_unlock(&mmlist_lock);
-                       }
+               if (!PageDirty(page) && (flags & TTU_FREE)) {
+                       /* It's a freeable page by MADV_FREE */
                         dec_mm_counter(mm, MM_ANONPAGES);
-                       inc_mm_counter(mm, MM_SWAPENTS);
-               } else if (IS_ENABLED(CONFIG_MIGRATION)) {
-                       /*
-                        * Store the pfn of the page in a special migration
-                        * pte. do_swap_page() will wait until the migration
-                        * pte is removed and then restart fault handling.
-                        */
-                       BUG_ON(!(flags & TTU_MIGRATION));
-                       entry = make_migration_entry(page, pte_write(pteval));
+                       goto discard;
                 }
+
+               /*
+                * Store the swap location in the pte.
+                * See handle_pte_fault() ...
+                */
+               VM_BUG_ON_PAGE(!PageSwapCache(page), page);
+               if (swap_duplicate(entry) < 0) {
+                       set_pte_at(mm, address, pte, pteval);
+                       ret = SWAP_FAIL;
+                       goto out_unmap;
+               }
+                       if (!PageDirty(page))
+                               SetPageDirty(page);
+               if (list_empty(&mm->mmlist)) {
+                       spin_lock(&mmlist_lock);
+                       if (list_empty(&mm->mmlist))
+                               list_add(&mm->mmlist, &init_mm.mmlist);
+                       spin_unlock(&mmlist_lock);
+               }
+               dec_mm_counter(mm, MM_ANONPAGES);
+               inc_mm_counter(mm, MM_SWAPENTS);
                 swp_pte = swp_entry_to_pte(entry);
                 if (pte_soft_dirty(pteval))
                         swp_pte = pte_swp_mksoft_dirty(swp_pte);
                 set_pte_at(mm, address, pte, swp_pte);
-       } else if (IS_ENABLED(CONFIG_MIGRATION) &&
-                  (flags & TTU_MIGRATION)) {
-               /* Establish migration entry for a file page */
-               swp_entry_t entry;
-               entry = make_migration_entry(page, pte_write(pteval));
-               set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
         } else
                 dec_mm_counter(mm, MM_FILEPAGES);
  
-       page_remove_rmap(page);
+discard:
+       page_remove_rmap(page, false);
         page_cache_release(page);
  
  out_unmap:
         pte_unmap_unlock(pte, ptl);
-       if (ret != SWAP_FAIL && !(flags & TTU_MUNLOCK))
+       if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
                 mmu_notifier_invalidate_page(mm, address);
  out:
         return ret;
-
-out_mlock:
-       pte_unmap_unlock(pte, ptl);
-
-
-       /*
-        * We need mmap_sem locking, Otherwise VM_LOCKED check makes
-        * unstable result and race. Plus, We can't wait here because
-        * we now hold anon_vma->rwsem or mapping->i_mmap_rwsem.
-        * if trylock failed, the page remain in evictable lru and later
-        * vmscan could retry to move the page to unevictable lru if the
-        * page is actually mlocked.
-        */
-       if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
-               if (vma->vm_flags & VM_LOCKED) {
-                       mlock_vma_page(page);
-                       ret = SWAP_MLOCK;
-               }
-               up_read(&vma->vm_mm->mmap_sem);
-       }
-       return ret;
  }
  
  bool is_vma_temporary_stack(struct vm_area_struct *vma)
@@ -1607,6 +1656,8 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
                 struct vm_area_struct *vma = avc->vma;
                 unsigned long address = vma_address(page, vma);
  
+               cond_resched();
+
                 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
                         continue;
  
@@ -1656,6 +1707,8 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
         vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                 unsigned long address = vma_address(page, vma);
  
+               cond_resched();
+
                 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
                         continue;
  
@@ -1713,7 +1766,7 @@ void hugepage_add_anon_rmap(struct page *page,
         BUG_ON(!PageLocked(page));
         BUG_ON(!anon_vma);
         /* address might be in next vma when migration races vma_adjust */
-       first = atomic_inc_and_test(&page->_mapcount);
+       first = atomic_inc_and_test(compound_mapcount_ptr(page));
         if (first)
                 __hugepage_set_anon_rmap(page, vma, address, 0);
  }
@@ -1722,7 +1775,7 @@ void hugepage_add_new_anon_rmap(struct page *page,
                         struct vm_area_struct *vma, unsigned long address)
  {
         BUG_ON(address < vma->vm_start || address >= vma->vm_end);
-       atomic_set(&page->_mapcount, 0);
+       atomic_set(compound_mapcount_ptr(page), 0);
         __hugepage_set_anon_rmap(page, vma, address, 1);
  }
  #endif /* CONFIG_HUGETLB_PAGE */