[PATCH] 64bit Resource: finally enable 64bit resource sizes

[karo-tx-linux.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 85e80a57db29e2412ef766268996ec005c3e16c8..247b5c312b9b073d1c76769a67e73d39ee8bdd39 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -87,7 +87,7 @@ int randomize_va_space __read_mostly = 1;
  static int __init disable_randmaps(char *s)
  {
         randomize_va_space = 0;
-       return 0;
+       return 1;
  }
  __setup("norandmaps", disable_randmaps);
  
@@ -277,7 +277,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
                 anon_vma_unlink(vma);
                 unlink_file_vma(vma);
  
-               if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+               if (is_vm_hugetlb_page(vma)) {
                         hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
                                 floor, next? next->vm_start: ceiling);
                 } else {
@@ -285,8 +285,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
                          * Optimization: gather nearby vmas into one call down
                          */
                         while (next && next->vm_start <= vma->vm_end + PMD_SIZE
-                         && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
-                                                       HPAGE_SIZE)) {
+                              && !is_vm_hugetlb_page(next)) {
                                 vma = next;
                                 next = vma->vm_next;
                                 anon_vma_unlink(vma);
@@ -388,7 +387,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
  {
         unsigned long pfn = pte_pfn(pte);
  
-       if (vma->vm_flags & VM_PFNMAP) {
+       if (unlikely(vma->vm_flags & VM_PFNMAP)) {
                 unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
                 if (pfn == vma->vm_pgoff + off)
                         return NULL;
@@ -401,8 +400,6 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
          * we should just do "return pfn_to_page(pfn)", but
          * in the meantime we check that we get a valid pfn,
          * and that the resulting page looks ok.
-        *
-        * Remove this test eventually!
          */
         if (unlikely(!pfn_valid(pfn))) {
                 print_bad_pte(vma, pte, addr);
@@ -437,7 +434,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         /* pte contains position in swap or file, so copy. */
         if (unlikely(!pte_present(pte))) {
                 if (!pte_file(pte)) {
-                       swap_duplicate(pte_to_swp_entry(pte));
+                       swp_entry_t entry = pte_to_swp_entry(pte);
+
+                       swap_duplicate(entry);
                         /* make sure dst_mm is on swapoff's mmlist. */
                         if (unlikely(list_empty(&dst_mm->mmlist))) {
                                 spin_lock(&mmlist_lock);
@@ -446,6 +445,16 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                                                  &src_mm->mmlist);
                                 spin_unlock(&mmlist_lock);
                         }
+                       if (is_write_migration_entry(entry) &&
+                                       is_cow_mapping(vm_flags)) {
+                               /*
+                                * COW mappings require pages in both parent
+                                * and child to be set to read.
+                                */
+                               make_migration_entry_read(&entry);
+                               pte = swp_entry_to_pte(entry);
+                               set_pte_at(src_mm, addr, src_pte, pte);
+                       }
                 }
                 goto out_set_pte;
         }
@@ -1074,6 +1083,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         }
                         if (pages) {
                                 pages[i] = page;
+
+                               flush_anon_page(page, start);
                                 flush_dcache_page(page);
                         }
                         if (vmas)
@@ -1221,9 +1232,7 @@ out:
   * The page has to be a nice clean _individual_ kernel allocation.
   * If you allocate a compound page, you need to have marked it as
   * such (__GFP_COMP), or manually just split the page up yourself
- * (which is mainly an issue of doing "set_page_count(page, 1)" for
- * each sub-page, and then freeing them one by one when you free
- * them rather than freeing it as a compound page).
+ * (see split_page()).
   *
   * NOTE! Traditionally this was done with "remap_pfn_range()" which
   * took an arbitrary page protection parameter. This doesn't allow
@@ -1448,25 +1457,60 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
  {
         struct page *old_page, *new_page;
         pte_t entry;
-       int ret = VM_FAULT_MINOR;
+       int reuse, ret = VM_FAULT_MINOR;
  
         old_page = vm_normal_page(vma, address, orig_pte);
         if (!old_page)
                 goto gotten;
  
-       if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
-               int reuse = can_share_swap_page(old_page);
-               unlock_page(old_page);
-               if (reuse) {
-                       flush_cache_page(vma, address, pte_pfn(orig_pte));
-                       entry = pte_mkyoung(orig_pte);
-                       entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-                       ptep_set_access_flags(vma, address, page_table, entry, 1);
-                       update_mmu_cache(vma, address, entry);
-                       lazy_mmu_prot_update(entry);
-                       ret |= VM_FAULT_WRITE;
-                       goto unlock;
+       if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) ==
+                               (VM_SHARED|VM_WRITE))) {
+               if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
+                       /*
+                        * Notify the address space that the page is about to
+                        * become writable so that it can prohibit this or wait
+                        * for the page to get into an appropriate state.
+                        *
+                        * We do this without the lock held, so that it can
+                        * sleep if it needs to.
+                        */
+                       page_cache_get(old_page);
+                       pte_unmap_unlock(page_table, ptl);
+
+                       if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
+                               goto unwritable_page;
+
+                       page_cache_release(old_page);
+
+                       /*
+                        * Since we dropped the lock we need to revalidate
+                        * the PTE as someone else may have changed it.  If
+                        * they did, we just return, as we can count on the
+                        * MMU to tell us if they didn't also make it writable.
+                        */
+                       page_table = pte_offset_map_lock(mm, pmd, address,
+                                                        &ptl);
+                       if (!pte_same(*page_table, orig_pte))
+                               goto unlock;
                 }
+
+               reuse = 1;
+       } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
+               reuse = can_share_swap_page(old_page);
+               unlock_page(old_page);
+       } else {
+               reuse = 0;
+       }
+
+       if (reuse) {
+               flush_cache_page(vma, address, pte_pfn(orig_pte));
+               entry = pte_mkyoung(orig_pte);
+               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+               ptep_set_access_flags(vma, address, page_table, entry, 1);
+               update_mmu_cache(vma, address, entry);
+               lazy_mmu_prot_update(entry);
+               ret |= VM_FAULT_WRITE;
+               goto unlock;
         }
  
         /*
@@ -1526,6 +1570,10 @@ oom:
         if (old_page)
                 page_cache_release(old_page);
         return VM_FAULT_OOM;
+
+unwritable_page:
+       page_cache_release(old_page);
+       return VM_FAULT_SIGBUS;
  }
  
  /*
@@ -1882,7 +1930,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 goto out;
  
         entry = pte_to_swp_entry(orig_pte);
-again:
+       if (is_migration_entry(entry)) {
+               migration_entry_wait(mm, pmd, address);
+               goto out;
+       }
         page = lookup_swap_cache(entry);
         if (!page) {
                 swapin_readahead(entry, address, vma);
@@ -1906,12 +1957,6 @@ again:
  
         mark_page_accessed(page);
         lock_page(page);
-       if (!PageSwapCache(page)) {
-               /* Page migration has occured */
-               unlock_page(page);
-               page_cache_release(page);
-               goto again;
-       }
  
         /*
          * Back out if somebody else already faulted in this pte.
@@ -2077,18 +2122,31 @@ retry:
         /*
          * Should we do an early C-O-W break?
          */
-       if (write_access && !(vma->vm_flags & VM_SHARED)) {
-               struct page *page;
+       if (write_access) {
+               if (!(vma->vm_flags & VM_SHARED)) {
+                       struct page *page;
  
-               if (unlikely(anon_vma_prepare(vma)))
-                       goto oom;
-               page = alloc_page_vma(GFP_HIGHUSER, vma, address);
-               if (!page)
-                       goto oom;
-               copy_user_highpage(page, new_page, address);
-               page_cache_release(new_page);
-               new_page = page;
-               anon = 1;
+                       if (unlikely(anon_vma_prepare(vma)))
+                               goto oom;
+                       page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+                       if (!page)
+                               goto oom;
+                       copy_user_highpage(page, new_page, address);
+                       page_cache_release(new_page);
+                       new_page = page;
+                       anon = 1;
+
+               } else {
+                       /* if the page will be shareable, see if the backing
+                        * address space wants to know that the page is about
+                        * to become writable */
+                       if (vma->vm_ops->page_mkwrite &&
+                           vma->vm_ops->page_mkwrite(vma, new_page) < 0
+                           ) {
+                               page_cache_release(new_page);
+                               return VM_FAULT_SIGBUS;
+                       }
+               }
         }
  
         page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
@@ -2357,10 +2415,8 @@ int make_pages_present(unsigned long addr, unsigned long end)
         if (!vma)
                 return -1;
         write = (vma->vm_flags & VM_WRITE) != 0;
-       if (addr >= end)
-               BUG();
-       if (end > vma->vm_end)
-               BUG();
+       BUG_ON(addr >= end);
+       BUG_ON(end > vma->vm_end);
         len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
         ret = get_user_pages(current, current->mm, addr,
                         len, write, 0, NULL, NULL);