mm: follow_hugetlb_page flags

author Hugh Dickins <hugh.dickins@tiscali.co.uk>

Tue, 22 Sep 2009 00:03:27 +0000 (17:03 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 22 Sep 2009 14:17:40 +0000 (07:17 -0700)
author Hugh Dickins <hugh.dickins@tiscali.co.uk>
Tue, 22 Sep 2009 00:03:27 +0000 (17:03 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 22 Sep 2009 14:17:40 +0000 (07:17 -0700)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 16cdb75a543a031f0e5fbb67998104a7ee12ee34..e7f0fabfa1c20c51f33bde41941099e1215ba2c1 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
  int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
  int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
  int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
+int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
+                       struct page **, struct vm_area_struct **,
+                       unsigned long *, int *, int, unsigned int flags);
  void unmap_hugepage_range(struct vm_area_struct *,
                         unsigned long, unsigned long, struct page *);
  void __unmap_hugepage_range(struct vm_area_struct *,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index c001f846f17d1ee1baa6b9036d66ca00490a97a2..6b41f70bbc7fcf7bc6b1825cb6e0e99643e2f4fa 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
         return find_lock_page(mapping, idx);
  }
  
+/* Return whether there is a pagecache page to back given address within VMA */
+static bool hugetlbfs_backed(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long address)
+{
+       struct address_space *mapping;
+       pgoff_t idx;
+       struct page *page;
+
+       mapping = vma->vm_file->f_mapping;
+       idx = vma_hugecache_offset(h, vma, address);
+
+       page = find_get_page(mapping, idx);
+       if (page)
+               put_page(page);
+       return page != NULL;
+}
+
  static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         unsigned long address, pte_t *ptep, unsigned int flags)
  {
@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
         return NULL;
  }
  
-static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
-{
-       if (!ptep || write || shared)
-               return 0;
-       else
-               return huge_pte_none(huge_ptep_get(ptep));
-}
-
  int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         struct page **pages, struct vm_area_struct **vmas,
                         unsigned long *position, int *length, int i,
-                       int write)
+                       unsigned int flags)
  {
         unsigned long pfn_offset;
         unsigned long vaddr = *position;
         int remainder = *length;
         struct hstate *h = hstate_vma(vma);
-       int zeropage_ok = 0;
-       int shared = vma->vm_flags & VM_SHARED;
  
         spin_lock(&mm->page_table_lock);
         while (vaddr < vma->vm_end && remainder) {
                 pte_t *pte;
+               int absent;
                 struct page *page;
  
                 /*
                  * Some archs (sparc64, sh*) have multiple pte_ts to
-                * each hugepage.  We have to make * sure we get the
+                * each hugepage.  We have to make sure we get the
                  * first, for the page indexing below to work.
                  */
                 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
-               if (huge_zeropage_ok(pte, write, shared))
-                       zeropage_ok = 1;
+               absent = !pte || huge_pte_none(huge_ptep_get(pte));
+
+               /*
+                * When coredumping, it suits get_dump_page if we just return
+                * an error if there's a hole and no huge pagecache to back it.
+                */
+               if (absent &&
+                   ((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
+                       remainder = 0;
+                       break;
+               }
  
-               if (!pte ||
-                   (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
-                   (write && !pte_write(huge_ptep_get(pte)))) {
+               if (absent ||
+                   ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
                         int ret;
  
                         spin_unlock(&mm->page_table_lock);
-                       ret = hugetlb_fault(mm, vma, vaddr, write);
+                       ret = hugetlb_fault(mm, vma, vaddr,
+                               (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
                         spin_lock(&mm->page_table_lock);
                         if (!(ret & VM_FAULT_ERROR))
                                 continue;
  
                         remainder = 0;
-                       if (!i)
-                               i = -EFAULT;
                         break;
                 }
  
@@ -2266,10 +2281,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 page = pte_page(huge_ptep_get(pte));
  same_page:
                 if (pages) {
-                       if (zeropage_ok)
-                               pages[i] = ZERO_PAGE(0);
-                       else
-                               pages[i] = mem_map_offset(page, pfn_offset);
+                       pages[i] = mem_map_offset(page, pfn_offset);
                         get_page(pages[i]);
                 }
  
@@ -2293,7 +2305,7 @@ same_page:
         *length = remainder;
         *position = vaddr;
  
-       return i;
+       return i ? i : -EFAULT;
  }
  
  void hugetlb_change_protection(struct vm_area_struct *vma,
diff --git a/mm/memory.c b/mm/memory.c

index 532a55bce6a41bfc36b19b517c3d19e4951e8ff6..6359a4f80c4a951dd1eaeb2ebbb007fde4924db6 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                     !(vm_flags & vma->vm_flags))
                         return i ? : -EFAULT;
  
-               if (is_vm_hugetlb_page(vma)) {
-                       i = follow_hugetlb_page(mm, vma, pages, vmas,
-                                               &start, &nr_pages, i, write);
-                       continue;
-               }
-
                 foll_flags = FOLL_TOUCH;
                 if (pages)
                         foll_flags |= FOLL_GET;
                 if (flags & GUP_FLAGS_DUMP)
                         foll_flags |= FOLL_DUMP;
+               if (write)
+                       foll_flags |= FOLL_WRITE;
+
+               if (is_vm_hugetlb_page(vma)) {
+                       i = follow_hugetlb_page(mm, vma, pages, vmas,
+                                       &start, &nr_pages, i, foll_flags);
+                       continue;
+               }
  
                 do {
                         struct page *page;
author	Hugh Dickins <hugh.dickins@tiscali.co.uk>
	Tue, 22 Sep 2009 00:03:27 +0000 (17:03 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 22 Sep 2009 14:17:40 +0000 (07:17 -0700)
include/linux/hugetlb.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history