]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
mm/hugetlb: page faults check for fallocate hole punch in progress and wait
authorMike Kravetz <mike.kravetz@oracle.com>
Wed, 21 Oct 2015 22:03:20 +0000 (09:03 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Wed, 21 Oct 2015 22:03:20 +0000 (09:03 +1100)
At page fault time, check i_private which indicates a fallocate hole punch
is in progress.  If the fault falls within the hole, wait for the hole
punch operation to complete before proceeding with the fault.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/hugetlb.c

index 5d3beae07fffbf2020b57c42d4db2cc0f2833f0c..06580fdf5e306825b961f794b36fcf9544361abc 100644 (file)
@@ -3678,6 +3678,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        struct page *pagecache_page = NULL;
        struct hstate *h = hstate_vma(vma);
        struct address_space *mapping;
+       struct inode *inode = file_inode(vma->vm_file);
        int need_wait_lock = 0;
 
        address &= huge_page_mask(h);
@@ -3700,6 +3701,44 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        mapping = vma->vm_file->f_mapping;
        idx = vma_hugecache_offset(h, vma, address);
 
+       /*
+        * page faults could race with fallocate hole punch.  If a page
+        * is faulted between unmap and deallocation, it will still remain
+        * in the punched hole.  During hole punch operations, a hugetlb_falloc
+        * structure will be pointed to by i_private.  If this fault is for
+        * a page in a hole being punched, wait for the operation to finish
+        * before proceeding.
+        *
+        * Even with this strategy, it is still possible for a page fault to
+        * race with hole punch.  In this case, remove_inode_hugepages() will
+        * unmap the page and then remove.  Checking i_private as below should
+        * catch most of these races as we want to minimize unmapping a page
+        * multiple times.
+        */
+       if (unlikely(inode->i_private)) {
+               struct hugetlb_falloc *hugetlb_falloc;
+
+               spin_lock(&inode->i_lock);
+               hugetlb_falloc = inode->i_private;
+               if (hugetlb_falloc && hugetlb_falloc->waitq &&
+                   idx >= hugetlb_falloc->start &&
+                   idx <= hugetlb_falloc->end) {
+                       wait_queue_head_t *hugetlb_falloc_waitq;
+                       DEFINE_WAIT(hugetlb_fault_wait);
+
+                       hugetlb_falloc_waitq = hugetlb_falloc->waitq;
+                       prepare_to_wait(hugetlb_falloc_waitq,
+                                       &hugetlb_fault_wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       spin_unlock(&inode->i_lock);
+                       schedule();
+
+                       spin_lock(&inode->i_lock);
+                       finish_wait(hugetlb_falloc_waitq, &hugetlb_fault_wait);
+               }
+               spin_unlock(&inode->i_lock);
+       }
+
        /*
         * Serialize hugepage allocation and instantiation, so that we don't
         * get spurious allocation failures if two CPUs race to instantiate