]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/dax.c
Merge remote-tracking branch 'xfs/for-next'
[karo-tx-linux.git] / fs / dax.c
index bcfb14bfc1e49eb36d507c53ccd1c3f4f23135f9..131fd35ae39d53f8a9ef7bdd1aaa61677beda5f0 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
 #include <linux/uio.h>
 #include <linux/vmstat.h>
 
+/*
+ * dax_clear_blocks() is called from within transaction context from XFS,
+ * and hence this means the stack from this point must follow GFP_NOFS
+ * semantics for all operations.
+ */
 int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 {
        struct block_device *bdev = inode->i_sb->s_bdev;
@@ -285,6 +290,7 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh,
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                        struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+       struct address_space *mapping = inode->i_mapping;
        sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
        unsigned long vaddr = (unsigned long)vmf->virtual_address;
        void __pmem *addr;
@@ -292,6 +298,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
        pgoff_t size;
        int error;
 
+       i_mmap_lock_read(mapping);
+
        /*
         * Check truncate didn't happen while we were allocating a block.
         * If it did, this block may or may not be still allocated to the
@@ -321,6 +329,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
        error = vm_insert_mixed(vma, vaddr, pfn);
 
  out:
+       i_mmap_unlock_read(mapping);
+
        return error;
 }
 
@@ -382,17 +392,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                         * from a read fault and we've raced with a truncate
                         */
                        error = -EIO;
-                       goto unlock;
+                       goto unlock_page;
                }
-       } else {
-               i_mmap_lock_write(mapping);
        }
 
        error = get_block(inode, block, &bh, 0);
        if (!error && (bh.b_size < PAGE_SIZE))
                error = -EIO;           /* fs corruption? */
        if (error)
-               goto unlock;
+               goto unlock_page;
 
        if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
                if (vmf->flags & FAULT_FLAG_WRITE) {
@@ -403,9 +411,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                        if (!error && (bh.b_size < PAGE_SIZE))
                                error = -EIO;
                        if (error)
-                               goto unlock;
+                               goto unlock_page;
                } else {
-                       i_mmap_unlock_write(mapping);
                        return dax_load_hole(mapping, page, vmf);
                }
        }
@@ -417,15 +424,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                else
                        clear_user_highpage(new_page, vaddr);
                if (error)
-                       goto unlock;
+                       goto unlock_page;
                vmf->page = page;
                if (!page) {
+                       i_mmap_lock_read(mapping);
                        /* Check we didn't race with truncate */
                        size = (i_size_read(inode) + PAGE_SIZE - 1) >>
                                                                PAGE_SHIFT;
                        if (vmf->pgoff >= size) {
+                               i_mmap_unlock_read(mapping);
                                error = -EIO;
-                               goto unlock;
+                               goto out;
                        }
                }
                return VM_FAULT_LOCKED;
@@ -461,8 +470,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                        WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
        }
 
-       if (!page)
-               i_mmap_unlock_write(mapping);
  out:
        if (error == -ENOMEM)
                return VM_FAULT_OOM | major;
@@ -471,14 +478,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                return VM_FAULT_SIGBUS | major;
        return VM_FAULT_NOPAGE | major;
 
- unlock:
+ unlock_page:
        if (page) {
                unlock_page(page);
                page_cache_release(page);
-       } else {
-               i_mmap_unlock_write(mapping);
        }
-
        goto out;
 }
 EXPORT_SYMBOL(__dax_fault);
@@ -556,10 +560,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
 
        bh.b_size = PMD_SIZE;
-       i_mmap_lock_write(mapping);
        length = get_block(inode, block, &bh, write);
        if (length)
                return VM_FAULT_SIGBUS;
+       i_mmap_lock_read(mapping);
 
        /*
         * If the filesystem isn't willing to tell us the length of a hole,
@@ -569,36 +573,14 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
                goto fallback;
 
-       sector = bh.b_blocknr << (blkbits - 9);
-
-       if (buffer_unwritten(&bh) || buffer_new(&bh)) {
-               int i;
-
-               length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
-                                               bh.b_size);
-               if (length < 0) {
-                       result = VM_FAULT_SIGBUS;
-                       goto out;
-               }
-               if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
-                       goto fallback;
-
-               for (i = 0; i < PTRS_PER_PMD; i++)
-                       clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
-               wmb_pmem();
-               count_vm_event(PGMAJFAULT);
-               mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
-               result |= VM_FAULT_MAJOR;
-       }
-
        /*
         * If we allocated new storage, make sure no process has any
         * zero pages covering this hole
         */
        if (buffer_new(&bh)) {
-               i_mmap_unlock_write(mapping);
+               i_mmap_unlock_read(mapping);
                unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-               i_mmap_lock_write(mapping);
+               i_mmap_lock_read(mapping);
        }
 
        /*
@@ -635,6 +617,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                result = VM_FAULT_NOPAGE;
                spin_unlock(ptl);
        } else {
+               sector = bh.b_blocknr << (blkbits - 9);
                length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
                                                bh.b_size);
                if (length < 0) {
@@ -644,15 +627,25 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
                        goto fallback;
 
+               if (buffer_unwritten(&bh) || buffer_new(&bh)) {
+                       int i;
+                       for (i = 0; i < PTRS_PER_PMD; i++)
+                               clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
+                       wmb_pmem();
+                       count_vm_event(PGMAJFAULT);
+                       mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+                       result |= VM_FAULT_MAJOR;
+               }
+
                result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
        }
 
  out:
+       i_mmap_unlock_read(mapping);
+
        if (buffer_unwritten(&bh))
                complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
 
-       i_mmap_unlock_write(mapping);
-
        return result;
 
  fallback: