X-Git-Url: https://git.kernelconcepts.de/?p=karo-tx-linux.git;a=blobdiff_plain;f=fs%2Fdax.c;h=131fd35ae39d53f8a9ef7bdd1aaa61677beda5f0;hp=bcfb14bfc1e49eb36d507c53ccd1c3f4f23135f9;hb=fb952af46586ab123ac006248a7196afcf3323a8;hpb=64ebda3acd6ea592e6f97dcd0682e87aa7cf8d61 diff --git a/fs/dax.c b/fs/dax.c index bcfb14bfc1e4..131fd35ae39d 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -29,6 +29,11 @@ #include #include +/* + * dax_clear_blocks() is called from within transaction context from XFS, + * and hence this means the stack from this point must follow GFP_NOFS + * semantics for all operations. + */ int dax_clear_blocks(struct inode *inode, sector_t block, long size) { struct block_device *bdev = inode->i_sb->s_bdev; @@ -285,6 +290,7 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh, static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, struct vm_area_struct *vma, struct vm_fault *vmf) { + struct address_space *mapping = inode->i_mapping; sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); unsigned long vaddr = (unsigned long)vmf->virtual_address; void __pmem *addr; @@ -292,6 +298,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, pgoff_t size; int error; + i_mmap_lock_read(mapping); + /* * Check truncate didn't happen while we were allocating a block. * If it did, this block may or may not be still allocated to the @@ -321,6 +329,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, error = vm_insert_mixed(vma, vaddr, pfn); out: + i_mmap_unlock_read(mapping); + return error; } @@ -382,17 +392,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, * from a read fault and we've raced with a truncate */ error = -EIO; - goto unlock; + goto unlock_page; } - } else { - i_mmap_lock_write(mapping); } error = get_block(inode, block, &bh, 0); if (!error && (bh.b_size < PAGE_SIZE)) error = -EIO; /* fs corruption? */ if (error) - goto unlock; + goto unlock_page; if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { if (vmf->flags & FAULT_FLAG_WRITE) { @@ -403,9 +411,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, if (!error && (bh.b_size < PAGE_SIZE)) error = -EIO; if (error) - goto unlock; + goto unlock_page; } else { - i_mmap_unlock_write(mapping); return dax_load_hole(mapping, page, vmf); } } @@ -417,15 +424,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, else clear_user_highpage(new_page, vaddr); if (error) - goto unlock; + goto unlock_page; vmf->page = page; if (!page) { + i_mmap_lock_read(mapping); /* Check we didn't race with truncate */ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (vmf->pgoff >= size) { + i_mmap_unlock_read(mapping); error = -EIO; - goto unlock; + goto out; } } return VM_FAULT_LOCKED; @@ -461,8 +470,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); } - if (!page) - i_mmap_unlock_write(mapping); out: if (error == -ENOMEM) return VM_FAULT_OOM | major; @@ -471,14 +478,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, return VM_FAULT_SIGBUS | major; return VM_FAULT_NOPAGE | major; - unlock: + unlock_page: if (page) { unlock_page(page); page_cache_release(page); - } else { - i_mmap_unlock_write(mapping); } - goto out; } EXPORT_SYMBOL(__dax_fault); @@ -556,10 +560,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); bh.b_size = PMD_SIZE; - i_mmap_lock_write(mapping); length = get_block(inode, block, &bh, write); if (length) return VM_FAULT_SIGBUS; + i_mmap_lock_read(mapping); /* * If the filesystem isn't willing to tell us the length of a hole, @@ -569,36 +573,14 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) goto fallback; - sector = bh.b_blocknr << (blkbits - 9); - - if (buffer_unwritten(&bh) || buffer_new(&bh)) { - int i; - - length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, - bh.b_size); - if (length < 0) { - result = VM_FAULT_SIGBUS; - goto out; - } - if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) - goto fallback; - - for (i = 0; i < PTRS_PER_PMD; i++) - clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); - wmb_pmem(); - count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); - result |= VM_FAULT_MAJOR; - } - /* * If we allocated new storage, make sure no process has any * zero pages covering this hole */ if (buffer_new(&bh)) { - i_mmap_unlock_write(mapping); + i_mmap_unlock_read(mapping); unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0); - i_mmap_lock_write(mapping); + i_mmap_lock_read(mapping); } /* @@ -635,6 +617,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, result = VM_FAULT_NOPAGE; spin_unlock(ptl); } else { + sector = bh.b_blocknr << (blkbits - 9); length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, bh.b_size); if (length < 0) { @@ -644,15 +627,25 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) goto fallback; + if (buffer_unwritten(&bh) || buffer_new(&bh)) { + int i; + for (i = 0; i < PTRS_PER_PMD; i++) + clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); + wmb_pmem(); + count_vm_event(PGMAJFAULT); + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); + result |= VM_FAULT_MAJOR; + } + result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write); } out: + i_mmap_unlock_read(mapping); + if (buffer_unwritten(&bh)) complete_unwritten(&bh, !(result & VM_FAULT_ERROR)); - i_mmap_unlock_write(mapping); - return result; fallback: