]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/dax.c
dax: for truncate/hole-punch, do zeroing through the driver if possible
[karo-tx-linux.git] / fs / dax.c
index 7c0036dd15706e0e446317bffacee08412bdee0a..0b9a1693401700da996e192a508565ea7c389d9e 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -87,38 +87,6 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
        return page;
 }
 
-/*
- * dax_clear_sectors() is called from within transaction context from XFS,
- * and hence this means the stack from this point must follow GFP_NOFS
- * semantics for all operations.
- */
-int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
-{
-       struct blk_dax_ctl dax = {
-               .sector = _sector,
-               .size = _size,
-       };
-
-       might_sleep();
-       do {
-               long count, sz;
-
-               count = dax_map_atomic(bdev, &dax);
-               if (count < 0)
-                       return count;
-               sz = min_t(long, count, SZ_128K);
-               clear_pmem(dax.addr, sz);
-               dax.size -= sz;
-               dax.sector += sz / 512;
-               dax_unmap_atomic(bdev, &dax);
-               cond_resched();
-       } while (dax.size);
-
-       wmb_pmem();
-       return 0;
-}
-EXPORT_SYMBOL_GPL(dax_clear_sectors);
-
 static bool buffer_written(struct buffer_head *bh)
 {
        return buffer_mapped(bh) && !buffer_unwritten(bh);
@@ -268,15 +236,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        memset(&bh, 0, sizeof(bh));
        bh.b_bdev = inode->i_sb->s_bdev;
 
-       if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
-               struct address_space *mapping = inode->i_mapping;
+       if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
                inode_lock(inode);
-               retval = filemap_write_and_wait_range(mapping, pos, end - 1);
-               if (retval) {
-                       inode_unlock(inode);
-                       goto out;
-               }
-       }
 
        /* Protects against truncate */
        if (!(flags & DIO_SKIP_DIO_COUNT))
@@ -297,7 +258,6 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 
        if (!(flags & DIO_SKIP_DIO_COUNT))
                inode_dio_end(inode);
- out:
        return retval;
 }
 EXPORT_SYMBOL_GPL(dax_do_io);
@@ -313,20 +273,11 @@ EXPORT_SYMBOL_GPL(dax_do_io);
 static int dax_load_hole(struct address_space *mapping, struct page *page,
                                                        struct vm_fault *vmf)
 {
-       unsigned long size;
-       struct inode *inode = mapping->host;
        if (!page)
                page = find_or_create_page(mapping, vmf->pgoff,
                                                GFP_KERNEL | __GFP_ZERO);
        if (!page)
                return VM_FAULT_OOM;
-       /* Recheck i_size under page lock to avoid truncate race */
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (vmf->pgoff >= size) {
-               unlock_page(page);
-               put_page(page);
-               return VM_FAULT_SIGBUS;
-       }
 
        vmf->page = page;
        return VM_FAULT_LOCKED;
@@ -557,24 +508,10 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                .sector = to_sector(bh, inode),
                .size = bh->b_size,
        };
-       pgoff_t size;
        int error;
 
        i_mmap_lock_read(mapping);
 
-       /*
-        * Check truncate didn't happen while we were allocating a block.
-        * If it did, this block may or may not be still allocated to the
-        * file.  We can't tell the filesystem to free it because we can't
-        * take i_mutex here.  In the worst case, the file still has blocks
-        * allocated past the end of the file.
-        */
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (unlikely(vmf->pgoff >= size)) {
-               error = -EIO;
-               goto out;
-       }
-
        if (dax_map_atomic(bdev, &dax) < 0) {
                error = PTR_ERR(dax.addr);
                goto out;
@@ -640,15 +577,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                        put_page(page);
                        goto repeat;
                }
-               size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               if (unlikely(vmf->pgoff >= size)) {
-                       /*
-                        * We have a struct page covering a hole in the file
-                        * from a read fault and we've raced with a truncate
-                        */
-                       error = -EIO;
-                       goto unlock_page;
-               }
        }
 
        error = get_block(inode, block, &bh, 0);
@@ -681,17 +609,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                if (error)
                        goto unlock_page;
                vmf->page = page;
-               if (!page) {
+               if (!page)
                        i_mmap_lock_read(mapping);
-                       /* Check we didn't race with truncate */
-                       size = (i_size_read(inode) + PAGE_SIZE - 1) >>
-                                                               PAGE_SHIFT;
-                       if (vmf->pgoff >= size) {
-                               i_mmap_unlock_read(mapping);
-                               error = -EIO;
-                               goto out;
-                       }
-               }
                return VM_FAULT_LOCKED;
        }
 
@@ -869,23 +788,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 
        i_mmap_lock_read(mapping);
 
-       /*
-        * If a truncate happened while we were allocating blocks, we may
-        * leave blocks allocated to the file that are beyond EOF.  We can't
-        * take i_mutex here, so just leave them hanging; they'll be freed
-        * when the file is deleted.
-        */
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (pgoff >= size) {
-               result = VM_FAULT_SIGBUS;
-               goto out;
-       }
-       if ((pgoff | PG_PMD_COLOUR) >= size) {
-               dax_pmd_dbg(&bh, address,
-                               "offset + huge page size > file size");
-               goto fallback;
-       }
-
        if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
                spinlock_t *ptl;
                pmd_t entry;
@@ -921,8 +823,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                long length = dax_map_atomic(bdev, &dax);
 
                if (length < 0) {
-                       result = VM_FAULT_SIGBUS;
-                       goto out;
+                       dax_pmd_dbg(&bh, address, "dax-error fallback");
+                       goto fallback;
                }
                if (length < PMD_SIZE) {
                        dax_pmd_dbg(&bh, address, "dax-length too small");
@@ -1045,6 +947,43 @@ int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
 
+static bool dax_range_is_aligned(struct block_device *bdev,
+                                unsigned int offset, unsigned int length)
+{
+       unsigned short sector_size = bdev_logical_block_size(bdev);
+
+       if (!IS_ALIGNED(offset, sector_size))
+               return false;
+       if (!IS_ALIGNED(length, sector_size))
+               return false;
+
+       return true;
+}
+
+int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
+               unsigned int offset, unsigned int length)
+{
+       struct blk_dax_ctl dax = {
+               .sector         = sector,
+               .size           = PAGE_SIZE,
+       };
+
+       if (dax_range_is_aligned(bdev, offset, length)) {
+               sector_t start_sector = dax.sector + (offset >> 9);
+
+               return blkdev_issue_zeroout(bdev, start_sector,
+                               length >> 9, GFP_NOFS, true);
+       } else {
+               if (dax_map_atomic(bdev, &dax) < 0)
+                       return PTR_ERR(dax.addr);
+               clear_pmem(dax.addr + offset, length);
+               wmb_pmem();
+               dax_unmap_atomic(bdev, &dax);
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(__dax_zero_page_range);
+
 /**
  * dax_zero_page_range - zero a range within a page of a DAX file
  * @inode: The file being truncated
@@ -1080,23 +1019,11 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
        bh.b_bdev = inode->i_sb->s_bdev;
        bh.b_size = PAGE_SIZE;
        err = get_block(inode, index, &bh, 0);
-       if (err < 0)
+       if (err < 0 || !buffer_written(&bh))
                return err;
-       if (buffer_written(&bh)) {
-               struct block_device *bdev = bh.b_bdev;
-               struct blk_dax_ctl dax = {
-                       .sector = to_sector(&bh, inode),
-                       .size = PAGE_SIZE,
-               };
 
-               if (dax_map_atomic(bdev, &dax) < 0)
-                       return PTR_ERR(dax.addr);
-               clear_pmem(dax.addr + offset, length);
-               wmb_pmem();
-               dax_unmap_atomic(bdev, &dax);
-       }
-
-       return 0;
+       return __dax_zero_page_range(bh.b_bdev, to_sector(&bh, inode),
+                       offset, length);
 }
 EXPORT_SYMBOL_GPL(dax_zero_page_range);