]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/xfs/xfs_file.c
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[karo-tx-linux.git] / fs / xfs / xfs_file.c
index 1f12ad0a8585b3d0f0788cfe5b88b6ab662a1547..8121e75352ee9bddd4726ca685d6d3e855256bdd 100644 (file)
@@ -559,7 +559,7 @@ restart:
        if (error <= 0)
                return error;
 
-       error = xfs_break_layouts(inode, iolock);
+       error = xfs_break_layouts(inode, iolock, true);
        if (error)
                return error;
 
@@ -569,21 +569,42 @@ restart:
         * write.  If zeroing is needed and we are currently holding the
         * iolock shared, we need to update it to exclusive which implies
         * having to redo all checks before.
+        *
+        * We need to serialise against EOF updates that occur in IO
+        * completions here. We want to make sure that nobody is changing the
+        * size while we do this check until we have placed an IO barrier (i.e.
+        * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+        * The spinlock effectively forms a memory barrier once we have the
+        * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+        * and hence be able to correctly determine if we need to run zeroing.
         */
+       spin_lock(&ip->i_flags_lock);
        if (iocb->ki_pos > i_size_read(inode)) {
                bool    zero = false;
 
+               spin_unlock(&ip->i_flags_lock);
                if (*iolock == XFS_IOLOCK_SHARED) {
                        xfs_rw_iunlock(ip, *iolock);
                        *iolock = XFS_IOLOCK_EXCL;
                        xfs_rw_ilock(ip, *iolock);
                        iov_iter_reexpand(from, count);
+
+                       /*
+                        * We now have an IO submission barrier in place, but
+                        * AIO can do EOF updates during IO completion and hence
+                        * we now need to wait for all of them to drain. Non-AIO
+                        * DIO will have drained before we are given the
+                        * XFS_IOLOCK_EXCL, and so for most cases this wait is a
+                        * no-op.
+                        */
+                       inode_dio_wait(inode);
                        goto restart;
                }
                error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
                if (error)
                        return error;
-       }
+       } else
+               spin_unlock(&ip->i_flags_lock);
 
        /*
         * Updating the timestamps will grab the ilock again from
@@ -645,6 +666,8 @@ xfs_file_dio_aio_write(
        int                     iolock;
        size_t                  count = iov_iter_count(from);
        loff_t                  pos = iocb->ki_pos;
+       loff_t                  end;
+       struct iov_iter         data;
        struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                        mp->m_rtdev_targp : mp->m_ddev_targp;
 
@@ -685,10 +708,11 @@ xfs_file_dio_aio_write(
                goto out;
        count = iov_iter_count(from);
        pos = iocb->ki_pos;
+       end = pos + count - 1;
 
        if (mapping->nrpages) {
                ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                                   pos, pos + count - 1);
+                                                  pos, end);
                if (ret)
                        goto out;
                /*
@@ -698,7 +722,7 @@ xfs_file_dio_aio_write(
                 */
                ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
                                        pos >> PAGE_CACHE_SHIFT,
-                                       (pos + count - 1) >> PAGE_CACHE_SHIFT);
+                                       end >> PAGE_CACHE_SHIFT);
                WARN_ON_ONCE(ret);
                ret = 0;
        }
@@ -715,8 +739,22 @@ xfs_file_dio_aio_write(
        }
 
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_direct_write(iocb, from, pos);
 
+       data = *from;
+       ret = mapping->a_ops->direct_IO(iocb, &data, pos);
+
+       /* see generic_file_direct_write() for why this is necessary */
+       if (mapping->nrpages) {
+               invalidate_inode_pages2_range(mapping,
+                                             pos >> PAGE_CACHE_SHIFT,
+                                             end >> PAGE_CACHE_SHIFT);
+       }
+
+       if (ret > 0) {
+               pos += ret;
+               iov_iter_advance(from, ret);
+               iocb->ki_pos = pos;
+       }
 out:
        xfs_rw_iunlock(ip, iolock);
 
@@ -822,6 +860,11 @@ xfs_file_write_iter(
        return ret;
 }
 
+#define        XFS_FALLOC_FL_SUPPORTED                                         \
+               (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
+                FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
+                FALLOC_FL_INSERT_RANGE)
+
 STATIC long
 xfs_file_fallocate(
        struct file             *file,
@@ -835,18 +878,21 @@ xfs_file_fallocate(
        enum xfs_prealloc_flags flags = 0;
        uint                    iolock = XFS_IOLOCK_EXCL;
        loff_t                  new_size = 0;
+       bool                    do_file_insert = 0;
 
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+       if (mode & ~XFS_FALLOC_FL_SUPPORTED)
                return -EOPNOTSUPP;
 
        xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock);
+       error = xfs_break_layouts(inode, &iolock, false);
        if (error)
                goto out_unlock;
 
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       iolock |= XFS_MMAPLOCK_EXCL;
+
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                error = xfs_free_file_space(ip, offset, len);
                if (error)
@@ -873,6 +919,27 @@ xfs_file_fallocate(
                error = xfs_collapse_file_space(ip, offset, len);
                if (error)
                        goto out_unlock;
+       } else if (mode & FALLOC_FL_INSERT_RANGE) {
+               unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+               new_size = i_size_read(inode) + len;
+               if (offset & blksize_mask || len & blksize_mask) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+
+               /* check the new inode size does not wrap through zero */
+               if (new_size > inode->i_sb->s_maxbytes) {
+                       error = -EFBIG;
+                       goto out_unlock;
+               }
+
+               /* Offset should be less than i_size */
+               if (offset >= i_size_read(inode)) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+               do_file_insert = 1;
        } else {
                flags |= XFS_PREALLOC_SET;
 
@@ -907,8 +974,19 @@ xfs_file_fallocate(
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = new_size;
                error = xfs_setattr_size(ip, &iattr);
+               if (error)
+                       goto out_unlock;
        }
 
+       /*
+        * Perform hole insertion now that the file size has been
+        * updated so that if we crash during the operation we don't
+        * leave shifted extents past EOF and hence losing access to
+        * the data that is contained within them.
+        */
+       if (do_file_insert)
+               error = xfs_insert_file_space(ip, offset, len);
+
 out_unlock:
        xfs_iunlock(ip, iolock);
        return error;
@@ -996,20 +1074,6 @@ xfs_file_mmap(
        return 0;
 }
 
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-       struct vm_area_struct   *vma,
-       struct vm_fault         *vmf)
-{
-       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
 /*
  * This type is designed to indicate the type of offset we would like
  * to search from page cache for xfs_seek_hole_data().
@@ -1385,6 +1449,55 @@ xfs_file_llseek(
        }
 }
 
+/*
+ * Locking for serialisation of IO during page faults. This results in a lock
+ * ordering of:
+ *
+ * mmap_sem (MM)
+ *   i_mmap_lock (XFS - truncate serialisation)
+ *     page_lock (MM)
+ *       i_lock (XFS - extent map serialisation)
+ */
+STATIC int
+xfs_filemap_fault(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+
+       trace_xfs_filemap_fault(ip);
+
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = filemap_fault(vma, vmf);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+       return error;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
+ */
+STATIC int
+xfs_filemap_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+
+       trace_xfs_filemap_page_mkwrite(ip);
+
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+       return error;
+}
+
 const struct file_operations xfs_file_operations = {
        .llseek         = xfs_file_llseek,
        .read_iter      = xfs_file_read_iter,
@@ -1415,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = {
 };
 
 static const struct vm_operations_struct xfs_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = xfs_filemap_fault,
        .map_pages      = filemap_map_pages,
-       .page_mkwrite   = xfs_vm_page_mkwrite,
+       .page_mkwrite   = xfs_filemap_page_mkwrite,
 };