]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/xfs/xfs_file.c
Merge branch 'xfs-dax-updates' into for-next
[karo-tx-linux.git] / fs / xfs / xfs_file.c
index ce208e3896aa4996c3f1745906d31f81eb5e214b..39743efae79501f3d590b05722ebd0c30deb50f4 100644 (file)
@@ -242,19 +242,30 @@ xfs_file_fsync(
        }
 
        /*
-        * All metadata updates are logged, which means that we just have
-        * to flush the log up to the latest LSN that touched the inode.
+        * All metadata updates are logged, which means that we just have to
+        * flush the log up to the latest LSN that touched the inode. If we have
+        * concurrent fsync/fdatasync() calls, we need them to all block on the
+        * log force before we clear the ili_fsync_fields field. This ensures
+        * that we don't get a racing sync operation that does not wait for the
+        * metadata to hit the journal before returning. If we race with
+        * clearing the ili_fsync_fields, then all that will happen is the log
+        * force will do nothing as the lsn will already be on disk. We can't
+        * race with setting ili_fsync_fields because that is done under
+        * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
+        * until after the ili_fsync_fields is cleared.
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        if (xfs_ipincount(ip)) {
                if (!datasync ||
-                   (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
+                   (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
                        lsn = ip->i_itemp->ili_last_lsn;
        }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-       if (lsn)
+       if (lsn) {
                error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+               ip->i_itemp->ili_fsync_fields = 0;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
        /*
         * If we only have a single device, and the log force about was
@@ -287,7 +298,7 @@ xfs_file_read_iter(
        xfs_fsize_t             n;
        loff_t                  pos = iocb->ki_pos;
 
-       XFS_STATS_INC(xs_read_calls);
+       XFS_STATS_INC(mp, xs_read_calls);
 
        if (unlikely(iocb->ki_flags & IOCB_DIRECT))
                ioflags |= XFS_IO_ISDIRECT;
@@ -365,7 +376,7 @@ xfs_file_read_iter(
 
        ret = generic_file_read_iter(iocb, to);
        if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
+               XFS_STATS_ADD(mp, xs_read_bytes, ret);
 
        xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
        return ret;
@@ -383,7 +394,7 @@ xfs_file_splice_read(
        int                     ioflags = 0;
        ssize_t                 ret;
 
-       XFS_STATS_INC(xs_read_calls);
+       XFS_STATS_INC(ip->i_mount, xs_read_calls);
 
        if (infilp->f_mode & FMODE_NOCMTIME)
                ioflags |= XFS_IO_INVIS;
@@ -401,7 +412,7 @@ xfs_file_splice_read(
        else
                ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
        if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
+               XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
 
        xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
        return ret;
@@ -482,6 +493,8 @@ xfs_zero_eof(
        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        ASSERT(offset > isize);
 
+       trace_xfs_zero_eof(ip, isize, offset - isize);
+
        /*
         * First handle zeroing the block on which isize resides.
         *
@@ -574,6 +587,7 @@ xfs_file_aio_write_checks(
        struct xfs_inode        *ip = XFS_I(inode);
        ssize_t                 error = 0;
        size_t                  count = iov_iter_count(from);
+       bool                    drained_dio = false;
 
 restart:
        error = generic_write_checks(iocb, from);
@@ -611,12 +625,13 @@ restart:
                bool    zero = false;
 
                spin_unlock(&ip->i_flags_lock);
-               if (*iolock == XFS_IOLOCK_SHARED) {
-                       xfs_rw_iunlock(ip, *iolock);
-                       *iolock = XFS_IOLOCK_EXCL;
-                       xfs_rw_ilock(ip, *iolock);
-                       iov_iter_reexpand(from, count);
-
+               if (!drained_dio) {
+                       if (*iolock == XFS_IOLOCK_SHARED) {
+                               xfs_rw_iunlock(ip, *iolock);
+                               *iolock = XFS_IOLOCK_EXCL;
+                               xfs_rw_ilock(ip, *iolock);
+                               iov_iter_reexpand(from, count);
+                       }
                        /*
                         * We now have an IO submission barrier in place, but
                         * AIO can do EOF updates during IO completion and hence
@@ -626,6 +641,7 @@ restart:
                         * no-op.
                         */
                        inode_dio_wait(inode);
+                       drained_dio = true;
                        goto restart;
                }
                error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
@@ -867,7 +883,7 @@ xfs_file_write_iter(
        ssize_t                 ret;
        size_t                  ocount = iov_iter_count(from);
 
-       XFS_STATS_INC(xs_write_calls);
+       XFS_STATS_INC(ip->i_mount, xs_write_calls);
 
        if (ocount == 0)
                return 0;
@@ -883,7 +899,7 @@ xfs_file_write_iter(
        if (ret > 0) {
                ssize_t err;
 
-               XFS_STATS_ADD(xs_write_bytes, ret);
+               XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
 
                /* Handle various SYNC-type writes */
                err = generic_write_sync(file, iocb->ki_pos - ret, ret);