]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/xfs/linux-2.6/xfs_lrw.c
Merge branch 'splice-2.6.23' of git://git.kernel.dk/data/git/linux-2.6-block
[karo-tx-linux.git] / fs / xfs / linux-2.6 / xfs_lrw.c
index 65e79b471d49570f825033d31843327e2d7c50f2..765ec16a6e392d3406d1891c6a58bfc456f1d23a 100644 (file)
@@ -43,8 +43,6 @@
 #include "xfs_itable.h"
 #include "xfs_rw.h"
 #include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
 #include "xfs_attr.h"
 #include "xfs_inode_item.h"
 #include "xfs_buf_item.h"
@@ -134,13 +132,11 @@ STATIC int
 xfs_iozero(
        struct inode            *ip,    /* inode                        */
        loff_t                  pos,    /* offset in file               */
-       size_t                  count,  /* size of data to zero         */
-       loff_t                  end_size)       /* max file size to set */
+       size_t                  count)  /* size of data to zero         */
 {
        unsigned                bytes;
        struct page             *page;
        struct address_space    *mapping;
-       char                    *kaddr;
        int                     status;
 
        mapping = ip->i_mapping;
@@ -158,26 +154,21 @@ xfs_iozero(
                if (!page)
                        break;
 
-               kaddr = kmap(page);
                status = mapping->a_ops->prepare_write(NULL, page, offset,
                                                        offset + bytes);
-               if (status) {
+               if (status)
                        goto unlock;
-               }
 
-               memset((void *) (kaddr + offset), 0, bytes);
-               flush_dcache_page(page);
+               zero_user_page(page, offset, bytes, KM_USER0);
+
                status = mapping->a_ops->commit_write(NULL, page, offset,
                                                        offset + bytes);
                if (!status) {
                        pos += bytes;
                        count -= bytes;
-                       if (pos > i_size_read(ip))
-                               i_size_write(ip, pos < end_size ? pos : end_size);
                }
 
 unlock:
-               kunmap(page);
                unlock_page(page);
                page_cache_release(page);
                if (status)
@@ -200,7 +191,7 @@ xfs_read(
        struct file             *file = iocb->ki_filp;
        struct inode            *inode = file->f_mapping->host;
        size_t                  size = 0;
-       ssize_t                 ret;
+       ssize_t                 ret = 0;
        xfs_fsize_t             n;
        xfs_inode_t             *ip;
        xfs_mount_t             *mp;
@@ -233,7 +224,7 @@ xfs_read(
                                mp->m_rtdev_targp : mp->m_ddev_targp;
                if ((*offset & target->bt_smask) ||
                    (size & target->bt_smask)) {
-                       if (*offset == ip->i_d.di_size) {
+                       if (*offset == ip->i_size) {
                                return (0);
                        }
                        return -XFS_ERROR(EINVAL);
@@ -272,9 +263,13 @@ xfs_read(
 
        if (unlikely(ioflags & IO_ISDIRECT)) {
                if (VN_CACHED(vp))
-                       bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
+                       ret = bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
                                                 -1, FI_REMAPF_LOCKED);
                mutex_unlock(&inode->i_mutex);
+               if (ret) {
+                       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                       return ret;
+               }
        }
 
        xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
@@ -291,50 +286,6 @@ xfs_read(
        return ret;
 }
 
-ssize_t
-xfs_sendfile(
-       bhv_desc_t              *bdp,
-       struct file             *filp,
-       loff_t                  *offset,
-       int                     ioflags,
-       size_t                  count,
-       read_actor_t            actor,
-       void                    *target,
-       cred_t                  *credp)
-{
-       xfs_inode_t             *ip = XFS_BHVTOI(bdp);
-       xfs_mount_t             *mp = ip->i_mount;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_read_calls);
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
-       if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
-           (!(ioflags & IO_INVIS))) {
-               bhv_vrwlock_t locktype = VRWLOCK_READ;
-               int error;
-
-               error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
-                                     *offset, count,
-                                     FILP_DELAY_FLAG(filp), &locktype);
-               if (error) {
-                       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-                       return -error;
-               }
-       }
-       xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
-                  (void *)(unsigned long)target, count, *offset, ioflags);
-       ret = generic_file_sendfile(filp, offset, count, actor, target);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
 ssize_t
 xfs_splice_read(
        bhv_desc_t              *bdp,
@@ -392,9 +343,10 @@ xfs_splice_write(
 {
        xfs_inode_t             *ip = XFS_BHVTOI(bdp);
        xfs_mount_t             *mp = ip->i_mount;
+       xfs_iocore_t            *io = &ip->i_iocore;
        ssize_t                 ret;
        struct inode            *inode = outfilp->f_mapping->host;
-       xfs_fsize_t             isize;
+       xfs_fsize_t             isize, new_size;
 
        XFS_STATS_INC(xs_write_calls);
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -415,6 +367,14 @@ xfs_splice_write(
                        return -error;
                }
        }
+
+       new_size = *ppos + count;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (new_size > ip->i_size)
+               io->io_new_size = new_size;
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
        xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
                           pipe, count, *ppos, ioflags);
        ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
@@ -425,14 +385,18 @@ xfs_splice_write(
        if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
                *ppos = isize;
 
-       if (*ppos > ip->i_d.di_size) {
+       if (*ppos > ip->i_size) {
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-               if (*ppos > ip->i_d.di_size) {
-                       ip->i_d.di_size = *ppos;
-                       i_size_write(inode, *ppos);
-                       ip->i_update_core = 1;
-                       ip->i_update_size = 1;
-               }
+               if (*ppos > ip->i_size)
+                       ip->i_size = *ppos;
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+
+       if (io->io_new_size) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               io->io_new_size = 0;
+               if (ip->i_d.di_size > ip->i_size)
+                       ip->i_d.di_size = ip->i_size;
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
        }
        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -449,8 +413,8 @@ STATIC int                          /* error (positive) */
 xfs_zero_last_block(
        struct inode    *ip,
        xfs_iocore_t    *io,
-       xfs_fsize_t     isize,
-       xfs_fsize_t     end_size)
+       xfs_fsize_t     offset,
+       xfs_fsize_t     isize)
 {
        xfs_fileoff_t   last_fsb;
        xfs_mount_t     *mp = io->io_mount;
@@ -459,7 +423,6 @@ xfs_zero_last_block(
        int             zero_len;
        int             error = 0;
        xfs_bmbt_irec_t imap;
-       loff_t          loff;
 
        ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
 
@@ -494,9 +457,10 @@ xfs_zero_last_block(
         */
        XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
 
-       loff = XFS_FSB_TO_B(mp, last_fsb);
        zero_len = mp->m_sb.sb_blocksize - zero_offset;
-       error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
+       if (isize + zero_len > offset)
+               zero_len = offset - isize;
+       error = xfs_iozero(ip, isize, zero_len);
 
        XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
        ASSERT(error >= 0);
@@ -519,14 +483,15 @@ xfs_zero_eof(
        bhv_vnode_t     *vp,
        xfs_iocore_t    *io,
        xfs_off_t       offset,         /* starting I/O offset */
-       xfs_fsize_t     isize,          /* current inode size */
-       xfs_fsize_t     end_size)       /* terminal inode size */
+       xfs_fsize_t     isize)          /* current inode size */
 {
        struct inode    *ip = vn_to_inode(vp);
        xfs_fileoff_t   start_zero_fsb;
        xfs_fileoff_t   end_zero_fsb;
        xfs_fileoff_t   zero_count_fsb;
        xfs_fileoff_t   last_fsb;
+       xfs_fileoff_t   zero_off;
+       xfs_fsize_t     zero_len;
        xfs_mount_t     *mp = io->io_mount;
        int             nimaps;
        int             error = 0;
@@ -540,7 +505,7 @@ xfs_zero_eof(
         * First handle zeroing the block on which isize resides.
         * We only zero a part of that block so it is handled specially.
         */
-       error = xfs_zero_last_block(ip, io, isize, end_size);
+       error = xfs_zero_last_block(ip, io, offset, isize);
        if (error) {
                ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
                ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -601,10 +566,13 @@ xfs_zero_eof(
                 */
                XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 
-               error = xfs_iozero(ip,
-                                  XFS_FSB_TO_B(mp, start_zero_fsb),
-                                  XFS_FSB_TO_B(mp, imap.br_blockcount),
-                                  end_size);
+               zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+               zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+
+               if ((zero_off + zero_len) > offset)
+                       zero_len = offset - zero_off;
+
+               error = xfs_iozero(ip, zero_off, zero_len);
                if (error) {
                        goto out_lock;
                }
@@ -644,37 +612,21 @@ xfs_write(
        xfs_fsize_t             isize, new_size;
        xfs_iocore_t            *io;
        bhv_vnode_t             *vp;
-       unsigned long           seg;
        int                     iolock;
        int                     eventsent = 0;
        bhv_vrwlock_t           locktype;
        size_t                  ocount = 0, count;
        loff_t                  pos;
-       int                     need_i_mutex = 1, need_flush = 0;
+       int                     need_i_mutex;
 
        XFS_STATS_INC(xs_write_calls);
 
        vp = BHV_TO_VNODE(bdp);
        xip = XFS_BHVTOI(bdp);
 
-       for (seg = 0; seg < segs; seg++) {
-               const struct iovec *iv = &iovp[seg];
-
-               /*
-                * If any segment has a negative length, or the cumulative
-                * length ever wraps negative then return -EINVAL.
-                */
-               ocount += iv->iov_len;
-               if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
-                       return -EINVAL;
-               if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
-                       continue;
-               if (seg == 0)
-                       return -EFAULT;
-               segs = seg;
-               ocount -= iv->iov_len;  /* This segment is no good */
-               break;
-       }
+       error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
+       if (error)
+               return error;
 
        count = ocount;
        pos = *offset;
@@ -690,39 +642,20 @@ xfs_write(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-       if (ioflags & IO_ISDIRECT) {
-               xfs_buftarg_t   *target =
-                       (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
-                               mp->m_rtdev_targp : mp->m_ddev_targp;
-
-               if ((pos & target->bt_smask) || (count & target->bt_smask))
-                       return XFS_ERROR(-EINVAL);
-
-               if (!VN_CACHED(vp) && pos < i_size_read(inode))
-                       need_i_mutex = 0;
-
-               if (VN_CACHED(vp))
-                       need_flush = 1;
-       }
-
 relock:
-       if (need_i_mutex) {
+       if (ioflags & IO_ISDIRECT) {
+               iolock = XFS_IOLOCK_SHARED;
+               locktype = VRWLOCK_WRITE_DIRECT;
+               need_i_mutex = 0;
+       } else {
                iolock = XFS_IOLOCK_EXCL;
                locktype = VRWLOCK_WRITE;
-
+               need_i_mutex = 1;
                mutex_lock(&inode->i_mutex);
-       } else {
-               iolock = XFS_IOLOCK_SHARED;
-               locktype = VRWLOCK_WRITE_DIRECT;
        }
 
        xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
 
-       isize = i_size_read(inode);
-
-       if (file->f_flags & O_APPEND)
-               *offset = isize;
-
 start:
        error = -generic_write_checks(file, &pos, &count,
                                        S_ISBLK(inode->i_mode));
@@ -731,13 +664,8 @@ start:
                goto out_unlock_mutex;
        }
 
-       new_size = pos + count;
-       if (new_size > isize)
-               io->io_new_size = new_size;
-
        if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
            !(ioflags & IO_INVIS) && !eventsent)) {
-               loff_t          savedsize = pos;
                int             dmflags = FILP_DELAY_FLAG(file);
 
                if (need_i_mutex)
@@ -748,8 +676,7 @@ start:
                                      pos, count,
                                      dmflags, &locktype);
                if (error) {
-                       xfs_iunlock(xip, iolock);
-                       goto out_unlock_mutex;
+                       goto out_unlock_internal;
                }
                xfs_ilock(xip, XFS_ILOCK_EXCL);
                eventsent = 1;
@@ -761,12 +688,35 @@ start:
                 * event prevents another call to XFS_SEND_DATA, which is
                 * what allows the size to change in the first place.
                 */
-               if ((file->f_flags & O_APPEND) && savedsize != isize) {
-                       pos = isize = xip->i_d.di_size;
+               if ((file->f_flags & O_APPEND) && pos != xip->i_size)
+                       goto start;
+       }
+
+       if (ioflags & IO_ISDIRECT) {
+               xfs_buftarg_t   *target =
+                       (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+                               mp->m_rtdev_targp : mp->m_ddev_targp;
+
+               if ((pos & target->bt_smask) || (count & target->bt_smask)) {
+                       xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                       return XFS_ERROR(-EINVAL);
+               }
+
+               if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) {
+                       xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                       iolock = XFS_IOLOCK_EXCL;
+                       locktype = VRWLOCK_WRITE;
+                       need_i_mutex = 1;
+                       mutex_lock(&inode->i_mutex);
+                       xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
                        goto start;
                }
        }
 
+       new_size = pos + count;
+       if (new_size > xip->i_size)
+               io->io_new_size = new_size;
+
        if (likely(!(ioflags & IO_INVIS))) {
                file_update_time(file);
                xfs_ichgtime_fast(xip, inode,
@@ -782,12 +732,11 @@ start:
         * to zero it out up to the new size.
         */
 
-       if (pos > isize) {
-               error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
-                                       isize, pos + count);
+       if (pos > xip->i_size) {
+               error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, xip->i_size);
                if (error) {
-                       xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-                       goto out_unlock_mutex;
+                       xfs_iunlock(xip, XFS_ILOCK_EXCL);
+                       goto out_unlock_internal;
                }
        }
        xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -807,8 +756,7 @@ start:
                if (likely(!error))
                        error = -remove_suid(file->f_path.dentry);
                if (unlikely(error)) {
-                       xfs_iunlock(xip, iolock);
-                       goto out_unlock_mutex;
+                       goto out_unlock_internal;
                }
        }
 
@@ -817,11 +765,14 @@ retry:
        current->backing_dev_info = mapping->backing_dev_info;
 
        if ((ioflags & IO_ISDIRECT)) {
-               if (need_flush) {
+               if (VN_CACHED(vp)) {
+                       WARN_ON(need_i_mutex == 0);
                        xfs_inval_cached_trace(io, pos, -1,
                                        ctooff(offtoct(pos)), -1);
-                       bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)),
+                       error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)),
                                        -1, FI_REMAPF_LOCKED);
+                       if (error)
+                               goto out_unlock_internal;
                }
 
                if (need_i_mutex) {
@@ -849,7 +800,6 @@ retry:
                        pos += ret;
                        count -= ret;
 
-                       need_i_mutex = 1;
                        ioflags &= ~IO_ISDIRECT;
                        xfs_iunlock(xip, iolock);
                        goto relock;
@@ -876,12 +826,12 @@ retry:
                error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
                                DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
                                0, 0, 0); /* Delay flag intentionally  unused */
-               if (error)
-                       goto out_nounlocks;
                if (need_i_mutex)
                        mutex_lock(&inode->i_mutex);
                xfs_rwlock(bdp, locktype);
-               pos = xip->i_d.di_size;
+               if (error)
+                       goto out_unlock_internal;
+               pos = xip->i_size;
                ret = 0;
                goto retry;
        }
@@ -890,14 +840,10 @@ retry:
        if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
                *offset = isize;
 
-       if (*offset > xip->i_d.di_size) {
+       if (*offset > xip->i_size) {
                xfs_ilock(xip, XFS_ILOCK_EXCL);
-               if (*offset > xip->i_d.di_size) {
-                       xip->i_d.di_size = *offset;
-                       i_size_write(inode, *offset);
-                       xip->i_update_core = 1;
-                       xip->i_update_size = 1;
-               }
+               if (*offset > xip->i_size)
+                       xip->i_size = *offset;
                xfs_iunlock(xip, XFS_ILOCK_EXCL);
        }
 
@@ -919,16 +865,31 @@ retry:
 
                error = sync_page_range(inode, mapping, pos, ret);
                if (!error)
-                       error = ret;
-               return error;
+                       error = -ret;
+               if (need_i_mutex)
+                       mutex_lock(&inode->i_mutex);
+               xfs_rwlock(bdp, locktype);
        }
 
  out_unlock_internal:
+       if (io->io_new_size) {
+               xfs_ilock(xip, XFS_ILOCK_EXCL);
+               io->io_new_size = 0;
+               /*
+                * If this was a direct or synchronous I/O that failed (such
+                * as ENOSPC) then part of the I/O may have been written to
+                * disk before the error occured.  In this case the on-disk
+                * file size may have been adjusted beyond the in-memory file
+                * size and now needs to be truncated back.
+                */
+               if (xip->i_d.di_size > xip->i_size)
+                       xip->i_d.di_size = xip->i_size;
+               xfs_iunlock(xip, XFS_ILOCK_EXCL);
+       }
        xfs_rwunlock(bdp, locktype);
  out_unlock_mutex:
        if (need_i_mutex)
                mutex_unlock(&inode->i_mutex);
- out_nounlocks:
        return -error;
 }