]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/xfs/xfs_buf_item.c
xfs: struct xfs_buf_log_format isn't variable sized.
[karo-tx-linux.git] / fs / xfs / xfs_buf_item.c
index eac97ef81e2a156a58d9b74b54607c6a0d2e76c3..52cd8f89ee72b4ba403def680f2ac99585c48aee 100644 (file)
@@ -20,7 +20,6 @@
 #include "xfs_types.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
-#include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
@@ -123,11 +122,11 @@ xfs_buf_item_log_check(
        ASSERT(bip->bli_logged != NULL);
 
        bp = bip->bli_buf;
-       ASSERT(XFS_BUF_COUNT(bp) > 0);
+       ASSERT(bp->b_length > 0);
        ASSERT(bp->b_addr != NULL);
        orig = bip->bli_orig;
        buffer = bp->b_addr;
-       for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
+       for (x = 0; x < BBTOB(bp->b_length); x++) {
                if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
                        xfs_emerg(bp->b_mount,
                                "%s: bip %x buffer %x orig %x index %d",
@@ -241,15 +240,13 @@ xfs_buf_item_format(
               (bip->bli_flags & XFS_BLI_STALE));
 
        /*
-        * The size of the base structure is the size of the
-        * declared structure plus the space for the extra words
-        * of the bitmap.  We subtract one from the map size, because
-        * the first element of the bitmap is accounted for in the
-        * size of the base structure.
+        * Base size is the actual size of the ondisk structure - it reflects
+        * the actual size of the dirty bitmap rather than the size of the in
+        * memory structure.
         */
-       base_size =
-               (uint)(sizeof(xfs_buf_log_format_t) +
-                      ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
+       base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
+                       (bip->bli_format.blf_map_size *
+                               sizeof(bip->bli_format.blf_data_map[0]));
        vecp->i_addr = &bip->bli_format;
        vecp->i_len = base_size;
        vecp->i_type = XLOG_REG_TYPE_BFORMAT;
@@ -418,7 +415,6 @@ xfs_buf_item_unpin(
        if (freed && stale) {
                ASSERT(bip->bli_flags & XFS_BLI_STALE);
                ASSERT(xfs_buf_islocked(bp));
-               ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
                ASSERT(XFS_BUF_ISSTALE(bp));
                ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
 
@@ -455,42 +451,42 @@ xfs_buf_item_unpin(
                        bp->b_iodone = NULL;
                } else {
                        spin_lock(&ailp->xa_lock);
-                       xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
+                       xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
                        xfs_buf_item_relse(bp);
                        ASSERT(bp->b_fspriv == NULL);
                }
                xfs_buf_relse(bp);
+       } else if (freed && remove) {
+               xfs_buf_lock(bp);
+               xfs_buf_ioerror(bp, EIO);
+               XFS_BUF_UNDONE(bp);
+               xfs_buf_stale(bp);
+               xfs_buf_ioend(bp, 0);
        }
 }
 
-/*
- * This is called to attempt to lock the buffer associated with this
- * buf log item.  Don't sleep on the buffer lock.  If we can't get
- * the lock right away, return 0.  If we can get the lock, take a
- * reference to the buffer. If this is a delayed write buffer that
- * needs AIL help to be written back, invoke the pushbuf routine
- * rather than the normal success path.
- */
 STATIC uint
-xfs_buf_item_trylock(
-       struct xfs_log_item     *lip)
+xfs_buf_item_push(
+       struct xfs_log_item     *lip,
+       struct list_head        *buffer_list)
 {
        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
        struct xfs_buf          *bp = bip->bli_buf;
+       uint                    rval = XFS_ITEM_SUCCESS;
 
        if (xfs_buf_ispinned(bp))
                return XFS_ITEM_PINNED;
        if (!xfs_buf_trylock(bp))
                return XFS_ITEM_LOCKED;
 
-       /* take a reference to the buffer.  */
-       xfs_buf_hold(bp);
-
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
-       trace_xfs_buf_item_trylock(bip);
-       if (XFS_BUF_ISDELAYWRITE(bp))
-               return XFS_ITEM_PUSHBUF;
-       return XFS_ITEM_SUCCESS;
+
+       trace_xfs_buf_item_push(bip);
+
+       if (!xfs_buf_delwri_queue(bp, buffer_list))
+               rval = XFS_ITEM_FLUSHING;
+       xfs_buf_unlock(bp);
+       return rval;
 }
 
 /*
@@ -603,49 +599,6 @@ xfs_buf_item_committed(
        return lsn;
 }
 
-/*
- * The buffer is locked, but is not a delayed write buffer. This happens
- * if we race with IO completion and hence we don't want to try to write it
- * again. Just release the buffer.
- */
-STATIC void
-xfs_buf_item_push(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_buf_log_item *bip = BUF_ITEM(lip);
-       struct xfs_buf          *bp = bip->bli_buf;
-
-       ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
-       ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
-
-       trace_xfs_buf_item_push(bip);
-
-       xfs_buf_relse(bp);
-}
-
-/*
- * The buffer is locked and is a delayed write buffer. Promote the buffer
- * in the delayed write queue as the caller knows that they must invoke
- * the xfsbufd to get this buffer written. We have to unlock the buffer
- * to allow the xfsbufd to write it, too.
- */
-STATIC bool
-xfs_buf_item_pushbuf(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_buf_log_item *bip = BUF_ITEM(lip);
-       struct xfs_buf          *bp = bip->bli_buf;
-
-       ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
-       ASSERT(XFS_BUF_ISDELAYWRITE(bp));
-
-       trace_xfs_buf_item_pushbuf(bip);
-
-       xfs_buf_delwri_promote(bp);
-       xfs_buf_relse(bp);
-       return true;
-}
-
 STATIC void
 xfs_buf_item_committing(
        struct xfs_log_item     *lip,
@@ -661,11 +614,9 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
        .iop_format     = xfs_buf_item_format,
        .iop_pin        = xfs_buf_item_pin,
        .iop_unpin      = xfs_buf_item_unpin,
-       .iop_trylock    = xfs_buf_item_trylock,
        .iop_unlock     = xfs_buf_item_unlock,
        .iop_committed  = xfs_buf_item_committed,
        .iop_push       = xfs_buf_item_push,
-       .iop_pushbuf    = xfs_buf_item_pushbuf,
        .iop_committing = xfs_buf_item_committing
 };
 
@@ -703,7 +654,8 @@ xfs_buf_item_init(
         * truncate any pieces.  map_size is the size of the
         * bitmap needed to describe the chunks of the buffer.
         */
-       chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT);
+       chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >>
+                                                               XFS_BLF_SHIFT);
        map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
 
        bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
@@ -713,7 +665,7 @@ xfs_buf_item_init(
        xfs_buf_hold(bp);
        bip->bli_format.blf_type = XFS_LI_BUF;
        bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
-       bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
+       bip->bli_format.blf_len = (ushort)bp->b_length;
        bip->bli_format.blf_map_size = map_size;
 
 #ifdef XFS_TRANS_DEBUG
@@ -725,9 +677,9 @@ xfs_buf_item_init(
         * the buffer to indicate which bytes the callers have asked
         * to have logged.
         */
-       bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
-       memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp));
-       bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
+       bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
+       memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
+       bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
 #endif
 
        /*
@@ -984,20 +936,27 @@ xfs_buf_iodone_callbacks(
         * If the write was asynchronous then no one will be looking for the
         * error.  Clear the error state and write the buffer out again.
         *
-        * During sync or umount we'll write all pending buffers again
-        * synchronous, which will catch these errors if they keep hanging
-        * around.
+        * XXX: This helps against transient write errors, but we need to find
+        * a way to shut the filesystem down if the writes keep failing.
+        *
+        * In practice we'll shut the filesystem down soon as non-transient
+        * erorrs tend to affect the whole device and a failing log write
+        * will make us give up.  But we really ought to do better here.
         */
        if (XFS_BUF_ISASYNC(bp)) {
+               ASSERT(bp->b_iodone != NULL);
+
+               trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
+
                xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
 
                if (!XFS_BUF_ISSTALE(bp)) {
-                       xfs_buf_delwri_queue(bp);
-                       XFS_BUF_DONE(bp);
+                       bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
+                       xfs_bdstrat_cb(bp);
+               } else {
+                       xfs_buf_relse(bp);
                }
-               ASSERT(bp->b_iodone != NULL);
-               trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
-               xfs_buf_relse(bp);
+
                return;
        }
 
@@ -1045,6 +1004,6 @@ xfs_buf_iodone(
         * Either way, AIL is useless if we're forcing a shutdown.
         */
        spin_lock(&ailp->xa_lock);
-       xfs_trans_ail_delete(ailp, lip);
+       xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
        xfs_buf_item_free(BUF_ITEM(lip));
 }