]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
ocfs2: fix write() performance regression
authorMark Fasheh <mark.fasheh@oracle.com>
Wed, 14 Nov 2007 21:33:27 +0000 (13:33 -0800)
committerGreg Kroah-Hartman <gregkh@suse.de>
Wed, 21 Nov 2007 17:25:56 +0000 (09:25 -0800)
ocfs2: fix write() performance regression

patch 4e9563fd55ff4479f2b118d0757d121dd0cfc39c in mainline.

On file systems which don't support sparse files, Ocfs2_map_page_blocks()
was reading blocks on appending writes. This caused write performance to
suffer dramatically. Fix this by detecting an appending write on a nonsparse
fs and skipping the read.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
fs/ocfs2/aops.c

index a480b09c79b916de88252129919bedbbfb10850a..3175288a10caee711cd4242325a8a66dab755840 100644 (file)
@@ -660,6 +660,27 @@ static void ocfs2_clear_page_regions(struct page *page,
        kunmap_atomic(kaddr, KM_USER0);
 }
 
+/*
+ * Nonsparse file systems fully allocate before we get to the write
+ * code. This prevents ocfs2_write() from tagging the write as an
+ * allocating one, which means ocfs2_map_page_blocks() might try to
+ * read-in the blocks at the tail of our file. Avoid reading them by
+ * testing i_size against each block offset.
+ */
+static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
+                                unsigned int block_start)
+{
+       u64 offset = page_offset(page) + block_start;
+
+       if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+               return 1;
+
+       if (i_size_read(inode) > offset)
+               return 1;
+
+       return 0;
+}
+
 /*
  * Some of this taken from block_prepare_write(). We already have our
  * mapping by now though, and the entire write will be allocating or
@@ -711,7 +732,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
                        if (!buffer_uptodate(bh))
                                set_buffer_uptodate(bh);
                } else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
-                    (block_start < from || block_end > to)) {
+                          ocfs2_should_read_blk(inode, page, block_start) &&
+                          (block_start < from || block_end > to)) {
                        ll_rw_block(READ, 1, &bh);
                        *wait_bh++=bh;
                }