]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Btrfs: Support reading/writing on disk free ino cache
authorLi Zefan <lizf@cn.fujitsu.com>
Wed, 20 Apr 2011 02:33:24 +0000 (10:33 +0800)
committerLi Zefan <lizf@cn.fujitsu.com>
Mon, 25 Apr 2011 08:46:11 +0000 (16:46 +0800)
This is similar to block group caching.

We dedicate a special inode in fs tree to save free ino cache.

At the very first time we create/delete a file after mount, the free ino
cache will be loaded from disk into memory. When the fs tree is commited,
the cache will be written back to disk.

To keep compatibility, we check the root generation against the generation
of the special inode when loading the cache, so the loading will fail
if the btrfs filesystem was mounted in an older kernel before.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/inode-map.c
fs/btrfs/inode-map.h
fs/btrfs/inode.c
fs/btrfs/transaction.c

index c96a4e4c556613a4a55924c1ca87d2690e844acd..b20082e27a9fb4ee2e1fe95b4ef65f9cf3d04736 100644 (file)
@@ -105,6 +105,12 @@ struct btrfs_ordered_sum;
 /* For storing free space cache */
 #define BTRFS_FREE_SPACE_OBJECTID -11ULL
 
+/*
+ * The inode number assigned to the special inode for sotring
+ * free ino cache
+ */
+#define BTRFS_FREE_INO_OBJECTID -12ULL
+
 /* dummy objectid represents multiple objectids */
 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
 
@@ -1110,6 +1116,7 @@ struct btrfs_root {
        wait_queue_head_t cache_wait;
        struct btrfs_free_space_ctl *free_ino_pinned;
        u64 cache_progress;
+       struct inode *cache_inode;
 
        struct mutex log_mutex;
        wait_queue_head_t log_writer_wait;
index d02683b1ee16383e88fc2e6718d15a94297ca028..4f12c30a5470a8c081228886bd771d6e5363256c 100644 (file)
@@ -2505,6 +2505,7 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
 
 static void free_fs_root(struct btrfs_root *root)
 {
+       iput(root->cache_inode);
        WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
        if (root->anon_super.s_dev) {
                down_write(&root->anon_super.s_umount);
index a0e818cb0401d36fc44b319ff6d50614a0bedaae..95ce8da63b280d57e337f2115801df763497155a 100644 (file)
@@ -3145,7 +3145,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
        /* make sure bytes are sectorsize aligned */
        bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
-       if (root == root->fs_info->tree_root) {
+       if (root == root->fs_info->tree_root ||
+           BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
                alloc_chunk = 0;
                committed = 1;
        }
index fcbdcef6ca28386389ccf59e61e3a979db4b3d8e..7d8b6b6434034354965b3b5cc725e200dbd3ed5f 100644 (file)
@@ -209,7 +209,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                return ret;
        }
 
-       return btrfs_update_inode(trans, root, inode);
+       ret = btrfs_update_inode(trans, root, inode);
+       return ret;
 }
 
 static int readahead_cache(struct inode *inode)
@@ -525,6 +526,7 @@ out:
                spin_lock(&block_group->lock);
                block_group->disk_cache_state = BTRFS_DC_CLEAR;
                spin_unlock(&block_group->lock);
+               ret = 0;
 
                printk(KERN_ERR "btrfs: failed to load free space cache "
                       "for block group %llu\n", block_group->key.objectid);
@@ -893,6 +895,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                spin_lock(&block_group->lock);
                block_group->disk_cache_state = BTRFS_DC_ERROR;
                spin_unlock(&block_group->lock);
+               ret = 0;
 
                printk(KERN_ERR "btrfs: failed to write free space cace "
                       "for block group %llu\n", block_group->key.objectid);
@@ -2458,3 +2461,95 @@ out:
 
        return ino;
 }
+
+struct inode *lookup_free_ino_inode(struct btrfs_root *root,
+                                   struct btrfs_path *path)
+{
+       struct inode *inode = NULL;
+
+       spin_lock(&root->cache_lock);
+       if (root->cache_inode)
+               inode = igrab(root->cache_inode);
+       spin_unlock(&root->cache_lock);
+       if (inode)
+               return inode;
+
+       inode = __lookup_free_space_inode(root, path, 0);
+       if (IS_ERR(inode))
+               return inode;
+
+       spin_lock(&root->cache_lock);
+       if (!root->fs_info->closing)
+               root->cache_inode = igrab(inode);
+       spin_unlock(&root->cache_lock);
+
+       return inode;
+}
+
+int create_free_ino_inode(struct btrfs_root *root,
+                         struct btrfs_trans_handle *trans,
+                         struct btrfs_path *path)
+{
+       return __create_free_space_inode(root, trans, path,
+                                        BTRFS_FREE_INO_OBJECTID, 0);
+}
+
+int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
+{
+       struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
+       struct btrfs_path *path;
+       struct inode *inode;
+       int ret = 0;
+       u64 root_gen = btrfs_root_generation(&root->root_item);
+
+       /*
+        * If we're unmounting then just return, since this does a search on the
+        * normal root and not the commit root and we could deadlock.
+        */
+       smp_mb();
+       if (fs_info->closing)
+               return 0;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return 0;
+
+       inode = lookup_free_ino_inode(root, path);
+       if (IS_ERR(inode))
+               goto out;
+
+       if (root_gen != BTRFS_I(inode)->generation)
+               goto out_put;
+
+       ret = __load_free_space_cache(root, inode, ctl, path, 0);
+
+       if (ret < 0)
+               printk(KERN_ERR "btrfs: failed to load free ino cache for "
+                      "root %llu\n", root->root_key.objectid);
+out_put:
+       iput(inode);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_write_out_ino_cache(struct btrfs_root *root,
+                             struct btrfs_trans_handle *trans,
+                             struct btrfs_path *path)
+{
+       struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
+       struct inode *inode;
+       int ret;
+
+       inode = lookup_free_ino_inode(root, path);
+       if (IS_ERR(inode))
+               return 0;
+
+       ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
+       if (ret < 0)
+               printk(KERN_ERR "btrfs: failed to write free ino cache "
+                      "for root %llu\n", root->root_key.objectid);
+
+       iput(inode);
+       return ret;
+}
index af06e6b6ceaa041f2615bd4bc0189c3b98260578..8f2613f779edc6bfb2dfcac1e6884f2d7ad156b7 100644 (file)
@@ -65,6 +65,17 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                          struct btrfs_block_group_cache *block_group,
                          struct btrfs_path *path);
 
+struct inode *lookup_free_ino_inode(struct btrfs_root *root,
+                                   struct btrfs_path *path);
+int create_free_ino_inode(struct btrfs_root *root,
+                         struct btrfs_trans_handle *trans,
+                         struct btrfs_path *path);
+int load_free_ino_cache(struct btrfs_fs_info *fs_info,
+                       struct btrfs_root *root);
+int btrfs_write_out_ino_cache(struct btrfs_root *root,
+                             struct btrfs_trans_handle *trans,
+                             struct btrfs_path *path);
+
 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
                           u64 bytenr, u64 size);
index 5be62df90c4f6bd0e52e4ee104f0767ee8aa8137..7967e85c72f52a3af491b6b7f5be65036f1c1d51 100644 (file)
@@ -137,6 +137,7 @@ out:
 static void start_caching(struct btrfs_root *root)
 {
        struct task_struct *tsk;
+       int ret;
 
        spin_lock(&root->cache_lock);
        if (root->cached != BTRFS_CACHE_NO) {
@@ -147,6 +148,14 @@ static void start_caching(struct btrfs_root *root)
        root->cached = BTRFS_CACHE_STARTED;
        spin_unlock(&root->cache_lock);
 
+       ret = load_free_ino_cache(root->fs_info, root);
+       if (ret == 1) {
+               spin_lock(&root->cache_lock);
+               root->cached = BTRFS_CACHE_FINISHED;
+               spin_unlock(&root->cache_lock);
+               return;
+       }
+
        tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
                          root->root_key.objectid);
        BUG_ON(IS_ERR(tsk));
@@ -352,6 +361,84 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root)
        pinned->op = &pinned_free_ino_op;
 }
 
+int btrfs_save_ino_cache(struct btrfs_root *root,
+                        struct btrfs_trans_handle *trans)
+{
+       struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
+       struct btrfs_path *path;
+       struct inode *inode;
+       u64 alloc_hint = 0;
+       int ret;
+       int prealloc;
+       bool retry = false;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+again:
+       inode = lookup_free_ino_inode(root, path);
+       if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+               ret = PTR_ERR(inode);
+               goto out;
+       }
+
+       if (IS_ERR(inode)) {
+               BUG_ON(retry);
+               retry = true;
+
+               ret = create_free_ino_inode(root, trans, path);
+               if (ret)
+                       goto out;
+               goto again;
+       }
+
+       BTRFS_I(inode)->generation = 0;
+       ret = btrfs_update_inode(trans, root, inode);
+       WARN_ON(ret);
+
+       if (i_size_read(inode) > 0) {
+               ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+               if (ret)
+                       goto out_put;
+       }
+
+       spin_lock(&root->cache_lock);
+       if (root->cached != BTRFS_CACHE_FINISHED) {
+               ret = -1;
+               spin_unlock(&root->cache_lock);
+               goto out_put;
+       }
+       spin_unlock(&root->cache_lock);
+
+       spin_lock(&ctl->tree_lock);
+       prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
+       prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
+       prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
+       spin_unlock(&ctl->tree_lock);
+
+       /* Just to make sure we have enough space */
+       prealloc += 8 * PAGE_CACHE_SIZE;
+
+       ret = btrfs_check_data_free_space(inode, prealloc);
+       if (ret)
+               goto out_put;
+
+       ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
+                                             prealloc, prealloc, &alloc_hint);
+       if (ret)
+               goto out_put;
+       btrfs_free_reserved_data_space(inode, prealloc);
+
+out_put:
+       iput(inode);
+out:
+       if (ret == 0)
+               ret = btrfs_write_out_ino_cache(root, trans, path);
+
+       btrfs_free_path(path);
+       return ret;
+}
+
 static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
 {
        struct btrfs_path *path;
index eb918451b492c410bfc2433f44d2da29ece28f5e..ddb347bfee232ec26543055fbf408bfb92f8028f 100644 (file)
@@ -5,6 +5,8 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root);
 void btrfs_unpin_free_ino(struct btrfs_root *root);
 void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
 int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
+int btrfs_save_ino_cache(struct btrfs_root *root,
+                        struct btrfs_trans_handle *trans);
 
 int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
 
index adec22884a3ec3b794e23cf74ba6f1ce86201c7e..b78d3ab789ca2fc174b6cea6e8be6ad8ee6ab6a0 100644 (file)
@@ -745,6 +745,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
        return alloc_hint;
 }
 
+static inline bool is_free_space_inode(struct btrfs_root *root,
+                                      struct inode *inode)
+{
+       if (root == root->fs_info->tree_root ||
+           BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+               return true;
+       return false;
+}
+
 /*
  * when extent_io.c finds a delayed allocation range in the file,
  * the call backs end up in this code.  The basic idea is to
@@ -777,7 +786,7 @@ static noinline int cow_file_range(struct inode *inode,
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
 
-       BUG_ON(root == root->fs_info->tree_root);
+       BUG_ON(is_free_space_inode(root, inode));
        trans = btrfs_join_transaction(root, 1);
        BUG_ON(IS_ERR(trans));
        btrfs_set_trans_block_group(trans, inode);
@@ -1048,17 +1057,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        int type;
        int nocow;
        int check_prev = 1;
-       bool nolock = false;
+       bool nolock;
        u64 ino = btrfs_ino(inode);
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
-       if (root == root->fs_info->tree_root) {
-               nolock = true;
+
+       nolock = is_free_space_inode(root, inode);
+
+       if (nolock)
                trans = btrfs_join_transaction_nolock(root, 1);
-       } else {
+       else
                trans = btrfs_join_transaction(root, 1);
-       }
        BUG_ON(IS_ERR(trans));
 
        cow_start = (u64)-1;
@@ -1316,8 +1326,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
        if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               int do_list = (root->root_key.objectid !=
-                              BTRFS_ROOT_TREE_OBJECTID);
+               bool do_list = !is_free_space_inode(root, inode);
 
                if (*bits & EXTENT_FIRST_DELALLOC)
                        *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1350,8 +1359,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
        if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               int do_list = (root->root_key.objectid !=
-                              BTRFS_ROOT_TREE_OBJECTID);
+               bool do_list = !is_free_space_inode(root, inode);
 
                if (*bits & EXTENT_FIRST_DELALLOC)
                        *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1458,7 +1466,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-       if (root == root->fs_info->tree_root)
+       if (is_free_space_inode(root, inode))
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
        else
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1701,7 +1709,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        struct extent_state *cached_state = NULL;
        int compress_type = 0;
        int ret;
-       bool nolock = false;
+       bool nolock;
 
        ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
                                             end - start + 1);
@@ -1709,7 +1717,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                return 0;
        BUG_ON(!ordered_extent);
 
-       nolock = (root == root->fs_info->tree_root);
+       nolock = is_free_space_inode(root, inode);
 
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list));
@@ -3473,7 +3481,9 @@ delete:
 
                if (path->slots[0] == 0 ||
                    path->slots[0] != pending_del_slot) {
-                       if (root->ref_cows) {
+                       if (root->ref_cows &&
+                           BTRFS_I(inode)->location.objectid !=
+                                               BTRFS_FREE_INO_OBJECTID) {
                                err = -EAGAIN;
                                goto out;
                        }
@@ -3765,7 +3775,7 @@ void btrfs_evict_inode(struct inode *inode)
 
        truncate_inode_pages(&inode->i_data, 0);
        if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
-                              root == root->fs_info->tree_root))
+                              is_free_space_inode(root, inode)))
                goto no_delete;
 
        if (is_bad_inode(inode)) {
@@ -4382,7 +4392,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
                return 0;
 
        smp_mb();
-       nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
+       if (root->fs_info->closing && is_free_space_inode(root, inode))
+               nolock = true;
 
        if (wbc->sync_mode == WB_SYNC_ALL) {
                if (nolock)
@@ -6900,7 +6911,7 @@ int btrfs_drop_inode(struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
        if (btrfs_root_refs(&root->root_item) == 0 &&
-           root != root->fs_info->tree_root)
+           !is_free_space_inode(root, inode))
                return 1;
        else
                return generic_drop_inode(inode);
index f4c1184b7f1a9601bc64baf57364ebda4a38023a..4d1dbcbbaf41e5f3f166852473be5c5507868796 100644 (file)
@@ -761,6 +761,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
                        btrfs_update_reloc_root(trans, root);
                        btrfs_orphan_commit_root(trans, root);
 
+                       btrfs_save_ino_cache(root, trans);
+
                        if (root->commit_root != root->node) {
                                mutex_lock(&root->fs_commit_mutex);
                                switch_commit_root(root);