]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/disk-io.c
Merge branch 'for-3.18/core' of git://git.kernel.dk/linux-block
[karo-tx-linux.git] / fs / btrfs / disk-io.c
index a1d36e62179c528041f292675e7452102863de45..1ad0f47ac850bce13c5bc369416eff171604768c 100644 (file)
@@ -72,21 +72,41 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root);
 static void btrfs_error_commit_super(struct btrfs_root *root);
 
 /*
- * end_io_wq structs are used to do processing in task context when an IO is
- * complete.  This is used during reads to verify checksums, and it is used
+ * btrfs_end_io_wq structs are used to do processing in task context when an IO
+ * is complete.  This is used during reads to verify checksums, and it is used
  * by writes to insert metadata for new file extents after IO is complete.
  */
-struct end_io_wq {
+struct btrfs_end_io_wq {
        struct bio *bio;
        bio_end_io_t *end_io;
        void *private;
        struct btrfs_fs_info *info;
        int error;
-       int metadata;
+       enum btrfs_wq_endio_type metadata;
        struct list_head list;
        struct btrfs_work work;
 };
 
+static struct kmem_cache *btrfs_end_io_wq_cache;
+
+int __init btrfs_end_io_wq_init(void)
+{
+       btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
+                                       sizeof(struct btrfs_end_io_wq),
+                                       0,
+                                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+                                       NULL);
+       if (!btrfs_end_io_wq_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void btrfs_end_io_wq_exit(void)
+{
+       if (btrfs_end_io_wq_cache)
+               kmem_cache_destroy(btrfs_end_io_wq_cache);
+}
+
 /*
  * async submit bios are used to offload expensive checksumming
  * onto the worker threads.  They checksum file and metadata bios
@@ -327,8 +347,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 {
        struct extent_state *cached_state = NULL;
        int ret;
-       bool need_lock = (current->journal_info ==
-                         (void *)BTRFS_SEND_TRANS_STUB);
+       bool need_lock = (current->journal_info == BTRFS_SEND_TRANS_STUB);
 
        if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
                return 0;
@@ -348,9 +367,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
                ret = 0;
                goto out;
        }
-       printk_ratelimited("parent transid verify failed on %llu wanted %llu "
-                      "found %llu\n",
-                      eb->start, parent_transid, btrfs_header_generation(eb));
+       printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
+                       eb->fs_info->sb->s_id, eb->start,
+                       parent_transid, btrfs_header_generation(eb));
        ret = 1;
 
        /*
@@ -607,22 +626,22 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                goto err;
 
        eb->read_mirror = mirror;
-       if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+       if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
                ret = -EIO;
                goto err;
        }
 
        found_start = btrfs_header_bytenr(eb);
        if (found_start != eb->start) {
-               printk_ratelimited(KERN_INFO "BTRFS: bad tree block start "
+               printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start "
                               "%llu %llu\n",
-                              found_start, eb->start);
+                              eb->fs_info->sb->s_id, found_start, eb->start);
                ret = -EIO;
                goto err;
        }
        if (check_tree_block_fsid(root, eb)) {
-               printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n",
-                              eb->start);
+               printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n",
+                              eb->fs_info->sb->s_id, eb->start);
                ret = -EIO;
                goto err;
        }
@@ -680,7 +699,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
 
        eb = (struct extent_buffer *)page->private;
-       set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+       set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
        eb->read_mirror = failed_mirror;
        atomic_dec(&eb->io_pages);
        if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
@@ -690,7 +709,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
 
 static void end_workqueue_bio(struct bio *bio, int err)
 {
-       struct end_io_wq *end_io_wq = bio->bi_private;
+       struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
        struct btrfs_fs_info *fs_info;
        struct btrfs_workqueue *wq;
        btrfs_work_func_t func;
@@ -713,7 +732,11 @@ static void end_workqueue_bio(struct bio *bio, int err)
                        func = btrfs_endio_write_helper;
                }
        } else {
-               if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+               if (unlikely(end_io_wq->metadata ==
+                            BTRFS_WQ_ENDIO_DIO_REPAIR)) {
+                       wq = fs_info->endio_repair_workers;
+                       func = btrfs_endio_repair_helper;
+               } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
                        wq = fs_info->endio_raid56_workers;
                        func = btrfs_endio_raid56_helper;
                } else if (end_io_wq->metadata) {
@@ -729,19 +752,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
        btrfs_queue_work(wq, &end_io_wq->work);
 }
 
-/*
- * For the metadata arg you want
- *
- * 0 - if data
- * 1 - if normal metadta
- * 2 - if writing to the free space cache area
- * 3 - raid parity work
- */
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
-                       int metadata)
+                       enum btrfs_wq_endio_type metadata)
 {
-       struct end_io_wq *end_io_wq;
-       end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
+       struct btrfs_end_io_wq *end_io_wq;
+
+       end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
        if (!end_io_wq)
                return -ENOMEM;
 
@@ -925,7 +941,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                 * can happen in the async kernel threads
                 */
                ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
-                                         bio, 1);
+                                         bio, BTRFS_WQ_ENDIO_METADATA);
                if (ret)
                        goto out_w_error;
                ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
@@ -1057,20 +1073,17 @@ static const struct address_space_operations btree_aops = {
        .set_page_dirty = btree_set_page_dirty,
 };
 
-int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
-                        u64 parent_transid)
+void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
 {
        struct extent_buffer *buf = NULL;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       int ret = 0;
 
        buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
        if (!buf)
-               return 0;
+               return;
        read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
                                 buf, 0, WAIT_NONE, btree_get_extent, 0);
        free_extent_buffer(buf);
-       return ret;
 }
 
 int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1106,7 +1119,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
 }
 
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
-                                           u64 bytenr, u32 blocksize)
+                                           u64 bytenr)
 {
        return find_extent_buffer(root->fs_info, bytenr);
 }
@@ -1114,11 +1127,9 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
                                                 u64 bytenr, u32 blocksize)
 {
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-       if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+       if (btrfs_test_is_dummy_root(root))
                return alloc_test_extent_buffer(root->fs_info, bytenr,
                                                blocksize);
-#endif
        return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
 }
 
@@ -1136,12 +1147,12 @@ int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
 }
 
 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
-                                     u32 blocksize, u64 parent_transid)
+                                     u64 parent_transid)
 {
        struct extent_buffer *buf = NULL;
        int ret;
 
-       buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
+       buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize);
        if (!buf)
                return NULL;
 
@@ -1183,7 +1194,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
        if (!writers)
                return ERR_PTR(-ENOMEM);
 
-       ret = percpu_counter_init(&writers->counter, 0);
+       ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
        if (ret < 0) {
                kfree(writers);
                return ERR_PTR(ret);
@@ -1200,16 +1211,14 @@ btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers)
        kfree(writers);
 }
 
-static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
-                        u32 stripesize, struct btrfs_root *root,
-                        struct btrfs_fs_info *fs_info,
+static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
+                        struct btrfs_root *root, struct btrfs_fs_info *fs_info,
                         u64 objectid)
 {
        root->node = NULL;
        root->commit_root = NULL;
        root->sectorsize = sectorsize;
        root->nodesize = nodesize;
-       root->leafsize = leafsize;
        root->stripesize = stripesize;
        root->state = 0;
        root->orphan_cleanup_state = 0;
@@ -1295,7 +1304,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void)
        root = btrfs_alloc_root(NULL);
        if (!root)
                return ERR_PTR(-ENOMEM);
-       __setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
+       __setup_root(4096, 4096, 4096, root, NULL, 1);
        set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
        root->alloc_bytenr = 0;
 
@@ -1318,15 +1327,13 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        if (!root)
                return ERR_PTR(-ENOMEM);
 
-       __setup_root(tree_root->nodesize, tree_root->leafsize,
-                    tree_root->sectorsize, tree_root->stripesize,
-                    root, fs_info, objectid);
+       __setup_root(tree_root->nodesize, tree_root->sectorsize,
+               tree_root->stripesize, root, fs_info, objectid);
        root->root_key.objectid = objectid;
        root->root_key.type = BTRFS_ROOT_ITEM_KEY;
        root->root_key.offset = 0;
 
-       leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
-                                     0, objectid, NULL, 0, 0, 0);
+       leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
        if (IS_ERR(leaf)) {
                ret = PTR_ERR(leaf);
                leaf = NULL;
@@ -1396,9 +1403,9 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
        if (!root)
                return ERR_PTR(-ENOMEM);
 
-       __setup_root(tree_root->nodesize, tree_root->leafsize,
-                    tree_root->sectorsize, tree_root->stripesize,
-                    root, fs_info, BTRFS_TREE_LOG_OBJECTID);
+       __setup_root(tree_root->nodesize, tree_root->sectorsize,
+                    tree_root->stripesize, root, fs_info,
+                    BTRFS_TREE_LOG_OBJECTID);
 
        root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
        root->root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1413,9 +1420,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
         * updated (along with back refs to the log tree).
         */
 
-       leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
-                                     BTRFS_TREE_LOG_OBJECTID, NULL,
-                                     0, 0, 0);
+       leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
+                       NULL, 0, 0, 0);
        if (IS_ERR(leaf)) {
                kfree(root);
                return ERR_CAST(leaf);
@@ -1465,7 +1471,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
        btrfs_set_stack_inode_generation(inode_item, 1);
        btrfs_set_stack_inode_size(inode_item, 3);
        btrfs_set_stack_inode_nlink(inode_item, 1);
-       btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
+       btrfs_set_stack_inode_nbytes(inode_item, root->nodesize);
        btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
 
        btrfs_set_root_node(&log_root->root_item, log_root->node);
@@ -1485,7 +1491,6 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
        struct btrfs_fs_info *fs_info = tree_root->fs_info;
        struct btrfs_path *path;
        u64 generation;
-       u32 blocksize;
        int ret;
 
        path = btrfs_alloc_path();
@@ -1498,9 +1503,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
                goto alloc_fail;
        }
 
-       __setup_root(tree_root->nodesize, tree_root->leafsize,
-                    tree_root->sectorsize, tree_root->stripesize,
-                    root, fs_info, key->objectid);
+       __setup_root(tree_root->nodesize, tree_root->sectorsize,
+               tree_root->stripesize, root, fs_info, key->objectid);
 
        ret = btrfs_find_root(tree_root, key, path,
                              &root->root_item, &root->root_key);
@@ -1511,9 +1515,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
        }
 
        generation = btrfs_root_generation(&root->root_item);
-       blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
-                                    blocksize, generation);
+                                    generation);
        if (!root->node) {
                ret = -ENOMEM;
                goto find_fail;
@@ -1573,8 +1576,8 @@ int btrfs_init_fs_root(struct btrfs_root *root)
        root->subv_writers = writers;
 
        btrfs_init_free_ino_ctl(root);
-       spin_lock_init(&root->cache_lock);
-       init_waitqueue_head(&root->cache_wait);
+       spin_lock_init(&root->ino_cache_lock);
+       init_waitqueue_head(&root->ino_cache_wait);
 
        ret = get_anon_bdev(&root->anon_dev);
        if (ret)
@@ -1699,7 +1702,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
                if (!device->bdev)
                        continue;
                bdi = blk_get_backing_dev_info(device->bdev);
-               if (bdi && bdi_congested(bdi, bdi_bits)) {
+               if (bdi_congested(bdi, bdi_bits)) {
                        ret = 1;
                        break;
                }
@@ -1708,10 +1711,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
        return ret;
 }
 
-/*
- * If this fails, caller must call bdi_destroy() to get rid of the
- * bdi again.
- */
 static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 {
        int err;
@@ -1734,16 +1733,16 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 static void end_workqueue_fn(struct btrfs_work *work)
 {
        struct bio *bio;
-       struct end_io_wq *end_io_wq;
+       struct btrfs_end_io_wq *end_io_wq;
        int error;
 
-       end_io_wq = container_of(work, struct end_io_wq, work);
+       end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
        bio = end_io_wq->bio;
 
        error = end_io_wq->error;
        bio->bi_private = end_io_wq->private;
        bio->bi_end_io = end_io_wq->end_io;
-       kfree(end_io_wq);
+       kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
        bio_endio_nodec(bio, error);
 }
 
@@ -1772,6 +1771,7 @@ static int cleaner_kthread(void *arg)
                }
 
                btrfs_run_delayed_iputs(root);
+               btrfs_delete_unused_bgs(root->fs_info);
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&root->fs_info->cleaner_mutex);
 
@@ -2063,6 +2063,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
        btrfs_destroy_workqueue(fs_info->endio_workers);
        btrfs_destroy_workqueue(fs_info->endio_meta_workers);
        btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
+       btrfs_destroy_workqueue(fs_info->endio_repair_workers);
        btrfs_destroy_workqueue(fs_info->rmw_workers);
        btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
        btrfs_destroy_workqueue(fs_info->endio_write_workers);
@@ -2143,8 +2144,6 @@ int open_ctree(struct super_block *sb,
 {
        u32 sectorsize;
        u32 nodesize;
-       u32 leafsize;
-       u32 blocksize;
        u32 stripesize;
        u64 generation;
        u64 features;
@@ -2188,7 +2187,7 @@ int open_ctree(struct super_block *sb,
                goto fail_srcu;
        }
 
-       ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+       ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
        if (ret) {
                err = ret;
                goto fail_bdi;
@@ -2196,13 +2195,13 @@ int open_ctree(struct super_block *sb,
        fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
                                        (1 + ilog2(nr_cpu_ids));
 
-       ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+       ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
        if (ret) {
                err = ret;
                goto fail_dirty_metadata_bytes;
        }
 
-       ret = percpu_counter_init(&fs_info->bio_counter, 0);
+       ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL);
        if (ret) {
                err = ret;
                goto fail_delalloc_bytes;
@@ -2233,6 +2232,7 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->super_lock);
        spin_lock_init(&fs_info->qgroup_op_lock);
        spin_lock_init(&fs_info->buffer_lock);
+       spin_lock_init(&fs_info->unused_bgs_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
@@ -2242,6 +2242,7 @@ int open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
+       INIT_LIST_HEAD(&fs_info->unused_bgs);
        btrfs_mapping_init(&fs_info->mapping_tree);
        btrfs_init_block_rsv(&fs_info->global_block_rsv,
                             BTRFS_BLOCK_RSV_GLOBAL);
@@ -2260,7 +2261,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
        fs_info->sb = sb;
-       fs_info->max_inline = 8192 * 1024;
+       fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
        fs_info->metadata_ratio = 0;
        fs_info->defrag_inodes = RB_ROOT;
        fs_info->free_chunk_space = 0;
@@ -2389,7 +2390,7 @@ int open_ctree(struct super_block *sb,
                goto fail_alloc;
        }
 
-       __setup_root(4096, 4096, 4096, 4096, tree_root,
+       __setup_root(4096, 4096, 4096, tree_root,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
        invalidate_bdev(fs_devices->latest_bdev);
@@ -2469,19 +2470,22 @@ int open_ctree(struct super_block *sb,
                goto fail_alloc;
        }
 
-       if (btrfs_super_leafsize(disk_super) !=
+       /*
+        * Leafsize and nodesize were always equal, this is only a sanity check.
+        */
+       if (le32_to_cpu(disk_super->__unused_leafsize) !=
            btrfs_super_nodesize(disk_super)) {
                printk(KERN_ERR "BTRFS: couldn't mount because metadata "
                       "blocksizes don't match.  node %d leaf %d\n",
                       btrfs_super_nodesize(disk_super),
-                      btrfs_super_leafsize(disk_super));
+                      le32_to_cpu(disk_super->__unused_leafsize));
                err = -EINVAL;
                goto fail_alloc;
        }
-       if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
+       if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
                printk(KERN_ERR "BTRFS: couldn't mount because metadata "
                       "blocksize (%d) was too large\n",
-                      btrfs_super_leafsize(disk_super));
+                      btrfs_super_nodesize(disk_super));
                err = -EINVAL;
                goto fail_alloc;
        }
@@ -2498,17 +2502,16 @@ int open_ctree(struct super_block *sb,
         * flag our filesystem as having big metadata blocks if
         * they are bigger than the page size
         */
-       if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
+       if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) {
                if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
                        printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
                features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
        }
 
        nodesize = btrfs_super_nodesize(disk_super);
-       leafsize = btrfs_super_leafsize(disk_super);
        sectorsize = btrfs_super_sectorsize(disk_super);
        stripesize = btrfs_super_stripesize(disk_super);
-       fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
+       fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
        fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
 
        /*
@@ -2516,7 +2519,7 @@ int open_ctree(struct super_block *sb,
         * extent buffers for the same range.  It leads to corruptions
         */
        if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
-           (sectorsize != leafsize)) {
+           (sectorsize != nodesize)) {
                printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
                                "are not allowed for mixed block groups on %s\n",
                                sb->s_id);
@@ -2579,6 +2582,8 @@ int open_ctree(struct super_block *sb,
                btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
        fs_info->endio_raid56_workers =
                btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+       fs_info->endio_repair_workers =
+               btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
        fs_info->rmw_workers =
                btrfs_alloc_workqueue("rmw", flags, max_active, 2);
        fs_info->endio_write_workers =
@@ -2600,11 +2605,12 @@ int open_ctree(struct super_block *sb,
              fs_info->submit_workers && fs_info->flush_workers &&
              fs_info->endio_workers && fs_info->endio_meta_workers &&
              fs_info->endio_meta_write_workers &&
+             fs_info->endio_repair_workers &&
              fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
              fs_info->endio_freespace_worker && fs_info->rmw_workers &&
              fs_info->caching_workers && fs_info->readahead_workers &&
              fs_info->fixup_workers && fs_info->delayed_workers &&
-             fs_info->fixup_workers && fs_info->extent_workers &&
+             fs_info->extent_workers &&
              fs_info->qgroup_rescan_workers)) {
                err = -ENOMEM;
                goto fail_sb_buffer;
@@ -2615,7 +2621,6 @@ int open_ctree(struct super_block *sb,
                                    4 * 1024 * 1024 / PAGE_CACHE_SIZE);
 
        tree_root->nodesize = nodesize;
-       tree_root->leafsize = leafsize;
        tree_root->sectorsize = sectorsize;
        tree_root->stripesize = stripesize;
 
@@ -2642,16 +2647,14 @@ int open_ctree(struct super_block *sb,
                goto fail_sb_buffer;
        }
 
-       blocksize = btrfs_level_size(tree_root,
-                                    btrfs_super_chunk_root_level(disk_super));
        generation = btrfs_super_chunk_root_generation(disk_super);
 
-       __setup_root(nodesize, leafsize, sectorsize, stripesize,
-                    chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+       __setup_root(nodesize, sectorsize, stripesize, chunk_root,
+                    fs_info, BTRFS_CHUNK_TREE_OBJECTID);
 
        chunk_root->node = read_tree_block(chunk_root,
                                           btrfs_super_chunk_root(disk_super),
-                                          blocksize, generation);
+                                          generation);
        if (!chunk_root->node ||
            !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
                printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
@@ -2684,13 +2687,11 @@ int open_ctree(struct super_block *sb,
        }
 
 retry_root_backup:
-       blocksize = btrfs_level_size(tree_root,
-                                    btrfs_super_root_level(disk_super));
        generation = btrfs_super_generation(disk_super);
 
        tree_root->node = read_tree_block(tree_root,
                                          btrfs_super_root(disk_super),
-                                         blocksize, generation);
+                                         generation);
        if (!tree_root->node ||
            !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
                printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
@@ -2859,9 +2860,6 @@ retry_root_backup:
                        err = -EIO;
                        goto fail_qgroup;
                }
-               blocksize =
-                    btrfs_level_size(tree_root,
-                                     btrfs_super_log_root_level(disk_super));
 
                log_tree_root = btrfs_alloc_root(fs_info);
                if (!log_tree_root) {
@@ -2869,11 +2867,10 @@ retry_root_backup:
                        goto fail_qgroup;
                }
 
-               __setup_root(nodesize, leafsize, sectorsize, stripesize,
+               __setup_root(nodesize, sectorsize, stripesize,
                             log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
 
                log_tree_root->node = read_tree_block(tree_root, bytenr,
-                                                     blocksize,
                                                      generation + 1);
                if (!log_tree_root->node ||
                    !extent_buffer_uptodate(log_tree_root->node)) {
@@ -2980,6 +2977,8 @@ retry_root_backup:
                fs_info->update_uuid_tree_gen = 1;
        }
 
+       fs_info->open = 1;
+
        return 0;
 
 fail_qgroup:
@@ -3139,7 +3138,8 @@ static int write_dev_supers(struct btrfs_device *device,
 
        for (i = 0; i < max_mirrors; i++) {
                bytenr = btrfs_sb_offset(i);
-               if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
+               if (bytenr + BTRFS_SUPER_INFO_SIZE >=
+                   device->commit_total_bytes)
                        break;
 
                if (wait) {
@@ -3456,8 +3456,9 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
                btrfs_set_stack_device_type(dev_item, dev->type);
                btrfs_set_stack_device_id(dev_item, dev->devid);
                btrfs_set_stack_device_total_bytes(dev_item,
-                                                  dev->disk_total_bytes);
-               btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
+                                                  dev->commit_total_bytes);
+               btrfs_set_stack_device_bytes_used(dev_item,
+                                                 dev->commit_bytes_used);
                btrfs_set_stack_device_io_align(dev_item, dev->io_align);
                btrfs_set_stack_device_io_width(dev_item, dev->io_width);
                btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
@@ -3532,7 +3533,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
 
 static void free_fs_root(struct btrfs_root *root)
 {
-       iput(root->cache_inode);
+       iput(root->ino_cache_inode);
        WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
        btrfs_free_block_rsv(root, root->orphan_block_rsv);
        root->orphan_block_rsv = NULL;
@@ -3623,7 +3624,7 @@ int btrfs_commit_super(struct btrfs_root *root)
        return btrfs_commit_transaction(trans, root);
 }
 
-int close_ctree(struct btrfs_root *root)
+void close_ctree(struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
@@ -3689,6 +3690,7 @@ int close_ctree(struct btrfs_root *root)
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        btrfs_stop_all_workers(fs_info);
 
+       fs_info->open = 0;
        free_root_pointers(fs_info, 1);
 
        iput(fs_info->btree_inode);
@@ -3711,8 +3713,6 @@ int close_ctree(struct btrfs_root *root)
 
        btrfs_free_block_rsv(root, root->orphan_block_rsv);
        root->orphan_block_rsv = NULL;
-
-       return 0;
 }
 
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
@@ -3814,10 +3814,73 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                              int read_only)
 {
+       struct btrfs_super_block *sb = fs_info->super_copy;
+       int ret = 0;
+
+       if (sb->root_level > BTRFS_MAX_LEVEL) {
+               printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n",
+                               sb->root_level, BTRFS_MAX_LEVEL);
+               ret = -EINVAL;
+       }
+       if (sb->chunk_root_level > BTRFS_MAX_LEVEL) {
+               printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n",
+                               sb->chunk_root_level, BTRFS_MAX_LEVEL);
+               ret = -EINVAL;
+       }
+       if (sb->log_root_level > BTRFS_MAX_LEVEL) {
+               printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n",
+                               sb->log_root_level, BTRFS_MAX_LEVEL);
+               ret = -EINVAL;
+       }
+
        /*
-        * Placeholder for checks
+        * The common minimum, we don't know if we can trust the nodesize/sectorsize
+        * items yet, they'll be verified later. Issue just a warning.
         */
-       return 0;
+       if (!IS_ALIGNED(sb->root, 4096))
+               printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+                               sb->root);
+       if (!IS_ALIGNED(sb->chunk_root, 4096))
+               printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+                               sb->chunk_root);
+       if (!IS_ALIGNED(sb->log_root, 4096))
+               printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+                               sb->log_root);
+
+       if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
+               printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
+                               fs_info->fsid, sb->dev_item.fsid);
+               ret = -EINVAL;
+       }
+
+       /*
+        * Hint to catch really bogus numbers, bitflips or so, more exact checks are
+        * done later
+        */
+       if (sb->num_devices > (1UL << 31))
+               printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
+                               sb->num_devices);
+
+       if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) {
+               printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
+                               sb->bytenr, BTRFS_SUPER_INFO_OFFSET);
+               ret = -EINVAL;
+       }
+
+       /*
+        * The generation is a global counter, we'll trust it more than the others
+        * but it's still possible that it's the one that's wrong.
+        */
+       if (sb->generation < sb->chunk_root_generation)
+               printk(KERN_WARNING
+                       "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n",
+                       sb->generation, sb->chunk_root_generation);
+       if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1)
+               printk(KERN_WARNING
+                       "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n",
+                       sb->generation, sb->cache_generation);
+
+       return ret;
 }
 
 static void btrfs_error_commit_super(struct btrfs_root *root)
@@ -4009,9 +4072,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
 
                clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
                while (start <= end) {
-                       eb = btrfs_find_tree_block(root, start,
-                                                  root->leafsize);
-                       start += root->leafsize;
+                       eb = btrfs_find_tree_block(root, start);
+                       start += root->nodesize;
                        if (!eb)
                                continue;
                        wait_on_extent_buffer_writeback(eb);