]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/volumes.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[karo-tx-linux.git] / fs / btrfs / volumes.c
index 67a08538184557486ec802840d610828696edf46..0052ca8264d9b37cc171e52e0954b165fd6dad46 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
 #include <linux/raid/pq.h>
+#include <linux/semaphore.h>
 #include <asm/div64.h>
 #include "compat.h"
 #include "ctree.h"
@@ -62,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root)
        mutex_unlock(&root->fs_info->chunk_mutex);
 }
 
+static struct btrfs_fs_devices *__alloc_fs_devices(void)
+{
+       struct btrfs_fs_devices *fs_devs;
+
+       fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
+       if (!fs_devs)
+               return ERR_PTR(-ENOMEM);
+
+       mutex_init(&fs_devs->device_list_mutex);
+
+       INIT_LIST_HEAD(&fs_devs->devices);
+       INIT_LIST_HEAD(&fs_devs->alloc_list);
+       INIT_LIST_HEAD(&fs_devs->list);
+
+       return fs_devs;
+}
+
+/**
+ * alloc_fs_devices - allocate struct btrfs_fs_devices
+ * @fsid:      a pointer to UUID for this FS.  If NULL a new UUID is
+ *             generated.
+ *
+ * Return: a pointer to a new &struct btrfs_fs_devices on success;
+ * ERR_PTR() on error.  Returned struct is not linked onto any lists and
+ * can be destroyed with kfree() right away.
+ */
+static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
+{
+       struct btrfs_fs_devices *fs_devs;
+
+       fs_devs = __alloc_fs_devices();
+       if (IS_ERR(fs_devs))
+               return fs_devs;
+
+       if (fsid)
+               memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
+       else
+               generate_random_uuid(fs_devs->fsid);
+
+       return fs_devs;
+}
+
 static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
 {
        struct btrfs_device *device;
@@ -101,6 +144,27 @@ void btrfs_cleanup_fs_uuids(void)
        }
 }
 
+static struct btrfs_device *__alloc_device(void)
+{
+       struct btrfs_device *dev;
+
+       dev = kzalloc(sizeof(*dev), GFP_NOFS);
+       if (!dev)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&dev->dev_list);
+       INIT_LIST_HEAD(&dev->dev_alloc_list);
+
+       spin_lock_init(&dev->io_lock);
+
+       spin_lock_init(&dev->reada_lock);
+       atomic_set(&dev->reada_in_flight, 0);
+       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
+       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+
+       return dev;
+}
+
 static noinline struct btrfs_device *__find_device(struct list_head *head,
                                                   u64 devid, u8 *uuid)
 {
@@ -395,16 +459,14 @@ static noinline int device_list_add(const char *path,
 
        fs_devices = find_fsid(disk_super->fsid);
        if (!fs_devices) {
-               fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-               if (!fs_devices)
-                       return -ENOMEM;
-               INIT_LIST_HEAD(&fs_devices->devices);
-               INIT_LIST_HEAD(&fs_devices->alloc_list);
+               fs_devices = alloc_fs_devices(disk_super->fsid);
+               if (IS_ERR(fs_devices))
+                       return PTR_ERR(fs_devices);
+
                list_add(&fs_devices->list, &fs_uuids);
-               memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
                fs_devices->latest_devid = devid;
                fs_devices->latest_trans = found_transid;
-               mutex_init(&fs_devices->device_list_mutex);
+
                device = NULL;
        } else {
                device = __find_device(&fs_devices->devices, devid,
@@ -414,17 +476,12 @@ static noinline int device_list_add(const char *path,
                if (fs_devices->opened)
                        return -EBUSY;
 
-               device = kzalloc(sizeof(*device), GFP_NOFS);
-               if (!device) {
+               device = btrfs_alloc_device(NULL, &devid,
+                                           disk_super->dev_item.uuid);
+               if (IS_ERR(device)) {
                        /* we can safely leave the fs_devices entry around */
-                       return -ENOMEM;
+                       return PTR_ERR(device);
                }
-               device->devid = devid;
-               device->dev_stats_valid = 0;
-               device->work.func = pending_bios_fn;
-               memcpy(device->uuid, disk_super->dev_item.uuid,
-                      BTRFS_UUID_SIZE);
-               spin_lock_init(&device->io_lock);
 
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name) {
@@ -432,22 +489,13 @@ static noinline int device_list_add(const char *path,
                        return -ENOMEM;
                }
                rcu_assign_pointer(device->name, name);
-               INIT_LIST_HEAD(&device->dev_alloc_list);
-
-               /* init readahead state */
-               spin_lock_init(&device->reada_lock);
-               device->reada_curr_zone = NULL;
-               atomic_set(&device->reada_in_flight, 0);
-               device->reada_next = 0;
-               INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
-               INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
 
                mutex_lock(&fs_devices->device_list_mutex);
                list_add_rcu(&device->dev_list, &fs_devices->devices);
+               fs_devices->num_devices++;
                mutex_unlock(&fs_devices->device_list_mutex);
 
                device->fs_devices = fs_devices;
-               fs_devices->num_devices++;
        } else if (!device->name || strcmp(device->name->str, path)) {
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name)
@@ -474,25 +522,21 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
        struct btrfs_device *device;
        struct btrfs_device *orig_dev;
 
-       fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-       if (!fs_devices)
-               return ERR_PTR(-ENOMEM);
+       fs_devices = alloc_fs_devices(orig->fsid);
+       if (IS_ERR(fs_devices))
+               return fs_devices;
 
-       INIT_LIST_HEAD(&fs_devices->devices);
-       INIT_LIST_HEAD(&fs_devices->alloc_list);
-       INIT_LIST_HEAD(&fs_devices->list);
-       mutex_init(&fs_devices->device_list_mutex);
        fs_devices->latest_devid = orig->latest_devid;
        fs_devices->latest_trans = orig->latest_trans;
        fs_devices->total_devices = orig->total_devices;
-       memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
 
        /* We have held the volume lock, it is safe to get the devices. */
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
                struct rcu_string *name;
 
-               device = kzalloc(sizeof(*device), GFP_NOFS);
-               if (!device)
+               device = btrfs_alloc_device(NULL, &orig_dev->devid,
+                                           orig_dev->uuid);
+               if (IS_ERR(device))
                        goto error;
 
                /*
@@ -506,13 +550,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
                }
                rcu_assign_pointer(device->name, name);
 
-               device->devid = orig_dev->devid;
-               device->work.func = pending_bios_fn;
-               memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
-               spin_lock_init(&device->io_lock);
-               INIT_LIST_HEAD(&device->dev_list);
-               INIT_LIST_HEAD(&device->dev_alloc_list);
-
                list_add(&device->dev_list, &fs_devices->devices);
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
@@ -636,23 +673,22 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 
                if (device->can_discard)
                        fs_devices->num_can_discard--;
+               if (device->missing)
+                       fs_devices->missing_devices--;
 
-               new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
-               BUG_ON(!new_device); /* -ENOMEM */
-               memcpy(new_device, device, sizeof(*new_device));
+               new_device = btrfs_alloc_device(NULL, &device->devid,
+                                               device->uuid);
+               BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
 
                /* Safe because we are under uuid_mutex */
                if (device->name) {
                        name = rcu_string_strdup(device->name->str, GFP_NOFS);
-                       BUG_ON(device->name && !name); /* -ENOMEM */
+                       BUG_ON(!name); /* -ENOMEM */
                        rcu_assign_pointer(new_device->name, name);
                }
-               new_device->bdev = NULL;
-               new_device->writeable = 0;
-               new_device->in_fs_metadata = 0;
-               new_device->can_discard = 0;
-               spin_lock_init(&new_device->io_lock);
+
                list_replace_rcu(&device->dev_list, &new_device->dev_list);
+               new_device->fs_devices = device->fs_devices;
 
                call_rcu(&device->rcu, free_device);
        }
@@ -865,7 +901,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
        disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
 
        if (btrfs_super_bytenr(disk_super) != bytenr ||
-           disk_super->magic != cpu_to_le64(BTRFS_MAGIC))
+           btrfs_super_magic(disk_super) != BTRFS_MAGIC)
                goto error_unmap;
 
        devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -880,8 +916,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
        }
 
-       printk(KERN_CONT "devid %llu transid %llu %s\n",
-              (unsigned long long)devid, (unsigned long long)transid, path);
+       printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
 
        ret = device_list_add(path, disk_super, devid, fs_devices_ret);
        if (!ret && fs_devices_ret)
@@ -1278,8 +1313,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
        btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
 
        write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
-                   (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
-                   BTRFS_UUID_SIZE);
+                   btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE);
 
        btrfs_set_dev_extent_length(leaf, extent, num_bytes);
        btrfs_mark_buffer_dirty(leaf);
@@ -1307,15 +1341,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
        return ret;
 }
 
-static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
+static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
+                                   u64 *devid_ret)
 {
        int ret;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
 
-       root = root->fs_info->chunk_root;
-
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -1324,20 +1357,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
        key.type = BTRFS_DEV_ITEM_KEY;
        key.offset = (u64)-1;
 
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
        if (ret < 0)
                goto error;
 
        BUG_ON(ret == 0); /* Corruption */
 
-       ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
+       ret = btrfs_previous_item(fs_info->chunk_root, path,
+                                 BTRFS_DEV_ITEMS_OBJECTID,
                                  BTRFS_DEV_ITEM_KEY);
        if (ret) {
-               *objectid = 1;
+               *devid_ret = 1;
        } else {
                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
                                      path->slots[0]);
-               *objectid = found_key.offset + 1;
+               *devid_ret = found_key.offset + 1;
        }
        ret = 0;
 error:
@@ -1391,9 +1425,9 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
        btrfs_set_device_bandwidth(leaf, dev_item, 0);
        btrfs_set_device_start_offset(leaf, dev_item, 0);
 
-       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       ptr = btrfs_device_uuid(dev_item);
        write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
-       ptr = (unsigned long)btrfs_device_fsid(dev_item);
+       ptr = btrfs_device_fsid(dev_item);
        write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
        btrfs_mark_buffer_dirty(leaf);
 
@@ -1562,7 +1596,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                clear_super = true;
        }
 
+       mutex_unlock(&uuid_mutex);
        ret = btrfs_shrink_device(device, 0);
+       mutex_lock(&uuid_mutex);
        if (ret)
                goto error_undo;
 
@@ -1586,7 +1622,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        /*
         * the device list mutex makes sure that we don't change
         * the device list while someone else is writing out all
-        * the device supers.
+        * the device supers. Whoever is writing all supers, should
+        * lock the device list mutex before getting the number of
+        * devices in the super block (super_copy). Conversely,
+        * whoever updates the number of devices in the super block
+        * (super_copy) should hold the device list mutex.
         */
 
        cur_devices = device->fs_devices;
@@ -1610,10 +1650,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                device->fs_devices->open_devices--;
 
        call_rcu(&device->rcu, free_device);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
        num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
        btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
        if (cur_devices->open_devices == 0) {
                struct btrfs_fs_devices *fs_devices;
@@ -1793,9 +1833,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
        if (!fs_devices->seeding)
                return -EINVAL;
 
-       seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-       if (!seed_devices)
-               return -ENOMEM;
+       seed_devices = __alloc_fs_devices();
+       if (IS_ERR(seed_devices))
+               return PTR_ERR(seed_devices);
 
        old_devices = clone_fs_devices(fs_devices);
        if (IS_ERR(old_devices)) {
@@ -1814,7 +1854,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
                              synchronize_rcu);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
        list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
        list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1830,6 +1869,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
        generate_random_uuid(fs_devices->fsid);
        memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
        memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
        super_flags = btrfs_super_flags(disk_super) &
                      ~BTRFS_SUPER_FLAG_SEEDING;
        btrfs_set_super_flags(disk_super, super_flags);
@@ -1889,11 +1930,9 @@ next_slot:
                dev_item = btrfs_item_ptr(leaf, path->slots[0],
                                          struct btrfs_dev_item);
                devid = btrfs_device_id(leaf, dev_item);
-               read_extent_buffer(leaf, dev_uuid,
-                                  (unsigned long)btrfs_device_uuid(dev_item),
+               read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                                   BTRFS_UUID_SIZE);
-               read_extent_buffer(leaf, fs_uuid,
-                                  (unsigned long)btrfs_device_fsid(dev_item),
+               read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                                   BTRFS_UUID_SIZE);
                device = btrfs_find_device(root->fs_info, devid, dev_uuid,
                                           fs_uuid);
@@ -1956,10 +1995,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        }
        mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device) {
+       device = btrfs_alloc_device(root->fs_info, NULL, NULL);
+       if (IS_ERR(device)) {
                /* we can safely leave the fs_devices entry around */
-               ret = -ENOMEM;
+               ret = PTR_ERR(device);
                goto error;
        }
 
@@ -1971,13 +2010,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        }
        rcu_assign_pointer(device->name, name);
 
-       ret = find_next_devid(root, &device->devid);
-       if (ret) {
-               rcu_string_free(device->name);
-               kfree(device);
-               goto error;
-       }
-
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                rcu_string_free(device->name);
@@ -1992,9 +2024,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        if (blk_queue_discard(q))
                device->can_discard = 1;
        device->writeable = 1;
-       device->work.func = pending_bios_fn;
-       generate_random_uuid(device->uuid);
-       spin_lock_init(&device->io_lock);
        device->generation = trans->transid;
        device->io_width = root->sectorsize;
        device->io_align = root->sectorsize;
@@ -2121,6 +2150,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct list_head *devices;
        struct rcu_string *name;
+       u64 devid = BTRFS_DEV_REPLACE_DEVID;
        int ret = 0;
 
        *device_out = NULL;
@@ -2142,9 +2172,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
                }
        }
 
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device) {
-               ret = -ENOMEM;
+       device = btrfs_alloc_device(NULL, &devid, NULL);
+       if (IS_ERR(device)) {
+               ret = PTR_ERR(device);
                goto error;
        }
 
@@ -2161,10 +2191,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
                device->can_discard = 1;
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        device->writeable = 1;
-       device->work.func = pending_bios_fn;
-       generate_random_uuid(device->uuid);
-       device->devid = BTRFS_DEV_REPLACE_DEVID;
-       spin_lock_init(&device->io_lock);
        device->generation = 0;
        device->io_width = root->sectorsize;
        device->io_align = root->sectorsize;
@@ -2971,10 +2997,6 @@ again:
                if (found_key.objectid != key.objectid)
                        break;
 
-               /* chunk zero is special */
-               if (found_key.offset == 0)
-                       break;
-
                chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 
                if (!counting) {
@@ -3010,6 +3032,8 @@ again:
                        spin_unlock(&fs_info->balance_lock);
                }
 loop:
+               if (found_key.offset == 0)
+                       break;
                key.offset = found_key.offset - 1;
        }
 
@@ -3074,9 +3098,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
 }
 
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
-                              struct btrfs_ioctl_balance_args *bargs);
-
 /*
  * Should be called with both balance and volume mutexes held
  */
@@ -3139,7 +3160,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
             (bctl->data.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "data profile %llu\n",
-                      (unsigned long long)bctl->data.target);
+                      bctl->data.target);
                ret = -EINVAL;
                goto out;
        }
@@ -3148,7 +3169,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
             (bctl->meta.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "metadata profile %llu\n",
-                      (unsigned long long)bctl->meta.target);
+                      bctl->meta.target);
                ret = -EINVAL;
                goto out;
        }
@@ -3157,7 +3178,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
             (bctl->sys.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "system profile %llu\n",
-                      (unsigned long long)bctl->sys.target);
+                      bctl->sys.target);
                ret = -EINVAL;
                goto out;
        }
@@ -3430,6 +3451,264 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
        return 0;
 }
 
+static int btrfs_uuid_scan_kthread(void *data)
+{
+       struct btrfs_fs_info *fs_info = data;
+       struct btrfs_root *root = fs_info->tree_root;
+       struct btrfs_key key;
+       struct btrfs_key max_key;
+       struct btrfs_path *path = NULL;
+       int ret = 0;
+       struct extent_buffer *eb;
+       int slot;
+       struct btrfs_root_item root_item;
+       u32 item_size;
+       struct btrfs_trans_handle *trans = NULL;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       key.objectid = 0;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = 0;
+
+       max_key.objectid = (u64)-1;
+       max_key.type = BTRFS_ROOT_ITEM_KEY;
+       max_key.offset = (u64)-1;
+
+       path->keep_locks = 1;
+
+       while (1) {
+               ret = btrfs_search_forward(root, &key, &max_key, path, 0);
+               if (ret) {
+                       if (ret > 0)
+                               ret = 0;
+                       break;
+               }
+
+               if (key.type != BTRFS_ROOT_ITEM_KEY ||
+                   (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
+                    key.objectid != BTRFS_FS_TREE_OBJECTID) ||
+                   key.objectid > BTRFS_LAST_FREE_OBJECTID)
+                       goto skip;
+
+               eb = path->nodes[0];
+               slot = path->slots[0];
+               item_size = btrfs_item_size_nr(eb, slot);
+               if (item_size < sizeof(root_item))
+                       goto skip;
+
+               read_extent_buffer(eb, &root_item,
+                                  btrfs_item_ptr_offset(eb, slot),
+                                  (int)sizeof(root_item));
+               if (btrfs_root_refs(&root_item) == 0)
+                       goto skip;
+
+               if (!btrfs_is_empty_uuid(root_item.uuid) ||
+                   !btrfs_is_empty_uuid(root_item.received_uuid)) {
+                       if (trans)
+                               goto update_tree;
+
+                       btrfs_release_path(path);
+                       /*
+                        * 1 - subvol uuid item
+                        * 1 - received_subvol uuid item
+                        */
+                       trans = btrfs_start_transaction(fs_info->uuid_root, 2);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               break;
+                       }
+                       continue;
+               } else {
+                       goto skip;
+               }
+update_tree:
+               if (!btrfs_is_empty_uuid(root_item.uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                                 root_item.uuid,
+                                                 BTRFS_UUID_KEY_SUBVOL,
+                                                 key.objectid);
+                       if (ret < 0) {
+                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                                       ret);
+                               break;
+                       }
+               }
+
+               if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                                 root_item.received_uuid,
+                                                BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+                                                 key.objectid);
+                       if (ret < 0) {
+                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                                       ret);
+                               break;
+                       }
+               }
+
+skip:
+               if (trans) {
+                       ret = btrfs_end_transaction(trans, fs_info->uuid_root);
+                       trans = NULL;
+                       if (ret)
+                               break;
+               }
+
+               btrfs_release_path(path);
+               if (key.offset < (u64)-1) {
+                       key.offset++;
+               } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
+                       key.offset = 0;
+                       key.type = BTRFS_ROOT_ITEM_KEY;
+               } else if (key.objectid < (u64)-1) {
+                       key.offset = 0;
+                       key.type = BTRFS_ROOT_ITEM_KEY;
+                       key.objectid++;
+               } else {
+                       break;
+               }
+               cond_resched();
+       }
+
+out:
+       btrfs_free_path(path);
+       if (trans && !IS_ERR(trans))
+               btrfs_end_transaction(trans, fs_info->uuid_root);
+       if (ret)
+               pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
+       else
+               fs_info->update_uuid_tree_gen = 1;
+       up(&fs_info->uuid_tree_rescan_sem);
+       return 0;
+}
+
+/*
+ * Callback for btrfs_uuid_tree_iterate().
+ * returns:
+ * 0   check succeeded, the entry is not outdated.
+ * < 0 if an error occured.
+ * > 0 if the check failed, which means the caller shall remove the entry.
+ */
+static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
+                                      u8 *uuid, u8 type, u64 subid)
+{
+       struct btrfs_key key;
+       int ret = 0;
+       struct btrfs_root *subvol_root;
+
+       if (type != BTRFS_UUID_KEY_SUBVOL &&
+           type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
+               goto out;
+
+       key.objectid = subid;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = (u64)-1;
+       subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(subvol_root)) {
+               ret = PTR_ERR(subvol_root);
+               if (ret == -ENOENT)
+                       ret = 1;
+               goto out;
+       }
+
+       switch (type) {
+       case BTRFS_UUID_KEY_SUBVOL:
+               if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
+                       ret = 1;
+               break;
+       case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+               if (memcmp(uuid, subvol_root->root_item.received_uuid,
+                          BTRFS_UUID_SIZE))
+                       ret = 1;
+               break;
+       }
+
+out:
+       return ret;
+}
+
+static int btrfs_uuid_rescan_kthread(void *data)
+{
+       struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
+       int ret;
+
+       /*
+        * 1st step is to iterate through the existing UUID tree and
+        * to delete all entries that contain outdated data.
+        * 2nd step is to add all missing entries to the UUID tree.
+        */
+       ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
+       if (ret < 0) {
+               pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
+               up(&fs_info->uuid_tree_rescan_sem);
+               return ret;
+       }
+       return btrfs_uuid_scan_kthread(data);
+}
+
+int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *uuid_root;
+       struct task_struct *task;
+       int ret;
+
+       /*
+        * 1 - root node
+        * 1 - root item
+        */
+       trans = btrfs_start_transaction(tree_root, 2);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       uuid_root = btrfs_create_tree(trans, fs_info,
+                                     BTRFS_UUID_TREE_OBJECTID);
+       if (IS_ERR(uuid_root)) {
+               btrfs_abort_transaction(trans, tree_root,
+                                       PTR_ERR(uuid_root));
+               return PTR_ERR(uuid_root);
+       }
+
+       fs_info->uuid_root = uuid_root;
+
+       ret = btrfs_commit_transaction(trans, tree_root);
+       if (ret)
+               return ret;
+
+       down(&fs_info->uuid_tree_rescan_sem);
+       task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
+       if (IS_ERR(task)) {
+               /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+               pr_warn("btrfs: failed to start uuid_scan task\n");
+               up(&fs_info->uuid_tree_rescan_sem);
+               return PTR_ERR(task);
+       }
+
+       return 0;
+}
+
+int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
+{
+       struct task_struct *task;
+
+       down(&fs_info->uuid_tree_rescan_sem);
+       task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
+       if (IS_ERR(task)) {
+               /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+               pr_warn("btrfs: failed to start uuid_rescan task\n");
+               up(&fs_info->uuid_tree_rescan_sem);
+               return PTR_ERR(task);
+       }
+
+       return 0;
+}
+
 /*
  * shrinking a device means finding all of the device extents past
  * the new size, and then following the back refs to the chunks.
@@ -4194,13 +4473,13 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
         * and exit, so return 1 so the callers don't try to use other copies.
         */
        if (!em) {
-               btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
+               btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical,
                            logical+len);
                return 1;
        }
 
        if (em->start > logical || em->start + em->len < logical) {
-               btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
+               btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
                            "%Lu-%Lu\n", logical, logical+len, em->start,
                            em->start + em->len);
                return 1;
@@ -4375,8 +4654,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
        if (!em) {
                btrfs_crit(fs_info, "unable to find logical %llu len %llu",
-                       (unsigned long long)logical,
-                       (unsigned long long)*length);
+                       logical, *length);
                return -EINVAL;
        }
 
@@ -4671,6 +4949,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        }
        bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
        if (!bbio) {
+               kfree(raid_map);
                ret = -ENOMEM;
                goto out;
        }
@@ -5246,9 +5525,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 
        if (map_length < length) {
                btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
-                       (unsigned long long)logical,
-                       (unsigned long long)length,
-                       (unsigned long long)map_length);
+                       logical, length, map_length);
                BUG();
        }
 
@@ -5314,23 +5591,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
 
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device)
+       device = btrfs_alloc_device(NULL, &devid, dev_uuid);
+       if (IS_ERR(device))
                return NULL;
-       list_add(&device->dev_list,
-                &fs_devices->devices);
-       device->devid = devid;
-       device->work.func = pending_bios_fn;
+
+       list_add(&device->dev_list, &fs_devices->devices);
        device->fs_devices = fs_devices;
-       device->missing = 1;
        fs_devices->num_devices++;
+
+       device->missing = 1;
        fs_devices->missing_devices++;
-       spin_lock_init(&device->io_lock);
-       INIT_LIST_HEAD(&device->dev_alloc_list);
-       memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
+
        return device;
 }
 
+/**
+ * btrfs_alloc_device - allocate struct btrfs_device
+ * @fs_info:   used only for generating a new devid, can be NULL if
+ *             devid is provided (i.e. @devid != NULL).
+ * @devid:     a pointer to devid for this device.  If NULL a new devid
+ *             is generated.
+ * @uuid:      a pointer to UUID for this device.  If NULL a new UUID
+ *             is generated.
+ *
+ * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
+ * on error.  Returned struct is not linked onto any lists and can be
+ * destroyed with kfree() right away.
+ */
+struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+                                       const u64 *devid,
+                                       const u8 *uuid)
+{
+       struct btrfs_device *dev;
+       u64 tmp;
+
+       if (!devid && !fs_info) {
+               WARN_ON(1);
+               return ERR_PTR(-EINVAL);
+       }
+
+       dev = __alloc_device();
+       if (IS_ERR(dev))
+               return dev;
+
+       if (devid)
+               tmp = *devid;
+       else {
+               int ret;
+
+               ret = find_next_devid(fs_info, &tmp);
+               if (ret) {
+                       kfree(dev);
+                       return ERR_PTR(ret);
+               }
+       }
+       dev->devid = tmp;
+
+       if (uuid)
+               memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
+       else
+               generate_random_uuid(dev->uuid);
+
+       dev->work.func = pending_bios_fn;
+
+       return dev;
+}
+
 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                          struct extent_buffer *leaf,
                          struct btrfs_chunk *chunk)
@@ -5437,7 +5763,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
        WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
        device->is_tgtdev_for_dev_replace = 0;
 
-       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       ptr = btrfs_device_uuid(dev_item);
        read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
 }
 
@@ -5500,11 +5826,9 @@ static int read_one_dev(struct btrfs_root *root,
        u8 dev_uuid[BTRFS_UUID_SIZE];
 
        devid = btrfs_device_id(leaf, dev_item);
-       read_extent_buffer(leaf, dev_uuid,
-                          (unsigned long)btrfs_device_uuid(dev_item),
+       read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                           BTRFS_UUID_SIZE);
-       read_extent_buffer(leaf, fs_uuid,
-                          (unsigned long)btrfs_device_fsid(dev_item),
+       read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                           BTRFS_UUID_SIZE);
 
        if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
@@ -5519,8 +5843,7 @@ static int read_one_dev(struct btrfs_root *root,
                        return -EIO;
 
                if (!device) {
-                       btrfs_warn(root->fs_info, "devid %llu missing",
-                               (unsigned long long)devid);
+                       btrfs_warn(root->fs_info, "devid %llu missing", devid);
                        device = add_missing_dev(root, devid, dev_uuid);
                        if (!device)
                                return -ENOMEM;
@@ -5644,14 +5967,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
        mutex_lock(&uuid_mutex);
        lock_chunks(root);
 
-       /* first we search for all of the device items, and then we
-        * read in all of the chunk items.  This way we can create chunk
-        * mappings that reference all of the devices that are afound
+       /*
+        * Read all device items, and then all the chunk items. All
+        * device items are found before any chunk item (their object id
+        * is smaller than the lowest possible object id for a chunk
+        * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
         */
        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
        key.offset = 0;
        key.type = 0;
-again:
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto error;
@@ -5667,17 +5991,13 @@ again:
                        break;
                }
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
-               if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
-                       if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
-                               break;
-                       if (found_key.type == BTRFS_DEV_ITEM_KEY) {
-                               struct btrfs_dev_item *dev_item;
-                               dev_item = btrfs_item_ptr(leaf, slot,
+               if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+                       struct btrfs_dev_item *dev_item;
+                       dev_item = btrfs_item_ptr(leaf, slot,
                                                  struct btrfs_dev_item);
-                               ret = read_one_dev(root, leaf, dev_item);
-                               if (ret)
-                                       goto error;
-                       }
+                       ret = read_one_dev(root, leaf, dev_item);
+                       if (ret)
+                               goto error;
                } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
                        struct btrfs_chunk *chunk;
                        chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -5687,11 +6007,6 @@ again:
                }
                path->slots[0]++;
        }
-       if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
-               key.objectid = 0;
-               btrfs_release_path(path);
-               goto again;
-       }
        ret = 0;
 error:
        unlock_chunks(root);