]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/inode.c
Merge remote-tracking branch 'btrfs/next'
[karo-tx-linux.git] / fs / btrfs / inode.c
index 2a028a58619cdaca85ff21b4b74bc3324bf6bfea..76c2c02815c274841ed5d6dedfa977e9c8b6cefb 100644 (file)
@@ -54,6 +54,7 @@
 #include "locking.h"
 #include "free-space-cache.h"
 #include "inode-map.h"
+#include "backref.h"
 
 struct btrfs_iget_args {
        u64 ino;
@@ -230,7 +231,6 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
        u64 inline_len = actual_end - start;
        u64 aligned_end = (end + root->sectorsize - 1) &
                        ~((u64)root->sectorsize - 1);
-       u64 hint_byte;
        u64 data_len = inline_len;
        int ret;
 
@@ -247,8 +247,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
                return 1;
        }
 
-       ret = btrfs_drop_extents(trans, inode, start, aligned_end,
-                                &hint_byte, 1);
+       ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
        if (ret)
                return ret;
 
@@ -664,7 +663,7 @@ retry:
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
                                           0, alloc_hint, &ins, 1);
-                       if (ret)
+                       if (ret && ret != -ENOSPC)
                                btrfs_abort_transaction(trans, root, ret);
                        btrfs_end_transaction(trans, root);
                }
@@ -1308,6 +1307,7 @@ out_check:
                        em->block_start = disk_bytenr;
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
+                       set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                        while (1) {
                                write_lock(&em_tree->lock);
                                ret = add_extent_mapping(em_tree, em);
@@ -1364,11 +1364,7 @@ out_check:
        }
 
 error:
-       if (nolock) {
-               err = btrfs_end_transaction_nolock(trans, root);
-       } else {
-               err = btrfs_end_transaction(trans, root);
-       }
+       err = btrfs_end_transaction(trans, root);
        if (!ret)
                ret = err;
 
@@ -1785,7 +1781,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_key ins;
-       u64 hint;
        int ret;
 
        path = btrfs_alloc_path();
@@ -1803,8 +1798,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
         * the caller is expected to unpin it and allow it to be merged
         * with the others.
         */
-       ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
-                                &hint, 0);
+       ret = btrfs_drop_extents(trans, root, inode, file_pos,
+                                file_pos + num_bytes, 0);
        if (ret)
                goto out;
 
@@ -1828,10 +1823,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_set_file_extent_encryption(leaf, fi, encryption);
        btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
 
-       btrfs_unlock_up_safe(path, 1);
-       btrfs_set_lock_blocking(leaf);
-
        btrfs_mark_buffer_dirty(leaf);
+       btrfs_release_path(path);
 
        inode_add_bytes(inode, num_bytes);
 
@@ -1847,6 +1840,607 @@ out:
        return ret;
 }
 
+/* snapshot-aware defrag */
+struct sa_defrag_extent_backref {
+       struct rb_node node;
+       struct old_sa_defrag_extent *old;
+       u64 root_id;
+       u64 inum;
+       u64 file_pos;
+       u64 extent_offset;
+       u64 num_bytes;
+       u64 generation;
+};
+
+struct old_sa_defrag_extent {
+       struct list_head list;
+       struct new_sa_defrag_extent *new;
+
+       u64 extent_offset;
+       u64 bytenr;
+       u64 offset;
+       u64 len;
+       int count;
+};
+
+struct new_sa_defrag_extent {
+       struct rb_root root;
+       struct list_head head;
+       struct btrfs_path *path;
+       struct inode *inode;
+       u64 file_pos;
+       u64 len;
+       u64 bytenr;
+       u64 disk_len;
+       u8 compress_type;
+};
+
+static int backref_comp(struct sa_defrag_extent_backref *b1,
+                       struct sa_defrag_extent_backref *b2)
+{
+       if (b1->root_id < b2->root_id)
+               return -1;
+       else if (b1->root_id > b2->root_id)
+               return 1;
+
+       if (b1->inum < b2->inum)
+               return -1;
+       else if (b1->inum > b2->inum)
+               return 1;
+
+       if (b1->file_pos < b2->file_pos)
+               return -1;
+       else if (b1->file_pos > b2->file_pos)
+               return 1;
+
+       WARN_ON(1);
+       return 0;
+}
+
+static void backref_insert(struct rb_root *root,
+                          struct sa_defrag_extent_backref *backref)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct sa_defrag_extent_backref *entry;
+       int ret;
+
+       while (*p) {
+               parent = *p;
+               entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
+
+               ret = backref_comp(backref, entry);
+               if (ret < 0)
+                       p = &(*p)->rb_left;
+               else if (ret > 0)
+                       p = &(*p)->rb_right;
+               else
+                       BUG_ON(1);
+       }
+
+       rb_link_node(&backref->node, parent, p);
+       rb_insert_color(&backref->node, root);
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
+                                      void *ctx)
+{
+       struct btrfs_file_extent_item *extent;
+       struct btrfs_fs_info *fs_info;
+       struct old_sa_defrag_extent *old = ctx;
+       struct new_sa_defrag_extent *new = old->new;
+       struct btrfs_path *path = new->path;
+       struct btrfs_key key;
+       struct btrfs_root *root;
+       struct sa_defrag_extent_backref *backref;
+       struct extent_buffer *leaf;
+       struct inode *inode = new->inode;
+       int slot;
+       int ret;
+       u64 extent_offset;
+       u64 num_bytes;
+
+       if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
+           inum == btrfs_ino(inode))
+               return 0;
+
+       key.objectid = root_id;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = (u64)-1;
+
+       fs_info = BTRFS_I(inode)->root->fs_info;
+       root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(root)) {
+               if (PTR_ERR(root) == -ENOENT)
+                       return 0;
+               WARN_ON(1);
+               pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
+                        inum, offset, root_id);
+               return PTR_ERR(root);
+       }
+
+       key.objectid = inum;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       if (offset > (u64)-1 << 32)
+               key.offset = 0;
+       else
+               key.offset = offset;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0) {
+               WARN_ON(1);
+               return ret;
+       }
+
+       while (1) {
+               cond_resched();
+
+               leaf = path->nodes[0];
+               slot = path->slots[0];
+
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0) {
+                               goto out;
+                       } else if (ret > 0) {
+                               ret = 0;
+                               goto out;
+                       }
+                       continue;
+               }
+
+               path->slots[0]++;
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+
+               if (key.objectid != inum || key.type != BTRFS_EXTENT_DATA_KEY)
+                       continue;
+
+               extent = btrfs_item_ptr(leaf, slot,
+                                       struct btrfs_file_extent_item);
+
+               if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
+                       continue;
+
+               if (key.offset - btrfs_file_extent_offset(leaf, extent) !=
+                   offset)
+                       continue;
+
+               break;
+       }
+
+       extent_offset = btrfs_file_extent_offset(leaf, extent);
+       num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+
+       if (extent_offset >= old->extent_offset + old->offset + old->len ||
+           extent_offset + num_bytes < old->extent_offset + old->offset)
+               goto out;
+
+       backref = kmalloc(sizeof(*backref), GFP_NOFS);
+       if (!backref) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       backref->root_id = root_id;
+       backref->inum = inum;
+       backref->file_pos = offset + extent_offset;
+       backref->num_bytes = num_bytes;
+       backref->extent_offset = extent_offset;
+       backref->generation = btrfs_file_extent_generation(leaf, extent);
+       backref->old = old;
+       backref_insert(&new->root, backref);
+       old->count++;
+out:
+       btrfs_release_path(path);
+       WARN_ON(ret);
+       return ret;
+}
+
+static noinline bool record_extent_backrefs(struct btrfs_path *path,
+                                  struct new_sa_defrag_extent *new)
+{
+       struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
+       struct old_sa_defrag_extent *old, *tmp;
+       int ret;
+
+       new->path = path;
+
+       list_for_each_entry_safe(old, tmp, &new->head, list) {
+               ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+                                                 path, record_one_backref,
+                                                 old);
+               WARN_ON(ret < 0);
+
+               /* no backref to be processed for this extent */
+               if (!old->count) {
+                       list_del(&old->list);
+                       kfree(old);
+               }
+       }
+
+       if (list_empty(&new->head))
+               return false;
+
+       return true;
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int relink_extent_backref(struct btrfs_path *path,
+                                struct sa_defrag_extent_backref *prev,
+                                struct sa_defrag_extent_backref *backref)
+{
+       struct btrfs_file_extent_item *extent;
+       struct btrfs_file_extent_item *item;
+       struct btrfs_ordered_extent *ordered;
+       struct btrfs_trans_handle *trans;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_root *root;
+       struct btrfs_key key;
+       struct extent_buffer *leaf;
+       struct old_sa_defrag_extent *old = backref->old;
+       struct new_sa_defrag_extent *new = old->new;
+       struct inode *src_inode = new->inode;
+       struct inode *inode;
+       struct extent_state *cached = NULL;
+       int ret = 0;
+       u64 start;
+       u64 len;
+       bool merge = false;
+
+       if (prev && prev->root_id == backref->root_id &&
+           prev->inum == backref->inum &&
+           prev->extent_offset == backref->extent_offset &&
+           prev->file_pos + prev->num_bytes == backref->file_pos)
+               merge = true;
+
+       key.objectid = backref->root_id;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = (u64)-1;
+
+       fs_info = BTRFS_I(src_inode)->root->fs_info;
+       root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(root)) {
+               if (PTR_ERR(root) == -ENOENT)
+                       return 0;
+               return PTR_ERR(root);
+       }
+
+       key.objectid = backref->inum;
+       key.type = BTRFS_INODE_ITEM_KEY;
+       key.offset = 0;
+
+       inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+       if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
+               if (inode && !IS_ERR(inode))
+                       iput(inode);
+               return 0;
+       }
+
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, backref->file_pos,
+                        backref->file_pos + backref->num_bytes, 0, &cached);
+
+       ordered = btrfs_lookup_first_ordered_extent(inode,
+                                                   backref->file_pos +
+                                                   backref->num_bytes);
+       if (ordered) {
+               btrfs_put_ordered_extent(ordered);
+               goto out_unlock;
+       }
+
+       /*
+        * 1 for drop_extents
+        * 1 for merge clause's search_slot
+        * 1 for insert items
+        */
+       trans = btrfs_start_transaction(root, 3);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
+               goto out_unlock;
+       }
+
+       key.objectid = backref->inum;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = backref->file_pos;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0) {
+               goto out_free_path;
+       } else if (ret > 0) {
+               ret = 0;
+               goto out_free_path;
+       }
+
+       extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                               struct btrfs_file_extent_item);
+
+       if (btrfs_file_extent_generation(path->nodes[0], extent) !=
+           backref->generation)
+               goto out_free_path;
+
+       btrfs_release_path(path);
+
+       start = backref->file_pos;
+       if (backref->extent_offset < old->extent_offset + old->offset)
+               start += old->extent_offset + old->offset -
+                        backref->extent_offset;
+
+       len = min(backref->extent_offset + backref->num_bytes,
+                 old->extent_offset + old->offset + old->len);
+       len -= max(backref->extent_offset, old->extent_offset + old->offset);
+
+       ret = btrfs_drop_extents(trans, BTRFS_I(inode)->root, inode, start,
+                                start + len, 1);
+       if (ret)
+               goto out_free_path;
+again:
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = start;
+
+       if (merge) {
+               struct btrfs_file_extent_item *fi;
+               u64 extent_len;
+               struct btrfs_key found_key;
+
+               ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+               if (ret < 0)
+                       goto out_free_path;
+
+               path->slots[0]--;
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+               fi = btrfs_item_ptr(leaf, path->slots[0],
+                                   struct btrfs_file_extent_item);
+               extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+
+               if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
+                   btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
+                   !btrfs_file_extent_compression(leaf, fi) &&
+                   !btrfs_file_extent_encryption(leaf, fi) &&
+                   !btrfs_file_extent_other_encoding(leaf, fi) &&
+                   extent_len + found_key.offset == start) {
+                       btrfs_set_file_extent_num_bytes(leaf, fi,
+                                                       extent_len + len);
+                       btrfs_mark_buffer_dirty(leaf);
+                       inode_add_bytes(inode, len);
+
+                       ret = 1;
+                       goto out_free_path;
+               } else {
+                       merge = false;
+                       btrfs_release_path(path);
+                       goto again;
+               }
+       }
+
+       ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                       sizeof(*extent));
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto out_free_path;
+       }
+
+       leaf = path->nodes[0];
+       item = btrfs_item_ptr(leaf, path->slots[0],
+                               struct btrfs_file_extent_item);
+       btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
+       btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
+       btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
+       btrfs_set_file_extent_num_bytes(leaf, item, len);
+       btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
+       btrfs_set_file_extent_generation(leaf, item, trans->transid);
+       btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
+       btrfs_set_file_extent_compression(leaf, item, new->compress_type);
+       btrfs_set_file_extent_encryption(leaf, item, 0);
+       btrfs_set_file_extent_other_encoding(leaf, item, 0);
+
+       btrfs_mark_buffer_dirty(leaf);
+       inode_add_bytes(inode, len);
+
+       ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
+                       new->disk_len, 0,
+                       backref->root_id, backref->inum,
+                       start, 0);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto out_free_path;
+       }
+
+       ret = 1;
+out_free_path:
+       btrfs_release_path(path);
+       btrfs_end_transaction(trans, root);
+out_unlock:
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, backref->file_pos,
+                            backref->file_pos + backref->num_bytes,
+                            &cached, GFP_NOFS);
+       iput(inode);
+       return ret;
+}
+
+static void relink_file_extents(struct new_sa_defrag_extent *new)
+{
+       struct btrfs_path *path;
+       struct old_sa_defrag_extent *old, *tmp;
+       struct sa_defrag_extent_backref *backref;
+       struct sa_defrag_extent_backref *prev = NULL;
+       struct inode *inode;
+       struct btrfs_root *root;
+       struct rb_node *node;
+       struct extent_state *cached = NULL;
+       int ret;
+
+       inode = new->inode;
+       root = BTRFS_I(inode)->root;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return;
+
+       if (!record_extent_backrefs(path, new)) {
+               btrfs_free_path(path);
+               goto out;
+       }
+       btrfs_release_path(path);
+
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, new->file_pos,
+                        new->file_pos + new->len, 0, &cached);
+
+       while (1) {
+               node = rb_first(&new->root);
+               if (!node)
+                       break;
+               rb_erase(node, &new->root);
+
+               backref = rb_entry(node, struct sa_defrag_extent_backref, node);
+
+               ret = relink_extent_backref(path, prev, backref);
+               WARN_ON(ret < 0);
+
+               kfree(prev);
+
+               if (ret == 1)
+                       prev = backref;
+               else
+                       prev = NULL;
+               cond_resched();
+       }
+
+       kfree(prev);
+
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, new->file_pos,
+                            new->file_pos + new->len, &cached, GFP_NOFS);
+
+       btrfs_free_path(path);
+
+       list_for_each_entry_safe(old, tmp, &new->head, list) {
+               list_del(&old->list);
+               kfree(old);
+       }
+out:
+       atomic_dec(&root->fs_info->defrag_running);
+       wake_up(&root->fs_info->transaction_wait);
+
+       kfree(new);
+}
+
+static struct new_sa_defrag_extent *
+record_old_file_extents(struct inode *inode,
+                       struct btrfs_ordered_extent *ordered)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       struct old_sa_defrag_extent *old, *tmp;
+       struct new_sa_defrag_extent *new;
+       int ret;
+
+       new = kmalloc(sizeof(*new), GFP_NOFS);
+       if (!new)
+               return NULL;
+
+       new->inode = inode;
+       new->file_pos = ordered->file_offset;
+       new->len = ordered->len;
+       new->bytenr = ordered->start;
+       new->disk_len = ordered->disk_len;
+       new->compress_type = ordered->compress_type;
+       new->root = RB_ROOT;
+       INIT_LIST_HEAD(&new->head);
+
+       path = btrfs_alloc_path();
+       if (!path)
+               goto out_kfree;
+
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = new->file_pos;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out_free_path;
+       if (ret > 0 && path->slots[0] > 0)
+               path->slots[0]--;
+
+       /* find out all the old extents for the file range */
+       while (1) {
+               struct btrfs_file_extent_item *extent;
+               struct extent_buffer *l;
+               int slot;
+               u64 num_bytes;
+               u64 offset;
+               u64 end;
+
+               l = path->nodes[0];
+               slot = path->slots[0];
+
+               if (slot >= btrfs_header_nritems(l)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto out_free_list;
+                       else if (ret > 0)
+                               break;
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(l, &key, slot);
+
+               if (key.objectid != btrfs_ino(inode))
+                       break;
+               if (key.type != BTRFS_EXTENT_DATA_KEY)
+                       break;
+               if (key.offset >= new->file_pos + new->len)
+                       break;
+
+               extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
+
+               num_bytes = btrfs_file_extent_num_bytes(l, extent);
+               if (key.offset + num_bytes < new->file_pos)
+                       goto next;
+
+               old = kmalloc(sizeof(*old), GFP_NOFS);
+               if (!old)
+                       goto out_free_list;
+
+               offset = max(new->file_pos, key.offset);
+               end = min(new->file_pos + new->len, key.offset + num_bytes);
+
+               old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
+               old->extent_offset = btrfs_file_extent_offset(l, extent);
+               old->offset = offset - key.offset;
+               old->len = end - offset;
+               old->new = new;
+               old->count = 0;
+               list_add_tail(&old->list, &new->head);
+next:
+               path->slots[0]++;
+               cond_resched();
+       }
+
+       btrfs_free_path(path);
+       atomic_inc(&root->fs_info->defrag_running);
+
+       return new;
+
+out_free_list:
+       list_for_each_entry_safe(old, tmp, &new->head, list) {
+               list_del(&old->list);
+               kfree(old);
+       }
+out_free_path:
+       btrfs_free_path(path);
+out_kfree:
+       kfree(new);
+       return NULL;
+}
+
 /*
  * helper function for btrfs_finish_ordered_io, this
  * just reads in some of the csum leaves to prime them into ram
@@ -1864,6 +2458,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        struct btrfs_trans_handle *trans = NULL;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_state *cached_state = NULL;
+       struct new_sa_defrag_extent *new = NULL;
        int compress_type = 0;
        int ret;
        bool nolock;
@@ -1900,6 +2495,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                         ordered_extent->file_offset + ordered_extent->len - 1,
                         0, &cached_state);
 
+       ret = test_range_bit(io_tree, ordered_extent->file_offset,
+                       ordered_extent->file_offset + ordered_extent->len - 1,
+                       EXTENT_DEFRAG, 1, cached_state);
+       if (ret && btrfs_root_last_snapshot(&root->root_item) >=
+                                               BTRFS_I(inode)->generation) {
+               /* the inode is shared */
+               new = record_old_file_extents(inode, ordered_extent);
+       }
+
        if (nolock)
                trans = btrfs_join_transaction_nolock(root);
        else
@@ -1929,11 +2533,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                                                ordered_extent->len,
                                                compress_type, 0, 0,
                                                BTRFS_FILE_EXTENT_REG);
-               unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
-                                  ordered_extent->file_offset,
-                                  ordered_extent->len);
        }
-
+       unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+                          ordered_extent->file_offset, ordered_extent->len,
+                          trans->transid);
        if (ret < 0) {
                btrfs_abort_transaction(trans, root, ret);
                goto out_unlock;
@@ -1949,6 +2552,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                        btrfs_abort_transaction(trans, root, ret);
                        goto out_unlock;
                }
+       } else {
+               btrfs_set_inode_last_trans(trans, inode);
        }
        ret = 0;
 out_unlock:
@@ -1958,12 +2563,8 @@ out_unlock:
 out:
        if (root != root->fs_info->tree_root)
                btrfs_delalloc_release_metadata(inode, ordered_extent->len);
-       if (trans) {
-               if (nolock)
-                       btrfs_end_transaction_nolock(trans, root);
-               else
-                       btrfs_end_transaction(trans, root);
-       }
+       if (trans)
+               btrfs_end_transaction(trans, root);
 
        if (ret)
                clear_extent_uptodate(io_tree, ordered_extent->file_offset,
@@ -1976,6 +2577,10 @@ out:
         */
        btrfs_remove_ordered_extent(inode, ordered_extent);
 
+       /* for snapshot-aware defrag */
+       if (new)
+               relink_file_extents(new);
+
        /* once for us */
        btrfs_put_ordered_extent(ordered_extent);
        /* once for the tree */
@@ -2119,7 +2724,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
        if (empty)
                return;
 
-       down_read(&root->fs_info->cleanup_work_sem);
        spin_lock(&fs_info->delayed_iput_lock);
        list_splice_init(&fs_info->delayed_iputs, &list);
        spin_unlock(&fs_info->delayed_iput_lock);
@@ -2130,7 +2734,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
                iput(delayed->inode);
                kfree(delayed);
        }
-       up_read(&root->fs_info->cleanup_work_sem);
 }
 
 enum btrfs_orphan_cleanup_state {
@@ -2198,7 +2801,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
        int ret;
 
        if (!root->orphan_block_rsv) {
-               block_rsv = btrfs_alloc_block_rsv(root);
+               block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
                if (!block_rsv)
                        return -ENOMEM;
        }
@@ -2225,7 +2828,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
                        insert = 1;
 #endif
                insert = 1;
-               atomic_dec(&root->orphan_inodes);
+               atomic_inc(&root->orphan_inodes);
        }
 
        if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
@@ -2590,6 +3193,18 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
        inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
        BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
+       BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
+
+       /*
+        * If we were modified in the current generation and evicted from memory
+        * and then re-read we need to do a full sync since we don't have any
+        * idea about which extents were modified before we were evicted from
+        * cache.
+        */
+       if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
+               set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                       &BTRFS_I(inode)->runtime_flags);
+
        inode->i_version = btrfs_inode_sequence(leaf, inode_item);
        inode->i_generation = BTRFS_I(inode)->generation;
        inode->i_rdev = 0;
@@ -2894,7 +3509,6 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_path *path;
-       struct btrfs_inode_ref *ref;
        struct btrfs_dir_item *di;
        struct inode *inode = dentry->d_inode;
        u64 index;
@@ -3008,17 +3622,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        }
        btrfs_release_path(path);
 
-       ref = btrfs_lookup_inode_ref(trans, root, path,
-                               dentry->d_name.name, dentry->d_name.len,
-                               ino, dir_ino, 0);
-       if (IS_ERR(ref)) {
-               err = PTR_ERR(ref);
+       ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name,
+                                       dentry->d_name.len, ino, dir_ino, 0,
+                                       &index);
+       if (ret) {
+               err = ret;
                goto out;
        }
-       BUG_ON(!ref); /* Logic error */
+
        if (check_path_shared(root, path))
                goto out;
-       index = btrfs_inode_ref_index(path->nodes[0], ref);
+
        btrfs_release_path(path);
 
        /*
@@ -3061,7 +3675,7 @@ out:
 static void __unlink_end_trans(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root)
 {
-       if (trans->block_rsv == &root->fs_info->global_block_rsv) {
+       if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) {
                btrfs_block_rsv_release(root, trans->block_rsv,
                                        trans->bytes_reserved);
                trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3191,9 +3805,10 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        struct btrfs_trans_handle *trans;
        unsigned long nr = 0;
 
-       if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
-           btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+       if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
+       if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+               return -EPERM;
 
        trans = __unlink_start_trans(dir, dentry);
        if (IS_ERR(trans))
@@ -3267,8 +3882,13 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        path->reada = -1;
 
+       /*
+        * We want to drop from the next block forward in case this new size is
+        * not block aligned since we will be keeping the last block of the
+        * extent just the way it is.
+        */
        if (root->ref_cows || root == root->fs_info->tree_root)
-               btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
+               btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0);
 
        /*
         * This function is also used to drop the items in the log tree before
@@ -3429,12 +4049,6 @@ delete:
 
                if (path->slots[0] == 0 ||
                    path->slots[0] != pending_del_slot) {
-                       if (root->ref_cows &&
-                           BTRFS_I(inode)->location.objectid !=
-                                               BTRFS_FREE_INO_OBJECTID) {
-                               err = -EAGAIN;
-                               goto out;
-                       }
                        if (pending_del_nr) {
                                ret = btrfs_del_items(trans, root, path,
                                                pending_del_slot,
@@ -3465,12 +4079,20 @@ error:
 }
 
 /*
- * taken from block_truncate_page, but does cow as it zeros out
- * any bytes left in the last page in the file.
+ * btrfs_truncate_page - read, zero a chunk and write a page
+ * @inode - inode that we're zeroing
+ * @from - the offset to start zeroing
+ * @len - the length to zero, 0 to zero the entire range respective to the
+ *     offset
+ * @front - zero up to the offset instead of from the offset on
+ *
+ * This will find the page for the "from" offset and cow the page and zero the
+ * part we want to zero.  This is used with truncate and hole punching.
  */
-static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
+int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+                       int front)
 {
-       struct inode *inode = mapping->host;
+       struct address_space *mapping = inode->i_mapping;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct btrfs_ordered_extent *ordered;
@@ -3485,7 +4107,8 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
        u64 page_start;
        u64 page_end;
 
-       if ((offset & (blocksize - 1)) == 0)
+       if ((offset & (blocksize - 1)) == 0 &&
+           (!len || ((len & (blocksize - 1)) == 0)))
                goto out;
        ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
        if (ret)
@@ -3532,7 +4155,8 @@ again:
        }
 
        clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                         EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+                         EXTENT_DIRTY | EXTENT_DELALLOC |
+                         EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                          0, 0, &cached_state, GFP_NOFS);
 
        ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
@@ -3545,8 +4169,13 @@ again:
 
        ret = 0;
        if (offset != PAGE_CACHE_SIZE) {
+               if (!len)
+                       len = PAGE_CACHE_SIZE - offset;
                kaddr = kmap(page);
-               memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
+               if (front)
+                       memset(kaddr, 0, offset);
+               else
+                       memset(kaddr + offset, 0, len);
                flush_dcache_page(page);
                kunmap(page);
        }
@@ -3577,6 +4206,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        u64 mask = root->sectorsize - 1;
        u64 hole_start = (oldsize + mask) & ~mask;
        u64 block_end = (size + mask) & ~mask;
@@ -3613,7 +4243,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                last_byte = min(extent_map_end(em), block_end);
                last_byte = (last_byte + mask) & ~mask;
                if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
-                       u64 hint_byte = 0;
+                       struct extent_map *hole_em;
                        hole_size = last_byte - cur_offset;
 
                        trans = btrfs_start_transaction(root, 3);
@@ -3622,9 +4252,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                break;
                        }
 
-                       err = btrfs_drop_extents(trans, inode, cur_offset,
-                                                cur_offset + hole_size,
-                                                &hint_byte, 1);
+                       err = btrfs_drop_extents(trans, root, inode,
+                                                cur_offset,
+                                                cur_offset + hole_size, 1);
                        if (err) {
                                btrfs_abort_transaction(trans, root, err);
                                btrfs_end_transaction(trans, root);
@@ -3641,9 +4271,39 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                break;
                        }
 
-                       btrfs_drop_extent_cache(inode, hole_start,
-                                       last_byte - 1, 0);
+                       btrfs_drop_extent_cache(inode, cur_offset,
+                                               cur_offset + hole_size - 1, 0);
+                       hole_em = alloc_extent_map();
+                       if (!hole_em) {
+                               set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                                       &BTRFS_I(inode)->runtime_flags);
+                               goto next;
+                       }
+                       hole_em->start = cur_offset;
+                       hole_em->len = hole_size;
+                       hole_em->orig_start = cur_offset;
+
+                       hole_em->block_start = EXTENT_MAP_HOLE;
+                       hole_em->block_len = 0;
+                       hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
+                       hole_em->compress_type = BTRFS_COMPRESS_NONE;
+                       hole_em->generation = trans->transid;
 
+                       while (1) {
+                               write_lock(&em_tree->lock);
+                               err = add_extent_mapping(em_tree, hole_em);
+                               if (!err)
+                                       list_move(&hole_em->list,
+                                                 &em_tree->modified_extents);
+                               write_unlock(&em_tree->lock);
+                               if (err != -EEXIST)
+                                       break;
+                               btrfs_drop_extent_cache(inode, cur_offset,
+                                                       cur_offset +
+                                                       hole_size - 1, 0);
+                       }
+                       free_extent_map(hole_em);
+next:
                        btrfs_update_inode(trans, root, inode);
                        btrfs_end_transaction(trans, root);
                }
@@ -3768,26 +4428,22 @@ void btrfs_evict_inode(struct inode *inode)
                goto no_delete;
        }
 
-       rsv = btrfs_alloc_block_rsv(root);
+       rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
        if (!rsv) {
                btrfs_orphan_del(NULL, inode);
                goto no_delete;
        }
        rsv->size = min_size;
+       rsv->failfast = 1;
        global_rsv = &root->fs_info->global_block_rsv;
 
        btrfs_i_size_write(inode, 0);
 
        /*
-        * This is a bit simpler than btrfs_truncate since
-        *
-        * 1) We've already reserved our space for our orphan item in the
-        *    unlink.
-        * 2) We're going to delete the inode item, so we don't need to update
-        *    it at all.
-        *
-        * So we just need to reserve some slack space in case we add bytes when
-        * doing the truncate.
+        * This is a bit simpler than btrfs_truncate since we've already
+        * reserved our space for our orphan item in the unlink, so we just
+        * need to reserve some slack space in case we add bytes and update
+        * inode item when doing the truncate.
         */
        while (1) {
                ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
@@ -3808,7 +4464,7 @@ void btrfs_evict_inode(struct inode *inode)
                        goto no_delete;
                }
 
-               trans = btrfs_start_transaction(root, 0);
+               trans = btrfs_start_transaction_noflush(root, 1);
                if (IS_ERR(trans)) {
                        btrfs_orphan_del(NULL, inode);
                        btrfs_free_block_rsv(root, rsv);
@@ -3818,9 +4474,13 @@ void btrfs_evict_inode(struct inode *inode)
                trans->block_rsv = rsv;
 
                ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
-               if (ret != -EAGAIN)
+               if (ret != -ENOSPC)
                        break;
 
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
+               ret = btrfs_update_inode(trans, root, inode);
+               BUG_ON(ret);
+
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
                trans = NULL;
@@ -4470,10 +5130,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
                        trans = btrfs_join_transaction(root);
                if (IS_ERR(trans))
                        return PTR_ERR(trans);
-               if (nolock)
-                       ret = btrfs_end_transaction_nolock(trans, root);
-               else
-                       ret = btrfs_commit_transaction(trans, root);
+               ret = btrfs_commit_transaction(trans, root);
        }
        return ret;
 }
@@ -4671,6 +5328,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        BTRFS_I(inode)->generation = trans->transid;
        inode->i_generation = BTRFS_I(inode)->generation;
 
+       /*
+        * We could have gotten an inode number from somebody who was fsynced
+        * and then removed in this same transaction, so let's just set full
+        * sync since it will be a full sync anyway and this will blow away the
+        * old info in the log.
+        */
+       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
+
        if (S_ISDIR(mode))
                owner = 0;
        else
@@ -4680,6 +5345,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
        key[0].offset = 0;
 
+       /*
+        * Start new inodes with an inode_ref. This is slightly more
+        * efficient for small numbers of hard links since they will
+        * be packed into one item. Extended refs will kick in if we
+        * add more hard links than can fit in the ref item.
+        */
        key[1].objectid = objectid;
        btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
        key[1].offset = ref_objectid;
@@ -4986,7 +5657,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        if (root->objectid != BTRFS_I(inode)->root->objectid)
                return -EXDEV;
 
-       if (inode->i_nlink == ~0U)
+       if (inode->i_nlink >= BTRFS_LINK_MAX)
                return -EMLINK;
 
        err = btrfs_set_inode_index(dir, &index);
@@ -5450,7 +6121,8 @@ insert:
        write_unlock(&em_tree->lock);
 out:
 
-       trace_btrfs_get_extent(root, em);
+       if (em)
+               trace_btrfs_get_extent(root, em);
 
        if (path)
                btrfs_free_path(path);
@@ -5836,6 +6508,48 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
        return ret;
 }
 
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+                                          u64 len, u64 orig_start,
+                                          u64 block_start, u64 block_len,
+                                          int type)
+{
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       int ret;
+
+       em_tree = &BTRFS_I(inode)->extent_tree;
+       em = alloc_extent_map();
+       if (!em)
+               return ERR_PTR(-ENOMEM);
+
+       em->start = start;
+       em->orig_start = orig_start;
+       em->len = len;
+       em->block_len = block_len;
+       em->block_start = block_start;
+       em->bdev = root->fs_info->fs_devices->latest_bdev;
+       set_bit(EXTENT_FLAG_PINNED, &em->flags);
+       if (type == BTRFS_ORDERED_PREALLOC)
+               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+
+       do {
+               btrfs_drop_extent_cache(inode, em->start,
+                               em->start + em->len - 1, 0);
+               write_lock(&em_tree->lock);
+               ret = add_extent_mapping(em_tree, em);
+               write_unlock(&em_tree->lock);
+       } while (ret == -EEXIST);
+
+       if (ret) {
+               free_extent_map(em);
+               return ERR_PTR(ret);
+       }
+
+       return em;
+}
+
+
 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                   struct buffer_head *bh_result, int create)
 {
@@ -5950,6 +6664,19 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                        goto must_cow;
 
                if (can_nocow_odirect(trans, inode, start, len) == 1) {
+                       u64 orig_start = em->start;
+
+                       if (type == BTRFS_ORDERED_PREALLOC) {
+                               free_extent_map(em);
+                               em = create_pinned_em(inode, start, len,
+                                                      orig_start,
+                                                      block_start, len, type);
+                               if (IS_ERR(em)) {
+                                       btrfs_end_transaction(trans, root);
+                                       goto unlock_err;
+                               }
+                       }
+
                        ret = btrfs_add_ordered_extent_dio(inode, start,
                                           block_start, len, len, type);
                        btrfs_end_transaction(trans, root);
@@ -5999,7 +6726,8 @@ unlock:
        if (lockstart < lockend) {
                if (create && len < lockend - lockstart) {
                        clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-                                        lockstart + len - 1, unlock_bits, 1, 0,
+                                        lockstart + len - 1,
+                                        unlock_bits | EXTENT_DEFRAG, 1, 0,
                                         &cached_state, GFP_NOFS);
                        /*
                         * Beside unlock, we also need to cleanup reserved space
@@ -6007,8 +6735,8 @@ unlock:
                         */
                        clear_extent_bit(&BTRFS_I(inode)->io_tree,
                                         lockstart + len, lockend,
-                                        unlock_bits | EXTENT_DO_ACCOUNTING,
-                                        1, 0, NULL, GFP_NOFS);
+                                        unlock_bits | EXTENT_DO_ACCOUNTING |
+                                        EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
                } else {
                        clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
                                         lockend, unlock_bits, 1, 0,
@@ -6573,8 +7301,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
                 */
                clear_extent_bit(tree, page_start, page_end,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
-                                &cached_state, GFP_NOFS);
+                                EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
                /*
                 * whoever cleared the private bit is responsible
                 * for the finish_ordered_io
@@ -6590,7 +7318,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
        }
        clear_extent_bit(tree, page_start, page_end,
                 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-                EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
+                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
+                &cached_state, GFP_NOFS);
        __btrfs_releasepage(page, GFP_NOFS);
 
        ClearPageChecked(page);
@@ -6687,7 +7416,8 @@ again:
         * prepare_pages in the normal write path.
         */
        clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                         EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+                         EXTENT_DIRTY | EXTENT_DELALLOC |
+                         EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                          0, 0, &cached_state, GFP_NOFS);
 
        ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
@@ -6718,6 +7448,7 @@ again:
 
        BTRFS_I(inode)->last_trans = root->fs_info->generation;
        BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
+       BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
 
        unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
 
@@ -6745,7 +7476,7 @@ static int btrfs_truncate(struct inode *inode)
        u64 mask = root->sectorsize - 1;
        u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
 
-       ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
+       ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
        if (ret)
                return ret;
 
@@ -6788,10 +7519,11 @@ static int btrfs_truncate(struct inode *inode)
         * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
         * updating the inode.
         */
-       rsv = btrfs_alloc_block_rsv(root);
+       rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
        if (!rsv)
                return -ENOMEM;
        rsv->size = min_size;
+       rsv->failfast = 1;
 
        /*
         * 1 for the truncate slack space
@@ -6837,36 +7569,21 @@ static int btrfs_truncate(struct inode *inode)
                                           &BTRFS_I(inode)->runtime_flags))
                btrfs_add_ordered_operation(trans, root, inode);
 
-       while (1) {
-               ret = btrfs_block_rsv_refill(root, rsv, min_size);
-               if (ret) {
-                       /*
-                        * This can only happen with the original transaction we
-                        * started above, every other time we shouldn't have a
-                        * transaction started yet.
-                        */
-                       if (ret == -EAGAIN)
-                               goto end_trans;
-                       err = ret;
-                       break;
-               }
-
-               if (!trans) {
-                       /* Just need the 1 for updating the inode */
-                       trans = btrfs_start_transaction(root, 1);
-                       if (IS_ERR(trans)) {
-                               ret = err = PTR_ERR(trans);
-                               trans = NULL;
-                               break;
-                       }
-               }
-
-               trans->block_rsv = rsv;
+       /*
+        * So if we truncate and then write and fsync we normally would just
+        * write the extents that changed, which is a problem if we need to
+        * first truncate that entire inode.  So set this flag so we write out
+        * all of the extents in the inode to the sync log so we're completely
+        * safe.
+        */
+       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
+       trans->block_rsv = rsv;
 
+       while (1) {
                ret = btrfs_truncate_inode_items(trans, root, inode,
                                                 inode->i_size,
                                                 BTRFS_EXTENT_DATA_KEY);
-               if (ret != -EAGAIN) {
+               if (ret != -ENOSPC) {
                        err = ret;
                        break;
                }
@@ -6877,11 +7594,22 @@ static int btrfs_truncate(struct inode *inode)
                        err = ret;
                        break;
                }
-end_trans:
+
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
-               trans = NULL;
                btrfs_btree_balance_dirty(root, nr);
+
+               trans = btrfs_start_transaction(root, 2);
+               if (IS_ERR(trans)) {
+                       ret = err = PTR_ERR(trans);
+                       trans = NULL;
+                       break;
+               }
+
+               ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
+                                             rsv, min_size);
+               BUG_ON(ret);    /* shouldn't happen */
+               trans->block_rsv = rsv;
        }
 
        if (ret == 0 && inode->i_nlink > 0) {
@@ -6965,6 +7693,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->csum_bytes = 0;
        ei->index_cnt = (u64)-1;
        ei->last_unlink_trans = 0;
+       ei->last_log_commit = 0;
 
        spin_lock_init(&ei->lock);
        ei->outstanding_extents = 0;
@@ -7090,31 +7819,31 @@ void btrfs_destroy_cachep(void)
 
 int btrfs_init_cachep(void)
 {
-       btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
+       btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
                        sizeof(struct btrfs_inode), 0,
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
        if (!btrfs_inode_cachep)
                goto fail;
 
-       btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
+       btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
                        sizeof(struct btrfs_trans_handle), 0,
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!btrfs_trans_handle_cachep)
                goto fail;
 
-       btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
+       btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
                        sizeof(struct btrfs_transaction), 0,
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!btrfs_transaction_cachep)
                goto fail;
 
-       btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
+       btrfs_path_cachep = kmem_cache_create("btrfs_path",
                        sizeof(struct btrfs_path), 0,
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!btrfs_path_cachep)
                goto fail;
 
-       btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache",
+       btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
                        sizeof(struct btrfs_free_space), 0,
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!btrfs_free_space_cachep)
@@ -7508,6 +8237,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                                       loff_t actual_len, u64 *alloc_hint,
                                       struct btrfs_trans_handle *trans)
 {
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key ins;
        u64 cur_offset = start;
@@ -7548,6 +8279,37 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                btrfs_drop_extent_cache(inode, cur_offset,
                                        cur_offset + ins.offset -1, 0);
 
+               em = alloc_extent_map();
+               if (!em) {
+                       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                               &BTRFS_I(inode)->runtime_flags);
+                       goto next;
+               }
+
+               em->start = cur_offset;
+               em->orig_start = cur_offset;
+               em->len = ins.offset;
+               em->block_start = ins.objectid;
+               em->block_len = ins.offset;
+               em->bdev = root->fs_info->fs_devices->latest_bdev;
+               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+               em->generation = trans->transid;
+
+               while (1) {
+                       write_lock(&em_tree->lock);
+                       ret = add_extent_mapping(em_tree, em);
+                       if (!ret)
+                               list_move(&em->list,
+                                         &em_tree->modified_extents);
+                       write_unlock(&em_tree->lock);
+                       if (ret != -EEXIST)
+                               break;
+                       btrfs_drop_extent_cache(inode, cur_offset,
+                                               cur_offset + ins.offset - 1,
+                                               0);
+               }
+               free_extent_map(em);
+next:
                num_bytes -= ins.offset;
                cur_offset += ins.offset;
                *alloc_hint = ins.objectid + ins.offset;