#include "locking.h"
#include "free-space-cache.h"
#include "inode-map.h"
+#include "backref.h"
struct btrfs_iget_args {
u64 ino;
u64 inline_len = actual_end - start;
u64 aligned_end = (end + root->sectorsize - 1) &
~((u64)root->sectorsize - 1);
- u64 hint_byte;
u64 data_len = inline_len;
int ret;
return 1;
}
- ret = btrfs_drop_extents(trans, inode, start, aligned_end,
- &hint_byte, 1);
+ ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
if (ret)
return ret;
async_extent->compressed_size,
async_extent->compressed_size,
0, alloc_hint, &ins, 1);
- if (ret)
+ if (ret && ret != -ENOSPC)
btrfs_abort_transaction(trans, root, ret);
btrfs_end_transaction(trans, root);
}
em->block_start = disk_bytenr;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
}
error:
- if (nolock) {
- err = btrfs_end_transaction_nolock(trans, root);
- } else {
- err = btrfs_end_transaction(trans, root);
- }
+ err = btrfs_end_transaction(trans, root);
if (!ret)
ret = err;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
- u64 hint;
int ret;
path = btrfs_alloc_path();
* the caller is expected to unpin it and allow it to be merged
* with the others.
*/
- ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
- &hint, 0);
+ ret = btrfs_drop_extents(trans, root, inode, file_pos,
+ file_pos + num_bytes, 0);
if (ret)
goto out;
btrfs_set_file_extent_encryption(leaf, fi, encryption);
btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
- btrfs_unlock_up_safe(path, 1);
- btrfs_set_lock_blocking(leaf);
-
btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
inode_add_bytes(inode, num_bytes);
return ret;
}
+/* snapshot-aware defrag */
+struct sa_defrag_extent_backref {
+ struct rb_node node;
+ struct old_sa_defrag_extent *old;
+ u64 root_id;
+ u64 inum;
+ u64 file_pos;
+ u64 extent_offset;
+ u64 num_bytes;
+ u64 generation;
+};
+
+struct old_sa_defrag_extent {
+ struct list_head list;
+ struct new_sa_defrag_extent *new;
+
+ u64 extent_offset;
+ u64 bytenr;
+ u64 offset;
+ u64 len;
+ int count;
+};
+
+struct new_sa_defrag_extent {
+ struct rb_root root;
+ struct list_head head;
+ struct btrfs_path *path;
+ struct inode *inode;
+ u64 file_pos;
+ u64 len;
+ u64 bytenr;
+ u64 disk_len;
+ u8 compress_type;
+};
+
+static int backref_comp(struct sa_defrag_extent_backref *b1,
+ struct sa_defrag_extent_backref *b2)
+{
+ if (b1->root_id < b2->root_id)
+ return -1;
+ else if (b1->root_id > b2->root_id)
+ return 1;
+
+ if (b1->inum < b2->inum)
+ return -1;
+ else if (b1->inum > b2->inum)
+ return 1;
+
+ if (b1->file_pos < b2->file_pos)
+ return -1;
+ else if (b1->file_pos > b2->file_pos)
+ return 1;
+
+ WARN_ON(1);
+ return 0;
+}
+
+static void backref_insert(struct rb_root *root,
+ struct sa_defrag_extent_backref *backref)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct sa_defrag_extent_backref *entry;
+ int ret;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
+
+ ret = backref_comp(backref, entry);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ BUG_ON(1);
+ }
+
+ rb_link_node(&backref->node, parent, p);
+ rb_insert_color(&backref->node, root);
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
+ void *ctx)
+{
+ struct btrfs_file_extent_item *extent;
+ struct btrfs_fs_info *fs_info;
+ struct old_sa_defrag_extent *old = ctx;
+ struct new_sa_defrag_extent *new = old->new;
+ struct btrfs_path *path = new->path;
+ struct btrfs_key key;
+ struct btrfs_root *root;
+ struct sa_defrag_extent_backref *backref;
+ struct extent_buffer *leaf;
+ struct inode *inode = new->inode;
+ int slot;
+ int ret;
+ u64 extent_offset;
+ u64 num_bytes;
+
+ if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
+ inum == btrfs_ino(inode))
+ return 0;
+
+ key.objectid = root_id;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ fs_info = BTRFS_I(inode)->root->fs_info;
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ if (PTR_ERR(root) == -ENOENT)
+ return 0;
+ WARN_ON(1);
+ pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
+ inum, offset, root_id);
+ return PTR_ERR(root);
+ }
+
+ key.objectid = inum;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ if (offset > (u64)-1 << 32)
+ key.offset = 0;
+ else
+ key.offset = offset;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ WARN_ON(1);
+ return ret;
+ }
+
+ while (1) {
+ cond_resched();
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ continue;
+ }
+
+ path->slots[0]++;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ if (key.objectid != inum || key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ extent = btrfs_item_ptr(leaf, slot,
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
+ continue;
+
+ if (key.offset - btrfs_file_extent_offset(leaf, extent) !=
+ offset)
+ continue;
+
+ break;
+ }
+
+ extent_offset = btrfs_file_extent_offset(leaf, extent);
+ num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+
+ if (extent_offset >= old->extent_offset + old->offset + old->len ||
+ extent_offset + num_bytes < old->extent_offset + old->offset)
+ goto out;
+
+ backref = kmalloc(sizeof(*backref), GFP_NOFS);
+ if (!backref) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ backref->root_id = root_id;
+ backref->inum = inum;
+ backref->file_pos = offset + extent_offset;
+ backref->num_bytes = num_bytes;
+ backref->extent_offset = extent_offset;
+ backref->generation = btrfs_file_extent_generation(leaf, extent);
+ backref->old = old;
+ backref_insert(&new->root, backref);
+ old->count++;
+out:
+ btrfs_release_path(path);
+ WARN_ON(ret);
+ return ret;
+}
+
+static noinline bool record_extent_backrefs(struct btrfs_path *path,
+ struct new_sa_defrag_extent *new)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
+ struct old_sa_defrag_extent *old, *tmp;
+ int ret;
+
+ new->path = path;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+ path, record_one_backref,
+ old);
+ WARN_ON(ret < 0);
+
+ /* no backref to be processed for this extent */
+ if (!old->count) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ }
+
+ if (list_empty(&new->head))
+ return false;
+
+ return true;
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int relink_extent_backref(struct btrfs_path *path,
+ struct sa_defrag_extent_backref *prev,
+ struct sa_defrag_extent_backref *backref)
+{
+ struct btrfs_file_extent_item *extent;
+ struct btrfs_file_extent_item *item;
+ struct btrfs_ordered_extent *ordered;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct old_sa_defrag_extent *old = backref->old;
+ struct new_sa_defrag_extent *new = old->new;
+ struct inode *src_inode = new->inode;
+ struct inode *inode;
+ struct extent_state *cached = NULL;
+ int ret = 0;
+ u64 start;
+ u64 len;
+ bool merge = false;
+
+ if (prev && prev->root_id == backref->root_id &&
+ prev->inum == backref->inum &&
+ prev->extent_offset == backref->extent_offset &&
+ prev->file_pos + prev->num_bytes == backref->file_pos)
+ merge = true;
+
+ key.objectid = backref->root_id;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ fs_info = BTRFS_I(src_inode)->root->fs_info;
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ if (PTR_ERR(root) == -ENOENT)
+ return 0;
+ return PTR_ERR(root);
+ }
+
+ key.objectid = backref->inum;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
+ if (inode && !IS_ERR(inode))
+ iput(inode);
+ return 0;
+ }
+
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, backref->file_pos,
+ backref->file_pos + backref->num_bytes, 0, &cached);
+
+ ordered = btrfs_lookup_first_ordered_extent(inode,
+ backref->file_pos +
+ backref->num_bytes);
+ if (ordered) {
+ btrfs_put_ordered_extent(ordered);
+ goto out_unlock;
+ }
+
+ /*
+ * 1 for drop_extents
+ * 1 for merge clause's search_slot
+ * 1 for insert items
+ */
+ trans = btrfs_start_transaction(root, 3);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_unlock;
+ }
+
+ key.objectid = backref->inum;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = backref->file_pos;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out_free_path;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out_free_path;
+ }
+
+ extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_generation(path->nodes[0], extent) !=
+ backref->generation)
+ goto out_free_path;
+
+ btrfs_release_path(path);
+
+ start = backref->file_pos;
+ if (backref->extent_offset < old->extent_offset + old->offset)
+ start += old->extent_offset + old->offset -
+ backref->extent_offset;
+
+ len = min(backref->extent_offset + backref->num_bytes,
+ old->extent_offset + old->offset + old->len);
+ len -= max(backref->extent_offset, old->extent_offset + old->offset);
+
+ ret = btrfs_drop_extents(trans, BTRFS_I(inode)->root, inode, start,
+ start + len, 1);
+ if (ret)
+ goto out_free_path;
+again:
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = start;
+
+ if (merge) {
+ struct btrfs_file_extent_item *fi;
+ u64 extent_len;
+ struct btrfs_key found_key;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret < 0)
+ goto out_free_path;
+
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+
+ if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
+ btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
+ !btrfs_file_extent_compression(leaf, fi) &&
+ !btrfs_file_extent_encryption(leaf, fi) &&
+ !btrfs_file_extent_other_encoding(leaf, fi) &&
+ extent_len + found_key.offset == start) {
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_len + len);
+ btrfs_mark_buffer_dirty(leaf);
+ inode_add_bytes(inode, len);
+
+ ret = 1;
+ goto out_free_path;
+ } else {
+ merge = false;
+ btrfs_release_path(path);
+ goto again;
+ }
+ }
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(*extent));
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_free_path;
+ }
+
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
+ btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
+ btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
+ btrfs_set_file_extent_num_bytes(leaf, item, len);
+ btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
+ btrfs_set_file_extent_generation(leaf, item, trans->transid);
+ btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
+ btrfs_set_file_extent_compression(leaf, item, new->compress_type);
+ btrfs_set_file_extent_encryption(leaf, item, 0);
+ btrfs_set_file_extent_other_encoding(leaf, item, 0);
+
+ btrfs_mark_buffer_dirty(leaf);
+ inode_add_bytes(inode, len);
+
+ ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
+ new->disk_len, 0,
+ backref->root_id, backref->inum,
+ start, 0);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_free_path;
+ }
+
+ ret = 1;
+out_free_path:
+ btrfs_release_path(path);
+ btrfs_end_transaction(trans, root);
+out_unlock:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, backref->file_pos,
+ backref->file_pos + backref->num_bytes,
+ &cached, GFP_NOFS);
+ iput(inode);
+ return ret;
+}
+
+static void relink_file_extents(struct new_sa_defrag_extent *new)
+{
+ struct btrfs_path *path;
+ struct old_sa_defrag_extent *old, *tmp;
+ struct sa_defrag_extent_backref *backref;
+ struct sa_defrag_extent_backref *prev = NULL;
+ struct inode *inode;
+ struct btrfs_root *root;
+ struct rb_node *node;
+ struct extent_state *cached = NULL;
+ int ret;
+
+ inode = new->inode;
+ root = BTRFS_I(inode)->root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return;
+
+ if (!record_extent_backrefs(path, new)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, new->file_pos,
+ new->file_pos + new->len, 0, &cached);
+
+ while (1) {
+ node = rb_first(&new->root);
+ if (!node)
+ break;
+ rb_erase(node, &new->root);
+
+ backref = rb_entry(node, struct sa_defrag_extent_backref, node);
+
+ ret = relink_extent_backref(path, prev, backref);
+ WARN_ON(ret < 0);
+
+ kfree(prev);
+
+ if (ret == 1)
+ prev = backref;
+ else
+ prev = NULL;
+ cond_resched();
+ }
+
+ kfree(prev);
+
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, new->file_pos,
+ new->file_pos + new->len, &cached, GFP_NOFS);
+
+ btrfs_free_path(path);
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+out:
+ atomic_dec(&root->fs_info->defrag_running);
+ wake_up(&root->fs_info->transaction_wait);
+
+ kfree(new);
+}
+
+static struct new_sa_defrag_extent *
+record_old_file_extents(struct inode *inode,
+ struct btrfs_ordered_extent *ordered)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct old_sa_defrag_extent *old, *tmp;
+ struct new_sa_defrag_extent *new;
+ int ret;
+
+ new = kmalloc(sizeof(*new), GFP_NOFS);
+ if (!new)
+ return NULL;
+
+ new->inode = inode;
+ new->file_pos = ordered->file_offset;
+ new->len = ordered->len;
+ new->bytenr = ordered->start;
+ new->disk_len = ordered->disk_len;
+ new->compress_type = ordered->compress_type;
+ new->root = RB_ROOT;
+ INIT_LIST_HEAD(&new->head);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out_kfree;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = new->file_pos;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out_free_path;
+ if (ret > 0 && path->slots[0] > 0)
+ path->slots[0]--;
+
+ /* find out all the old extents for the file range */
+ while (1) {
+ struct btrfs_file_extent_item *extent;
+ struct extent_buffer *l;
+ int slot;
+ u64 num_bytes;
+ u64 offset;
+ u64 end;
+
+ l = path->nodes[0];
+ slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out_free_list;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid != btrfs_ino(inode))
+ break;
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+ if (key.offset >= new->file_pos + new->len)
+ break;
+
+ extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
+
+ num_bytes = btrfs_file_extent_num_bytes(l, extent);
+ if (key.offset + num_bytes < new->file_pos)
+ goto next;
+
+ old = kmalloc(sizeof(*old), GFP_NOFS);
+ if (!old)
+ goto out_free_list;
+
+ offset = max(new->file_pos, key.offset);
+ end = min(new->file_pos + new->len, key.offset + num_bytes);
+
+ old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
+ old->extent_offset = btrfs_file_extent_offset(l, extent);
+ old->offset = offset - key.offset;
+ old->len = end - offset;
+ old->new = new;
+ old->count = 0;
+ list_add_tail(&old->list, &new->head);
+next:
+ path->slots[0]++;
+ cond_resched();
+ }
+
+ btrfs_free_path(path);
+ atomic_inc(&root->fs_info->defrag_running);
+
+ return new;
+
+out_free_list:
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+out_free_path:
+ btrfs_free_path(path);
+out_kfree:
+ kfree(new);
+ return NULL;
+}
+
/*
* helper function for btrfs_finish_ordered_io, this
* just reads in some of the csum leaves to prime them into ram
struct btrfs_trans_handle *trans = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
+ struct new_sa_defrag_extent *new = NULL;
int compress_type = 0;
int ret;
bool nolock;
ordered_extent->file_offset + ordered_extent->len - 1,
0, &cached_state);
+ ret = test_range_bit(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ EXTENT_DEFRAG, 1, cached_state);
+ if (ret && btrfs_root_last_snapshot(&root->root_item) >=
+ BTRFS_I(inode)->generation) {
+ /* the inode is shared */
+ new = record_old_file_extents(inode, ordered_extent);
+ }
+
if (nolock)
trans = btrfs_join_transaction_nolock(root);
else
ordered_extent->len,
compress_type, 0, 0,
BTRFS_FILE_EXTENT_REG);
- unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
- ordered_extent->file_offset,
- ordered_extent->len);
}
-
+ unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+ ordered_extent->file_offset, ordered_extent->len,
+ trans->transid);
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
goto out_unlock;
btrfs_abort_transaction(trans, root, ret);
goto out_unlock;
}
+ } else {
+ btrfs_set_inode_last_trans(trans, inode);
}
ret = 0;
out_unlock:
out:
if (root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
- if (trans) {
- if (nolock)
- btrfs_end_transaction_nolock(trans, root);
- else
- btrfs_end_transaction(trans, root);
- }
+ if (trans)
+ btrfs_end_transaction(trans, root);
if (ret)
clear_extent_uptodate(io_tree, ordered_extent->file_offset,
*/
btrfs_remove_ordered_extent(inode, ordered_extent);
+ /* for snapshot-aware defrag */
+ if (new)
+ relink_file_extents(new);
+
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
if (empty)
return;
- down_read(&root->fs_info->cleanup_work_sem);
spin_lock(&fs_info->delayed_iput_lock);
list_splice_init(&fs_info->delayed_iputs, &list);
spin_unlock(&fs_info->delayed_iput_lock);
iput(delayed->inode);
kfree(delayed);
}
- up_read(&root->fs_info->cleanup_work_sem);
}
enum btrfs_orphan_cleanup_state {
int ret;
if (!root->orphan_block_rsv) {
- block_rsv = btrfs_alloc_block_rsv(root);
+ block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
if (!block_rsv)
return -ENOMEM;
}
insert = 1;
#endif
insert = 1;
- atomic_dec(&root->orphan_inodes);
+ atomic_inc(&root->orphan_inodes);
}
if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
+ BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
+
+ /*
+ * If we were modified in the current generation and evicted from memory
+ * and then re-read we need to do a full sync since we don't have any
+ * idea about which extents were modified before we were evicted from
+ * cache.
+ */
+ if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags);
+
inode->i_version = btrfs_inode_sequence(leaf, inode_item);
inode->i_generation = BTRFS_I(inode)->generation;
inode->i_rdev = 0;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
- struct btrfs_inode_ref *ref;
struct btrfs_dir_item *di;
struct inode *inode = dentry->d_inode;
u64 index;
}
btrfs_release_path(path);
- ref = btrfs_lookup_inode_ref(trans, root, path,
- dentry->d_name.name, dentry->d_name.len,
- ino, dir_ino, 0);
- if (IS_ERR(ref)) {
- err = PTR_ERR(ref);
+ ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name,
+ dentry->d_name.len, ino, dir_ino, 0,
+ &index);
+ if (ret) {
+ err = ret;
goto out;
}
- BUG_ON(!ref); /* Logic error */
+
if (check_path_shared(root, path))
goto out;
- index = btrfs_inode_ref_index(path->nodes[0], ref);
+
btrfs_release_path(path);
/*
static void __unlink_end_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- if (trans->block_rsv == &root->fs_info->global_block_rsv) {
+ if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) {
btrfs_block_rsv_release(root, trans->block_rsv,
trans->bytes_reserved);
trans->block_rsv = &root->fs_info->trans_block_rsv;
struct btrfs_trans_handle *trans;
unsigned long nr = 0;
- if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
- btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+ if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
+ if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+ return -EPERM;
trans = __unlink_start_trans(dir, dentry);
if (IS_ERR(trans))
return -ENOMEM;
path->reada = -1;
+ /*
+ * We want to drop from the next block forward in case this new size is
+ * not block aligned since we will be keeping the last block of the
+ * extent just the way it is.
+ */
if (root->ref_cows || root == root->fs_info->tree_root)
- btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
+ btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0);
/*
* This function is also used to drop the items in the log tree before
if (path->slots[0] == 0 ||
path->slots[0] != pending_del_slot) {
- if (root->ref_cows &&
- BTRFS_I(inode)->location.objectid !=
- BTRFS_FREE_INO_OBJECTID) {
- err = -EAGAIN;
- goto out;
- }
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path,
pending_del_slot,
}
/*
- * taken from block_truncate_page, but does cow as it zeros out
- * any bytes left in the last page in the file.
+ * btrfs_truncate_page - read, zero a chunk and write a page
+ * @inode - inode that we're zeroing
+ * @from - the offset to start zeroing
+ * @len - the length to zero, 0 to zero the entire range respective to the
+ * offset
+ * @front - zero up to the offset instead of from the offset on
+ *
+ * This will find the page for the "from" offset and cow the page and zero the
+ * part we want to zero. This is used with truncate and hole punching.
*/
-static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
+int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+ int front)
{
- struct inode *inode = mapping->host;
+ struct address_space *mapping = inode->i_mapping;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
u64 page_start;
u64 page_end;
- if ((offset & (blocksize - 1)) == 0)
+ if ((offset & (blocksize - 1)) == 0 &&
+ (!len || ((len & (blocksize - 1)) == 0)))
goto out;
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
}
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
- EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
ret = 0;
if (offset != PAGE_CACHE_SIZE) {
+ if (!len)
+ len = PAGE_CACHE_SIZE - offset;
kaddr = kmap(page);
- memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
+ if (front)
+ memset(kaddr, 0, offset);
+ else
+ memset(kaddr + offset, 0, len);
flush_dcache_page(page);
kunmap(page);
}
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
u64 mask = root->sectorsize - 1;
u64 hole_start = (oldsize + mask) & ~mask;
u64 block_end = (size + mask) & ~mask;
last_byte = min(extent_map_end(em), block_end);
last_byte = (last_byte + mask) & ~mask;
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
- u64 hint_byte = 0;
+ struct extent_map *hole_em;
hole_size = last_byte - cur_offset;
trans = btrfs_start_transaction(root, 3);
break;
}
- err = btrfs_drop_extents(trans, inode, cur_offset,
- cur_offset + hole_size,
- &hint_byte, 1);
+ err = btrfs_drop_extents(trans, root, inode,
+ cur_offset,
+ cur_offset + hole_size, 1);
if (err) {
btrfs_abort_transaction(trans, root, err);
btrfs_end_transaction(trans, root);
break;
}
- btrfs_drop_extent_cache(inode, hole_start,
- last_byte - 1, 0);
+ btrfs_drop_extent_cache(inode, cur_offset,
+ cur_offset + hole_size - 1, 0);
+ hole_em = alloc_extent_map();
+ if (!hole_em) {
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags);
+ goto next;
+ }
+ hole_em->start = cur_offset;
+ hole_em->len = hole_size;
+ hole_em->orig_start = cur_offset;
+
+ hole_em->block_start = EXTENT_MAP_HOLE;
+ hole_em->block_len = 0;
+ hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
+ hole_em->compress_type = BTRFS_COMPRESS_NONE;
+ hole_em->generation = trans->transid;
+ while (1) {
+ write_lock(&em_tree->lock);
+ err = add_extent_mapping(em_tree, hole_em);
+ if (!err)
+ list_move(&hole_em->list,
+ &em_tree->modified_extents);
+ write_unlock(&em_tree->lock);
+ if (err != -EEXIST)
+ break;
+ btrfs_drop_extent_cache(inode, cur_offset,
+ cur_offset +
+ hole_size - 1, 0);
+ }
+ free_extent_map(hole_em);
+next:
btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
}
goto no_delete;
}
- rsv = btrfs_alloc_block_rsv(root);
+ rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
if (!rsv) {
btrfs_orphan_del(NULL, inode);
goto no_delete;
}
rsv->size = min_size;
+ rsv->failfast = 1;
global_rsv = &root->fs_info->global_block_rsv;
btrfs_i_size_write(inode, 0);
/*
- * This is a bit simpler than btrfs_truncate since
- *
- * 1) We've already reserved our space for our orphan item in the
- * unlink.
- * 2) We're going to delete the inode item, so we don't need to update
- * it at all.
- *
- * So we just need to reserve some slack space in case we add bytes when
- * doing the truncate.
+ * This is a bit simpler than btrfs_truncate since we've already
+ * reserved our space for our orphan item in the unlink, so we just
+ * need to reserve some slack space in case we add bytes and update
+ * inode item when doing the truncate.
*/
while (1) {
ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
goto no_delete;
}
- trans = btrfs_start_transaction(root, 0);
+ trans = btrfs_start_transaction_noflush(root, 1);
if (IS_ERR(trans)) {
btrfs_orphan_del(NULL, inode);
btrfs_free_block_rsv(root, rsv);
trans->block_rsv = rsv;
ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
- if (ret != -EAGAIN)
+ if (ret != -ENOSPC)
break;
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
trans = NULL;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- if (nolock)
- ret = btrfs_end_transaction_nolock(trans, root);
- else
- ret = btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans, root);
}
return ret;
}
BTRFS_I(inode)->generation = trans->transid;
inode->i_generation = BTRFS_I(inode)->generation;
+ /*
+ * We could have gotten an inode number from somebody who was fsynced
+ * and then removed in this same transaction, so let's just set full
+ * sync since it will be a full sync anyway and this will blow away the
+ * old info in the log.
+ */
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
+
if (S_ISDIR(mode))
owner = 0;
else
btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
key[0].offset = 0;
+ /*
+ * Start new inodes with an inode_ref. This is slightly more
+ * efficient for small numbers of hard links since they will
+ * be packed into one item. Extended refs will kick in if we
+ * add more hard links than can fit in the ref item.
+ */
key[1].objectid = objectid;
btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
key[1].offset = ref_objectid;
if (root->objectid != BTRFS_I(inode)->root->objectid)
return -EXDEV;
- if (inode->i_nlink == ~0U)
+ if (inode->i_nlink >= BTRFS_LINK_MAX)
return -EMLINK;
err = btrfs_set_inode_index(dir, &index);
write_unlock(&em_tree->lock);
out:
- trace_btrfs_get_extent(root, em);
+ if (em)
+ trace_btrfs_get_extent(root, em);
if (path)
btrfs_free_path(path);
return ret;
}
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+ u64 len, u64 orig_start,
+ u64 block_start, u64 block_len,
+ int type)
+{
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ em_tree = &BTRFS_I(inode)->extent_tree;
+ em = alloc_extent_map();
+ if (!em)
+ return ERR_PTR(-ENOMEM);
+
+ em->start = start;
+ em->orig_start = orig_start;
+ em->len = len;
+ em->block_len = block_len;
+ em->block_start = block_start;
+ em->bdev = root->fs_info->fs_devices->latest_bdev;
+ set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ if (type == BTRFS_ORDERED_PREALLOC)
+ set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+
+ do {
+ btrfs_drop_extent_cache(inode, em->start,
+ em->start + em->len - 1, 0);
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em);
+ write_unlock(&em_tree->lock);
+ } while (ret == -EEXIST);
+
+ if (ret) {
+ free_extent_map(em);
+ return ERR_PTR(ret);
+ }
+
+ return em;
+}
+
+
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
goto must_cow;
if (can_nocow_odirect(trans, inode, start, len) == 1) {
+ u64 orig_start = em->start;
+
+ if (type == BTRFS_ORDERED_PREALLOC) {
+ free_extent_map(em);
+ em = create_pinned_em(inode, start, len,
+ orig_start,
+ block_start, len, type);
+ if (IS_ERR(em)) {
+ btrfs_end_transaction(trans, root);
+ goto unlock_err;
+ }
+ }
+
ret = btrfs_add_ordered_extent_dio(inode, start,
block_start, len, len, type);
btrfs_end_transaction(trans, root);
if (lockstart < lockend) {
if (create && len < lockend - lockstart) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockstart + len - 1, unlock_bits, 1, 0,
+ lockstart + len - 1,
+ unlock_bits | EXTENT_DEFRAG, 1, 0,
&cached_state, GFP_NOFS);
/*
* Beside unlock, we also need to cleanup reserved space
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree,
lockstart + len, lockend,
- unlock_bits | EXTENT_DO_ACCOUNTING,
- 1, 0, NULL, GFP_NOFS);
+ unlock_bits | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
} else {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, unlock_bits, 1, 0,
*/
clear_extent_bit(tree, page_start, page_end,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
- &cached_state, GFP_NOFS);
+ EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
/*
* whoever cleared the private bit is responsible
* for the finish_ordered_io
}
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
+ EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
+ &cached_state, GFP_NOFS);
__btrfs_releasepage(page, GFP_NOFS);
ClearPageChecked(page);
* prepare_pages in the normal write path.
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
- EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
BTRFS_I(inode)->last_trans = root->fs_info->generation;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
+ BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
u64 mask = root->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
- ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
+ ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
if (ret)
return ret;
* 3) fs_info->trans_block_rsv - this will have 1 items worth left for
* updating the inode.
*/
- rsv = btrfs_alloc_block_rsv(root);
+ rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
if (!rsv)
return -ENOMEM;
rsv->size = min_size;
+ rsv->failfast = 1;
/*
* 1 for the truncate slack space
&BTRFS_I(inode)->runtime_flags))
btrfs_add_ordered_operation(trans, root, inode);
- while (1) {
- ret = btrfs_block_rsv_refill(root, rsv, min_size);
- if (ret) {
- /*
- * This can only happen with the original transaction we
- * started above, every other time we shouldn't have a
- * transaction started yet.
- */
- if (ret == -EAGAIN)
- goto end_trans;
- err = ret;
- break;
- }
-
- if (!trans) {
- /* Just need the 1 for updating the inode */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = err = PTR_ERR(trans);
- trans = NULL;
- break;
- }
- }
-
- trans->block_rsv = rsv;
+ /*
+ * So if we truncate and then write and fsync we normally would just
+ * write the extents that changed, which is a problem if we need to
+ * first truncate that entire inode. So set this flag so we write out
+ * all of the extents in the inode to the sync log so we're completely
+ * safe.
+ */
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
+ trans->block_rsv = rsv;
+ while (1) {
ret = btrfs_truncate_inode_items(trans, root, inode,
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
- if (ret != -EAGAIN) {
+ if (ret != -ENOSPC) {
err = ret;
break;
}
err = ret;
break;
}
-end_trans:
+
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- trans = NULL;
btrfs_btree_balance_dirty(root, nr);
+
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans)) {
+ ret = err = PTR_ERR(trans);
+ trans = NULL;
+ break;
+ }
+
+ ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
+ rsv, min_size);
+ BUG_ON(ret); /* shouldn't happen */
+ trans->block_rsv = rsv;
}
if (ret == 0 && inode->i_nlink > 0) {
ei->csum_bytes = 0;
ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0;
+ ei->last_log_commit = 0;
spin_lock_init(&ei->lock);
ei->outstanding_extents = 0;
int btrfs_init_cachep(void)
{
- btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
+ btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
sizeof(struct btrfs_inode), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
if (!btrfs_inode_cachep)
goto fail;
- btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
+ btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
sizeof(struct btrfs_trans_handle), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_trans_handle_cachep)
goto fail;
- btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
+ btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
sizeof(struct btrfs_transaction), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_transaction_cachep)
goto fail;
- btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
+ btrfs_path_cachep = kmem_cache_create("btrfs_path",
sizeof(struct btrfs_path), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_path_cachep)
goto fail;
- btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache",
+ btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
sizeof(struct btrfs_free_space), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_free_space_cachep)
loff_t actual_len, u64 *alloc_hint,
struct btrfs_trans_handle *trans)
{
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 cur_offset = start;
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + ins.offset -1, 0);
+ em = alloc_extent_map();
+ if (!em) {
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags);
+ goto next;
+ }
+
+ em->start = cur_offset;
+ em->orig_start = cur_offset;
+ em->len = ins.offset;
+ em->block_start = ins.objectid;
+ em->block_len = ins.offset;
+ em->bdev = root->fs_info->fs_devices->latest_bdev;
+ set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+ em->generation = trans->transid;
+
+ while (1) {
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
+ write_unlock(&em_tree->lock);
+ if (ret != -EEXIST)
+ break;
+ btrfs_drop_extent_cache(inode, cur_offset,
+ cur_offset + ins.offset - 1,
+ 0);
+ }
+ free_extent_map(em);
+next:
num_bytes -= ins.offset;
cur_offset += ins.offset;
*alloc_hint = ins.objectid + ins.offset;