unlock_extent(io_tree, async_extent->start,
async_extent->start +
async_extent->ram_size - 1);
+
+ /*
+ * we need to redirty the pages if we decide to
+ * fallback to uncompressed IO, otherwise we
+ * will not submit these pages down to lower
+ * layers.
+ */
+ extent_range_redirty_for_io(inode,
+ async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1);
+
goto retry;
}
goto out_free;
min_size);
BUG_ON(ret);
- /*
- * setattr is responsible for setting the ordered_data_close flag,
- * but that is only tested during the last file release. That
- * could happen well after the next commit, leaving a great big
- * window where new writes may get lost if someone chooses to write
- * to this file after truncating to zero
- *
- * The inode doesn't have any dirty data here, and so if we commit
- * this is a noop. If someone immediately starts writing to the inode
- * it is very likely we'll catch some of their writes in this
- * transaction, and the commit will find this file on the ordered
- * data list with good things to send down.
- *
- * This is a best effort solution, there is still a window where
- * using truncate to replace the contents of the file will
- * end up with a zero length file after a crash.
- */
- if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
- &BTRFS_I(inode)->runtime_flags))
- btrfs_add_ordered_operation(trans, root, inode);
-
/*
* So if we truncate and then write and fsync we normally would just
* write the extents that changed, which is a problem if we need to
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
- INIT_LIST_HEAD(&ei->ordered_operations);
RB_CLEAR_NODE(&ei->rb_node);
return inode;
if (!root)
goto free;
- /*
- * Make sure we're properly removed from the ordered operation
- * lists.
- */
- smp_mb();
- if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
- spin_lock(&root->fs_info->ordered_root_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- }
-
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_info(root->fs_info, "inode %llu still on the orphan list",
ret = 0;
/*
- * we're using rename to replace one file with another.
- * and the replacement file is large. Start IO on it now so
- * we don't add too much work to the end of the transaction
+ * we're using rename to replace one file with another. Start IO on it
+ * now so we don't add too much work to the end of the transaction
*/
- if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
- old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+ if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
filemap_flush(old_inode->i_mapping);
/* close the racy window with snapshot create/destroy ioctl */
*/
btrfs_pin_log_trans(root);
}
- /*
- * make sure the inode gets flushed if it is replacing
- * something.
- */
- if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
- btrfs_add_ordered_operation(trans, root, old_inode);
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
return ret;
}
+static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+
+ return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
static void btrfs_run_delalloc_work(struct btrfs_work *work)
{
struct btrfs_delalloc_work *delalloc_work;
.link = btrfs_link,
.mkdir = btrfs_mkdir,
.rmdir = btrfs_rmdir,
- .rename = btrfs_rename,
+ .rename2 = btrfs_rename2,
.symlink = btrfs_symlink,
.setattr = btrfs_setattr,
.mknod = btrfs_mknod,
struct btrfs_path *path;
struct btrfs_key location;
struct inode *inode;
- struct dentry *dentry;
u64 dir_id;
int new = 0;
return dget(sb->s_root);
}
- dentry = d_obtain_alias(inode);
- if (!IS_ERR(dentry)) {
- spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_DISCONNECTED;
- spin_unlock(&dentry->d_lock);
- }
- return dentry;
+ return d_obtain_root(inode);
}
static int btrfs_fill_super(struct super_block *sb,
return 0;
}
+ /*
+ * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
+ *
+ * If there's a redundant raid level at DATA block groups, use the respective
+ * multiplier to scale the sizes.
+ *
+ * Unused device space usage is based on simulating the chunk allocator
+ * algorithm that respects the device sizes, order of allocations and the
+ * 'alloc_start' value, this is a close approximation of the actual use but
+ * there are other factors that may change the result (like a new metadata
+ * chunk).
+ *
+ * FIXME: not accurate for mixed block groups, total and free/used are ok,
+ * available appears slightly larger.
+ */
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
u64 total_free_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)fs_info->fsid;
+ unsigned factor = 1;
+ struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
int ret;
/* holding chunk_muext to avoid allocating new chunks */
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+ int i;
+
total_free_data += found->disk_total - found->disk_used;
total_free_data -=
btrfs_account_ro_block_groups_free_space(found);
+
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+ if (!list_empty(&found->block_groups[i])) {
+ switch (i) {
+ case BTRFS_RAID_DUP:
+ case BTRFS_RAID_RAID1:
+ case BTRFS_RAID_RAID10:
+ factor = 2;
+ }
+ }
+ }
}
total_used += found->disk_used;
}
+
rcu_read_unlock();
- buf->f_namelen = BTRFS_NAME_LEN;
- buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
- buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bsize = dentry->d_sb->s_blocksize;
- buf->f_type = BTRFS_SUPER_MAGIC;
+ buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
+ buf->f_blocks >>= bits;
+ buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
+
+ /* Account global block reserve as used, it's in logical size already */
+ spin_lock(&block_rsv->lock);
+ buf->f_bfree -= block_rsv->size >> bits;
+ spin_unlock(&block_rsv->lock);
+
buf->f_bavail = total_free_data;
ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
if (ret) {
mutex_unlock(&fs_info->chunk_mutex);
return ret;
}
- buf->f_bavail += total_free_data;
+ buf->f_bavail += div_u64(total_free_data, factor);
buf->f_bavail = buf->f_bavail >> bits;
mutex_unlock(&fs_info->chunk_mutex);
+ buf->f_type = BTRFS_SUPER_MAGIC;
+ buf->f_bsize = dentry->d_sb->s_blocksize;
+ buf->f_namelen = BTRFS_NAME_LEN;
+
/* We treat it as constant endianness (it doesn't matter _which_)
because we want the fsid to come out the same whether mounted
on a big-endian or little-endian host */