]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'for-linus2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Aug 2014 15:06:55 +0000 (09:06 -0600)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Aug 2014 15:06:55 +0000 (09:06 -0600)
Pull btrfs updates from Chris Mason:
 "These are all fixes I'd like to get out to a broader audience.

  The biggest of the bunch is Mark's quota fix, which is also in the
  SUSE kernel, and makes our subvolume quotas dramatically more
  accurate.

  I've been running xfstests with these against your current git
  overnight, but I'm queueing up longer tests as well"

* 'for-linus2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  btrfs: disable strict file flushes for renames and truncates
  Btrfs: fix csum tree corruption, duplicate and outdated checksums
  Btrfs: Fix memory corruption by ulist_add_merge() on 32bit arch
  Btrfs: fix compressed write corruption on enospc
  btrfs: correctly handle return from ulist_add
  btrfs: qgroup: account shared subtrees during snapshot delete
  Btrfs: read lock extent buffer while walking backrefs
  Btrfs: __btrfs_mod_ref should always use no_quota
  btrfs: adjust statfs calculations according to raid profiles

1  2 
fs/btrfs/inode.c
fs/btrfs/super.c

diff --combined fs/btrfs/inode.c
index 3183742d6f0d74d131c16e29dde59205f3e7f8a4,73098328d040983d0c0296425b7a1e9e7961ce90..03708ef3deefb11edd7998ee831bf370e6ad8a66
@@@ -709,6 -709,18 +709,18 @@@ retry
                                unlock_extent(io_tree, async_extent->start,
                                              async_extent->start +
                                              async_extent->ram_size - 1);
+                               /*
+                                * we need to redirty the pages if we decide to
+                                * fallback to uncompressed IO, otherwise we
+                                * will not submit these pages down to lower
+                                * layers.
+                                */
+                               extent_range_redirty_for_io(inode,
+                                               async_extent->start,
+                                               async_extent->start +
+                                               async_extent->ram_size - 1);
                                goto retry;
                        }
                        goto out_free;
@@@ -7938,27 -7950,6 +7950,6 @@@ static int btrfs_truncate(struct inode 
                                      min_size);
        BUG_ON(ret);
  
-       /*
-        * setattr is responsible for setting the ordered_data_close flag,
-        * but that is only tested during the last file release.  That
-        * could happen well after the next commit, leaving a great big
-        * window where new writes may get lost if someone chooses to write
-        * to this file after truncating to zero
-        *
-        * The inode doesn't have any dirty data here, and so if we commit
-        * this is a noop.  If someone immediately starts writing to the inode
-        * it is very likely we'll catch some of their writes in this
-        * transaction, and the commit will find this file on the ordered
-        * data list with good things to send down.
-        *
-        * This is a best effort solution, there is still a window where
-        * using truncate to replace the contents of the file will
-        * end up with a zero length file after a crash.
-        */
-       if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
-                                          &BTRFS_I(inode)->runtime_flags))
-               btrfs_add_ordered_operation(trans, root, inode);
        /*
         * So if we truncate and then write and fsync we normally would just
         * write the extents that changed, which is a problem if we need to
@@@ -8106,7 -8097,6 +8097,6 @@@ struct inode *btrfs_alloc_inode(struct 
        mutex_init(&ei->delalloc_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
        INIT_LIST_HEAD(&ei->delalloc_inodes);
-       INIT_LIST_HEAD(&ei->ordered_operations);
        RB_CLEAR_NODE(&ei->rb_node);
  
        return inode;
@@@ -8146,17 -8136,6 +8136,6 @@@ void btrfs_destroy_inode(struct inode *
        if (!root)
                goto free;
  
-       /*
-        * Make sure we're properly removed from the ordered operation
-        * lists.
-        */
-       smp_mb();
-       if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
-               spin_lock(&root->fs_info->ordered_root_lock);
-               list_del_init(&BTRFS_I(inode)->ordered_operations);
-               spin_unlock(&root->fs_info->ordered_root_lock);
-       }
        if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                     &BTRFS_I(inode)->runtime_flags)) {
                btrfs_info(root->fs_info, "inode %llu still on the orphan list",
@@@ -8338,12 -8317,10 +8317,10 @@@ static int btrfs_rename(struct inode *o
        ret = 0;
  
        /*
-        * we're using rename to replace one file with another.
-        * and the replacement file is large.  Start IO on it now so
-        * we don't add too much work to the end of the transaction
+        * we're using rename to replace one file with another.  Start IO on it
+        * now so  we don't add too much work to the end of the transaction
         */
-       if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
-           old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+       if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
                filemap_flush(old_inode->i_mapping);
  
        /* close the racy window with snapshot create/destroy ioctl */
                 */
                btrfs_pin_log_trans(root);
        }
-       /*
-        * make sure the inode gets flushed if it is replacing
-        * something.
-        */
-       if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
-               btrfs_add_ordered_operation(trans, root, old_inode);
  
        inode_inc_iversion(old_dir);
        inode_inc_iversion(new_dir);
@@@ -8476,16 -8447,6 +8447,16 @@@ out_notrans
        return ret;
  }
  
 +static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
 +                       struct inode *new_dir, struct dentry *new_dentry,
 +                       unsigned int flags)
 +{
 +      if (flags & ~RENAME_NOREPLACE)
 +              return -EINVAL;
 +
 +      return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
 +}
 +
  static void btrfs_run_delalloc_work(struct btrfs_work *work)
  {
        struct btrfs_delalloc_work *delalloc_work;
@@@ -9029,7 -8990,7 +9000,7 @@@ static const struct inode_operations bt
        .link           = btrfs_link,
        .mkdir          = btrfs_mkdir,
        .rmdir          = btrfs_rmdir,
 -      .rename         = btrfs_rename,
 +      .rename2        = btrfs_rename2,
        .symlink        = btrfs_symlink,
        .setattr        = btrfs_setattr,
        .mknod          = btrfs_mknod,
diff --combined fs/btrfs/super.c
index 67b48b9a03e044eb2d83ab28447c8ead9db5192b,18cdcd1dbe11712e009293beb20eeebbe231701d..c4124de4435bffed06afc3a76ea6aba49a7c5317
@@@ -851,6 -851,7 +851,6 @@@ static struct dentry *get_default_root(
        struct btrfs_path *path;
        struct btrfs_key location;
        struct inode *inode;
 -      struct dentry *dentry;
        u64 dir_id;
        int new = 0;
  
@@@ -921,7 -922,13 +921,7 @@@ setup_root
                return dget(sb->s_root);
        }
  
 -      dentry = d_obtain_alias(inode);
 -      if (!IS_ERR(dentry)) {
 -              spin_lock(&dentry->d_lock);
 -              dentry->d_flags &= ~DCACHE_DISCONNECTED;
 -              spin_unlock(&dentry->d_lock);
 -      }
 -      return dentry;
 +      return d_obtain_root(inode);
  }
  
  static int btrfs_fill_super(struct super_block *sb,
@@@ -1665,6 -1672,21 +1665,21 @@@ static int btrfs_calc_avail_data_space(
        return 0;
  }
  
+ /*
+  * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
+  *
+  * If there's a redundant raid level at DATA block groups, use the respective
+  * multiplier to scale the sizes.
+  *
+  * Unused device space usage is based on simulating the chunk allocator
+  * algorithm that respects the device sizes, order of allocations and the
+  * 'alloc_start' value, this is a close approximation of the actual use but
+  * there are other factors that may change the result (like a new metadata
+  * chunk).
+  *
+  * FIXME: not accurate for mixed block groups, total and free/used are ok,
+  * available appears slightly larger.
+  */
  static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  {
        struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
        u64 total_free_data = 0;
        int bits = dentry->d_sb->s_blocksize_bits;
        __be32 *fsid = (__be32 *)fs_info->fsid;
+       unsigned factor = 1;
+       struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
        int ret;
  
        /* holding chunk_muext to avoid allocating new chunks */
        rcu_read_lock();
        list_for_each_entry_rcu(found, head, list) {
                if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+                       int i;
                        total_free_data += found->disk_total - found->disk_used;
                        total_free_data -=
                                btrfs_account_ro_block_groups_free_space(found);
+                       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+                               if (!list_empty(&found->block_groups[i])) {
+                                       switch (i) {
+                                       case BTRFS_RAID_DUP:
+                                       case BTRFS_RAID_RAID1:
+                                       case BTRFS_RAID_RAID10:
+                                               factor = 2;
+                                       }
+                               }
+                       }
                }
  
                total_used += found->disk_used;
        }
        rcu_read_unlock();
  
-       buf->f_namelen = BTRFS_NAME_LEN;
-       buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
-       buf->f_bfree = buf->f_blocks - (total_used >> bits);
-       buf->f_bsize = dentry->d_sb->s_blocksize;
-       buf->f_type = BTRFS_SUPER_MAGIC;
+       buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
+       buf->f_blocks >>= bits;
+       buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
+       /* Account global block reserve as used, it's in logical size already */
+       spin_lock(&block_rsv->lock);
+       buf->f_bfree -= block_rsv->size >> bits;
+       spin_unlock(&block_rsv->lock);
        buf->f_bavail = total_free_data;
        ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
        if (ret) {
                mutex_unlock(&fs_info->chunk_mutex);
                return ret;
        }
-       buf->f_bavail += total_free_data;
+       buf->f_bavail += div_u64(total_free_data, factor);
        buf->f_bavail = buf->f_bavail >> bits;
        mutex_unlock(&fs_info->chunk_mutex);
  
+       buf->f_type = BTRFS_SUPER_MAGIC;
+       buf->f_bsize = dentry->d_sb->s_blocksize;
+       buf->f_namelen = BTRFS_NAME_LEN;
        /* We treat it as constant endianness (it doesn't matter _which_)
           because we want the fsid to come out the same whether mounted
           on a big-endian or little-endian host */