]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'integration-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/fdman...
authorChris Mason <clm@fb.com>
Thu, 22 Oct 2015 01:23:59 +0000 (18:23 -0700)
committerChris Mason <clm@fb.com>
Thu, 22 Oct 2015 01:23:59 +0000 (18:23 -0700)
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/send.c

index ecb1204468c3f2cef32af8b0190e3c9f691ebfa9..6e6df34d74f051df17e9a3b6079cadf2af49d8bb 100644 (file)
@@ -3070,8 +3070,12 @@ static int __do_readpage(struct extent_io_tree *tree,
 
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            &cached, GFP_NOFS);
-                       unlock_extent_cached(tree, cur, cur + iosize - 1,
-                                            &cached, GFP_NOFS);
+                       if (parent_locked)
+                               free_extent_state(cached);
+                       else
+                               unlock_extent_cached(tree, cur,
+                                                    cur + iosize - 1,
+                                                    &cached, GFP_NOFS);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
index 8161afc32fa0a41de7aa555a941a6c11b0b532eb..5ce55f6eefceb8103ab6d8c7194c1797f0668ce3 100644 (file)
@@ -4216,6 +4216,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
 
 }
 
+static int truncate_inline_extent(struct inode *inode,
+                                 struct btrfs_path *path,
+                                 struct btrfs_key *found_key,
+                                 const u64 item_end,
+                                 const u64 new_size)
+{
+       struct extent_buffer *leaf = path->nodes[0];
+       int slot = path->slots[0];
+       struct btrfs_file_extent_item *fi;
+       u32 size = (u32)(new_size - found_key->offset);
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+
+       fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+       if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
+               loff_t offset = new_size;
+               loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
+
+               /*
+                * Zero out the remaining of the last page of our inline extent,
+                * instead of directly truncating our inline extent here - that
+                * would be much more complex (decompressing all the data, then
+                * compressing the truncated data, which might be bigger than
+                * the size of the inline extent, resize the extent, etc).
+                * We release the path because to get the page we might need to
+                * read the extent item from disk (data not in the page cache).
+                */
+               btrfs_release_path(path);
+               return btrfs_truncate_page(inode, offset, page_end - offset, 0);
+       }
+
+       btrfs_set_file_extent_ram_bytes(leaf, fi, size);
+       size = btrfs_file_extent_calc_inline_size(size);
+       btrfs_truncate_item(root, path, size, 1);
+
+       if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+               inode_sub_bytes(inode, item_end + 1 - new_size);
+
+       return 0;
+}
+
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
@@ -4410,27 +4451,40 @@ search_again:
                         * special encodings
                         */
                        if (!del_item &&
-                           btrfs_file_extent_compression(leaf, fi) == 0 &&
                            btrfs_file_extent_encryption(leaf, fi) == 0 &&
                            btrfs_file_extent_other_encoding(leaf, fi) == 0) {
-                               u32 size = new_size - found_key.offset;
-
-                               if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
-                                       inode_sub_bytes(inode, item_end + 1 -
-                                                       new_size);
 
                                /*
-                                * update the ram bytes to properly reflect
-                                * the new size of our item
+                                * Need to release path in order to truncate a
+                                * compressed extent. So delete any accumulated
+                                * extent items so far.
                                 */
-                               btrfs_set_file_extent_ram_bytes(leaf, fi, size);
-                               size =
-                                   btrfs_file_extent_calc_inline_size(size);
-                               btrfs_truncate_item(root, path, size, 1);
+                               if (btrfs_file_extent_compression(leaf, fi) !=
+                                   BTRFS_COMPRESS_NONE && pending_del_nr) {
+                                       err = btrfs_del_items(trans, root, path,
+                                                             pending_del_slot,
+                                                             pending_del_nr);
+                                       if (err) {
+                                               btrfs_abort_transaction(trans,
+                                                                       root,
+                                                                       err);
+                                               goto error;
+                                       }
+                                       pending_del_nr = 0;
+                               }
+
+                               err = truncate_inline_extent(inode, path,
+                                                            &found_key,
+                                                            item_end,
+                                                            new_size);
+                               if (err) {
+                                       btrfs_abort_transaction(trans,
+                                                               root, err);
+                                       goto error;
+                               }
                        } else if (test_bit(BTRFS_ROOT_REF_COWS,
                                            &root->state)) {
-                               inode_sub_bytes(inode, item_end + 1 -
-                                               found_key.offset);
+                               inode_sub_bytes(inode, item_end + 1 - new_size);
                        }
                }
 delete:
index a30d32b901da286d632c061f9dae3cb18f710bdd..685df7e1b24e531ae9a6f15c81c0475f8e25251c 100644 (file)
@@ -3327,6 +3327,150 @@ static void clone_update_extent_map(struct inode *inode,
                        &BTRFS_I(inode)->runtime_flags);
 }
 
+/*
+ * Make sure we do not end up inserting an inline extent into a file that has
+ * already other (non-inline) extents. If a file has an inline extent it can
+ * not have any other extents and the (single) inline extent must start at the
+ * file offset 0. Failing to respect these rules will lead to file corruption,
+ * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
+ *
+ * We can have extents that have been already written to disk or we can have
+ * dirty ranges still in delalloc, in which case the extent maps and items are
+ * created only when we run delalloc, and the delalloc ranges might fall outside
+ * the range we are currently locking in the inode's io tree. So we check the
+ * inode's i_size because of that (i_size updates are done while holding the
+ * i_mutex, which we are holding here).
+ * We also check to see if the inode has a size not greater than "datal" but has
+ * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
+ * protected against such concurrent fallocate calls by the i_mutex).
+ *
+ * If the file has no extents but a size greater than datal, do not allow the
+ * copy because we would need turn the inline extent into a non-inline one (even
+ * with NO_HOLES enabled). If we find our destination inode only has one inline
+ * extent, just overwrite it with the source inline extent if its size is less
+ * than the source extent's size, or we could copy the source inline extent's
+ * data into the destination inode's inline extent if the later is greater then
+ * the former.
+ */
+static int clone_copy_inline_extent(struct inode *src,
+                                   struct inode *dst,
+                                   struct btrfs_trans_handle *trans,
+                                   struct btrfs_path *path,
+                                   struct btrfs_key *new_key,
+                                   const u64 drop_start,
+                                   const u64 datal,
+                                   const u64 skip,
+                                   const u64 size,
+                                   char *inline_data)
+{
+       struct btrfs_root *root = BTRFS_I(dst)->root;
+       const u64 aligned_end = ALIGN(new_key->offset + datal,
+                                     root->sectorsize);
+       int ret;
+       struct btrfs_key key;
+
+       if (new_key->offset > 0)
+               return -EOPNOTSUPP;
+
+       key.objectid = btrfs_ino(dst);
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = 0;
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0) {
+               return ret;
+       } else if (ret > 0) {
+               if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ret;
+                       else if (ret > 0)
+                               goto copy_inline_extent;
+               }
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               if (key.objectid == btrfs_ino(dst) &&
+                   key.type == BTRFS_EXTENT_DATA_KEY) {
+                       ASSERT(key.offset > 0);
+                       return -EOPNOTSUPP;
+               }
+       } else if (i_size_read(dst) <= datal) {
+               struct btrfs_file_extent_item *ei;
+               u64 ext_len;
+
+               /*
+                * If the file size is <= datal, make sure there are no other
+                * extents following (can happen do to an fallocate call with
+                * the flag FALLOC_FL_KEEP_SIZE).
+                */
+               ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                   struct btrfs_file_extent_item);
+               /*
+                * If it's an inline extent, it can not have other extents
+                * following it.
+                */
+               if (btrfs_file_extent_type(path->nodes[0], ei) ==
+                   BTRFS_FILE_EXTENT_INLINE)
+                       goto copy_inline_extent;
+
+               ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
+               if (ext_len > aligned_end)
+                       return -EOPNOTSUPP;
+
+               ret = btrfs_next_item(root, path);
+               if (ret < 0) {
+                       return ret;
+               } else if (ret == 0) {
+                       btrfs_item_key_to_cpu(path->nodes[0], &key,
+                                             path->slots[0]);
+                       if (key.objectid == btrfs_ino(dst) &&
+                           key.type == BTRFS_EXTENT_DATA_KEY)
+                               return -EOPNOTSUPP;
+               }
+       }
+
+copy_inline_extent:
+       /*
+        * We have no extent items, or we have an extent at offset 0 which may
+        * or may not be inlined. All these cases are dealt the same way.
+        */
+       if (i_size_read(dst) > datal) {
+               /*
+                * If the destination inode has an inline extent...
+                * This would require copying the data from the source inline
+                * extent into the beginning of the destination's inline extent.
+                * But this is really complex, both extents can be compressed
+                * or just one of them, which would require decompressing and
+                * re-compressing data (which could increase the new compressed
+                * size, not allowing the compressed data to fit anymore in an
+                * inline extent).
+                * So just don't support this case for now (it should be rare,
+                * we are not really saving space when cloning inline extents).
+                */
+               return -EOPNOTSUPP;
+       }
+
+       btrfs_release_path(path);
+       ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
+       if (ret)
+               return ret;
+       ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
+       if (ret)
+               return ret;
+
+       if (skip) {
+               const u32 start = btrfs_file_extent_calc_inline_size(0);
+
+               memmove(inline_data + start, inline_data + start + skip, datal);
+       }
+
+       write_extent_buffer(path->nodes[0], inline_data,
+                           btrfs_item_ptr_offset(path->nodes[0],
+                                                 path->slots[0]),
+                           size);
+       inode_add_bytes(dst, datal);
+
+       return 0;
+}
+
 /**
  * btrfs_clone() - clone a range from inode file to another
  *
@@ -3593,21 +3737,6 @@ process_slot:
                        } else if (type == BTRFS_FILE_EXTENT_INLINE) {
                                u64 skip = 0;
                                u64 trim = 0;
-                               u64 aligned_end = 0;
-
-                               /*
-                                * Don't copy an inline extent into an offset
-                                * greater than zero. Having an inline extent
-                                * at such an offset results in chaos as btrfs
-                                * isn't prepared for such cases. Just skip
-                                * this case for the same reasons as commented
-                                * at btrfs_ioctl_clone().
-                                */
-                               if (last_dest_end > 0) {
-                                       ret = -EOPNOTSUPP;
-                                       btrfs_end_transaction(trans, root);
-                                       goto out;
-                               }
 
                                if (off > key.offset) {
                                        skip = off - key.offset;
@@ -3625,42 +3754,22 @@ process_slot:
                                size -= skip + trim;
                                datal -= skip + trim;
 
-                               aligned_end = ALIGN(new_key.offset + datal,
-                                                   root->sectorsize);
-                               ret = btrfs_drop_extents(trans, root, inode,
-                                                        drop_start,
-                                                        aligned_end,
-                                                        1);
+                               ret = clone_copy_inline_extent(src, inode,
+                                                              trans, path,
+                                                              &new_key,
+                                                              drop_start,
+                                                              datal,
+                                                              skip, size, buf);
                                if (ret) {
                                        if (ret != -EOPNOTSUPP)
                                                btrfs_abort_transaction(trans,
-                                                       root, ret);
-                                       btrfs_end_transaction(trans, root);
-                                       goto out;
-                               }
-
-                               ret = btrfs_insert_empty_item(trans, root, path,
-                                                             &new_key, size);
-                               if (ret) {
-                                       btrfs_abort_transaction(trans, root,
-                                                               ret);
+                                                                       root,
+                                                                       ret);
                                        btrfs_end_transaction(trans, root);
                                        goto out;
                                }
-
-                               if (skip) {
-                                       u32 start =
-                                         btrfs_file_extent_calc_inline_size(0);
-                                       memmove(buf+start, buf+start+skip,
-                                               datal);
-                               }
-
                                leaf = path->nodes[0];
                                slot = path->slots[0];
-                               write_extent_buffer(leaf, buf,
-                                           btrfs_item_ptr_offset(leaf, slot),
-                                           size);
-                               inode_add_bytes(inode, datal);
                        }
 
                        /* If we have an implicit hole (NO_HOLES feature). */
index b5d47b9400ba5a9e9da868a9751881e14baff315..355a458cba1abe29efb3410a6ae93261052ba1e9 100644 (file)
@@ -1434,16 +1434,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
        }
 
        if (cur_clone_root) {
-               if (compressed != BTRFS_COMPRESS_NONE) {
-                       /*
-                        * Offsets given by iterate_extent_inodes() are relative
-                        * to the start of the extent, we need to add logical
-                        * offset from the file extent item.
-                        * (See why at backref.c:check_extent_in_eb())
-                        */
-                       cur_clone_root->offset += btrfs_file_extent_offset(eb,
-                                                                          fi);
-               }
                *found = cur_clone_root;
                ret = 0;
        } else {
@@ -2353,8 +2343,14 @@ static int send_subvol_begin(struct send_ctx *sctx)
        }
 
        TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
-       TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
-                       sctx->send_root->root_item.uuid);
+
+       if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
+               TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
+                           sctx->send_root->root_item.received_uuid);
+       else
+               TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
+                           sctx->send_root->root_item.uuid);
+
        TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
                    le64_to_cpu(sctx->send_root->root_item.ctransid));
        if (parent_root) {
@@ -4687,6 +4683,171 @@ tlv_put_failure:
        return ret;
 }
 
+static int send_extent_data(struct send_ctx *sctx,
+                           const u64 offset,
+                           const u64 len)
+{
+       u64 sent = 0;
+
+       if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
+               return send_update_extent(sctx, offset, len);
+
+       while (sent < len) {
+               u64 size = len - sent;
+               int ret;
+
+               if (size > BTRFS_SEND_READ_SIZE)
+                       size = BTRFS_SEND_READ_SIZE;
+               ret = send_write(sctx, offset + sent, size);
+               if (ret < 0)
+                       return ret;
+               if (!ret)
+                       break;
+               sent += ret;
+       }
+       return 0;
+}
+
+static int clone_range(struct send_ctx *sctx,
+                      struct clone_root *clone_root,
+                      const u64 disk_byte,
+                      u64 data_offset,
+                      u64 offset,
+                      u64 len)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int ret;
+
+       path = alloc_path_for_send();
+       if (!path)
+               return -ENOMEM;
+
+       /*
+        * We can't send a clone operation for the entire range if we find
+        * extent items in the respective range in the source file that
+        * refer to different extents or if we find holes.
+        * So check for that and do a mix of clone and regular write/copy
+        * operations if needed.
+        *
+        * Example:
+        *
+        * mkfs.btrfs -f /dev/sda
+        * mount /dev/sda /mnt
+        * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
+        * cp --reflink=always /mnt/foo /mnt/bar
+        * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
+        * btrfs subvolume snapshot -r /mnt /mnt/snap
+        *
+        * If when we send the snapshot and we are processing file bar (which
+        * has a higher inode number than foo) we blindly send a clone operation
+        * for the [0, 100K[ range from foo to bar, the receiver ends up getting
+        * a file bar that matches the content of file foo - iow, doesn't match
+        * the content from bar in the original filesystem.
+        */
+       key.objectid = clone_root->ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = clone_root->offset;
+       ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       if (ret > 0 && path->slots[0] > 0) {
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
+               if (key.objectid == clone_root->ino &&
+                   key.type == BTRFS_EXTENT_DATA_KEY)
+                       path->slots[0]--;
+       }
+
+       while (true) {
+               struct extent_buffer *leaf = path->nodes[0];
+               int slot = path->slots[0];
+               struct btrfs_file_extent_item *ei;
+               u8 type;
+               u64 ext_len;
+               u64 clone_len;
+
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(clone_root->root, path);
+                       if (ret < 0)
+                               goto out;
+                       else if (ret > 0)
+                               break;
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+
+               /*
+                * We might have an implicit trailing hole (NO_HOLES feature
+                * enabled). We deal with it after leaving this loop.
+                */
+               if (key.objectid != clone_root->ino ||
+                   key.type != BTRFS_EXTENT_DATA_KEY)
+                       break;
+
+               ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+               type = btrfs_file_extent_type(leaf, ei);
+               if (type == BTRFS_FILE_EXTENT_INLINE) {
+                       ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
+                       ext_len = PAGE_CACHE_ALIGN(ext_len);
+               } else {
+                       ext_len = btrfs_file_extent_num_bytes(leaf, ei);
+               }
+
+               if (key.offset + ext_len <= clone_root->offset)
+                       goto next;
+
+               if (key.offset > clone_root->offset) {
+                       /* Implicit hole, NO_HOLES feature enabled. */
+                       u64 hole_len = key.offset - clone_root->offset;
+
+                       if (hole_len > len)
+                               hole_len = len;
+                       ret = send_extent_data(sctx, offset, hole_len);
+                       if (ret < 0)
+                               goto out;
+
+                       len -= hole_len;
+                       if (len == 0)
+                               break;
+                       offset += hole_len;
+                       clone_root->offset += hole_len;
+                       data_offset += hole_len;
+               }
+
+               if (key.offset >= clone_root->offset + len)
+                       break;
+
+               clone_len = min_t(u64, ext_len, len);
+
+               if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
+                   btrfs_file_extent_offset(leaf, ei) == data_offset)
+                       ret = send_clone(sctx, offset, clone_len, clone_root);
+               else
+                       ret = send_extent_data(sctx, offset, clone_len);
+
+               if (ret < 0)
+                       goto out;
+
+               len -= clone_len;
+               if (len == 0)
+                       break;
+               offset += clone_len;
+               clone_root->offset += clone_len;
+               data_offset += clone_len;
+next:
+               path->slots[0]++;
+       }
+
+       if (len > 0)
+               ret = send_extent_data(sctx, offset, len);
+       else
+               ret = 0;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
 static int send_write_or_clone(struct send_ctx *sctx,
                               struct btrfs_path *path,
                               struct btrfs_key *key,
@@ -4695,9 +4856,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
        int ret = 0;
        struct btrfs_file_extent_item *ei;
        u64 offset = key->offset;
-       u64 pos = 0;
        u64 len;
-       u32 l;
        u8 type;
        u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
 
@@ -4725,22 +4884,15 @@ static int send_write_or_clone(struct send_ctx *sctx,
        }
 
        if (clone_root && IS_ALIGNED(offset + len, bs)) {
-               ret = send_clone(sctx, offset, len, clone_root);
-       } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
-               ret = send_update_extent(sctx, offset, len);
+               u64 disk_byte;
+               u64 data_offset;
+
+               disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
+               data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
+               ret = clone_range(sctx, clone_root, disk_byte, data_offset,
+                                 offset, len);
        } else {
-               while (pos < len) {
-                       l = len - pos;
-                       if (l > BTRFS_SEND_READ_SIZE)
-                               l = BTRFS_SEND_READ_SIZE;
-                       ret = send_write(sctx, pos + offset, l);
-                       if (ret < 0)
-                               goto out;
-                       if (!ret)
-                               break;
-                       pos += ret;
-               }
-               ret = 0;
+               ret = send_extent_data(sctx, offset, len);
        }
 out:
        return ret;