]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/file.c
Merge branch 'for-linus-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
[karo-tx-linux.git] / fs / btrfs / file.c
index 098bb8f690c992e1ebd01270f49ff2d37e6658bd..8d7b5a45c00523f4ca7ee5d58810b9d571ad4148 100644 (file)
@@ -41,6 +41,7 @@
 #include "locking.h"
 #include "volumes.h"
 #include "qgroup.h"
+#include "compression.h"
 
 static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
@@ -413,11 +414,11 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
        size_t copied = 0;
        size_t total_copied = 0;
        int pg = 0;
-       int offset = pos & (PAGE_CACHE_SIZE - 1);
+       int offset = pos & (PAGE_SIZE - 1);
 
        while (write_bytes > 0) {
                size_t count = min_t(size_t,
-                                    PAGE_CACHE_SIZE - offset, write_bytes);
+                                    PAGE_SIZE - offset, write_bytes);
                struct page *page = prepared_pages[pg];
                /*
                 * Copy data from userspace to the current page
@@ -447,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
                if (unlikely(copied == 0))
                        break;
 
-               if (copied < PAGE_CACHE_SIZE - offset) {
+               if (copied < PAGE_SIZE - offset) {
                        offset += copied;
                } else {
                        pg++;
@@ -472,7 +473,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
                 */
                ClearPageChecked(pages[i]);
                unlock_page(pages[i]);
-               page_cache_release(pages[i]);
+               put_page(pages[i]);
        }
 }
 
@@ -498,7 +499,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
        loff_t isize = i_size_read(inode);
 
        start_pos = pos & ~((u64)root->sectorsize - 1);
-       num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
+       num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
 
        end_of_last_block = start_pos + num_bytes - 1;
        err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
@@ -1296,7 +1297,7 @@ static int prepare_uptodate_page(struct inode *inode,
 {
        int ret = 0;
 
-       if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
+       if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
            !PageUptodate(page)) {
                ret = btrfs_readpage(NULL, page);
                if (ret)
@@ -1322,7 +1323,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
                                  size_t write_bytes, bool force_uptodate)
 {
        int i;
-       unsigned long index = pos >> PAGE_CACHE_SHIFT;
+       unsigned long index = pos >> PAGE_SHIFT;
        gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
        int err = 0;
        int faili;
@@ -1344,7 +1345,7 @@ again:
                        err = prepare_uptodate_page(inode, pages[i],
                                                    pos + write_bytes, false);
                if (err) {
-                       page_cache_release(pages[i]);
+                       put_page(pages[i]);
                        if (err == -EAGAIN) {
                                err = 0;
                                goto again;
@@ -1359,7 +1360,7 @@ again:
 fail:
        while (faili >= 0) {
                unlock_page(pages[faili]);
-               page_cache_release(pages[faili]);
+               put_page(pages[faili]);
                faili--;
        }
        return err;
@@ -1379,16 +1380,19 @@ fail:
 static noinline int
 lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
                                size_t num_pages, loff_t pos,
+                               size_t write_bytes,
                                u64 *lockstart, u64 *lockend,
                                struct extent_state **cached_state)
 {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 start_pos;
        u64 last_pos;
        int i;
        int ret = 0;
 
-       start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
-       last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
+       start_pos = round_down(pos, root->sectorsize);
+       last_pos = start_pos
+               + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
 
        if (start_pos < inode->i_size) {
                struct btrfs_ordered_extent *ordered;
@@ -1404,7 +1408,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
                                             cached_state, GFP_NOFS);
                        for (i = 0; i < num_pages; i++) {
                                unlock_page(pages[i]);
-                               page_cache_release(pages[i]);
+                               put_page(pages[i]);
                        }
                        btrfs_start_ordered_extent(inode, ordered, 1);
                        btrfs_put_ordered_extent(ordered);
@@ -1493,8 +1497,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
        bool force_page_uptodate = false;
        bool need_unlock;
 
-       nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE),
-                       PAGE_CACHE_SIZE / (sizeof(struct page *)));
+       nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
+                       PAGE_SIZE / (sizeof(struct page *)));
        nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
        nrptrs = max(nrptrs, 8);
        pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
@@ -1502,15 +1506,18 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                return -ENOMEM;
 
        while (iov_iter_count(i) > 0) {
-               size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+               size_t offset = pos & (PAGE_SIZE - 1);
+               size_t sector_offset;
                size_t write_bytes = min(iov_iter_count(i),
-                                        nrptrs * (size_t)PAGE_CACHE_SIZE -
+                                        nrptrs * (size_t)PAGE_SIZE -
                                         offset);
                size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
-                                               PAGE_CACHE_SIZE);
+                                               PAGE_SIZE);
                size_t reserve_bytes;
                size_t dirty_pages;
                size_t copied;
+               size_t dirty_sectors;
+               size_t num_sectors;
 
                WARN_ON(num_pages > nrptrs);
 
@@ -1523,29 +1530,29 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                        break;
                }
 
-               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+               sector_offset = pos & (root->sectorsize - 1);
+               reserve_bytes = round_up(write_bytes + sector_offset,
+                               root->sectorsize);
 
-               if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-                                            BTRFS_INODE_PREALLOC)) {
-                       ret = check_can_nocow(inode, pos, &write_bytes);
-                       if (ret < 0)
-                               break;
-                       if (ret > 0) {
-                               /*
-                                * For nodata cow case, no need to reserve
-                                * data space.
-                                */
-                               only_release_metadata = true;
-                               /*
-                                * our prealloc extent may be smaller than
-                                * write_bytes, so scale down.
-                                */
-                               num_pages = DIV_ROUND_UP(write_bytes + offset,
-                                                        PAGE_CACHE_SIZE);
-                               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
-                               goto reserve_metadata;
-                       }
+               if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+                                             BTRFS_INODE_PREALLOC)) &&
+                   check_can_nocow(inode, pos, &write_bytes) > 0) {
+                       /*
+                        * For nodata cow case, no need to reserve
+                        * data space.
+                        */
+                       only_release_metadata = true;
+                       /*
+                        * our prealloc extent may be smaller than
+                        * write_bytes, so scale down.
+                        */
+                       num_pages = DIV_ROUND_UP(write_bytes + offset,
+                                                PAGE_SIZE);
+                       reserve_bytes = round_up(write_bytes + sector_offset,
+                                       root->sectorsize);
+                       goto reserve_metadata;
                }
+
                ret = btrfs_check_data_free_space(inode, pos, write_bytes);
                if (ret < 0)
                        break;
@@ -1576,8 +1583,8 @@ again:
                        break;
 
                ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
-                                                     pos, &lockstart, &lockend,
-                                                     &cached_state);
+                                               pos, write_bytes, &lockstart,
+                                               &lockend, &cached_state);
                if (ret < 0) {
                        if (ret == -EAGAIN)
                                goto again;
@@ -1602,7 +1609,7 @@ again:
                } else {
                        force_page_uptodate = false;
                        dirty_pages = DIV_ROUND_UP(copied + offset,
-                                                  PAGE_CACHE_SIZE);
+                                                  PAGE_SIZE);
                }
 
                /*
@@ -1612,9 +1619,16 @@ again:
                 * we still have an outstanding extent for the chunk we actually
                 * managed to copy.
                 */
-               if (num_pages > dirty_pages) {
-                       release_bytes = (num_pages - dirty_pages) <<
-                               PAGE_CACHE_SHIFT;
+               num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+                                               reserve_bytes);
+               dirty_sectors = round_up(copied + sector_offset,
+                                       root->sectorsize);
+               dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+                                               dirty_sectors);
+
+               if (num_sectors > dirty_sectors) {
+                       release_bytes = (write_bytes - copied)
+                               & ~((u64)root->sectorsize - 1);
                        if (copied > 0) {
                                spin_lock(&BTRFS_I(inode)->lock);
                                BTRFS_I(inode)->outstanding_extents++;
@@ -1627,13 +1641,14 @@ again:
                                u64 __pos;
 
                                __pos = round_down(pos, root->sectorsize) +
-                                       (dirty_pages << PAGE_CACHE_SHIFT);
+                                       (dirty_pages << PAGE_SHIFT);
                                btrfs_delalloc_release_space(inode, __pos,
                                                             release_bytes);
                        }
                }
 
-               release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
+               release_bytes = round_up(copied + sector_offset,
+                                       root->sectorsize);
 
                if (copied > 0)
                        ret = btrfs_dirty_pages(root, inode, pages,
@@ -1654,8 +1669,7 @@ again:
 
                if (only_release_metadata && copied > 0) {
                        lockstart = round_down(pos, root->sectorsize);
-                       lockend = lockstart +
-                               (dirty_pages << PAGE_CACHE_SHIFT) - 1;
+                       lockend = round_up(pos + copied, root->sectorsize) - 1;
 
                        set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
                                       lockend, EXTENT_NORESERVE, NULL,
@@ -1668,7 +1682,7 @@ again:
                cond_resched();
 
                balance_dirty_pages_ratelimited(inode->i_mapping);
-               if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1)
+               if (dirty_pages < (root->nodesize >> PAGE_SHIFT) + 1)
                        btrfs_btree_balance_dirty(root);
 
                pos += copied;
@@ -1724,8 +1738,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
                goto out;
        written += written_buffered;
        iocb->ki_pos = pos + written_buffered;
-       invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
-                                endbyte >> PAGE_CACHE_SHIFT);
+       invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
+                                endbyte >> PAGE_SHIFT);
 out:
        return written ? written : err;
 }
@@ -1761,6 +1775,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        ssize_t err;
        loff_t pos;
        size_t count;
+       loff_t oldsize;
+       int clean_page = 0;
 
        inode_lock(inode);
        err = generic_write_checks(iocb, from);
@@ -1799,14 +1815,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        pos = iocb->ki_pos;
        count = iov_iter_count(from);
        start_pos = round_down(pos, root->sectorsize);
-       if (start_pos > i_size_read(inode)) {
+       oldsize = i_size_read(inode);
+       if (start_pos > oldsize) {
                /* Expand hole size to cover write data, preventing empty gap */
                end_pos = round_up(pos + count, root->sectorsize);
-               err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
+               err = btrfs_cont_expand(inode, oldsize, end_pos);
                if (err) {
                        inode_unlock(inode);
                        goto out;
                }
+               if (start_pos > round_up(oldsize, root->sectorsize))
+                       clean_page = 1;
        }
 
        if (sync)
@@ -1818,6 +1837,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                num_written = __btrfs_buffered_write(file, from, pos);
                if (num_written > 0)
                        iocb->ki_pos = pos + num_written;
+               if (clean_page)
+                       pagecache_isize_extended(inode, oldsize,
+                                               i_size_read(inode));
        }
 
        inode_unlock(inode);
@@ -1825,7 +1847,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        /*
         * We also have to set last_sub_trans to the current log transid,
         * otherwise subsequent syncs to a file that's been synced in this
-        * transaction will appear to have already occured.
+        * transaction will appear to have already occurred.
         */
        spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->last_sub_trans = root->log_transid;
@@ -1883,7 +1905,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
  */
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
-       struct dentry *dentry = file->f_path.dentry;
+       struct dentry *dentry = file_dentry(file);
        struct inode *inode = d_inode(dentry);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
@@ -1996,10 +2018,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         */
        smp_mb();
        if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
-           (BTRFS_I(inode)->last_trans <=
-            root->fs_info->last_trans_committed &&
-            (full_sync ||
-             !btrfs_have_ordered_extents_in_range(inode, start, len)))) {
+           (full_sync && BTRFS_I(inode)->last_trans <=
+            root->fs_info->last_trans_committed) ||
+           (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
+            BTRFS_I(inode)->last_trans
+            <= root->fs_info->last_trans_committed)) {
                /*
                 * We'v had everything committed since the last time we were
                 * modified so clear this flag in case it was set for whatever
@@ -2293,10 +2316,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        int ret = 0;
        int err = 0;
        unsigned int rsv_count;
-       bool same_page;
+       bool same_block;
        bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
        u64 ino_size;
-       bool truncated_page = false;
+       bool truncated_block = false;
        bool updated_inode = false;
 
        ret = btrfs_wait_ordered_range(inode, offset, len);
@@ -2304,7 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                return ret;
 
        inode_lock(inode);
-       ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+       ino_size = round_up(inode->i_size, root->sectorsize);
        ret = find_first_non_hole(inode, &offset, &len);
        if (ret < 0)
                goto out_only_mutex;
@@ -2317,31 +2340,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
        lockend = round_down(offset + len,
                             BTRFS_I(inode)->root->sectorsize) - 1;
-       same_page = ((offset >> PAGE_CACHE_SHIFT) ==
-                   ((offset + len - 1) >> PAGE_CACHE_SHIFT));
-
+       same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
+               == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
        /*
-        * We needn't truncate any page which is beyond the end of the file
+        * We needn't truncate any block which is beyond the end of the file
         * because we are sure there is no data there.
         */
        /*
-        * Only do this if we are in the same page and we aren't doing the
-        * entire page.
+        * Only do this if we are in the same block and we aren't doing the
+        * entire block.
         */
-       if (same_page && len < PAGE_CACHE_SIZE) {
+       if (same_block && len < root->sectorsize) {
                if (offset < ino_size) {
-                       truncated_page = true;
-                       ret = btrfs_truncate_page(inode, offset, len, 0);
+                       truncated_block = true;
+                       ret = btrfs_truncate_block(inode, offset, len, 0);
                } else {
                        ret = 0;
                }
                goto out_only_mutex;
        }
 
-       /* zero back part of the first page */
+       /* zero back part of the first block */
        if (offset < ino_size) {
-               truncated_page = true;
-               ret = btrfs_truncate_page(inode, offset, 0, 0);
+               truncated_block = true;
+               ret = btrfs_truncate_block(inode, offset, 0, 0);
                if (ret) {
                        inode_unlock(inode);
                        return ret;
@@ -2376,9 +2398,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                if (!ret) {
                        /* zero the front end of the last page */
                        if (tail_start + tail_len < ino_size) {
-                               truncated_page = true;
-                               ret = btrfs_truncate_page(inode,
-                                               tail_start + tail_len, 0, 1);
+                               truncated_block = true;
+                               ret = btrfs_truncate_block(inode,
+                                                       tail_start + tail_len,
+                                                       0, 1);
                                if (ret)
                                        goto out_only_mutex;
                        }
@@ -2544,7 +2567,7 @@ out_trans:
                goto out_free;
 
        inode_inc_iversion(inode);
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
 
        trans->block_rsv = &root->fs_info->trans_block_rsv;
        ret = btrfs_update_inode(trans, root, inode);
@@ -2558,7 +2581,7 @@ out:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                             &cached_state, GFP_NOFS);
 out_only_mutex:
-       if (!updated_inode && truncated_page && !ret && !err) {
+       if (!updated_inode && truncated_block && !ret && !err) {
                /*
                 * If we only end up zeroing part of a page, we still need to
                 * update the inode item, so that all the time fields are
@@ -2611,7 +2634,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
                return 0;
        }
 insert:
-       range = kmalloc(sizeof(*range), GFP_NOFS);
+       range = kmalloc(sizeof(*range), GFP_KERNEL);
        if (!range)
                return -ENOMEM;
        range->start = start;
@@ -2659,9 +2682,12 @@ static long btrfs_fallocate(struct file *file, int mode,
                return ret;
 
        inode_lock(inode);
-       ret = inode_newsize_ok(inode, alloc_end);
-       if (ret)
-               goto out;
+
+       if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
+               ret = inode_newsize_ok(inode, offset + len);
+               if (ret)
+                       goto out;
+       }
 
        /*
         * TODO: Move these two operations after we have checked
@@ -2678,10 +2704,10 @@ static long btrfs_fallocate(struct file *file, int mode,
        } else if (offset + len > inode->i_size) {
                /*
                 * If we are fallocating from the end of the file onward we
-                * need to zero out the end of the page if i_size lands in the
-                * middle of a page.
+                * need to zero out the end of the block if i_size lands in the
+                * middle of a block.
                 */
-               ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
+               ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
                if (ret)
                        goto out;
        }
@@ -2712,7 +2738,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                        btrfs_put_ordered_extent(ordered);
                        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
                                             alloc_start, locked_end,
-                                            &cached_state, GFP_NOFS);
+                                            &cached_state, GFP_KERNEL);
                        /*
                         * we can't wait on the range with the transaction
                         * running or with the extent lock held
@@ -2794,7 +2820,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
                } else {
-                       inode->i_ctime = CURRENT_TIME;
+                       inode->i_ctime = current_fs_time(inode->i_sb);
                        i_size_write(inode, actual_end);
                        btrfs_ordered_update_i_size(inode, actual_end, NULL);
                        ret = btrfs_update_inode(trans, root, inode);
@@ -2806,7 +2832,7 @@ static long btrfs_fallocate(struct file *file, int mode,
        }
 out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
-                            &cached_state, GFP_NOFS);
+                            &cached_state, GFP_KERNEL);
 out:
        /*
         * As we waited the extent range, the data_rsv_map must be empty
@@ -2939,8 +2965,7 @@ const struct file_operations btrfs_file_operations = {
 
 void btrfs_auto_defrag_exit(void)
 {
-       if (btrfs_inode_defrag_cachep)
-               kmem_cache_destroy(btrfs_inode_defrag_cachep);
+       kmem_cache_destroy(btrfs_inode_defrag_cachep);
 }
 
 int btrfs_auto_defrag_init(void)