]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/compression.c
Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block
[karo-tx-linux.git] / fs / btrfs / compression.c
index ff61a41ac90b75a75d93a330a04d2e8b45ec1a99..cefedabf0a92fd6aaa4c4986d4899fc9cac3b859 100644 (file)
@@ -363,6 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
                kfree(cb);
                return -ENOMEM;
        }
+       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
        bio->bi_private = cb;
        bio->bi_end_io = end_compressed_bio_write;
        atomic_inc(&cb->pending_bios);
@@ -373,7 +374,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
                page = compressed_pages[pg_index];
                page->mapping = inode->i_mapping;
                if (bio->bi_iter.bi_size)
-                       ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
+                       ret = io_tree->ops->merge_bio_hook(page, 0,
                                                           PAGE_SIZE,
                                                           bio, 0);
                else
@@ -401,13 +402,14 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
                                BUG_ON(ret); /* -ENOMEM */
                        }
 
-                       ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
+                       ret = btrfs_map_bio(root, bio, 0, 1);
                        BUG_ON(ret); /* -ENOMEM */
 
                        bio_put(bio);
 
                        bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
                        BUG_ON(!bio);
+                       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
                        bio->bi_private = cb;
                        bio->bi_end_io = end_compressed_bio_write;
                        bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -431,7 +433,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
                BUG_ON(ret); /* -ENOMEM */
        }
 
-       ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
+       ret = btrfs_map_bio(root, bio, 0, 1);
        BUG_ON(ret); /* -ENOMEM */
 
        bio_put(bio);
@@ -646,6 +648,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
        if (!comp_bio)
                goto fail2;
+       bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
        comp_bio->bi_private = cb;
        comp_bio->bi_end_io = end_compressed_bio_read;
        atomic_inc(&cb->pending_bios);
@@ -656,7 +659,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                page->index = em_start >> PAGE_SHIFT;
 
                if (comp_bio->bi_iter.bi_size)
-                       ret = tree->ops->merge_bio_hook(READ, page, 0,
+                       ret = tree->ops->merge_bio_hook(page, 0,
                                                        PAGE_SIZE,
                                                        comp_bio, 0);
                else
@@ -687,8 +690,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
                                             root->sectorsize);
 
-                       ret = btrfs_map_bio(root, READ, comp_bio,
-                                           mirror_num, 0);
+                       ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
                        if (ret) {
                                bio->bi_error = ret;
                                bio_endio(comp_bio);
@@ -699,6 +701,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
                                                        GFP_NOFS);
                        BUG_ON(!comp_bio);
+                       bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
                        comp_bio->bi_private = cb;
                        comp_bio->bi_end_io = end_compressed_bio_read;
 
@@ -717,7 +720,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                BUG_ON(ret); /* -ENOMEM */
        }
 
-       ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
+       ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
        if (ret) {
                bio->bi_error = ret;
                bio_endio(comp_bio);
@@ -743,8 +746,11 @@ out:
 static struct {
        struct list_head idle_ws;
        spinlock_t ws_lock;
-       int num_ws;
-       atomic_t alloc_ws;
+       /* Number of free workspaces */
+       int free_ws;
+       /* Total number of allocated workspaces */
+       atomic_t total_ws;
+       /* Waiters for a free workspace */
        wait_queue_head_t ws_wait;
 } btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
 
@@ -758,16 +764,34 @@ void __init btrfs_init_compress(void)
        int i;
 
        for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+               struct list_head *workspace;
+
                INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
                spin_lock_init(&btrfs_comp_ws[i].ws_lock);
-               atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
+               atomic_set(&btrfs_comp_ws[i].total_ws, 0);
                init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
+
+               /*
+                * Preallocate one workspace for each compression type so
+                * we can guarantee forward progress in the worst case
+                */
+               workspace = btrfs_compress_op[i]->alloc_workspace();
+               if (IS_ERR(workspace)) {
+                       printk(KERN_WARNING
+       "BTRFS: cannot preallocate compression workspace, will try later");
+               } else {
+                       atomic_set(&btrfs_comp_ws[i].total_ws, 1);
+                       btrfs_comp_ws[i].free_ws = 1;
+                       list_add(workspace, &btrfs_comp_ws[i].idle_ws);
+               }
        }
 }
 
 /*
- * this finds an available workspace or allocates a new one
- * ERR_PTR is returned if things go bad.
+ * This finds an available workspace or allocates a new one.
+ * If it's not possible to allocate a new one, waits until there's one.
+ * Preallocation makes a forward progress guarantees and we do not return
+ * errors.
  */
 static struct list_head *find_workspace(int type)
 {
@@ -777,36 +801,58 @@ static struct list_head *find_workspace(int type)
 
        struct list_head *idle_ws       = &btrfs_comp_ws[idx].idle_ws;
        spinlock_t *ws_lock             = &btrfs_comp_ws[idx].ws_lock;
-       atomic_t *alloc_ws              = &btrfs_comp_ws[idx].alloc_ws;
+       atomic_t *total_ws              = &btrfs_comp_ws[idx].total_ws;
        wait_queue_head_t *ws_wait      = &btrfs_comp_ws[idx].ws_wait;
-       int *num_ws                     = &btrfs_comp_ws[idx].num_ws;
+       int *free_ws                    = &btrfs_comp_ws[idx].free_ws;
 again:
        spin_lock(ws_lock);
        if (!list_empty(idle_ws)) {
                workspace = idle_ws->next;
                list_del(workspace);
-               (*num_ws)--;
+               (*free_ws)--;
                spin_unlock(ws_lock);
                return workspace;
 
        }
-       if (atomic_read(alloc_ws) > cpus) {
+       if (atomic_read(total_ws) > cpus) {
                DEFINE_WAIT(wait);
 
                spin_unlock(ws_lock);
                prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
-               if (atomic_read(alloc_ws) > cpus && !*num_ws)
+               if (atomic_read(total_ws) > cpus && !*free_ws)
                        schedule();
                finish_wait(ws_wait, &wait);
                goto again;
        }
-       atomic_inc(alloc_ws);
+       atomic_inc(total_ws);
        spin_unlock(ws_lock);
 
        workspace = btrfs_compress_op[idx]->alloc_workspace();
        if (IS_ERR(workspace)) {
-               atomic_dec(alloc_ws);
+               atomic_dec(total_ws);
                wake_up(ws_wait);
+
+               /*
+                * Do not return the error but go back to waiting. There's a
+                * workspace preallocated for each type and the compression
+                * time is bounded so we get to a workspace eventually. This
+                * makes our caller's life easier.
+                *
+                * To prevent silent and low-probability deadlocks (when the
+                * initial preallocation fails), check if there are any
+                * workspaces at all.
+                */
+               if (atomic_read(total_ws) == 0) {
+                       static DEFINE_RATELIMIT_STATE(_rs,
+                                       /* once per minute */ 60 * HZ,
+                                       /* no burst */ 1);
+
+                       if (__ratelimit(&_rs)) {
+                               printk(KERN_WARNING
+                           "no compression workspaces, low memory, retrying");
+                       }
+               }
+               goto again;
        }
        return workspace;
 }
@@ -820,21 +866,21 @@ static void free_workspace(int type, struct list_head *workspace)
        int idx = type - 1;
        struct list_head *idle_ws       = &btrfs_comp_ws[idx].idle_ws;
        spinlock_t *ws_lock             = &btrfs_comp_ws[idx].ws_lock;
-       atomic_t *alloc_ws              = &btrfs_comp_ws[idx].alloc_ws;
+       atomic_t *total_ws              = &btrfs_comp_ws[idx].total_ws;
        wait_queue_head_t *ws_wait      = &btrfs_comp_ws[idx].ws_wait;
-       int *num_ws                     = &btrfs_comp_ws[idx].num_ws;
+       int *free_ws                    = &btrfs_comp_ws[idx].free_ws;
 
        spin_lock(ws_lock);
-       if (*num_ws < num_online_cpus()) {
+       if (*free_ws < num_online_cpus()) {
                list_add(workspace, idle_ws);
-               (*num_ws)++;
+               (*free_ws)++;
                spin_unlock(ws_lock);
                goto wake;
        }
        spin_unlock(ws_lock);
 
        btrfs_compress_op[idx]->free_workspace(workspace);
-       atomic_dec(alloc_ws);
+       atomic_dec(total_ws);
 wake:
        /*
         * Make sure counter is updated before we wake up waiters.
@@ -857,7 +903,7 @@ static void free_workspaces(void)
                        workspace = btrfs_comp_ws[i].idle_ws.next;
                        list_del(workspace);
                        btrfs_compress_op[i]->free_workspace(workspace);
-                       atomic_dec(&btrfs_comp_ws[i].alloc_ws);
+                       atomic_dec(&btrfs_comp_ws[i].total_ws);
                }
        }
 }
@@ -894,8 +940,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
        int ret;
 
        workspace = find_workspace(type);
-       if (IS_ERR(workspace))
-               return PTR_ERR(workspace);
 
        ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
                                                      start, len, pages,
@@ -930,8 +974,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
        int ret;
 
        workspace = find_workspace(type);
-       if (IS_ERR(workspace))
-               return PTR_ERR(workspace);
 
        ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
                                                         disk_start,
@@ -952,8 +994,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
        int ret;
 
        workspace = find_workspace(type);
-       if (IS_ERR(workspace))
-               return PTR_ERR(workspace);
 
        ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
                                                  dest_page, start_byte,