]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/md/dm-thin.c
dm thin: sort the deferred cells
[karo-tx-linux.git] / drivers / md / dm-thin.c
index fc9c848a60c9267a44296b54656bbdda60f40fd0..b9d25026ab84ac55afd5df7d67bfc177219d7def 100644 (file)
 #include <linux/device-mapper.h>
 #include <linux/dm-io.h>
 #include <linux/dm-kcopyd.h>
+#include <linux/log2.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/sort.h>
 #include <linux/rbtree.h>
 
 #define        DM_MSG_PREFIX   "thin"
@@ -25,7 +27,6 @@
  */
 #define ENDIO_HOOK_POOL_SIZE 1024
 #define MAPPING_POOL_SIZE 1024
-#define PRISON_CELLS 1024
 #define COMMIT_PERIOD HZ
 #define NO_SPACE_TIMEOUT_SECS 60
 
@@ -127,6 +128,53 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
 
 /*----------------------------------------------------------------*/
 
+#define THROTTLE_THRESHOLD (1 * HZ)
+
+struct throttle {
+       struct rw_semaphore lock;
+       unsigned long threshold;
+       bool throttle_applied;
+};
+
+static void throttle_init(struct throttle *t)
+{
+       init_rwsem(&t->lock);
+       t->throttle_applied = false;
+}
+
+static void throttle_work_start(struct throttle *t)
+{
+       t->threshold = jiffies + THROTTLE_THRESHOLD;
+}
+
+static void throttle_work_update(struct throttle *t)
+{
+       if (!t->throttle_applied && jiffies > t->threshold) {
+               down_write(&t->lock);
+               t->throttle_applied = true;
+       }
+}
+
+static void throttle_work_complete(struct throttle *t)
+{
+       if (t->throttle_applied) {
+               t->throttle_applied = false;
+               up_write(&t->lock);
+       }
+}
+
+static void throttle_lock(struct throttle *t)
+{
+       down_read(&t->lock);
+}
+
+static void throttle_unlock(struct throttle *t)
+{
+       up_read(&t->lock);
+}
+
+/*----------------------------------------------------------------*/
+
 /*
  * A pool device ties together a metadata device and a data device.  It
  * also provides the interface for creating and destroying internal
@@ -155,8 +203,11 @@ struct pool_features {
 
 struct thin_c;
 typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio);
+typedef void (*process_cell_fn)(struct thin_c *tc, struct dm_bio_prison_cell *cell);
 typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m);
 
+#define CELL_SORT_ARRAY_SIZE 8192
+
 struct pool {
        struct list_head list;
        struct dm_target *ti;   /* Only set if a pool target is bound */
@@ -176,6 +227,7 @@ struct pool {
        struct dm_kcopyd_client *copier;
 
        struct workqueue_struct *wq;
+       struct throttle throttle;
        struct work_struct worker;
        struct delayed_work waker;
        struct delayed_work no_space_timeout;
@@ -198,8 +250,13 @@ struct pool {
        process_bio_fn process_bio;
        process_bio_fn process_discard;
 
+       process_cell_fn process_cell;
+       process_cell_fn process_discard_cell;
+
        process_mapping_fn process_prepared_mapping;
        process_mapping_fn process_prepared_discard;
+
+       struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE];
 };
 
 static enum pool_mode get_pool_mode(struct pool *pool);
@@ -227,12 +284,14 @@ struct thin_c {
        struct list_head list;
        struct dm_dev *pool_dev;
        struct dm_dev *origin_dev;
+       sector_t origin_size;
        dm_thin_id dev_id;
 
        struct pool *pool;
        struct dm_thin_device *td;
        bool requeue_mode:1;
        spinlock_t lock;
+       struct list_head deferred_cells;
        struct bio_list deferred_bio_list;
        struct bio_list retry_on_resume_list;
        struct rb_root sort_bio_list; /* sorted list of deferred bios */
@@ -289,6 +348,15 @@ static void cell_release(struct pool *pool,
        dm_bio_prison_free_cell(pool->prison, cell);
 }
 
+static void cell_visit_release(struct pool *pool,
+                              void (*fn)(void *, struct dm_bio_prison_cell *),
+                              void *context,
+                              struct dm_bio_prison_cell *cell)
+{
+       dm_cell_visit_release(pool->prison, fn, context, cell);
+       dm_bio_prison_free_cell(pool->prison, cell);
+}
+
 static void cell_release_no_holder(struct pool *pool,
                                   struct dm_bio_prison_cell *cell,
                                   struct bio_list *bios)
@@ -297,19 +365,6 @@ static void cell_release_no_holder(struct pool *pool,
        dm_bio_prison_free_cell(pool->prison, cell);
 }
 
-static void cell_defer_no_holder_no_free(struct thin_c *tc,
-                                        struct dm_bio_prison_cell *cell)
-{
-       struct pool *pool = tc->pool;
-       unsigned long flags;
-
-       spin_lock_irqsave(&tc->lock, flags);
-       dm_cell_release_no_holder(pool->prison, cell, &tc->deferred_bio_list);
-       spin_unlock_irqrestore(&tc->lock, flags);
-
-       wake_worker(pool);
-}
-
 static void cell_error_with_code(struct pool *pool,
                                 struct dm_bio_prison_cell *cell, int error_code)
 {
@@ -322,6 +377,16 @@ static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
        cell_error_with_code(pool, cell, -EIO);
 }
 
+static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
+{
+       cell_error_with_code(pool, cell, 0);
+}
+
+static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
+{
+       cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+}
+
 /*----------------------------------------------------------------*/
 
 /*
@@ -409,10 +474,28 @@ static void requeue_bio_list(struct thin_c *tc, struct bio_list *master)
                bio_endio(bio, DM_ENDIO_REQUEUE);
 }
 
+static void requeue_deferred_cells(struct thin_c *tc)
+{
+       struct pool *pool = tc->pool;
+       unsigned long flags;
+       struct list_head cells;
+       struct dm_bio_prison_cell *cell, *tmp;
+
+       INIT_LIST_HEAD(&cells);
+
+       spin_lock_irqsave(&tc->lock, flags);
+       list_splice_init(&tc->deferred_cells, &cells);
+       spin_unlock_irqrestore(&tc->lock, flags);
+
+       list_for_each_entry_safe(cell, tmp, &cells, user_list)
+               cell_requeue(pool, cell);
+}
+
 static void requeue_io(struct thin_c *tc)
 {
        requeue_bio_list(tc, &tc->deferred_bio_list);
        requeue_bio_list(tc, &tc->retry_on_resume_list);
+       requeue_deferred_cells(tc);
 }
 
 static void error_thin_retry_list(struct thin_c *tc)
@@ -554,11 +637,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
 struct dm_thin_new_mapping {
        struct list_head list;
 
-       bool quiesced:1;
-       bool prepared:1;
        bool pass_discard:1;
        bool definitely_not_shared:1;
 
+       /*
+        * Track quiescing, copying and zeroing preparation actions.  When this
+        * counter hits zero the block is prepared and can be inserted into the
+        * btree.
+        */
+       atomic_t prepare_actions;
+
        int err;
        struct thin_c *tc;
        dm_block_t virt_block;
@@ -575,43 +663,41 @@ struct dm_thin_new_mapping {
        bio_end_io_t *saved_bi_end_io;
 };
 
-static void __maybe_add_mapping(struct dm_thin_new_mapping *m)
+static void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
 {
        struct pool *pool = m->tc->pool;
 
-       if (m->quiesced && m->prepared) {
+       if (atomic_dec_and_test(&m->prepare_actions)) {
                list_add_tail(&m->list, &pool->prepared_mappings);
                wake_worker(pool);
        }
 }
 
-static void copy_complete(int read_err, unsigned long write_err, void *context)
+static void complete_mapping_preparation(struct dm_thin_new_mapping *m)
 {
        unsigned long flags;
-       struct dm_thin_new_mapping *m = context;
        struct pool *pool = m->tc->pool;
 
-       m->err = read_err || write_err ? -EIO : 0;
-
        spin_lock_irqsave(&pool->lock, flags);
-       m->prepared = true;
-       __maybe_add_mapping(m);
+       __complete_mapping_preparation(m);
        spin_unlock_irqrestore(&pool->lock, flags);
 }
 
+static void copy_complete(int read_err, unsigned long write_err, void *context)
+{
+       struct dm_thin_new_mapping *m = context;
+
+       m->err = read_err || write_err ? -EIO : 0;
+       complete_mapping_preparation(m);
+}
+
 static void overwrite_endio(struct bio *bio, int err)
 {
-       unsigned long flags;
        struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
        struct dm_thin_new_mapping *m = h->overwrite_mapping;
-       struct pool *pool = m->tc->pool;
 
        m->err = err;
-
-       spin_lock_irqsave(&pool->lock, flags);
-       m->prepared = true;
-       __maybe_add_mapping(m);
-       spin_unlock_irqrestore(&pool->lock, flags);
+       complete_mapping_preparation(m);
 }
 
 /*----------------------------------------------------------------*/
@@ -625,33 +711,75 @@ static void overwrite_endio(struct bio *bio, int err)
  */
 
 /*
- * This sends the bios in the cell back to the deferred_bios list.
+ * This sends the bios in the cell, except the original holder, back
+ * to the deferred_bios list.
  */
-static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell)
+static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell)
 {
        struct pool *pool = tc->pool;
        unsigned long flags;
 
        spin_lock_irqsave(&tc->lock, flags);
-       cell_release(pool, cell, &tc->deferred_bio_list);
+       cell_release_no_holder(pool, cell, &tc->deferred_bio_list);
        spin_unlock_irqrestore(&tc->lock, flags);
 
        wake_worker(pool);
 }
 
-/*
- * Same as cell_defer above, except it omits the original holder of the cell.
- */
-static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell)
+static void thin_defer_bio(struct thin_c *tc, struct bio *bio);
+
+struct remap_info {
+       struct thin_c *tc;
+       struct bio_list defer_bios;
+       struct bio_list issue_bios;
+};
+
+static void __inc_remap_and_issue_cell(void *context,
+                                      struct dm_bio_prison_cell *cell)
 {
-       struct pool *pool = tc->pool;
-       unsigned long flags;
+       struct remap_info *info = context;
+       struct bio *bio;
 
-       spin_lock_irqsave(&tc->lock, flags);
-       cell_release_no_holder(pool, cell, &tc->deferred_bio_list);
-       spin_unlock_irqrestore(&tc->lock, flags);
+       while ((bio = bio_list_pop(&cell->bios))) {
+               if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA))
+                       bio_list_add(&info->defer_bios, bio);
+               else {
+                       inc_all_io_entry(info->tc->pool, bio);
 
-       wake_worker(pool);
+                       /*
+                        * We can't issue the bios with the bio prison lock
+                        * held, so we add them to a list to issue on
+                        * return from this function.
+                        */
+                       bio_list_add(&info->issue_bios, bio);
+               }
+       }
+}
+
+static void inc_remap_and_issue_cell(struct thin_c *tc,
+                                    struct dm_bio_prison_cell *cell,
+                                    dm_block_t block)
+{
+       struct bio *bio;
+       struct remap_info info;
+
+       info.tc = tc;
+       bio_list_init(&info.defer_bios);
+       bio_list_init(&info.issue_bios);
+
+       /*
+        * We have to be careful to inc any bios we're about to issue
+        * before the cell is released, and avoid a race with new bios
+        * being added to the cell.
+        */
+       cell_visit_release(tc->pool, __inc_remap_and_issue_cell,
+                          &info, cell);
+
+       while ((bio = bio_list_pop(&info.defer_bios)))
+               thin_defer_bio(tc, bio);
+
+       while ((bio = bio_list_pop(&info.issue_bios)))
+               remap_and_issue(info.tc, bio, block);
 }
 
 static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
@@ -702,10 +830,13 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
         * the bios in the cell.
         */
        if (bio) {
-               cell_defer_no_holder(tc, m->cell);
+               inc_remap_and_issue_cell(tc, m->cell, m->data_block);
                bio_endio(bio, 0);
-       } else
-               cell_defer(tc, m->cell);
+       } else {
+               inc_all_io_entry(tc->pool, m->cell->holder);
+               remap_and_issue(tc, m->cell->holder, m->data_block);
+               inc_remap_and_issue_cell(tc, m->cell, m->data_block);
+       }
 
 out:
        list_del(&m->list);
@@ -821,10 +952,45 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
        return m;
 }
 
+static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
+                   sector_t begin, sector_t end)
+{
+       int r;
+       struct dm_io_region to;
+
+       to.bdev = tc->pool_dev->bdev;
+       to.sector = begin;
+       to.count = end - begin;
+
+       r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
+       if (r < 0) {
+               DMERR_LIMIT("dm_kcopyd_zero() failed");
+               copy_complete(1, 1, m);
+       }
+}
+
+static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio,
+                                     dm_block_t data_block,
+                                     struct dm_thin_new_mapping *m)
+{
+       struct pool *pool = tc->pool;
+       struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
+
+       h->overwrite_mapping = m;
+       m->bio = bio;
+       save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
+       inc_all_io_entry(pool, bio);
+       remap_and_issue(tc, bio, data_block);
+}
+
+/*
+ * A partial copy also needs to zero the uncopied region.
+ */
 static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
                          struct dm_dev *origin, dm_block_t data_origin,
                          dm_block_t data_dest,
-                         struct dm_bio_prison_cell *cell, struct bio *bio)
+                         struct dm_bio_prison_cell *cell, struct bio *bio,
+                         sector_t len)
 {
        int r;
        struct pool *pool = tc->pool;
@@ -835,8 +1001,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
        m->data_block = data_dest;
        m->cell = cell;
 
+       /*
+        * quiesce action + copy action + an extra reference held for the
+        * duration of this function (we may need to inc later for a
+        * partial zero).
+        */
+       atomic_set(&m->prepare_actions, 3);
+
        if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
-               m->quiesced = true;
+               complete_mapping_preparation(m); /* already quiesced */
 
        /*
         * IO to pool_dev remaps to the pool target's data_dev.
@@ -844,33 +1017,45 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
         * If the whole block of data is being overwritten, we can issue the
         * bio immediately. Otherwise we use kcopyd to clone the data first.
         */
-       if (io_overwrites_block(pool, bio)) {
-               struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
-
-               h->overwrite_mapping = m;
-               m->bio = bio;
-               save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
-               inc_all_io_entry(pool, bio);
-               remap_and_issue(tc, bio, data_dest);
-       } else {
+       if (io_overwrites_block(pool, bio))
+               remap_and_issue_overwrite(tc, bio, data_dest, m);
+       else {
                struct dm_io_region from, to;
 
                from.bdev = origin->bdev;
                from.sector = data_origin * pool->sectors_per_block;
-               from.count = pool->sectors_per_block;
+               from.count = len;
 
                to.bdev = tc->pool_dev->bdev;
                to.sector = data_dest * pool->sectors_per_block;
-               to.count = pool->sectors_per_block;
+               to.count = len;
 
                r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
                                   0, copy_complete, m);
                if (r < 0) {
-                       mempool_free(m, pool->mapping_pool);
                        DMERR_LIMIT("dm_kcopyd_copy() failed");
-                       cell_error(pool, cell);
+                       copy_complete(1, 1, m);
+
+                       /*
+                        * We allow the zero to be issued, to simplify the
+                        * error path.  Otherwise we'd need to start
+                        * worrying about decrementing the prepare_actions
+                        * counter.
+                        */
+               }
+
+               /*
+                * Do we need to zero a tail region?
+                */
+               if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
+                       atomic_inc(&m->prepare_actions);
+                       ll_zero(tc, m,
+                               data_dest * pool->sectors_per_block + len,
+                               (data_dest + 1) * pool->sectors_per_block);
                }
        }
+
+       complete_mapping_preparation(m); /* drop our ref */
 }
 
 static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
@@ -878,15 +1063,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
                                   struct dm_bio_prison_cell *cell, struct bio *bio)
 {
        schedule_copy(tc, virt_block, tc->pool_dev,
-                     data_origin, data_dest, cell, bio);
-}
-
-static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
-                                  dm_block_t data_dest,
-                                  struct dm_bio_prison_cell *cell, struct bio *bio)
-{
-       schedule_copy(tc, virt_block, tc->origin_dev,
-                     virt_block, data_dest, cell, bio);
+                     data_origin, data_dest, cell, bio,
+                     tc->pool->sectors_per_block);
 }
 
 static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
@@ -896,8 +1074,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
        struct pool *pool = tc->pool;
        struct dm_thin_new_mapping *m = get_next_mapping(pool);
 
-       m->quiesced = true;
-       m->prepared = false;
+       atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
        m->tc = tc;
        m->virt_block = virt_block;
        m->data_block = data_block;
@@ -911,29 +1088,35 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
        if (!pool->pf.zero_new_blocks)
                process_prepared_mapping(m);
 
-       else if (io_overwrites_block(pool, bio)) {
-               struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
+       else if (io_overwrites_block(pool, bio))
+               remap_and_issue_overwrite(tc, bio, data_block, m);
 
-               h->overwrite_mapping = m;
-               m->bio = bio;
-               save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
-               inc_all_io_entry(pool, bio);
-               remap_and_issue(tc, bio, data_block);
-       } else {
-               int r;
-               struct dm_io_region to;
+       else
+               ll_zero(tc, m,
+                       data_block * pool->sectors_per_block,
+                       (data_block + 1) * pool->sectors_per_block);
+}
 
-               to.bdev = tc->pool_dev->bdev;
-               to.sector = data_block * pool->sectors_per_block;
-               to.count = pool->sectors_per_block;
+static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
+                                  dm_block_t data_dest,
+                                  struct dm_bio_prison_cell *cell, struct bio *bio)
+{
+       struct pool *pool = tc->pool;
+       sector_t virt_block_begin = virt_block * pool->sectors_per_block;
+       sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
 
-               r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
-               if (r < 0) {
-                       mempool_free(m, pool->mapping_pool);
-                       DMERR_LIMIT("dm_kcopyd_zero() failed");
-                       cell_error(pool, cell);
-               }
-       }
+       if (virt_block_end <= tc->origin_size)
+               schedule_copy(tc, virt_block, tc->origin_dev,
+                             virt_block, data_dest, cell, bio,
+                             pool->sectors_per_block);
+
+       else if (virt_block_begin < tc->origin_size)
+               schedule_copy(tc, virt_block, tc->origin_dev,
+                             virt_block, data_dest, cell, bio,
+                             tc->origin_size - virt_block_begin);
+
+       else
+               schedule_zero(tc, virt_block, data_dest, cell, bio);
 }
 
 /*
@@ -1089,20 +1272,21 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
                        retry_on_resume(bio);
 }
 
-static void process_discard(struct thin_c *tc, struct bio *bio)
+static void process_discard_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
 {
        int r;
-       unsigned long flags;
+       struct bio *bio = cell->holder;
        struct pool *pool = tc->pool;
-       struct dm_bio_prison_cell *cell, *cell2;
-       struct dm_cell_key key, key2;
+       struct dm_bio_prison_cell *cell2;
+       struct dm_cell_key key2;
        dm_block_t block = get_bio_block(tc, bio);
        struct dm_thin_lookup_result lookup_result;
        struct dm_thin_new_mapping *m;
 
-       build_virtual_key(tc->td, block, &key);
-       if (bio_detain(tc->pool, &key, bio, &cell))
+       if (tc->requeue_mode) {
+               cell_requeue(pool, cell);
                return;
+       }
 
        r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
        switch (r) {
@@ -1133,12 +1317,9 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
                        m->cell2 = cell2;
                        m->bio = bio;
 
-                       if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) {
-                               spin_lock_irqsave(&pool->lock, flags);
-                               list_add_tail(&m->list, &pool->prepared_discards);
-                               spin_unlock_irqrestore(&pool->lock, flags);
-                               wake_worker(pool);
-                       }
+                       if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
+                               pool->process_prepared_discard(m);
+
                } else {
                        inc_all_io_entry(pool, bio);
                        cell_defer_no_holder(tc, cell);
@@ -1173,6 +1354,19 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
        }
 }
 
+static void process_discard_bio(struct thin_c *tc, struct bio *bio)
+{
+       struct dm_bio_prison_cell *cell;
+       struct dm_cell_key key;
+       dm_block_t block = get_bio_block(tc, bio);
+
+       build_virtual_key(tc->td, block, &key);
+       if (bio_detain(tc->pool, &key, bio, &cell))
+               return;
+
+       process_discard_cell(tc, cell);
+}
+
 static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
                          struct dm_cell_key *key,
                          struct dm_thin_lookup_result *lookup_result,
@@ -1201,11 +1395,53 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
        }
 }
 
+static void __remap_and_issue_shared_cell(void *context,
+                                         struct dm_bio_prison_cell *cell)
+{
+       struct remap_info *info = context;
+       struct bio *bio;
+
+       while ((bio = bio_list_pop(&cell->bios))) {
+               if ((bio_data_dir(bio) == WRITE) ||
+                   (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)))
+                       bio_list_add(&info->defer_bios, bio);
+               else {
+                       struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
+
+                       h->shared_read_entry = dm_deferred_entry_inc(info->tc->pool->shared_read_ds);
+                       inc_all_io_entry(info->tc->pool, bio);
+                       bio_list_add(&info->issue_bios, bio);
+               }
+       }
+}
+
+static void remap_and_issue_shared_cell(struct thin_c *tc,
+                                       struct dm_bio_prison_cell *cell,
+                                       dm_block_t block)
+{
+       struct bio *bio;
+       struct remap_info info;
+
+       info.tc = tc;
+       bio_list_init(&info.defer_bios);
+       bio_list_init(&info.issue_bios);
+
+       cell_visit_release(tc->pool, __remap_and_issue_shared_cell,
+                          &info, cell);
+
+       while ((bio = bio_list_pop(&info.defer_bios)))
+               thin_defer_bio(tc, bio);
+
+       while ((bio = bio_list_pop(&info.issue_bios)))
+               remap_and_issue(tc, bio, block);
+}
+
 static void process_shared_bio(struct thin_c *tc, struct bio *bio,
                               dm_block_t block,
-                              struct dm_thin_lookup_result *lookup_result)
+                              struct dm_thin_lookup_result *lookup_result,
+                              struct dm_bio_prison_cell *virt_cell)
 {
-       struct dm_bio_prison_cell *cell;
+       struct dm_bio_prison_cell *data_cell;
        struct pool *pool = tc->pool;
        struct dm_cell_key key;
 
@@ -1214,19 +1450,23 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
         * of being broken so we have nothing further to do here.
         */
        build_data_key(tc->td, lookup_result->block, &key);
-       if (bio_detain(pool, &key, bio, &cell))
+       if (bio_detain(pool, &key, bio, &data_cell)) {
+               cell_defer_no_holder(tc, virt_cell);
                return;
+       }
 
-       if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size)
-               break_sharing(tc, bio, block, &key, lookup_result, cell);
-       else {
+       if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size) {
+               break_sharing(tc, bio, block, &key, lookup_result, data_cell);
+               cell_defer_no_holder(tc, virt_cell);
+       } else {
                struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
 
                h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
                inc_all_io_entry(pool, bio);
-               cell_defer_no_holder(tc, cell);
-
                remap_and_issue(tc, bio, lookup_result->block);
+
+               remap_and_issue_shared_cell(tc, data_cell, lookup_result->block);
+               remap_and_issue_shared_cell(tc, virt_cell, lookup_result->block);
        }
 }
 
@@ -1279,34 +1519,28 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
        }
 }
 
-static void process_bio(struct thin_c *tc, struct bio *bio)
+static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
 {
        int r;
        struct pool *pool = tc->pool;
+       struct bio *bio = cell->holder;
        dm_block_t block = get_bio_block(tc, bio);
-       struct dm_bio_prison_cell *cell;
-       struct dm_cell_key key;
        struct dm_thin_lookup_result lookup_result;
 
-       /*
-        * If cell is already occupied, then the block is already
-        * being provisioned so we have nothing further to do here.
-        */
-       build_virtual_key(tc->td, block, &key);
-       if (bio_detain(pool, &key, bio, &cell))
+       if (tc->requeue_mode) {
+               cell_requeue(pool, cell);
                return;
+       }
 
        r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
        switch (r) {
        case 0:
-               if (lookup_result.shared) {
-                       process_shared_bio(tc, bio, block, &lookup_result);
-                       cell_defer_no_holder(tc, cell); /* FIXME: pass this cell into process_shared? */
-               } else {
+               if (lookup_result.shared)
+                       process_shared_bio(tc, bio, block, &lookup_result, cell);
+               else {
                        inc_all_io_entry(pool, bio);
-                       cell_defer_no_holder(tc, cell);
-
                        remap_and_issue(tc, bio, lookup_result.block);
+                       inc_remap_and_issue_cell(tc, cell, lookup_result.block);
                }
                break;
 
@@ -1315,7 +1549,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
                        inc_all_io_entry(pool, bio);
                        cell_defer_no_holder(tc, cell);
 
-                       remap_to_origin_and_issue(tc, bio);
+                       if (bio_end_sector(bio) <= tc->origin_size)
+                               remap_to_origin_and_issue(tc, bio);
+
+                       else if (bio->bi_iter.bi_sector < tc->origin_size) {
+                               zero_fill_bio(bio);
+                               bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
+                               remap_to_origin_and_issue(tc, bio);
+
+                       } else {
+                               zero_fill_bio(bio);
+                               bio_endio(bio, 0);
+                       }
                } else
                        provision_block(tc, bio, block, cell);
                break;
@@ -1329,7 +1574,26 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
        }
 }
 
-static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
+static void process_bio(struct thin_c *tc, struct bio *bio)
+{
+       struct pool *pool = tc->pool;
+       dm_block_t block = get_bio_block(tc, bio);
+       struct dm_bio_prison_cell *cell;
+       struct dm_cell_key key;
+
+       /*
+        * If cell is already occupied, then the block is already
+        * being provisioned so we have nothing further to do here.
+        */
+       build_virtual_key(tc->td, block, &key);
+       if (bio_detain(pool, &key, bio, &cell))
+               return;
+
+       process_cell(tc, cell);
+}
+
+static void __process_bio_read_only(struct thin_c *tc, struct bio *bio,
+                                   struct dm_bio_prison_cell *cell)
 {
        int r;
        int rw = bio_data_dir(bio);
@@ -1339,15 +1603,21 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
        r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
        switch (r) {
        case 0:
-               if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size)
+               if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size) {
                        handle_unserviceable_bio(tc->pool, bio);
-               else {
+                       if (cell)
+                               cell_defer_no_holder(tc, cell);
+               } else {
                        inc_all_io_entry(tc->pool, bio);
                        remap_and_issue(tc, bio, lookup_result.block);
+                       if (cell)
+                               inc_remap_and_issue_cell(tc, cell, lookup_result.block);
                }
                break;
 
        case -ENODATA:
+               if (cell)
+                       cell_defer_no_holder(tc, cell);
                if (rw != READ) {
                        handle_unserviceable_bio(tc->pool, bio);
                        break;
@@ -1366,11 +1636,23 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
        default:
                DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
                            __func__, r);
+               if (cell)
+                       cell_defer_no_holder(tc, cell);
                bio_io_error(bio);
                break;
        }
 }
 
+static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
+{
+       __process_bio_read_only(tc, bio, NULL);
+}
+
+static void process_cell_read_only(struct thin_c *tc, struct dm_bio_prison_cell *cell)
+{
+       __process_bio_read_only(tc, cell->holder, cell);
+}
+
 static void process_bio_success(struct thin_c *tc, struct bio *bio)
 {
        bio_endio(bio, 0);
@@ -1381,6 +1663,16 @@ static void process_bio_fail(struct thin_c *tc, struct bio *bio)
        bio_io_error(bio);
 }
 
+static void process_cell_success(struct thin_c *tc, struct dm_bio_prison_cell *cell)
+{
+       cell_success(tc->pool, cell);
+}
+
+static void process_cell_fail(struct thin_c *tc, struct dm_bio_prison_cell *cell)
+{
+       cell_error(tc->pool, cell);
+}
+
 /*
  * FIXME: should we also commit due to size of transaction, measured in
  * metadata blocks?
@@ -1462,6 +1754,7 @@ static void process_thin_deferred_bios(struct thin_c *tc)
        struct bio *bio;
        struct bio_list bios;
        struct blk_plug plug;
+       unsigned count = 0;
 
        if (tc->requeue_mode) {
                requeue_bio_list(tc, &tc->deferred_bio_list);
@@ -1503,10 +1796,97 @@ static void process_thin_deferred_bios(struct thin_c *tc)
                        pool->process_discard(tc, bio);
                else
                        pool->process_bio(tc, bio);
+
+               if ((count++ & 127) == 0) {
+                       throttle_work_update(&pool->throttle);
+                       dm_pool_issue_prefetches(pool->pmd);
+               }
        }
        blk_finish_plug(&plug);
 }
 
+static int cmp_cells(const void *lhs, const void *rhs)
+{
+       struct dm_bio_prison_cell *lhs_cell = *((struct dm_bio_prison_cell **) lhs);
+       struct dm_bio_prison_cell *rhs_cell = *((struct dm_bio_prison_cell **) rhs);
+
+       BUG_ON(!lhs_cell->holder);
+       BUG_ON(!rhs_cell->holder);
+
+       if (lhs_cell->holder->bi_iter.bi_sector < rhs_cell->holder->bi_iter.bi_sector)
+               return -1;
+
+       if (lhs_cell->holder->bi_iter.bi_sector > rhs_cell->holder->bi_iter.bi_sector)
+               return 1;
+
+       return 0;
+}
+
+static unsigned sort_cells(struct pool *pool, struct list_head *cells)
+{
+       unsigned count = 0;
+       struct dm_bio_prison_cell *cell, *tmp;
+
+       list_for_each_entry_safe(cell, tmp, cells, user_list) {
+               if (count >= CELL_SORT_ARRAY_SIZE)
+                       break;
+
+               pool->cell_sort_array[count++] = cell;
+               list_del(&cell->user_list);
+       }
+
+       sort(pool->cell_sort_array, count, sizeof(cell), cmp_cells, NULL);
+
+       return count;
+}
+
+static void process_thin_deferred_cells(struct thin_c *tc)
+{
+       struct pool *pool = tc->pool;
+       unsigned long flags;
+       struct list_head cells;
+       struct dm_bio_prison_cell *cell;
+       unsigned i, j, count;
+
+       INIT_LIST_HEAD(&cells);
+
+       spin_lock_irqsave(&tc->lock, flags);
+       list_splice_init(&tc->deferred_cells, &cells);
+       spin_unlock_irqrestore(&tc->lock, flags);
+
+       if (list_empty(&cells))
+               return;
+
+       do {
+               count = sort_cells(tc->pool, &cells);
+
+               for (i = 0; i < count; i++) {
+                       cell = pool->cell_sort_array[i];
+                       BUG_ON(!cell->holder);
+
+                       /*
+                        * If we've got no free new_mapping structs, and processing
+                        * this bio might require one, we pause until there are some
+                        * prepared mappings to process.
+                        */
+                       if (ensure_next_mapping(pool)) {
+                               for (j = i; j < count; j++)
+                                       list_add(&pool->cell_sort_array[j]->user_list, &cells);
+
+                               spin_lock_irqsave(&tc->lock, flags);
+                               list_splice(&cells, &tc->deferred_cells);
+                               spin_unlock_irqrestore(&tc->lock, flags);
+                               return;
+                       }
+
+                       if (cell->holder->bi_rw & REQ_DISCARD)
+                               pool->process_discard_cell(tc, cell);
+                       else
+                               pool->process_cell(tc, cell);
+               }
+       } while (!list_empty(&cells));
+}
+
 static void thin_get(struct thin_c *tc);
 static void thin_put(struct thin_c *tc);
 
@@ -1555,6 +1935,7 @@ static void process_deferred_bios(struct pool *pool)
 
        tc = get_first_thin(pool);
        while (tc) {
+               process_thin_deferred_cells(tc);
                process_thin_deferred_bios(tc);
                tc = get_next_thin(pool, tc);
        }
@@ -1588,9 +1969,15 @@ static void do_worker(struct work_struct *ws)
 {
        struct pool *pool = container_of(ws, struct pool, worker);
 
+       throttle_work_start(&pool->throttle);
+       dm_pool_issue_prefetches(pool->pmd);
+       throttle_work_update(&pool->throttle);
        process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping);
+       throttle_work_update(&pool->throttle);
        process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
+       throttle_work_update(&pool->throttle);
        process_deferred_bios(pool);
+       throttle_work_complete(&pool->throttle);
 }
 
 /*
@@ -1727,6 +2114,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
                dm_pool_metadata_read_only(pool->pmd);
                pool->process_bio = process_bio_fail;
                pool->process_discard = process_bio_fail;
+               pool->process_cell = process_cell_fail;
+               pool->process_discard_cell = process_cell_fail;
                pool->process_prepared_mapping = process_prepared_mapping_fail;
                pool->process_prepared_discard = process_prepared_discard_fail;
 
@@ -1739,6 +2128,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
                dm_pool_metadata_read_only(pool->pmd);
                pool->process_bio = process_bio_read_only;
                pool->process_discard = process_bio_success;
+               pool->process_cell = process_cell_read_only;
+               pool->process_discard_cell = process_cell_success;
                pool->process_prepared_mapping = process_prepared_mapping_fail;
                pool->process_prepared_discard = process_prepared_discard_passdown;
 
@@ -1757,7 +2148,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
                if (old_mode != new_mode)
                        notify_of_pool_mode_change(pool, "out-of-data-space");
                pool->process_bio = process_bio_read_only;
-               pool->process_discard = process_discard;
+               pool->process_discard = process_discard_bio;
+               pool->process_cell = process_cell_read_only;
+               pool->process_discard_cell = process_discard_cell;
                pool->process_prepared_mapping = process_prepared_mapping;
                pool->process_prepared_discard = process_prepared_discard_passdown;
 
@@ -1770,7 +2163,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
                        notify_of_pool_mode_change(pool, "write");
                dm_pool_metadata_read_write(pool->pmd);
                pool->process_bio = process_bio;
-               pool->process_discard = process_discard;
+               pool->process_discard = process_discard_bio;
+               pool->process_cell = process_cell;
+               pool->process_discard_cell = process_discard_cell;
                pool->process_prepared_mapping = process_prepared_mapping;
                pool->process_prepared_discard = process_prepared_discard;
                break;
@@ -1830,6 +2225,29 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
        wake_worker(pool);
 }
 
+static void thin_defer_bio_with_throttle(struct thin_c *tc, struct bio *bio)
+{
+       struct pool *pool = tc->pool;
+
+       throttle_lock(&pool->throttle);
+       thin_defer_bio(tc, bio);
+       throttle_unlock(&pool->throttle);
+}
+
+static void thin_defer_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
+{
+       unsigned long flags;
+       struct pool *pool = tc->pool;
+
+       throttle_lock(&pool->throttle);
+       spin_lock_irqsave(&tc->lock, flags);
+       list_add_tail(&cell->user_list, &tc->deferred_cells);
+       spin_unlock_irqrestore(&tc->lock, flags);
+       throttle_unlock(&pool->throttle);
+
+       wake_worker(pool);
+}
+
 static void thin_hook_bio(struct thin_c *tc, struct bio *bio)
 {
        struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -1850,8 +2268,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
        dm_block_t block = get_bio_block(tc, bio);
        struct dm_thin_device *td = tc->td;
        struct dm_thin_lookup_result result;
-       struct dm_bio_prison_cell cell1, cell2;
-       struct dm_bio_prison_cell *cell_result;
+       struct dm_bio_prison_cell *virt_cell, *data_cell;
        struct dm_cell_key key;
 
        thin_hook_bio(tc, bio);
@@ -1867,10 +2284,18 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
        }
 
        if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
-               thin_defer_bio(tc, bio);
+               thin_defer_bio_with_throttle(tc, bio);
                return DM_MAPIO_SUBMITTED;
        }
 
+       /*
+        * We must hold the virtual cell before doing the lookup, otherwise
+        * there's a race with discard.
+        */
+       build_virtual_key(tc->td, block, &key);
+       if (bio_detain(tc->pool, &key, bio, &virt_cell))
+               return DM_MAPIO_SUBMITTED;
+
        r = dm_thin_find_block(td, block, 0, &result);
 
        /*
@@ -1893,23 +2318,19 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
                         * More distant ancestors are irrelevant. The
                         * shared flag will be set in their case.
                         */
-                       thin_defer_bio(tc, bio);
+                       thin_defer_cell(tc, virt_cell);
                        return DM_MAPIO_SUBMITTED;
                }
 
-               build_virtual_key(tc->td, block, &key);
-               if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result))
-                       return DM_MAPIO_SUBMITTED;
-
                build_data_key(tc->td, result.block, &key);
-               if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) {
-                       cell_defer_no_holder_no_free(tc, &cell1);
+               if (bio_detain(tc->pool, &key, bio, &data_cell)) {
+                       cell_defer_no_holder(tc, virt_cell);
                        return DM_MAPIO_SUBMITTED;
                }
 
                inc_all_io_entry(tc->pool, bio);
-               cell_defer_no_holder_no_free(tc, &cell2);
-               cell_defer_no_holder_no_free(tc, &cell1);
+               cell_defer_no_holder(tc, data_cell);
+               cell_defer_no_holder(tc, virt_cell);
 
                remap(tc, bio, result.block);
                return DM_MAPIO_REMAPPED;
@@ -1921,16 +2342,13 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
                         * of doing so.
                         */
                        handle_unserviceable_bio(tc->pool, bio);
+                       cell_defer_no_holder(tc, virt_cell);
                        return DM_MAPIO_SUBMITTED;
                }
                /* fall through */
 
        case -EWOULDBLOCK:
-               /*
-                * In future, the failed dm_thin_find_block above could
-                * provide the hint to load the metadata into cache.
-                */
-               thin_defer_bio(tc, bio);
+               thin_defer_cell(tc, virt_cell);
                return DM_MAPIO_SUBMITTED;
 
        default:
@@ -1940,6 +2358,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
                 * pool is switched to fail-io mode.
                 */
                bio_io_error(bio);
+               cell_defer_no_holder(tc, virt_cell);
                return DM_MAPIO_SUBMITTED;
        }
 }
@@ -2120,7 +2539,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
                pool->sectors_per_block_shift = __ffs(block_size);
        pool->low_water_blocks = 0;
        pool_features_init(&pool->pf);
-       pool->prison = dm_bio_prison_create(PRISON_CELLS);
+       pool->prison = dm_bio_prison_create();
        if (!pool->prison) {
                *error = "Error creating pool's bio prison";
                err_p = ERR_PTR(-ENOMEM);
@@ -2146,6 +2565,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
                goto bad_wq;
        }
 
+       throttle_init(&pool->throttle);
        INIT_WORK(&pool->worker, do_worker);
        INIT_DELAYED_WORK(&pool->waker, do_waker);
        INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
@@ -3104,15 +3524,42 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
        struct pool_c *pt = ti->private;
        struct pool *pool = pt->pool;
-       uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
+       sector_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
+
+       /*
+        * Adjust max_sectors_kb to highest possible power-of-2
+        * factor of pool->sectors_per_block.
+        */
+       if (limits->max_hw_sectors & (limits->max_hw_sectors - 1))
+               limits->max_sectors = rounddown_pow_of_two(limits->max_hw_sectors);
+       else
+               limits->max_sectors = limits->max_hw_sectors;
+
+       if (limits->max_sectors < pool->sectors_per_block) {
+               while (!is_factor(pool->sectors_per_block, limits->max_sectors)) {
+                       if ((limits->max_sectors & (limits->max_sectors - 1)) == 0)
+                               limits->max_sectors--;
+                       limits->max_sectors = rounddown_pow_of_two(limits->max_sectors);
+               }
+       } else if (block_size_is_power_of_two(pool)) {
+               /* max_sectors_kb is >= power-of-2 thinp blocksize */
+               while (!is_factor(limits->max_sectors, pool->sectors_per_block)) {
+                       if ((limits->max_sectors & (limits->max_sectors - 1)) == 0)
+                               limits->max_sectors--;
+                       limits->max_sectors = rounddown_pow_of_two(limits->max_sectors);
+               }
+       }
 
        /*
         * If the system-determined stacked limits are compatible with the
         * pool's blocksize (io_opt is a factor) do not override them.
         */
        if (io_opt_sectors < pool->sectors_per_block ||
-           do_div(io_opt_sectors, pool->sectors_per_block)) {
-               blk_limits_io_min(limits, 0);
+           !is_factor(io_opt_sectors, pool->sectors_per_block)) {
+               if (is_factor(pool->sectors_per_block, limits->max_sectors))
+                       blk_limits_io_min(limits, limits->max_sectors << SECTOR_SHIFT);
+               else
+                       blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
                blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
        }
 
@@ -3141,7 +3588,7 @@ static struct target_type pool_target = {
        .name = "thin-pool",
        .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
                    DM_TARGET_IMMUTABLE,
-       .version = {1, 12, 0},
+       .version = {1, 14, 0},
        .module = THIS_MODULE,
        .ctr = pool_ctr,
        .dtr = pool_dtr,
@@ -3230,6 +3677,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
                goto out_unlock;
        }
        spin_lock_init(&tc->lock);
+       INIT_LIST_HEAD(&tc->deferred_cells);
        bio_list_init(&tc->deferred_bio_list);
        bio_list_init(&tc->retry_on_resume_list);
        tc->sort_bio_list = RB_ROOT;
@@ -3361,8 +3809,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
                spin_lock_irqsave(&pool->lock, flags);
                list_for_each_entry_safe(m, tmp, &work, list) {
                        list_del(&m->list);
-                       m->quiesced = true;
-                       __maybe_add_mapping(m);
+                       __complete_mapping_preparation(m);
                }
                spin_unlock_irqrestore(&pool->lock, flags);
        }
@@ -3401,6 +3848,16 @@ static void thin_postsuspend(struct dm_target *ti)
        noflush_work(tc, do_noflush_stop);
 }
 
+static int thin_preresume(struct dm_target *ti)
+{
+       struct thin_c *tc = ti->private;
+
+       if (tc->origin_dev)
+               tc->origin_size = get_dev_size(tc->origin_dev->bdev);
+
+       return 0;
+}
+
 /*
  * <nr mapped sectors> <highest mapped sector>
  */
@@ -3459,6 +3916,21 @@ err:
        DMEMIT("Error");
 }
 
+static int thin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
+                     struct bio_vec *biovec, int max_size)
+{
+       struct thin_c *tc = ti->private;
+       struct request_queue *q = bdev_get_queue(tc->pool_dev->bdev);
+
+       if (!q->merge_bvec_fn)
+               return max_size;
+
+       bvm->bi_bdev = tc->pool_dev->bdev;
+       bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector);
+
+       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
+}
+
 static int thin_iterate_devices(struct dm_target *ti,
                                iterate_devices_callout_fn fn, void *data)
 {
@@ -3483,15 +3955,17 @@ static int thin_iterate_devices(struct dm_target *ti,
 
 static struct target_type thin_target = {
        .name = "thin",
-       .version = {1, 12, 0},
+       .version = {1, 14, 0},
        .module = THIS_MODULE,
        .ctr = thin_ctr,
        .dtr = thin_dtr,
        .map = thin_map,
        .end_io = thin_endio,
+       .preresume = thin_preresume,
        .presuspend = thin_presuspend,
        .postsuspend = thin_postsuspend,
        .status = thin_status,
+       .merge = thin_merge,
        .iterate_devices = thin_iterate_devices,
 };