]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/md/dm-cache-target.c
dm cache: emit a warning message if there are a lot of cache blocks
[karo-tx-linux.git] / drivers / md / dm-cache-target.c
index 2c63326638b6d4d54af4499643ac10dd9d8ee33b..abdd45d07bf66e5a217ee041aaa8cdb7e7b78ddf 100644 (file)
@@ -95,7 +95,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
 
 /*----------------------------------------------------------------*/
 
-#define PRISON_CELLS 1024
 #define MIGRATION_POOL_SIZE 128
 #define COMMIT_PERIOD HZ
 #define MIGRATION_COUNT_WINDOW 10
@@ -237,8 +236,9 @@ struct cache {
        /*
         * origin_blocks entries, discarded if set.
         */
-       dm_oblock_t discard_nr_blocks;
+       dm_dblock_t discard_nr_blocks;
        unsigned long *discard_bitset;
+       uint32_t discard_block_size; /* a power of 2 times sectors per block */
 
        /*
         * Rather than reconstructing the table line for the status we just
@@ -310,6 +310,7 @@ struct dm_cache_migration {
        dm_cblock_t cblock;
 
        bool err:1;
+       bool discard:1;
        bool writeback:1;
        bool demote:1;
        bool promote:1;
@@ -433,11 +434,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel
 
 /*----------------------------------------------------------------*/
 
-static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
+static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
 {
        key->virtual = 0;
        key->dev = 0;
-       key->block = from_oblock(oblock);
+       key->block_begin = from_oblock(begin);
+       key->block_end = from_oblock(end);
 }
 
 /*
@@ -447,15 +449,15 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
  */
 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
 
-static int bio_detain(struct cache *cache, dm_oblock_t oblock,
-                     struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
-                     cell_free_fn free_fn, void *free_context,
-                     struct dm_bio_prison_cell **cell_result)
+static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
+                           struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+                           cell_free_fn free_fn, void *free_context,
+                           struct dm_bio_prison_cell **cell_result)
 {
        int r;
        struct dm_cell_key key;
 
-       build_key(oblock, &key);
+       build_key(oblock_begin, oblock_end, &key);
        r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
        if (r)
                free_fn(free_context, cell_prealloc);
@@ -463,6 +465,16 @@ static int bio_detain(struct cache *cache, dm_oblock_t oblock,
        return r;
 }
 
+static int bio_detain(struct cache *cache, dm_oblock_t oblock,
+                     struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+                     cell_free_fn free_fn, void *free_context,
+                     struct dm_bio_prison_cell **cell_result)
+{
+       dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
+       return bio_detain_range(cache, oblock, end, bio,
+                               cell_prealloc, free_fn, free_context, cell_result);
+}
+
 static int get_cell(struct cache *cache,
                    dm_oblock_t oblock,
                    struct prealloc *structs,
@@ -474,7 +486,7 @@ static int get_cell(struct cache *cache,
 
        cell_prealloc = prealloc_get_cell(structs);
 
-       build_key(oblock, &key);
+       build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
        r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
        if (r)
                prealloc_put_cell(structs, cell_prealloc);
@@ -524,33 +536,57 @@ static dm_block_t block_div(dm_block_t b, uint32_t n)
        return b;
 }
 
-static void set_discard(struct cache *cache, dm_oblock_t b)
+static dm_block_t oblocks_per_dblock(struct cache *cache)
+{
+       dm_block_t oblocks = cache->discard_block_size;
+
+       if (block_size_is_power_of_two(cache))
+               oblocks >>= cache->sectors_per_block_shift;
+       else
+               oblocks = block_div(oblocks, cache->sectors_per_block);
+
+       return oblocks;
+}
+
+static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
+{
+       return to_dblock(block_div(from_oblock(oblock),
+                                  oblocks_per_dblock(cache)));
+}
+
+static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
+{
+       return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
+}
+
+static void set_discard(struct cache *cache, dm_dblock_t b)
 {
        unsigned long flags;
 
+       BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
        atomic_inc(&cache->stats.discard_count);
 
        spin_lock_irqsave(&cache->lock, flags);
-       set_bit(from_oblock(b), cache->discard_bitset);
+       set_bit(from_dblock(b), cache->discard_bitset);
        spin_unlock_irqrestore(&cache->lock, flags);
 }
 
-static void clear_discard(struct cache *cache, dm_oblock_t b)
+static void clear_discard(struct cache *cache, dm_dblock_t b)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&cache->lock, flags);
-       clear_bit(from_oblock(b), cache->discard_bitset);
+       clear_bit(from_dblock(b), cache->discard_bitset);
        spin_unlock_irqrestore(&cache->lock, flags);
 }
 
-static bool is_discarded(struct cache *cache, dm_oblock_t b)
+static bool is_discarded(struct cache *cache, dm_dblock_t b)
 {
        int r;
        unsigned long flags;
 
        spin_lock_irqsave(&cache->lock, flags);
-       r = test_bit(from_oblock(b), cache->discard_bitset);
+       r = test_bit(from_dblock(b), cache->discard_bitset);
        spin_unlock_irqrestore(&cache->lock, flags);
 
        return r;
@@ -562,7 +598,8 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
        unsigned long flags;
 
        spin_lock_irqsave(&cache->lock, flags);
-       r = test_bit(from_oblock(b), cache->discard_bitset);
+       r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
+                    cache->discard_bitset);
        spin_unlock_irqrestore(&cache->lock, flags);
 
        return r;
@@ -687,7 +724,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
        check_if_tick_bio_needed(cache, bio);
        remap_to_origin(cache, bio);
        if (bio_data_dir(bio) == WRITE)
-               clear_discard(cache, oblock);
+               clear_discard(cache, oblock_to_dblock(cache, oblock));
 }
 
 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
@@ -697,7 +734,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
        remap_to_cache(cache, bio, cblock);
        if (bio_data_dir(bio) == WRITE) {
                set_dirty(cache, oblock, cblock);
-               clear_discard(cache, oblock);
+               clear_discard(cache, oblock_to_dblock(cache, oblock));
        }
 }
 
@@ -718,6 +755,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
        return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
 }
 
+/*
+ * You must increment the deferred set whilst the prison cell is held.  To
+ * encourage this, we ask for 'cell' to be passed in.
+ */
+static void inc_ds(struct cache *cache, struct bio *bio,
+                  struct dm_bio_prison_cell *cell)
+{
+       size_t pb_data_size = get_per_bio_data_size(cache);
+       struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+
+       BUG_ON(!cell);
+       BUG_ON(pb->all_io_entry);
+
+       pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+}
+
 static void issue(struct cache *cache, struct bio *bio)
 {
        unsigned long flags;
@@ -737,6 +790,12 @@ static void issue(struct cache *cache, struct bio *bio)
        spin_unlock_irqrestore(&cache->lock, flags);
 }
 
+static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
+{
+       inc_ds(cache, bio, cell);
+       issue(cache, bio);
+}
+
 static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
 {
        unsigned long flags;
@@ -873,8 +932,8 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
        struct cache *cache = mg->cache;
 
        if (mg->writeback) {
-               cell_defer(cache, mg->old_ocell, false);
                clear_dirty(cache, mg->old_oblock, mg->cblock);
+               cell_defer(cache, mg->old_ocell, false);
                cleanup_migration(mg);
                return;
 
@@ -929,13 +988,13 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
                }
 
        } else {
+               clear_dirty(cache, mg->new_oblock, mg->cblock);
                if (mg->requeue_holder)
                        cell_defer(cache, mg->new_ocell, true);
                else {
                        bio_endio(mg->new_ocell->holder, 0);
                        cell_defer(cache, mg->new_ocell, false);
                }
-               clear_dirty(cache, mg->new_oblock, mg->cblock);
                cleanup_migration(mg);
        }
 }
@@ -956,7 +1015,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
        wake_worker(cache);
 }
 
-static void issue_copy_real(struct dm_cache_migration *mg)
+static void issue_copy(struct dm_cache_migration *mg)
 {
        int r;
        struct dm_io_region o_region, c_region;
@@ -1015,6 +1074,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
 
        dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
        remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
+
+       /*
+        * No need to inc_ds() here, since the cell will be held for the
+        * duration of the io.
+        */
        generic_make_request(bio);
 }
 
@@ -1030,11 +1094,46 @@ static void avoid_copy(struct dm_cache_migration *mg)
        migration_success_pre_commit(mg);
 }
 
-static void issue_copy(struct dm_cache_migration *mg)
+static void calc_discard_block_range(struct cache *cache, struct bio *bio,
+                                    dm_dblock_t *b, dm_dblock_t *e)
+{
+       sector_t sb = bio->bi_iter.bi_sector;
+       sector_t se = bio_end_sector(bio);
+
+       *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
+
+       if (se - sb < cache->discard_block_size)
+               *e = *b;
+       else
+               *e = to_dblock(block_div(se, cache->discard_block_size));
+}
+
+static void issue_discard(struct dm_cache_migration *mg)
+{
+       dm_dblock_t b, e;
+       struct bio *bio = mg->new_ocell->holder;
+
+       calc_discard_block_range(mg->cache, bio, &b, &e);
+       while (b != e) {
+               set_discard(mg->cache, b);
+               b = to_dblock(from_dblock(b) + 1);
+       }
+
+       bio_endio(bio, 0);
+       cell_defer(mg->cache, mg->new_ocell, false);
+       free_migration(mg);
+}
+
+static void issue_copy_or_discard(struct dm_cache_migration *mg)
 {
        bool avoid;
        struct cache *cache = mg->cache;
 
+       if (mg->discard) {
+               issue_discard(mg);
+               return;
+       }
+
        if (mg->writeback || mg->demote)
                avoid = !is_dirty(cache, mg->cblock) ||
                        is_discarded_oblock(cache, mg->old_oblock);
@@ -1049,7 +1148,7 @@ static void issue_copy(struct dm_cache_migration *mg)
                }
        }
 
-       avoid ? avoid_copy(mg) : issue_copy_real(mg);
+       avoid ? avoid_copy(mg) : issue_copy(mg);
 }
 
 static void complete_migration(struct dm_cache_migration *mg)
@@ -1115,8 +1214,7 @@ static void check_for_quiesced_migrations(struct cache *cache,
                return;
 
        INIT_LIST_HEAD(&work);
-       if (pb->all_io_entry)
-               dm_deferred_entry_dec(pb->all_io_entry, &work);
+       dm_deferred_entry_dec(pb->all_io_entry, &work);
 
        if (!list_empty(&work))
                queue_quiesced_migrations(cache, &work);
@@ -1135,6 +1233,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
        struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
        mg->err = false;
+       mg->discard = false;
        mg->writeback = false;
        mg->demote = false;
        mg->promote = true;
@@ -1158,6 +1257,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
        struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
        mg->err = false;
+       mg->discard = false;
        mg->writeback = true;
        mg->demote = false;
        mg->promote = false;
@@ -1183,6 +1283,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
        struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
        mg->err = false;
+       mg->discard = false;
        mg->writeback = false;
        mg->demote = true;
        mg->promote = true;
@@ -1211,6 +1312,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
        struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
        mg->err = false;
+       mg->discard = false;
        mg->writeback = false;
        mg->demote = true;
        mg->promote = false;
@@ -1227,6 +1329,26 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
        quiesce_migration(mg);
 }
 
+static void discard(struct cache *cache, struct prealloc *structs,
+                   struct dm_bio_prison_cell *cell)
+{
+       struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+       mg->err = false;
+       mg->discard = true;
+       mg->writeback = false;
+       mg->demote = false;
+       mg->promote = false;
+       mg->requeue_holder = false;
+       mg->invalidate = false;
+       mg->cache = cache;
+       mg->old_ocell = NULL;
+       mg->new_ocell = cell;
+       mg->start_jiffies = jiffies;
+
+       quiesce_migration(mg);
+}
+
 /*----------------------------------------------------------------
  * bio processing
  *--------------------------------------------------------------*/
@@ -1252,34 +1374,35 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
        else
                remap_to_cache(cache, bio, 0);
 
+       /*
+        * REQ_FLUSH is not directed at any particular block so we don't
+        * need to inc_ds().  REQ_FUA's are split into a write + REQ_FLUSH
+        * by dm-core.
+        */
        issue(cache, bio);
 }
 
-/*
- * People generally discard large parts of a device, eg, the whole device
- * when formatting.  Splitting these large discards up into cache block
- * sized ios and then quiescing (always neccessary for discard) takes too
- * long.
- *
- * We keep it simple, and allow any size of discard to come in, and just
- * mark off blocks on the discard bitset.  No passdown occurs!
- *
- * To implement passdown we need to change the bio_prison such that a cell
- * can have a key that spans many blocks.
- */
-static void process_discard_bio(struct cache *cache, struct bio *bio)
+static void process_discard_bio(struct cache *cache, struct prealloc *structs,
+                               struct bio *bio)
 {
-       dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
-                                                 cache->sectors_per_block);
-       dm_block_t end_block = bio_end_sector(bio);
-       dm_block_t b;
+       int r;
+       dm_dblock_t b, e;
+       struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
 
-       end_block = block_div(end_block, cache->sectors_per_block);
+       calc_discard_block_range(cache, bio, &b, &e);
+       if (b == e) {
+               bio_endio(bio, 0);
+               return;
+       }
 
-       for (b = start_block; b < end_block; b++)
-               set_discard(cache, to_oblock(b));
+       cell_prealloc = prealloc_get_cell(structs);
+       r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
+                            (cell_free_fn) prealloc_put_cell,
+                            structs, &new_ocell);
+       if (r > 0)
+               return;
 
-       bio_endio(bio, 0);
+       discard(cache, structs, new_ocell);
 }
 
 static bool spare_migration_bandwidth(struct cache *cache)
@@ -1301,15 +1424,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
                   &cache->stats.read_miss : &cache->stats.write_miss);
 }
 
-static void issue_cache_bio(struct cache *cache, struct bio *bio,
-                           struct per_bio_data *pb,
-                           dm_oblock_t oblock, dm_cblock_t cblock)
-{
-       pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-       remap_to_cache_dirty(cache, bio, oblock, cblock);
-       issue(cache, bio);
-}
-
 static void process_bio(struct cache *cache, struct prealloc *structs,
                        struct bio *bio)
 {
@@ -1318,8 +1432,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
        dm_oblock_t block = get_bio_block(cache, bio);
        struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
        struct policy_result lookup_result;
-       size_t pb_data_size = get_per_bio_data_size(cache);
-       struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
        bool discarded_block = is_discarded_oblock(cache, block);
        bool passthrough = passthrough_mode(&cache->features);
        bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
@@ -1359,9 +1471,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 
                        } else {
                                /* FIXME: factor out issue_origin() */
-                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
                                remap_to_origin_clear_discard(cache, bio, block);
-                               issue(cache, bio);
+                               inc_and_issue(cache, bio, new_ocell);
                        }
                } else {
                        inc_hit_counter(cache, bio);
@@ -1369,20 +1480,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
                        if (bio_data_dir(bio) == WRITE &&
                            writethrough_mode(&cache->features) &&
                            !is_dirty(cache, lookup_result.cblock)) {
-                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
                                remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
-                               issue(cache, bio);
-                       } else
-                               issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+                               inc_and_issue(cache, bio, new_ocell);
+
+                       } else  {
+                               remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+                               inc_and_issue(cache, bio, new_ocell);
+                       }
                }
 
                break;
 
        case POLICY_MISS:
                inc_miss_counter(cache, bio);
-               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
                remap_to_origin_clear_discard(cache, bio, block);
-               issue(cache, bio);
+               inc_and_issue(cache, bio, new_ocell);
                break;
 
        case POLICY_NEW:
@@ -1480,7 +1592,7 @@ static void process_deferred_bios(struct cache *cache)
                if (bio->bi_rw & REQ_FLUSH)
                        process_flush_bio(cache, bio);
                else if (bio->bi_rw & REQ_DISCARD)
-                       process_discard_bio(cache, bio);
+                       process_discard_bio(cache, &structs, bio);
                else
                        process_bio(cache, &structs, bio);
        }
@@ -1501,6 +1613,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
        bio_list_init(&cache->deferred_flush_bios);
        spin_unlock_irqrestore(&cache->lock, flags);
 
+       /*
+        * These bios have already been through inc_ds()
+        */
        while ((bio = bio_list_pop(&bios)))
                submit_bios ? generic_make_request(bio) : bio_io_error(bio);
 }
@@ -1518,6 +1633,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
        bio_list_init(&cache->deferred_writethrough_bios);
        spin_unlock_irqrestore(&cache->lock, flags);
 
+       /*
+        * These bios have already been through inc_ds()
+        */
        while ((bio = bio_list_pop(&bios)))
                generic_make_request(bio);
 }
@@ -1689,11 +1807,12 @@ static void do_worker(struct work_struct *ws)
                        process_invalidation_requests(cache);
                }
 
-               process_migrations(cache, &cache->quiesced_migrations, issue_copy);
+               process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
                process_migrations(cache, &cache->completed_migrations, complete_migration);
 
                if (commit_if_needed(cache)) {
                        process_deferred_flush_bios(cache, false);
+                       process_migrations(cache, &cache->need_commit_migrations, migration_failure);
 
                        /*
                         * FIXME: rollback metadata or just go into a
@@ -2153,6 +2272,48 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
        return 0;
 }
 
+/*
+ * We want the discard block size to be a power of two, at least the size
+ * of the cache block size, and have no more than 2^14 discard blocks
+ * across the origin.
+ */
+#define MAX_DISCARD_BLOCKS (1 << 14)
+
+static bool too_many_discard_blocks(sector_t discard_block_size,
+                                   sector_t origin_size)
+{
+       (void) sector_div(origin_size, discard_block_size);
+
+       return origin_size > MAX_DISCARD_BLOCKS;
+}
+
+static sector_t calculate_discard_block_size(sector_t cache_block_size,
+                                            sector_t origin_size)
+{
+       sector_t discard_block_size;
+
+       discard_block_size = roundup_pow_of_two(cache_block_size);
+
+       if (origin_size)
+               while (too_many_discard_blocks(discard_block_size, origin_size))
+                       discard_block_size *= 2;
+
+       return discard_block_size;
+}
+
+static void set_cache_size(struct cache *cache, dm_cblock_t size)
+{
+       dm_block_t nr_blocks = from_cblock(size);
+
+       if (nr_blocks > (1 << 20) && cache->cache_size != size)
+               DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
+                            "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
+                            "Please consider increasing the cache block size to reduce the overall cache block count.",
+                            (unsigned long long) nr_blocks);
+
+       cache->cache_size = size;
+}
+
 #define DEFAULT_MIGRATION_THRESHOLD 2048
 
 static int cache_create(struct cache_args *ca, struct cache **result)
@@ -2208,10 +2369,10 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
                cache->sectors_per_block_shift = -1;
                cache_size = block_div(cache_size, ca->block_size);
-               cache->cache_size = to_cblock(cache_size);
+               set_cache_size(cache, to_cblock(cache_size));
        } else {
                cache->sectors_per_block_shift = __ffs(ca->block_size);
-               cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift);
+               set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
        }
 
        r = create_cache_policy(cache, ca, error);
@@ -2276,13 +2437,16 @@ static int cache_create(struct cache_args *ca, struct cache **result)
        }
        clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
 
-       cache->discard_nr_blocks = cache->origin_blocks;
-       cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks));
+       cache->discard_block_size =
+               calculate_discard_block_size(cache->sectors_per_block,
+                                            cache->origin_sectors);
+       cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks);
+       cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
        if (!cache->discard_bitset) {
                *error = "could not allocate discard bitset";
                goto bad;
        }
-       clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks));
+       clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
 
        cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
        if (IS_ERR(cache->copier)) {
@@ -2300,7 +2464,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
        INIT_DELAYED_WORK(&cache->waker, do_waker);
        cache->last_commit_jiffies = jiffies;
 
-       cache->prison = dm_bio_prison_create(PRISON_CELLS);
+       cache->prison = dm_bio_prison_create();
        if (!cache->prison) {
                *error = "could not create bio prison";
                goto bad;
@@ -2406,16 +2570,13 @@ out:
        return r;
 }
 
-static int cache_map(struct dm_target *ti, struct bio *bio)
+static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
 {
-       struct cache *cache = ti->private;
-
        int r;
        dm_oblock_t block = get_bio_block(cache, bio);
        size_t pb_data_size = get_per_bio_data_size(cache);
        bool can_migrate = false;
        bool discarded_block;
-       struct dm_bio_prison_cell *cell;
        struct policy_result lookup_result;
        struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
 
@@ -2437,15 +2598,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
        /*
         * Check to see if that block is currently migrating.
         */
-       cell = alloc_prison_cell(cache);
-       if (!cell) {
+       *cell = alloc_prison_cell(cache);
+       if (!*cell) {
                defer_bio(cache, bio);
                return DM_MAPIO_SUBMITTED;
        }
 
-       r = bio_detain(cache, block, bio, cell,
+       r = bio_detain(cache, block, bio, *cell,
                       (cell_free_fn) free_prison_cell,
-                      cache, &cell);
+                      cache, cell);
        if (r) {
                if (r < 0)
                        defer_bio(cache, bio);
@@ -2458,11 +2619,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
        r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
                       bio, &lookup_result);
        if (r == -EWOULDBLOCK) {
-               cell_defer(cache, cell, true);
+               cell_defer(cache, *cell, true);
                return DM_MAPIO_SUBMITTED;
 
        } else if (r) {
                DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
+               cell_defer(cache, *cell, false);
                bio_io_error(bio);
                return DM_MAPIO_SUBMITTED;
        }
@@ -2476,52 +2638,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
                                 * We need to invalidate this block, so
                                 * defer for the worker thread.
                                 */
-                               cell_defer(cache, cell, true);
+                               cell_defer(cache, *cell, true);
                                r = DM_MAPIO_SUBMITTED;
 
                        } else {
-                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
                                inc_miss_counter(cache, bio);
                                remap_to_origin_clear_discard(cache, bio, block);
-
-                               cell_defer(cache, cell, false);
                        }
 
                } else {
                        inc_hit_counter(cache, bio);
-                       pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
                        if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
                            !is_dirty(cache, lookup_result.cblock))
                                remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
                        else
                                remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
-
-                       cell_defer(cache, cell, false);
                }
                break;
 
        case POLICY_MISS:
                inc_miss_counter(cache, bio);
-               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
                if (pb->req_nr != 0) {
                        /*
                         * This is a duplicate writethrough io that is no
                         * longer needed because the block has been demoted.
                         */
                        bio_endio(bio, 0);
-                       cell_defer(cache, cell, false);
-                       return DM_MAPIO_SUBMITTED;
-               } else {
+                       cell_defer(cache, *cell, false);
+                       r = DM_MAPIO_SUBMITTED;
+
+               } else
                        remap_to_origin_clear_discard(cache, bio, block);
-                       cell_defer(cache, cell, false);
-               }
+
                break;
 
        default:
                DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
                            (unsigned) lookup_result.op);
+               cell_defer(cache, *cell, false);
                bio_io_error(bio);
                r = DM_MAPIO_SUBMITTED;
        }
@@ -2529,6 +2683,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
        return r;
 }
 
+static int cache_map(struct dm_target *ti, struct bio *bio)
+{
+       int r;
+       struct dm_bio_prison_cell *cell;
+       struct cache *cache = ti->private;
+
+       r = __cache_map(cache, bio, &cell);
+       if (r == DM_MAPIO_REMAPPED) {
+               inc_ds(cache, bio, cell);
+               cell_defer(cache, cell, false);
+       }
+
+       return r;
+}
+
 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
        struct cache *cache = ti->private;
@@ -2567,16 +2736,16 @@ static int write_discard_bitset(struct cache *cache)
 {
        unsigned i, r;
 
-       r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block,
-                                          cache->origin_blocks);
+       r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
+                                          cache->discard_nr_blocks);
        if (r) {
                DMERR("could not resize on-disk discard bitset");
                return r;
        }
 
-       for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) {
-               r = dm_cache_set_discard(cache->cmd, to_oblock(i),
-                                        is_discarded(cache, to_oblock(i)));
+       for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
+               r = dm_cache_set_discard(cache->cmd, to_dblock(i),
+                                        is_discarded(cache, to_dblock(i)));
                if (r)
                        return r;
        }
@@ -2649,14 +2818,16 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
 }
 
 static int load_discard(void *context, sector_t discard_block_size,
-                       dm_oblock_t oblock, bool discard)
+                       dm_dblock_t dblock, bool discard)
 {
        struct cache *cache = context;
 
+       /* FIXME: handle mis-matched block size */
+
        if (discard)
-               set_discard(cache, oblock);
+               set_discard(cache, dblock);
        else
-               clear_discard(cache, oblock);
+               clear_discard(cache, dblock);
 
        return 0;
 }
@@ -2698,7 +2869,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
                return r;
        }
 
-       cache->cache_size = new_size;
+       set_cache_size(cache, new_size);
 
        return 0;
 }
@@ -2808,7 +2979,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
                residency = policy_residency(cache->policy);
 
                DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
-                      (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
+                      (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
                       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
                       (unsigned long long)nr_blocks_metadata,
                       cache->sectors_per_block,
@@ -3047,8 +3218,9 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
        /*
         * FIXME: these limits may be incompatible with the cache device
         */
-       limits->max_discard_sectors = cache->sectors_per_block;
-       limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT;
+       limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+                                           cache->origin_sectors);
+       limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
 }
 
 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -3062,7 +3234,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
         */
        if (io_opt_sectors < cache->sectors_per_block ||
            do_div(io_opt_sectors, cache->sectors_per_block)) {
-               blk_limits_io_min(limits, 0);
+               blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
                blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
        }
        set_discard_limits(cache, limits);
@@ -3072,7 +3244,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type cache_target = {
        .name = "cache",
-       .version = {1, 4, 0},
+       .version = {1, 6, 0},
        .module = THIS_MODULE,
        .ctr = cache_ctr,
        .dtr = cache_dtr,