dm cache: emit a warning message if there are a lot of cache blocks

[karo-tx-linux.git] / drivers / md / dm-cache-target.c
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c

index 2c63326638b6d4d54af4499643ac10dd9d8ee33b..abdd45d07bf66e5a217ee041aaa8cdb7e7b78ddf 100644 (file)
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -95,7 +95,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
  
  /*----------------------------------------------------------------*/
  
-#define PRISON_CELLS 1024
  #define MIGRATION_POOL_SIZE 128
  #define COMMIT_PERIOD HZ
  #define MIGRATION_COUNT_WINDOW 10
@@ -237,8 +236,9 @@ struct cache {
         /*
          * origin_blocks entries, discarded if set.
          */
-       dm_oblock_t discard_nr_blocks;
+       dm_dblock_t discard_nr_blocks;
         unsigned long *discard_bitset;
+       uint32_t discard_block_size; /* a power of 2 times sectors per block */
  
         /*
          * Rather than reconstructing the table line for the status we just
@@ -310,6 +310,7 @@ struct dm_cache_migration {
         dm_cblock_t cblock;
  
         bool err:1;
+       bool discard:1;
         bool writeback:1;
         bool demote:1;
         bool promote:1;
@@ -433,11 +434,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel
  
  /*----------------------------------------------------------------*/
  
-static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
+static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
  {
         key->virtual = 0;
         key->dev = 0;
-       key->block = from_oblock(oblock);
+       key->block_begin = from_oblock(begin);
+       key->block_end = from_oblock(end);
  }
  
  /*
@@ -447,15 +449,15 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
   */
  typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
  
-static int bio_detain(struct cache *cache, dm_oblock_t oblock,
-                     struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
-                     cell_free_fn free_fn, void *free_context,
-                     struct dm_bio_prison_cell **cell_result)
+static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
+                           struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+                           cell_free_fn free_fn, void *free_context,
+                           struct dm_bio_prison_cell **cell_result)
  {
         int r;
         struct dm_cell_key key;
  
-       build_key(oblock, &key);
+       build_key(oblock_begin, oblock_end, &key);
         r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
         if (r)
                 free_fn(free_context, cell_prealloc);
@@ -463,6 +465,16 @@ static int bio_detain(struct cache *cache, dm_oblock_t oblock,
         return r;
  }
  
+static int bio_detain(struct cache *cache, dm_oblock_t oblock,
+                     struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+                     cell_free_fn free_fn, void *free_context,
+                     struct dm_bio_prison_cell **cell_result)
+{
+       dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
+       return bio_detain_range(cache, oblock, end, bio,
+                               cell_prealloc, free_fn, free_context, cell_result);
+}
+
  static int get_cell(struct cache *cache,
                     dm_oblock_t oblock,
                     struct prealloc *structs,
@@ -474,7 +486,7 @@ static int get_cell(struct cache *cache,
  
         cell_prealloc = prealloc_get_cell(structs);
  
-       build_key(oblock, &key);
+       build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
         r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
         if (r)
                 prealloc_put_cell(structs, cell_prealloc);
@@ -524,33 +536,57 @@ static dm_block_t block_div(dm_block_t b, uint32_t n)
         return b;
  }
  
-static void set_discard(struct cache *cache, dm_oblock_t b)
+static dm_block_t oblocks_per_dblock(struct cache *cache)
+{
+       dm_block_t oblocks = cache->discard_block_size;
+
+       if (block_size_is_power_of_two(cache))
+               oblocks >>= cache->sectors_per_block_shift;
+       else
+               oblocks = block_div(oblocks, cache->sectors_per_block);
+
+       return oblocks;
+}
+
+static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
+{
+       return to_dblock(block_div(from_oblock(oblock),
+                                  oblocks_per_dblock(cache)));
+}
+
+static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
+{
+       return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
+}
+
+static void set_discard(struct cache *cache, dm_dblock_t b)
  {
         unsigned long flags;
  
+       BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
         atomic_inc(&cache->stats.discard_count);
  
         spin_lock_irqsave(&cache->lock, flags);
-       set_bit(from_oblock(b), cache->discard_bitset);
+       set_bit(from_dblock(b), cache->discard_bitset);
         spin_unlock_irqrestore(&cache->lock, flags);
  }
  
-static void clear_discard(struct cache *cache, dm_oblock_t b)
+static void clear_discard(struct cache *cache, dm_dblock_t b)
  {
         unsigned long flags;
  
         spin_lock_irqsave(&cache->lock, flags);
-       clear_bit(from_oblock(b), cache->discard_bitset);
+       clear_bit(from_dblock(b), cache->discard_bitset);
         spin_unlock_irqrestore(&cache->lock, flags);
  }
  
-static bool is_discarded(struct cache *cache, dm_oblock_t b)
+static bool is_discarded(struct cache *cache, dm_dblock_t b)
  {
         int r;
         unsigned long flags;
  
         spin_lock_irqsave(&cache->lock, flags);
-       r = test_bit(from_oblock(b), cache->discard_bitset);
+       r = test_bit(from_dblock(b), cache->discard_bitset);
         spin_unlock_irqrestore(&cache->lock, flags);
  
         return r;
@@ -562,7 +598,8 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
         unsigned long flags;
  
         spin_lock_irqsave(&cache->lock, flags);
-       r = test_bit(from_oblock(b), cache->discard_bitset);
+       r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
+                    cache->discard_bitset);
         spin_unlock_irqrestore(&cache->lock, flags);
  
         return r;
@@ -687,7 +724,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
         check_if_tick_bio_needed(cache, bio);
         remap_to_origin(cache, bio);
         if (bio_data_dir(bio) == WRITE)
-               clear_discard(cache, oblock);
+               clear_discard(cache, oblock_to_dblock(cache, oblock));
  }
  
  static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
@@ -697,7 +734,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
         remap_to_cache(cache, bio, cblock);
         if (bio_data_dir(bio) == WRITE) {
                 set_dirty(cache, oblock, cblock);
-               clear_discard(cache, oblock);
+               clear_discard(cache, oblock_to_dblock(cache, oblock));
         }
  }
  
@@ -718,6 +755,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
         return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
  }
  
+/*
+ * You must increment the deferred set whilst the prison cell is held.  To
+ * encourage this, we ask for 'cell' to be passed in.
+ */
+static void inc_ds(struct cache *cache, struct bio *bio,
+                  struct dm_bio_prison_cell *cell)
+{
+       size_t pb_data_size = get_per_bio_data_size(cache);
+       struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+
+       BUG_ON(!cell);
+       BUG_ON(pb->all_io_entry);
+
+       pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+}
+
  static void issue(struct cache *cache, struct bio *bio)
  {
         unsigned long flags;
@@ -737,6 +790,12 @@ static void issue(struct cache *cache, struct bio *bio)
         spin_unlock_irqrestore(&cache->lock, flags);
  }
  
+static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
+{
+       inc_ds(cache, bio, cell);
+       issue(cache, bio);
+}
+
  static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
  {
         unsigned long flags;
@@ -873,8 +932,8 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
         struct cache *cache = mg->cache;
  
         if (mg->writeback) {
-               cell_defer(cache, mg->old_ocell, false);
                 clear_dirty(cache, mg->old_oblock, mg->cblock);
+               cell_defer(cache, mg->old_ocell, false);
                 cleanup_migration(mg);
                 return;
  
@@ -929,13 +988,13 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
                 }
  
         } else {
+               clear_dirty(cache, mg->new_oblock, mg->cblock);
                 if (mg->requeue_holder)
                         cell_defer(cache, mg->new_ocell, true);
                 else {
                         bio_endio(mg->new_ocell->holder, 0);
                         cell_defer(cache, mg->new_ocell, false);
                 }
-               clear_dirty(cache, mg->new_oblock, mg->cblock);
                 cleanup_migration(mg);
         }
  }
@@ -956,7 +1015,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
         wake_worker(cache);
  }
  
-static void issue_copy_real(struct dm_cache_migration *mg)
+static void issue_copy(struct dm_cache_migration *mg)
  {
         int r;
         struct dm_io_region o_region, c_region;
@@ -1015,6 +1074,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
  
         dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
         remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
+
+       /*
+        * No need to inc_ds() here, since the cell will be held for the
+        * duration of the io.
+        */
         generic_make_request(bio);
  }
  
@@ -1030,11 +1094,46 @@ static void avoid_copy(struct dm_cache_migration *mg)
         migration_success_pre_commit(mg);
  }
  
-static void issue_copy(struct dm_cache_migration *mg)
+static void calc_discard_block_range(struct cache *cache, struct bio *bio,
+                                    dm_dblock_t *b, dm_dblock_t *e)
+{
+       sector_t sb = bio->bi_iter.bi_sector;
+       sector_t se = bio_end_sector(bio);
+
+       *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
+
+       if (se - sb < cache->discard_block_size)
+               *e = *b;
+       else
+               *e = to_dblock(block_div(se, cache->discard_block_size));
+}
+
+static void issue_discard(struct dm_cache_migration *mg)
+{
+       dm_dblock_t b, e;
+       struct bio *bio = mg->new_ocell->holder;
+
+       calc_discard_block_range(mg->cache, bio, &b, &e);
+       while (b != e) {
+               set_discard(mg->cache, b);
+               b = to_dblock(from_dblock(b) + 1);
+       }
+
+       bio_endio(bio, 0);
+       cell_defer(mg->cache, mg->new_ocell, false);
+       free_migration(mg);
+}
+
+static void issue_copy_or_discard(struct dm_cache_migration *mg)
  {
         bool avoid;
         struct cache *cache = mg->cache;
  
+       if (mg->discard) {
+               issue_discard(mg);
+               return;
+       }
+
         if (mg->writeback || mg->demote)
                 avoid = !is_dirty(cache, mg->cblock) ||
                         is_discarded_oblock(cache, mg->old_oblock);
@@ -1049,7 +1148,7 @@ static void issue_copy(struct dm_cache_migration *mg)
                 }
         }
  
-       avoid ? avoid_copy(mg) : issue_copy_real(mg);
+       avoid ? avoid_copy(mg) : issue_copy(mg);
  }
  
  static void complete_migration(struct dm_cache_migration *mg)
@@ -1115,8 +1214,7 @@ static void check_for_quiesced_migrations(struct cache *cache,
                 return;
  
         INIT_LIST_HEAD(&work);
-       if (pb->all_io_entry)
-               dm_deferred_entry_dec(pb->all_io_entry, &work);
+       dm_deferred_entry_dec(pb->all_io_entry, &work);
  
         if (!list_empty(&work))
                 queue_quiesced_migrations(cache, &work);
@@ -1135,6 +1233,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
         struct dm_cache_migration *mg = prealloc_get_migration(structs);
  
         mg->err = false;
+       mg->discard = false;
         mg->writeback = false;
         mg->demote = false;
         mg->promote = true;
@@ -1158,6 +1257,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
         struct dm_cache_migration *mg = prealloc_get_migration(structs);
  
         mg->err = false;
+       mg->discard = false;
         mg->writeback = true;
         mg->demote = false;
         mg->promote = false;
@@ -1183,6 +1283,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
         struct dm_cache_migration *mg = prealloc_get_migration(structs);
  
         mg->err = false;
+       mg->discard = false;
         mg->writeback = false;
         mg->demote = true;
         mg->promote = true;
@@ -1211,6 +1312,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
         struct dm_cache_migration *mg = prealloc_get_migration(structs);
  
         mg->err = false;
+       mg->discard = false;
         mg->writeback = false;
         mg->demote = true;
         mg->promote = false;
@@ -1227,6 +1329,26 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
         quiesce_migration(mg);
  }
  
+static void discard(struct cache *cache, struct prealloc *structs,
+                   struct dm_bio_prison_cell *cell)
+{
+       struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+       mg->err = false;
+       mg->discard = true;
+       mg->writeback = false;
+       mg->demote = false;
+       mg->promote = false;
+       mg->requeue_holder = false;
+       mg->invalidate = false;
+       mg->cache = cache;
+       mg->old_ocell = NULL;
+       mg->new_ocell = cell;
+       mg->start_jiffies = jiffies;
+
+       quiesce_migration(mg);
+}
+
  /*----------------------------------------------------------------
   * bio processing
   *--------------------------------------------------------------*/
@@ -1252,34 +1374,35 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
         else
                 remap_to_cache(cache, bio, 0);
  
+       /*
+        * REQ_FLUSH is not directed at any particular block so we don't
+        * need to inc_ds().  REQ_FUA's are split into a write + REQ_FLUSH
+        * by dm-core.
+        */
         issue(cache, bio);
  }
  
-/*
- * People generally discard large parts of a device, eg, the whole device
- * when formatting.  Splitting these large discards up into cache block
- * sized ios and then quiescing (always neccessary for discard) takes too
- * long.
- *
- * We keep it simple, and allow any size of discard to come in, and just
- * mark off blocks on the discard bitset.  No passdown occurs!
- *
- * To implement passdown we need to change the bio_prison such that a cell
- * can have a key that spans many blocks.
- */
-static void process_discard_bio(struct cache *cache, struct bio *bio)
+static void process_discard_bio(struct cache *cache, struct prealloc *structs,
+                               struct bio *bio)
  {
-       dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
-                                                 cache->sectors_per_block);
-       dm_block_t end_block = bio_end_sector(bio);
-       dm_block_t b;
+       int r;
+       dm_dblock_t b, e;
+       struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
  
-       end_block = block_div(end_block, cache->sectors_per_block);
+       calc_discard_block_range(cache, bio, &b, &e);
+       if (b == e) {
+               bio_endio(bio, 0);
+               return;
+       }
  
-       for (b = start_block; b < end_block; b++)
-               set_discard(cache, to_oblock(b));
+       cell_prealloc = prealloc_get_cell(structs);
+       r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
+                            (cell_free_fn) prealloc_put_cell,
+                            structs, &new_ocell);
+       if (r > 0)
+               return;
  
-       bio_endio(bio, 0);
+       discard(cache, structs, new_ocell);
  }
  
  static bool spare_migration_bandwidth(struct cache *cache)
@@ -1301,15 +1424,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
                    &cache->stats.read_miss : &cache->stats.write_miss);
  }
  
-static void issue_cache_bio(struct cache *cache, struct bio *bio,
-                           struct per_bio_data *pb,
-                           dm_oblock_t oblock, dm_cblock_t cblock)
-{
-       pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-       remap_to_cache_dirty(cache, bio, oblock, cblock);
-       issue(cache, bio);
-}
-
  static void process_bio(struct cache *cache, struct prealloc *structs,
                         struct bio *bio)
  {
@@ -1318,8 +1432,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
         dm_oblock_t block = get_bio_block(cache, bio);
         struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
         struct policy_result lookup_result;
-       size_t pb_data_size = get_per_bio_data_size(cache);
-       struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
         bool discarded_block = is_discarded_oblock(cache, block);
         bool passthrough = passthrough_mode(&cache->features);
         bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
@@ -1359,9 +1471,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
  
                         } else {
                                 /* FIXME: factor out issue_origin() */
-                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
                                 remap_to_origin_clear_discard(cache, bio, block);
-                               issue(cache, bio);
+                               inc_and_issue(cache, bio, new_ocell);
                         }
                 } else {
                         inc_hit_counter(cache, bio);
@@ -1369,20 +1480,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
                         if (bio_data_dir(bio) == WRITE &&
                             writethrough_mode(&cache->features) &&
                             !is_dirty(cache, lookup_result.cblock)) {
-                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
                                 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
-                               issue(cache, bio);
-                       } else
-                               issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+                               inc_and_issue(cache, bio, new_ocell);
+
+                       } else  {
+                               remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+                               inc_and_issue(cache, bio, new_ocell);
+                       }
                 }
  
                 break;
  
         case POLICY_MISS:
                 inc_miss_counter(cache, bio);
-               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
                 remap_to_origin_clear_discard(cache, bio, block);
-               issue(cache, bio);
+               inc_and_issue(cache, bio, new_ocell);
                 break;
  
         case POLICY_NEW:
@@ -1480,7 +1592,7 @@ static void process_deferred_bios(struct cache *cache)
                 if (bio->bi_rw & REQ_FLUSH)
                         process_flush_bio(cache, bio);
                 else if (bio->bi_rw & REQ_DISCARD)
-                       process_discard_bio(cache, bio);
+                       process_discard_bio(cache, &structs, bio);
                 else
                         process_bio(cache, &structs, bio);
         }
@@ -1501,6 +1613,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
         bio_list_init(&cache->deferred_flush_bios);
         spin_unlock_irqrestore(&cache->lock, flags);
  
+       /*
+        * These bios have already been through inc_ds()
+        */
         while ((bio = bio_list_pop(&bios)))
                 submit_bios ? generic_make_request(bio) : bio_io_error(bio);
  }
@@ -1518,6 +1633,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
         bio_list_init(&cache->deferred_writethrough_bios);
         spin_unlock_irqrestore(&cache->lock, flags);
  
+       /*
+        * These bios have already been through inc_ds()
+        */
         while ((bio = bio_list_pop(&bios)))
                 generic_make_request(bio);
  }
@@ -1689,11 +1807,12 @@ static void do_worker(struct work_struct *ws)
                         process_invalidation_requests(cache);
                 }
  
-               process_migrations(cache, &cache->quiesced_migrations, issue_copy);
+               process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
                 process_migrations(cache, &cache->completed_migrations, complete_migration);
  
                 if (commit_if_needed(cache)) {
                         process_deferred_flush_bios(cache, false);
+                       process_migrations(cache, &cache->need_commit_migrations, migration_failure);
  
                         /*
                          * FIXME: rollback metadata or just go into a
@@ -2153,6 +2272,48 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
         return 0;
  }
  
+/*
+ * We want the discard block size to be a power of two, at least the size
+ * of the cache block size, and have no more than 2^14 discard blocks
+ * across the origin.
+ */
+#define MAX_DISCARD_BLOCKS (1 << 14)
+
+static bool too_many_discard_blocks(sector_t discard_block_size,
+                                   sector_t origin_size)
+{
+       (void) sector_div(origin_size, discard_block_size);
+
+       return origin_size > MAX_DISCARD_BLOCKS;
+}
+
+static sector_t calculate_discard_block_size(sector_t cache_block_size,
+                                            sector_t origin_size)
+{
+       sector_t discard_block_size;
+
+       discard_block_size = roundup_pow_of_two(cache_block_size);
+
+       if (origin_size)
+               while (too_many_discard_blocks(discard_block_size, origin_size))
+                       discard_block_size *= 2;
+
+       return discard_block_size;
+}
+
+static void set_cache_size(struct cache *cache, dm_cblock_t size)
+{
+       dm_block_t nr_blocks = from_cblock(size);
+
+       if (nr_blocks > (1 << 20) && cache->cache_size != size)
+               DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
+                            "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
+                            "Please consider increasing the cache block size to reduce the overall cache block count.",
+                            (unsigned long long) nr_blocks);
+
+       cache->cache_size = size;
+}
+
  #define DEFAULT_MIGRATION_THRESHOLD 2048
  
  static int cache_create(struct cache_args *ca, struct cache **result)
@@ -2208,10 +2369,10 @@ static int cache_create(struct cache_args *ca, struct cache **result)
  
                 cache->sectors_per_block_shift = -1;
                 cache_size = block_div(cache_size, ca->block_size);
-               cache->cache_size = to_cblock(cache_size);
+               set_cache_size(cache, to_cblock(cache_size));
         } else {
                 cache->sectors_per_block_shift = __ffs(ca->block_size);
-               cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift);
+               set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
         }
  
         r = create_cache_policy(cache, ca, error);
@@ -2276,13 +2437,16 @@ static int cache_create(struct cache_args *ca, struct cache **result)
         }
         clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
  
-       cache->discard_nr_blocks = cache->origin_blocks;
-       cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks));
+       cache->discard_block_size =
+               calculate_discard_block_size(cache->sectors_per_block,
+                                            cache->origin_sectors);
+       cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks);
+       cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
         if (!cache->discard_bitset) {
                 *error = "could not allocate discard bitset";
                 goto bad;
         }
-       clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks));
+       clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
  
         cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
         if (IS_ERR(cache->copier)) {
@@ -2300,7 +2464,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
         INIT_DELAYED_WORK(&cache->waker, do_waker);
         cache->last_commit_jiffies = jiffies;
  
-       cache->prison = dm_bio_prison_create(PRISON_CELLS);
+       cache->prison = dm_bio_prison_create();
         if (!cache->prison) {
                 *error = "could not create bio prison";
                 goto bad;
@@ -2406,16 +2570,13 @@ out:
         return r;
  }
  
-static int cache_map(struct dm_target *ti, struct bio *bio)
+static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
  {
-       struct cache *cache = ti->private;
-
         int r;
         dm_oblock_t block = get_bio_block(cache, bio);
         size_t pb_data_size = get_per_bio_data_size(cache);
         bool can_migrate = false;
         bool discarded_block;
-       struct dm_bio_prison_cell *cell;
         struct policy_result lookup_result;
         struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
  
@@ -2437,15 +2598,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
         /*
          * Check to see if that block is currently migrating.
          */
-       cell = alloc_prison_cell(cache);
-       if (!cell) {
+       *cell = alloc_prison_cell(cache);
+       if (!*cell) {
                 defer_bio(cache, bio);
                 return DM_MAPIO_SUBMITTED;
         }
  
-       r = bio_detain(cache, block, bio, cell,
+       r = bio_detain(cache, block, bio, *cell,
                        (cell_free_fn) free_prison_cell,
-                      cache, &cell);
+                      cache, cell);
         if (r) {
                 if (r < 0)
                         defer_bio(cache, bio);
@@ -2458,11 +2619,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
         r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
                        bio, &lookup_result);
         if (r == -EWOULDBLOCK) {
-               cell_defer(cache, cell, true);
+               cell_defer(cache, *cell, true);
                 return DM_MAPIO_SUBMITTED;
  
         } else if (r) {
                 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
+               cell_defer(cache, *cell, false);
                 bio_io_error(bio);
                 return DM_MAPIO_SUBMITTED;
         }
@@ -2476,52 +2638,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
                                  * We need to invalidate this block, so
                                  * defer for the worker thread.
                                  */
-                               cell_defer(cache, cell, true);
+                               cell_defer(cache, *cell, true);
                                 r = DM_MAPIO_SUBMITTED;
  
                         } else {
-                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
                                 inc_miss_counter(cache, bio);
                                 remap_to_origin_clear_discard(cache, bio, block);
-
-                               cell_defer(cache, cell, false);
                         }
  
                 } else {
                         inc_hit_counter(cache, bio);
-                       pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
                         if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
                             !is_dirty(cache, lookup_result.cblock))
                                 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
                         else
                                 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
-
-                       cell_defer(cache, cell, false);
                 }
                 break;
  
         case POLICY_MISS:
                 inc_miss_counter(cache, bio);
-               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
                 if (pb->req_nr != 0) {
                         /*
                          * This is a duplicate writethrough io that is no
                          * longer needed because the block has been demoted.
                          */
                         bio_endio(bio, 0);
-                       cell_defer(cache, cell, false);
-                       return DM_MAPIO_SUBMITTED;
-               } else {
+                       cell_defer(cache, *cell, false);
+                       r = DM_MAPIO_SUBMITTED;
+
+               } else
                         remap_to_origin_clear_discard(cache, bio, block);
-                       cell_defer(cache, cell, false);
-               }
+
                 break;
  
         default:
                 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
                             (unsigned) lookup_result.op);
+               cell_defer(cache, *cell, false);
                 bio_io_error(bio);
                 r = DM_MAPIO_SUBMITTED;
         }
@@ -2529,6 +2683,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
         return r;
  }
  
+static int cache_map(struct dm_target *ti, struct bio *bio)
+{
+       int r;
+       struct dm_bio_prison_cell *cell;
+       struct cache *cache = ti->private;
+
+       r = __cache_map(cache, bio, &cell);
+       if (r == DM_MAPIO_REMAPPED) {
+               inc_ds(cache, bio, cell);
+               cell_defer(cache, cell, false);
+       }
+
+       return r;
+}
+
  static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
  {
         struct cache *cache = ti->private;
@@ -2567,16 +2736,16 @@ static int write_discard_bitset(struct cache *cache)
  {
         unsigned i, r;
  
-       r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block,
-                                          cache->origin_blocks);
+       r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
+                                          cache->discard_nr_blocks);
         if (r) {
                 DMERR("could not resize on-disk discard bitset");
                 return r;
         }
  
-       for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) {
-               r = dm_cache_set_discard(cache->cmd, to_oblock(i),
-                                        is_discarded(cache, to_oblock(i)));
+       for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
+               r = dm_cache_set_discard(cache->cmd, to_dblock(i),
+                                        is_discarded(cache, to_dblock(i)));
                 if (r)
                         return r;
         }
@@ -2649,14 +2818,16 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
  }
  
  static int load_discard(void *context, sector_t discard_block_size,
-                       dm_oblock_t oblock, bool discard)
+                       dm_dblock_t dblock, bool discard)
  {
         struct cache *cache = context;
  
+       /* FIXME: handle mis-matched block size */
+
         if (discard)
-               set_discard(cache, oblock);
+               set_discard(cache, dblock);
         else
-               clear_discard(cache, oblock);
+               clear_discard(cache, dblock);
  
         return 0;
  }
@@ -2698,7 +2869,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
                 return r;
         }
  
-       cache->cache_size = new_size;
+       set_cache_size(cache, new_size);
  
         return 0;
  }
@@ -2808,7 +2979,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
                 residency = policy_residency(cache->policy);
  
                 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
-                      (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
+                      (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
                        (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
                        (unsigned long long)nr_blocks_metadata,
                        cache->sectors_per_block,
@@ -3047,8 +3218,9 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
         /*
          * FIXME: these limits may be incompatible with the cache device
          */
-       limits->max_discard_sectors = cache->sectors_per_block;
-       limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT;
+       limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+                                           cache->origin_sectors);
+       limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
  }
  
  static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -3062,7 +3234,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
          */
         if (io_opt_sectors < cache->sectors_per_block ||
             do_div(io_opt_sectors, cache->sectors_per_block)) {
-               blk_limits_io_min(limits, 0);
+               blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
                 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
         }
         set_discard_limits(cache, limits);
@@ -3072,7 +3244,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
  
  static struct target_type cache_target = {
         .name = "cache",
-       .version = {1, 4, 0},
+       .version = {1, 6, 0},
         .module = THIS_MODULE,
         .ctr = cache_ctr,
         .dtr = cache_dtr,