/*----------------------------------------------------------------*/
-#define PRISON_CELLS 1024
#define MIGRATION_POOL_SIZE 128
#define COMMIT_PERIOD HZ
#define MIGRATION_COUNT_WINDOW 10
/*
* origin_blocks entries, discarded if set.
*/
- dm_oblock_t discard_nr_blocks;
+ dm_dblock_t discard_nr_blocks;
unsigned long *discard_bitset;
+ uint32_t discard_block_size; /* a power of 2 times sectors per block */
/*
* Rather than reconstructing the table line for the status we just
dm_cblock_t cblock;
bool err:1;
+ bool discard:1;
bool writeback:1;
bool demote:1;
bool promote:1;
/*----------------------------------------------------------------*/
-static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
+static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
{
key->virtual = 0;
key->dev = 0;
- key->block = from_oblock(oblock);
+ key->block_begin = from_oblock(begin);
+ key->block_end = from_oblock(end);
}
/*
*/
typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
-static int bio_detain(struct cache *cache, dm_oblock_t oblock,
- struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
- cell_free_fn free_fn, void *free_context,
- struct dm_bio_prison_cell **cell_result)
+static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
+ struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+ cell_free_fn free_fn, void *free_context,
+ struct dm_bio_prison_cell **cell_result)
{
int r;
struct dm_cell_key key;
- build_key(oblock, &key);
+ build_key(oblock_begin, oblock_end, &key);
r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
if (r)
free_fn(free_context, cell_prealloc);
return r;
}
+static int bio_detain(struct cache *cache, dm_oblock_t oblock,
+ struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+ cell_free_fn free_fn, void *free_context,
+ struct dm_bio_prison_cell **cell_result)
+{
+ dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
+ return bio_detain_range(cache, oblock, end, bio,
+ cell_prealloc, free_fn, free_context, cell_result);
+}
+
static int get_cell(struct cache *cache,
dm_oblock_t oblock,
struct prealloc *structs,
cell_prealloc = prealloc_get_cell(structs);
- build_key(oblock, &key);
+ build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
if (r)
prealloc_put_cell(structs, cell_prealloc);
return b;
}
-static void set_discard(struct cache *cache, dm_oblock_t b)
+static dm_block_t oblocks_per_dblock(struct cache *cache)
+{
+ dm_block_t oblocks = cache->discard_block_size;
+
+ if (block_size_is_power_of_two(cache))
+ oblocks >>= cache->sectors_per_block_shift;
+ else
+ oblocks = block_div(oblocks, cache->sectors_per_block);
+
+ return oblocks;
+}
+
+static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
+{
+ return to_dblock(block_div(from_oblock(oblock),
+ oblocks_per_dblock(cache)));
+}
+
+static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
+{
+ return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
+}
+
+static void set_discard(struct cache *cache, dm_dblock_t b)
{
unsigned long flags;
+ BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
atomic_inc(&cache->stats.discard_count);
spin_lock_irqsave(&cache->lock, flags);
- set_bit(from_oblock(b), cache->discard_bitset);
+ set_bit(from_dblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
}
-static void clear_discard(struct cache *cache, dm_oblock_t b)
+static void clear_discard(struct cache *cache, dm_dblock_t b)
{
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
- clear_bit(from_oblock(b), cache->discard_bitset);
+ clear_bit(from_dblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
}
-static bool is_discarded(struct cache *cache, dm_oblock_t b)
+static bool is_discarded(struct cache *cache, dm_dblock_t b)
{
int r;
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
- r = test_bit(from_oblock(b), cache->discard_bitset);
+ r = test_bit(from_dblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
return r;
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
- r = test_bit(from_oblock(b), cache->discard_bitset);
+ r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
+ cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
return r;
check_if_tick_bio_needed(cache, bio);
remap_to_origin(cache, bio);
if (bio_data_dir(bio) == WRITE)
- clear_discard(cache, oblock);
+ clear_discard(cache, oblock_to_dblock(cache, oblock));
}
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
remap_to_cache(cache, bio, cblock);
if (bio_data_dir(bio) == WRITE) {
set_dirty(cache, oblock, cblock);
- clear_discard(cache, oblock);
+ clear_discard(cache, oblock_to_dblock(cache, oblock));
}
}
struct cache *cache = mg->cache;
if (mg->writeback) {
- cell_defer(cache, mg->old_ocell, false);
clear_dirty(cache, mg->old_oblock, mg->cblock);
+ cell_defer(cache, mg->old_ocell, false);
cleanup_migration(mg);
return;
}
} else {
+ clear_dirty(cache, mg->new_oblock, mg->cblock);
if (mg->requeue_holder)
cell_defer(cache, mg->new_ocell, true);
else {
bio_endio(mg->new_ocell->holder, 0);
cell_defer(cache, mg->new_ocell, false);
}
- clear_dirty(cache, mg->new_oblock, mg->cblock);
cleanup_migration(mg);
}
}
wake_worker(cache);
}
-static void issue_copy_real(struct dm_cache_migration *mg)
+static void issue_copy(struct dm_cache_migration *mg)
{
int r;
struct dm_io_region o_region, c_region;
migration_success_pre_commit(mg);
}
-static void issue_copy(struct dm_cache_migration *mg)
+static void calc_discard_block_range(struct cache *cache, struct bio *bio,
+ dm_dblock_t *b, dm_dblock_t *e)
+{
+ sector_t sb = bio->bi_iter.bi_sector;
+ sector_t se = bio_end_sector(bio);
+
+ *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
+
+ if (se - sb < cache->discard_block_size)
+ *e = *b;
+ else
+ *e = to_dblock(block_div(se, cache->discard_block_size));
+}
+
+static void issue_discard(struct dm_cache_migration *mg)
+{
+ dm_dblock_t b, e;
+ struct bio *bio = mg->new_ocell->holder;
+
+ calc_discard_block_range(mg->cache, bio, &b, &e);
+ while (b != e) {
+ set_discard(mg->cache, b);
+ b = to_dblock(from_dblock(b) + 1);
+ }
+
+ bio_endio(bio, 0);
+ cell_defer(mg->cache, mg->new_ocell, false);
+ free_migration(mg);
+}
+
+static void issue_copy_or_discard(struct dm_cache_migration *mg)
{
bool avoid;
struct cache *cache = mg->cache;
+ if (mg->discard) {
+ issue_discard(mg);
+ return;
+ }
+
if (mg->writeback || mg->demote)
avoid = !is_dirty(cache, mg->cblock) ||
is_discarded_oblock(cache, mg->old_oblock);
}
}
- avoid ? avoid_copy(mg) : issue_copy_real(mg);
+ avoid ? avoid_copy(mg) : issue_copy(mg);
}
static void complete_migration(struct dm_cache_migration *mg)
struct dm_cache_migration *mg = prealloc_get_migration(structs);
mg->err = false;
+ mg->discard = false;
mg->writeback = false;
mg->demote = false;
mg->promote = true;
struct dm_cache_migration *mg = prealloc_get_migration(structs);
mg->err = false;
+ mg->discard = false;
mg->writeback = true;
mg->demote = false;
mg->promote = false;
struct dm_cache_migration *mg = prealloc_get_migration(structs);
mg->err = false;
+ mg->discard = false;
mg->writeback = false;
mg->demote = true;
mg->promote = true;
struct dm_cache_migration *mg = prealloc_get_migration(structs);
mg->err = false;
+ mg->discard = false;
mg->writeback = false;
mg->demote = true;
mg->promote = false;
quiesce_migration(mg);
}
+static void discard(struct cache *cache, struct prealloc *structs,
+ struct dm_bio_prison_cell *cell)
+{
+ struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+ mg->err = false;
+ mg->discard = true;
+ mg->writeback = false;
+ mg->demote = false;
+ mg->promote = false;
+ mg->requeue_holder = false;
+ mg->invalidate = false;
+ mg->cache = cache;
+ mg->old_ocell = NULL;
+ mg->new_ocell = cell;
+ mg->start_jiffies = jiffies;
+
+ quiesce_migration(mg);
+}
+
/*----------------------------------------------------------------
* bio processing
*--------------------------------------------------------------*/
issue(cache, bio);
}
-/*
- * People generally discard large parts of a device, eg, the whole device
- * when formatting. Splitting these large discards up into cache block
- * sized ios and then quiescing (always neccessary for discard) takes too
- * long.
- *
- * We keep it simple, and allow any size of discard to come in, and just
- * mark off blocks on the discard bitset. No passdown occurs!
- *
- * To implement passdown we need to change the bio_prison such that a cell
- * can have a key that spans many blocks.
- */
-static void process_discard_bio(struct cache *cache, struct bio *bio)
+static void process_discard_bio(struct cache *cache, struct prealloc *structs,
+ struct bio *bio)
{
- dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
- cache->sectors_per_block);
- dm_block_t end_block = bio_end_sector(bio);
- dm_block_t b;
+ int r;
+ dm_dblock_t b, e;
+ struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
- end_block = block_div(end_block, cache->sectors_per_block);
+ calc_discard_block_range(cache, bio, &b, &e);
+ if (b == e) {
+ bio_endio(bio, 0);
+ return;
+ }
- for (b = start_block; b < end_block; b++)
- set_discard(cache, to_oblock(b));
+ cell_prealloc = prealloc_get_cell(structs);
+ r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
+ (cell_free_fn) prealloc_put_cell,
+ structs, &new_ocell);
+ if (r > 0)
+ return;
- bio_endio(bio, 0);
+ discard(cache, structs, new_ocell);
}
static bool spare_migration_bandwidth(struct cache *cache)
if (bio->bi_rw & REQ_FLUSH)
process_flush_bio(cache, bio);
else if (bio->bi_rw & REQ_DISCARD)
- process_discard_bio(cache, bio);
+ process_discard_bio(cache, &structs, bio);
else
process_bio(cache, &structs, bio);
}
process_invalidation_requests(cache);
}
- process_migrations(cache, &cache->quiesced_migrations, issue_copy);
+ process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
process_migrations(cache, &cache->completed_migrations, complete_migration);
if (commit_if_needed(cache)) {
return 0;
}
+/*
+ * We want the discard block size to be a power of two, at least the size
+ * of the cache block size, and have no more than 2^14 discard blocks
+ * across the origin.
+ */
+#define MAX_DISCARD_BLOCKS (1 << 14)
+
+static bool too_many_discard_blocks(sector_t discard_block_size,
+ sector_t origin_size)
+{
+ (void) sector_div(origin_size, discard_block_size);
+
+ return origin_size > MAX_DISCARD_BLOCKS;
+}
+
+static sector_t calculate_discard_block_size(sector_t cache_block_size,
+ sector_t origin_size)
+{
+ sector_t discard_block_size;
+
+ discard_block_size = roundup_pow_of_two(cache_block_size);
+
+ if (origin_size)
+ while (too_many_discard_blocks(discard_block_size, origin_size))
+ discard_block_size *= 2;
+
+ return discard_block_size;
+}
+
+static void set_cache_size(struct cache *cache, dm_cblock_t size)
+{
+ dm_block_t nr_blocks = from_cblock(size);
+
+ if (nr_blocks > (1 << 20) && cache->cache_size != size)
+ DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
+ "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
+ "Please consider increasing the cache block size to reduce the overall cache block count.",
+ (unsigned long long) nr_blocks);
+
+ cache->cache_size = size;
+}
+
#define DEFAULT_MIGRATION_THRESHOLD 2048
static int cache_create(struct cache_args *ca, struct cache **result)
cache->sectors_per_block_shift = -1;
cache_size = block_div(cache_size, ca->block_size);
- cache->cache_size = to_cblock(cache_size);
+ set_cache_size(cache, to_cblock(cache_size));
} else {
cache->sectors_per_block_shift = __ffs(ca->block_size);
- cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift);
+ set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
}
r = create_cache_policy(cache, ca, error);
}
clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
- cache->discard_nr_blocks = cache->origin_blocks;
- cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks));
+ cache->discard_block_size =
+ calculate_discard_block_size(cache->sectors_per_block,
+ cache->origin_sectors);
+ cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks);
+ cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
if (!cache->discard_bitset) {
*error = "could not allocate discard bitset";
goto bad;
}
- clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks));
+ clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(cache->copier)) {
INIT_DELAYED_WORK(&cache->waker, do_waker);
cache->last_commit_jiffies = jiffies;
- cache->prison = dm_bio_prison_create(PRISON_CELLS);
+ cache->prison = dm_bio_prison_create();
if (!cache->prison) {
*error = "could not create bio prison";
goto bad;
{
unsigned i, r;
- r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block,
- cache->origin_blocks);
+ r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
+ cache->discard_nr_blocks);
if (r) {
DMERR("could not resize on-disk discard bitset");
return r;
}
- for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) {
- r = dm_cache_set_discard(cache->cmd, to_oblock(i),
- is_discarded(cache, to_oblock(i)));
+ for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
+ r = dm_cache_set_discard(cache->cmd, to_dblock(i),
+ is_discarded(cache, to_dblock(i)));
if (r)
return r;
}
}
static int load_discard(void *context, sector_t discard_block_size,
- dm_oblock_t oblock, bool discard)
+ dm_dblock_t dblock, bool discard)
{
struct cache *cache = context;
+ /* FIXME: handle mis-matched block size */
+
if (discard)
- set_discard(cache, oblock);
+ set_discard(cache, dblock);
else
- clear_discard(cache, oblock);
+ clear_discard(cache, dblock);
return 0;
}
return r;
}
- cache->cache_size = new_size;
+ set_cache_size(cache, new_size);
return 0;
}
/*
* FIXME: these limits may be incompatible with the cache device
*/
- limits->max_discard_sectors = cache->sectors_per_block;
- limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT;
+ limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+ cache->origin_sectors);
+ limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
}
static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {1, 5, 0},
+ .version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,