X-Git-Url: https://git.kernelconcepts.de/?a=blobdiff_plain;f=drivers%2Fmd%2Fdm-cache-target.c;h=abdd45d07bf66e5a217ee041aaa8cdb7e7b78ddf;hb=d1d9220cbaeecce910f3ecfeb71cc897a678eb68;hp=1af40ee209e2b9c0d46b9873f3bc32fd78234cd7;hpb=179c0ac67b9d947d2de69e9f08a743e7c74a8dce;p=karo-tx-linux.git diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1af40ee209e2..abdd45d07bf6 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -95,7 +95,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) /*----------------------------------------------------------------*/ -#define PRISON_CELLS 1024 #define MIGRATION_POOL_SIZE 128 #define COMMIT_PERIOD HZ #define MIGRATION_COUNT_WINDOW 10 @@ -237,8 +236,9 @@ struct cache { /* * origin_blocks entries, discarded if set. */ - dm_oblock_t discard_nr_blocks; + dm_dblock_t discard_nr_blocks; unsigned long *discard_bitset; + uint32_t discard_block_size; /* a power of 2 times sectors per block */ /* * Rather than reconstructing the table line for the status we just @@ -310,6 +310,7 @@ struct dm_cache_migration { dm_cblock_t cblock; bool err:1; + bool discard:1; bool writeback:1; bool demote:1; bool promote:1; @@ -433,11 +434,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel /*----------------------------------------------------------------*/ -static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) +static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key) { key->virtual = 0; key->dev = 0; - key->block = from_oblock(oblock); + key->block_begin = from_oblock(begin); + key->block_end = from_oblock(end); } /* @@ -447,15 +449,15 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) */ typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); -static int bio_detain(struct cache *cache, dm_oblock_t oblock, - struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, - cell_free_fn free_fn, void *free_context, - struct dm_bio_prison_cell **cell_result) +static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end, + struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, + cell_free_fn free_fn, void *free_context, + struct dm_bio_prison_cell **cell_result) { int r; struct dm_cell_key key; - build_key(oblock, &key); + build_key(oblock_begin, oblock_end, &key); r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); if (r) free_fn(free_context, cell_prealloc); @@ -463,6 +465,16 @@ static int bio_detain(struct cache *cache, dm_oblock_t oblock, return r; } +static int bio_detain(struct cache *cache, dm_oblock_t oblock, + struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, + cell_free_fn free_fn, void *free_context, + struct dm_bio_prison_cell **cell_result) +{ + dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); + return bio_detain_range(cache, oblock, end, bio, + cell_prealloc, free_fn, free_context, cell_result); +} + static int get_cell(struct cache *cache, dm_oblock_t oblock, struct prealloc *structs, @@ -474,7 +486,7 @@ static int get_cell(struct cache *cache, cell_prealloc = prealloc_get_cell(structs); - build_key(oblock, &key); + build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key); r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); if (r) prealloc_put_cell(structs, cell_prealloc); @@ -524,33 +536,57 @@ static dm_block_t block_div(dm_block_t b, uint32_t n) return b; } -static void set_discard(struct cache *cache, dm_oblock_t b) +static dm_block_t oblocks_per_dblock(struct cache *cache) +{ + dm_block_t oblocks = cache->discard_block_size; + + if (block_size_is_power_of_two(cache)) + oblocks >>= cache->sectors_per_block_shift; + else + oblocks = block_div(oblocks, cache->sectors_per_block); + + return oblocks; +} + +static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) +{ + return to_dblock(block_div(from_oblock(oblock), + oblocks_per_dblock(cache))); +} + +static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock) +{ + return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache)); +} + +static void set_discard(struct cache *cache, dm_dblock_t b) { unsigned long flags; + BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks)); atomic_inc(&cache->stats.discard_count); spin_lock_irqsave(&cache->lock, flags); - set_bit(from_oblock(b), cache->discard_bitset); + set_bit(from_dblock(b), cache->discard_bitset); spin_unlock_irqrestore(&cache->lock, flags); } -static void clear_discard(struct cache *cache, dm_oblock_t b) +static void clear_discard(struct cache *cache, dm_dblock_t b) { unsigned long flags; spin_lock_irqsave(&cache->lock, flags); - clear_bit(from_oblock(b), cache->discard_bitset); + clear_bit(from_dblock(b), cache->discard_bitset); spin_unlock_irqrestore(&cache->lock, flags); } -static bool is_discarded(struct cache *cache, dm_oblock_t b) +static bool is_discarded(struct cache *cache, dm_dblock_t b) { int r; unsigned long flags; spin_lock_irqsave(&cache->lock, flags); - r = test_bit(from_oblock(b), cache->discard_bitset); + r = test_bit(from_dblock(b), cache->discard_bitset); spin_unlock_irqrestore(&cache->lock, flags); return r; @@ -562,7 +598,8 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) unsigned long flags; spin_lock_irqsave(&cache->lock, flags); - r = test_bit(from_oblock(b), cache->discard_bitset); + r = test_bit(from_dblock(oblock_to_dblock(cache, b)), + cache->discard_bitset); spin_unlock_irqrestore(&cache->lock, flags); return r; @@ -687,7 +724,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, check_if_tick_bio_needed(cache, bio); remap_to_origin(cache, bio); if (bio_data_dir(bio) == WRITE) - clear_discard(cache, oblock); + clear_discard(cache, oblock_to_dblock(cache, oblock)); } static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, @@ -697,7 +734,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, remap_to_cache(cache, bio, cblock); if (bio_data_dir(bio) == WRITE) { set_dirty(cache, oblock, cblock); - clear_discard(cache, oblock); + clear_discard(cache, oblock_to_dblock(cache, oblock)); } } @@ -895,8 +932,8 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) struct cache *cache = mg->cache; if (mg->writeback) { - cell_defer(cache, mg->old_ocell, false); clear_dirty(cache, mg->old_oblock, mg->cblock); + cell_defer(cache, mg->old_ocell, false); cleanup_migration(mg); return; @@ -951,13 +988,13 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) } } else { + clear_dirty(cache, mg->new_oblock, mg->cblock); if (mg->requeue_holder) cell_defer(cache, mg->new_ocell, true); else { bio_endio(mg->new_ocell->holder, 0); cell_defer(cache, mg->new_ocell, false); } - clear_dirty(cache, mg->new_oblock, mg->cblock); cleanup_migration(mg); } } @@ -978,7 +1015,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) wake_worker(cache); } -static void issue_copy_real(struct dm_cache_migration *mg) +static void issue_copy(struct dm_cache_migration *mg) { int r; struct dm_io_region o_region, c_region; @@ -1057,11 +1094,46 @@ static void avoid_copy(struct dm_cache_migration *mg) migration_success_pre_commit(mg); } -static void issue_copy(struct dm_cache_migration *mg) +static void calc_discard_block_range(struct cache *cache, struct bio *bio, + dm_dblock_t *b, dm_dblock_t *e) +{ + sector_t sb = bio->bi_iter.bi_sector; + sector_t se = bio_end_sector(bio); + + *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); + + if (se - sb < cache->discard_block_size) + *e = *b; + else + *e = to_dblock(block_div(se, cache->discard_block_size)); +} + +static void issue_discard(struct dm_cache_migration *mg) +{ + dm_dblock_t b, e; + struct bio *bio = mg->new_ocell->holder; + + calc_discard_block_range(mg->cache, bio, &b, &e); + while (b != e) { + set_discard(mg->cache, b); + b = to_dblock(from_dblock(b) + 1); + } + + bio_endio(bio, 0); + cell_defer(mg->cache, mg->new_ocell, false); + free_migration(mg); +} + +static void issue_copy_or_discard(struct dm_cache_migration *mg) { bool avoid; struct cache *cache = mg->cache; + if (mg->discard) { + issue_discard(mg); + return; + } + if (mg->writeback || mg->demote) avoid = !is_dirty(cache, mg->cblock) || is_discarded_oblock(cache, mg->old_oblock); @@ -1076,7 +1148,7 @@ static void issue_copy(struct dm_cache_migration *mg) } } - avoid ? avoid_copy(mg) : issue_copy_real(mg); + avoid ? avoid_copy(mg) : issue_copy(mg); } static void complete_migration(struct dm_cache_migration *mg) @@ -1161,6 +1233,7 @@ static void promote(struct cache *cache, struct prealloc *structs, struct dm_cache_migration *mg = prealloc_get_migration(structs); mg->err = false; + mg->discard = false; mg->writeback = false; mg->demote = false; mg->promote = true; @@ -1184,6 +1257,7 @@ static void writeback(struct cache *cache, struct prealloc *structs, struct dm_cache_migration *mg = prealloc_get_migration(structs); mg->err = false; + mg->discard = false; mg->writeback = true; mg->demote = false; mg->promote = false; @@ -1209,6 +1283,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, struct dm_cache_migration *mg = prealloc_get_migration(structs); mg->err = false; + mg->discard = false; mg->writeback = false; mg->demote = true; mg->promote = true; @@ -1237,6 +1312,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs, struct dm_cache_migration *mg = prealloc_get_migration(structs); mg->err = false; + mg->discard = false; mg->writeback = false; mg->demote = true; mg->promote = false; @@ -1253,6 +1329,26 @@ static void invalidate(struct cache *cache, struct prealloc *structs, quiesce_migration(mg); } +static void discard(struct cache *cache, struct prealloc *structs, + struct dm_bio_prison_cell *cell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->discard = true; + mg->writeback = false; + mg->demote = false; + mg->promote = false; + mg->requeue_holder = false; + mg->invalidate = false; + mg->cache = cache; + mg->old_ocell = NULL; + mg->new_ocell = cell; + mg->start_jiffies = jiffies; + + quiesce_migration(mg); +} + /*---------------------------------------------------------------- * bio processing *--------------------------------------------------------------*/ @@ -1286,31 +1382,27 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) issue(cache, bio); } -/* - * People generally discard large parts of a device, eg, the whole device - * when formatting. Splitting these large discards up into cache block - * sized ios and then quiescing (always neccessary for discard) takes too - * long. - * - * We keep it simple, and allow any size of discard to come in, and just - * mark off blocks on the discard bitset. No passdown occurs! - * - * To implement passdown we need to change the bio_prison such that a cell - * can have a key that spans many blocks. - */ -static void process_discard_bio(struct cache *cache, struct bio *bio) +static void process_discard_bio(struct cache *cache, struct prealloc *structs, + struct bio *bio) { - dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, - cache->sectors_per_block); - dm_block_t end_block = bio_end_sector(bio); - dm_block_t b; + int r; + dm_dblock_t b, e; + struct dm_bio_prison_cell *cell_prealloc, *new_ocell; - end_block = block_div(end_block, cache->sectors_per_block); + calc_discard_block_range(cache, bio, &b, &e); + if (b == e) { + bio_endio(bio, 0); + return; + } - for (b = start_block; b < end_block; b++) - set_discard(cache, to_oblock(b)); + cell_prealloc = prealloc_get_cell(structs); + r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + structs, &new_ocell); + if (r > 0) + return; - bio_endio(bio, 0); + discard(cache, structs, new_ocell); } static bool spare_migration_bandwidth(struct cache *cache) @@ -1500,7 +1592,7 @@ static void process_deferred_bios(struct cache *cache) if (bio->bi_rw & REQ_FLUSH) process_flush_bio(cache, bio); else if (bio->bi_rw & REQ_DISCARD) - process_discard_bio(cache, bio); + process_discard_bio(cache, &structs, bio); else process_bio(cache, &structs, bio); } @@ -1715,7 +1807,7 @@ static void do_worker(struct work_struct *ws) process_invalidation_requests(cache); } - process_migrations(cache, &cache->quiesced_migrations, issue_copy); + process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard); process_migrations(cache, &cache->completed_migrations, complete_migration); if (commit_if_needed(cache)) { @@ -2180,6 +2272,48 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, return 0; } +/* + * We want the discard block size to be a power of two, at least the size + * of the cache block size, and have no more than 2^14 discard blocks + * across the origin. + */ +#define MAX_DISCARD_BLOCKS (1 << 14) + +static bool too_many_discard_blocks(sector_t discard_block_size, + sector_t origin_size) +{ + (void) sector_div(origin_size, discard_block_size); + + return origin_size > MAX_DISCARD_BLOCKS; +} + +static sector_t calculate_discard_block_size(sector_t cache_block_size, + sector_t origin_size) +{ + sector_t discard_block_size; + + discard_block_size = roundup_pow_of_two(cache_block_size); + + if (origin_size) + while (too_many_discard_blocks(discard_block_size, origin_size)) + discard_block_size *= 2; + + return discard_block_size; +} + +static void set_cache_size(struct cache *cache, dm_cblock_t size) +{ + dm_block_t nr_blocks = from_cblock(size); + + if (nr_blocks > (1 << 20) && cache->cache_size != size) + DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n" + "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n" + "Please consider increasing the cache block size to reduce the overall cache block count.", + (unsigned long long) nr_blocks); + + cache->cache_size = size; +} + #define DEFAULT_MIGRATION_THRESHOLD 2048 static int cache_create(struct cache_args *ca, struct cache **result) @@ -2235,10 +2369,10 @@ static int cache_create(struct cache_args *ca, struct cache **result) cache->sectors_per_block_shift = -1; cache_size = block_div(cache_size, ca->block_size); - cache->cache_size = to_cblock(cache_size); + set_cache_size(cache, to_cblock(cache_size)); } else { cache->sectors_per_block_shift = __ffs(ca->block_size); - cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); + set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift)); } r = create_cache_policy(cache, ca, error); @@ -2303,13 +2437,16 @@ static int cache_create(struct cache_args *ca, struct cache **result) } clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); - cache->discard_nr_blocks = cache->origin_blocks; - cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks)); + cache->discard_block_size = + calculate_discard_block_size(cache->sectors_per_block, + cache->origin_sectors); + cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); + cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); if (!cache->discard_bitset) { *error = "could not allocate discard bitset"; goto bad; } - clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks)); + clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(cache->copier)) { @@ -2327,7 +2464,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) INIT_DELAYED_WORK(&cache->waker, do_waker); cache->last_commit_jiffies = jiffies; - cache->prison = dm_bio_prison_create(PRISON_CELLS); + cache->prison = dm_bio_prison_create(); if (!cache->prison) { *error = "could not create bio prison"; goto bad; @@ -2599,16 +2736,16 @@ static int write_discard_bitset(struct cache *cache) { unsigned i, r; - r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block, - cache->origin_blocks); + r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, + cache->discard_nr_blocks); if (r) { DMERR("could not resize on-disk discard bitset"); return r; } - for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) { - r = dm_cache_set_discard(cache->cmd, to_oblock(i), - is_discarded(cache, to_oblock(i))); + for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { + r = dm_cache_set_discard(cache->cmd, to_dblock(i), + is_discarded(cache, to_dblock(i))); if (r) return r; } @@ -2681,14 +2818,16 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, } static int load_discard(void *context, sector_t discard_block_size, - dm_oblock_t oblock, bool discard) + dm_dblock_t dblock, bool discard) { struct cache *cache = context; + /* FIXME: handle mis-matched block size */ + if (discard) - set_discard(cache, oblock); + set_discard(cache, dblock); else - clear_discard(cache, oblock); + clear_discard(cache, dblock); return 0; } @@ -2730,7 +2869,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) return r; } - cache->cache_size = new_size; + set_cache_size(cache, new_size); return 0; } @@ -3079,8 +3218,9 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits) /* * FIXME: these limits may be incompatible with the cache device */ - limits->max_discard_sectors = cache->sectors_per_block; - limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT; + limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, + cache->origin_sectors); + limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; } static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -3104,7 +3244,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr,