]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/md/dm-cache-metadata.c
pci: use device_remove_file_self() instead of device_schedule_callback()
[karo-tx-linux.git] / drivers / md / dm-cache-metadata.c
1 /*
2  * Copyright (C) 2012 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6
7 #include "dm-cache-metadata.h"
8
9 #include "persistent-data/dm-array.h"
10 #include "persistent-data/dm-bitset.h"
11 #include "persistent-data/dm-space-map.h"
12 #include "persistent-data/dm-space-map-disk.h"
13 #include "persistent-data/dm-transaction-manager.h"
14
15 #include <linux/device-mapper.h>
16
17 /*----------------------------------------------------------------*/
18
19 #define DM_MSG_PREFIX   "cache metadata"
20
21 #define CACHE_SUPERBLOCK_MAGIC 06142003
22 #define CACHE_SUPERBLOCK_LOCATION 0
23
24 /*
25  * defines a range of metadata versions that this module can handle.
26  */
27 #define MIN_CACHE_VERSION 1
28 #define MAX_CACHE_VERSION 1
29
30 #define CACHE_METADATA_CACHE_SIZE 64
31
32 /*
33  *  3 for btree insert +
34  *  2 for btree lookup used within space map
35  */
36 #define CACHE_MAX_CONCURRENT_LOCKS 5
37 #define SPACE_MAP_ROOT_SIZE 128
38
39 enum superblock_flag_bits {
40         /* for spotting crashes that would invalidate the dirty bitset */
41         CLEAN_SHUTDOWN,
42 };
43
44 /*
45  * Each mapping from cache block -> origin block carries a set of flags.
46  */
47 enum mapping_bits {
48         /*
49          * A valid mapping.  Because we're using an array we clear this
50          * flag for an non existant mapping.
51          */
52         M_VALID = 1,
53
54         /*
55          * The data on the cache is different from that on the origin.
56          */
57         M_DIRTY = 2
58 };
59
60 struct cache_disk_superblock {
61         __le32 csum;
62         __le32 flags;
63         __le64 blocknr;
64
65         __u8 uuid[16];
66         __le64 magic;
67         __le32 version;
68
69         __u8 policy_name[CACHE_POLICY_NAME_SIZE];
70         __le32 policy_hint_size;
71
72         __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
73         __le64 mapping_root;
74         __le64 hint_root;
75
76         __le64 discard_root;
77         __le64 discard_block_size;
78         __le64 discard_nr_blocks;
79
80         __le32 data_block_size;
81         __le32 metadata_block_size;
82         __le32 cache_blocks;
83
84         __le32 compat_flags;
85         __le32 compat_ro_flags;
86         __le32 incompat_flags;
87
88         __le32 read_hits;
89         __le32 read_misses;
90         __le32 write_hits;
91         __le32 write_misses;
92
93         __le32 policy_version[CACHE_POLICY_VERSION_SIZE];
94 } __packed;
95
96 struct dm_cache_metadata {
97         struct block_device *bdev;
98         struct dm_block_manager *bm;
99         struct dm_space_map *metadata_sm;
100         struct dm_transaction_manager *tm;
101
102         struct dm_array_info info;
103         struct dm_array_info hint_info;
104         struct dm_disk_bitset discard_info;
105
106         struct rw_semaphore root_lock;
107         dm_block_t root;
108         dm_block_t hint_root;
109         dm_block_t discard_root;
110
111         sector_t discard_block_size;
112         dm_dblock_t discard_nr_blocks;
113
114         sector_t data_block_size;
115         dm_cblock_t cache_blocks;
116         bool changed:1;
117         bool clean_when_opened:1;
118
119         char policy_name[CACHE_POLICY_NAME_SIZE];
120         unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
121         size_t policy_hint_size;
122         struct dm_cache_statistics stats;
123 };
124
125 /*-------------------------------------------------------------------
126  * superblock validator
127  *-----------------------------------------------------------------*/
128
129 #define SUPERBLOCK_CSUM_XOR 9031977
130
131 static void sb_prepare_for_write(struct dm_block_validator *v,
132                                  struct dm_block *b,
133                                  size_t sb_block_size)
134 {
135         struct cache_disk_superblock *disk_super = dm_block_data(b);
136
137         disk_super->blocknr = cpu_to_le64(dm_block_location(b));
138         disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
139                                                       sb_block_size - sizeof(__le32),
140                                                       SUPERBLOCK_CSUM_XOR));
141 }
142
143 static int check_metadata_version(struct cache_disk_superblock *disk_super)
144 {
145         uint32_t metadata_version = le32_to_cpu(disk_super->version);
146         if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
147                 DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
148                       metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
149                 return -EINVAL;
150         }
151
152         return 0;
153 }
154
155 static int sb_check(struct dm_block_validator *v,
156                     struct dm_block *b,
157                     size_t sb_block_size)
158 {
159         struct cache_disk_superblock *disk_super = dm_block_data(b);
160         __le32 csum_le;
161
162         if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
163                 DMERR("sb_check failed: blocknr %llu: wanted %llu",
164                       le64_to_cpu(disk_super->blocknr),
165                       (unsigned long long)dm_block_location(b));
166                 return -ENOTBLK;
167         }
168
169         if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
170                 DMERR("sb_check failed: magic %llu: wanted %llu",
171                       le64_to_cpu(disk_super->magic),
172                       (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
173                 return -EILSEQ;
174         }
175
176         csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
177                                              sb_block_size - sizeof(__le32),
178                                              SUPERBLOCK_CSUM_XOR));
179         if (csum_le != disk_super->csum) {
180                 DMERR("sb_check failed: csum %u: wanted %u",
181                       le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
182                 return -EILSEQ;
183         }
184
185         return check_metadata_version(disk_super);
186 }
187
188 static struct dm_block_validator sb_validator = {
189         .name = "superblock",
190         .prepare_for_write = sb_prepare_for_write,
191         .check = sb_check
192 };
193
194 /*----------------------------------------------------------------*/
195
196 static int superblock_read_lock(struct dm_cache_metadata *cmd,
197                                 struct dm_block **sblock)
198 {
199         return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
200                                &sb_validator, sblock);
201 }
202
203 static int superblock_lock_zero(struct dm_cache_metadata *cmd,
204                                 struct dm_block **sblock)
205 {
206         return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
207                                      &sb_validator, sblock);
208 }
209
210 static int superblock_lock(struct dm_cache_metadata *cmd,
211                            struct dm_block **sblock)
212 {
213         return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
214                                 &sb_validator, sblock);
215 }
216
217 /*----------------------------------------------------------------*/
218
219 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
220 {
221         int r;
222         unsigned i;
223         struct dm_block *b;
224         __le64 *data_le, zero = cpu_to_le64(0);
225         unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
226
227         /*
228          * We can't use a validator here - it may be all zeroes.
229          */
230         r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
231         if (r)
232                 return r;
233
234         data_le = dm_block_data(b);
235         *result = true;
236         for (i = 0; i < sb_block_size; i++) {
237                 if (data_le[i] != zero) {
238                         *result = false;
239                         break;
240                 }
241         }
242
243         return dm_bm_unlock(b);
244 }
245
246 static void __setup_mapping_info(struct dm_cache_metadata *cmd)
247 {
248         struct dm_btree_value_type vt;
249
250         vt.context = NULL;
251         vt.size = sizeof(__le64);
252         vt.inc = NULL;
253         vt.dec = NULL;
254         vt.equal = NULL;
255         dm_array_info_init(&cmd->info, cmd->tm, &vt);
256
257         if (cmd->policy_hint_size) {
258                 vt.size = sizeof(__le32);
259                 dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
260         }
261 }
262
263 static int __write_initial_superblock(struct dm_cache_metadata *cmd)
264 {
265         int r;
266         struct dm_block *sblock;
267         size_t metadata_len;
268         struct cache_disk_superblock *disk_super;
269         sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
270
271         /* FIXME: see if we can lose the max sectors limit */
272         if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
273                 bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
274
275         r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
276         if (r < 0)
277                 return r;
278
279         r = dm_tm_pre_commit(cmd->tm);
280         if (r < 0)
281                 return r;
282
283         r = superblock_lock_zero(cmd, &sblock);
284         if (r)
285                 return r;
286
287         disk_super = dm_block_data(sblock);
288         disk_super->flags = 0;
289         memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
290         disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
291         disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
292         memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
293         memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
294         disk_super->policy_hint_size = 0;
295
296         r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
297                             metadata_len);
298         if (r < 0)
299                 goto bad_locked;
300
301         disk_super->mapping_root = cpu_to_le64(cmd->root);
302         disk_super->hint_root = cpu_to_le64(cmd->hint_root);
303         disk_super->discard_root = cpu_to_le64(cmd->discard_root);
304         disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
305         disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
306         disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
307         disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
308         disk_super->cache_blocks = cpu_to_le32(0);
309
310         disk_super->read_hits = cpu_to_le32(0);
311         disk_super->read_misses = cpu_to_le32(0);
312         disk_super->write_hits = cpu_to_le32(0);
313         disk_super->write_misses = cpu_to_le32(0);
314
315         return dm_tm_commit(cmd->tm, sblock);
316
317 bad_locked:
318         dm_bm_unlock(sblock);
319         return r;
320 }
321
322 static int __format_metadata(struct dm_cache_metadata *cmd)
323 {
324         int r;
325
326         r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
327                                  &cmd->tm, &cmd->metadata_sm);
328         if (r < 0) {
329                 DMERR("tm_create_with_sm failed");
330                 return r;
331         }
332
333         __setup_mapping_info(cmd);
334
335         r = dm_array_empty(&cmd->info, &cmd->root);
336         if (r < 0)
337                 goto bad;
338
339         dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
340
341         r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
342         if (r < 0)
343                 goto bad;
344
345         cmd->discard_block_size = 0;
346         cmd->discard_nr_blocks = 0;
347
348         r = __write_initial_superblock(cmd);
349         if (r)
350                 goto bad;
351
352         cmd->clean_when_opened = true;
353         return 0;
354
355 bad:
356         dm_tm_destroy(cmd->tm);
357         dm_sm_destroy(cmd->metadata_sm);
358
359         return r;
360 }
361
362 static int __check_incompat_features(struct cache_disk_superblock *disk_super,
363                                      struct dm_cache_metadata *cmd)
364 {
365         uint32_t features;
366
367         features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
368         if (features) {
369                 DMERR("could not access metadata due to unsupported optional features (%lx).",
370                       (unsigned long)features);
371                 return -EINVAL;
372         }
373
374         /*
375          * Check for read-only metadata to skip the following RDWR checks.
376          */
377         if (get_disk_ro(cmd->bdev->bd_disk))
378                 return 0;
379
380         features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
381         if (features) {
382                 DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
383                       (unsigned long)features);
384                 return -EINVAL;
385         }
386
387         return 0;
388 }
389
390 static int __open_metadata(struct dm_cache_metadata *cmd)
391 {
392         int r;
393         struct dm_block *sblock;
394         struct cache_disk_superblock *disk_super;
395         unsigned long sb_flags;
396
397         r = superblock_read_lock(cmd, &sblock);
398         if (r < 0) {
399                 DMERR("couldn't read lock superblock");
400                 return r;
401         }
402
403         disk_super = dm_block_data(sblock);
404
405         r = __check_incompat_features(disk_super, cmd);
406         if (r < 0)
407                 goto bad;
408
409         r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
410                                disk_super->metadata_space_map_root,
411                                sizeof(disk_super->metadata_space_map_root),
412                                &cmd->tm, &cmd->metadata_sm);
413         if (r < 0) {
414                 DMERR("tm_open_with_sm failed");
415                 goto bad;
416         }
417
418         __setup_mapping_info(cmd);
419         dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
420         sb_flags = le32_to_cpu(disk_super->flags);
421         cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
422         return dm_bm_unlock(sblock);
423
424 bad:
425         dm_bm_unlock(sblock);
426         return r;
427 }
428
429 static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
430                                      bool format_device)
431 {
432         int r;
433         bool unformatted = false;
434
435         r = __superblock_all_zeroes(cmd->bm, &unformatted);
436         if (r)
437                 return r;
438
439         if (unformatted)
440                 return format_device ? __format_metadata(cmd) : -EPERM;
441
442         return __open_metadata(cmd);
443 }
444
445 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
446                                             bool may_format_device)
447 {
448         int r;
449         cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
450                                           CACHE_METADATA_CACHE_SIZE,
451                                           CACHE_MAX_CONCURRENT_LOCKS);
452         if (IS_ERR(cmd->bm)) {
453                 DMERR("could not create block manager");
454                 return PTR_ERR(cmd->bm);
455         }
456
457         r = __open_or_format_metadata(cmd, may_format_device);
458         if (r)
459                 dm_block_manager_destroy(cmd->bm);
460
461         return r;
462 }
463
464 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
465 {
466         dm_sm_destroy(cmd->metadata_sm);
467         dm_tm_destroy(cmd->tm);
468         dm_block_manager_destroy(cmd->bm);
469 }
470
471 typedef unsigned long (*flags_mutator)(unsigned long);
472
473 static void update_flags(struct cache_disk_superblock *disk_super,
474                          flags_mutator mutator)
475 {
476         uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
477         disk_super->flags = cpu_to_le32(sb_flags);
478 }
479
480 static unsigned long set_clean_shutdown(unsigned long flags)
481 {
482         set_bit(CLEAN_SHUTDOWN, &flags);
483         return flags;
484 }
485
486 static unsigned long clear_clean_shutdown(unsigned long flags)
487 {
488         clear_bit(CLEAN_SHUTDOWN, &flags);
489         return flags;
490 }
491
492 static void read_superblock_fields(struct dm_cache_metadata *cmd,
493                                    struct cache_disk_superblock *disk_super)
494 {
495         cmd->root = le64_to_cpu(disk_super->mapping_root);
496         cmd->hint_root = le64_to_cpu(disk_super->hint_root);
497         cmd->discard_root = le64_to_cpu(disk_super->discard_root);
498         cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
499         cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
500         cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
501         cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
502         strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
503         cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
504         cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
505         cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
506         cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
507
508         cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
509         cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
510         cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
511         cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
512
513         cmd->changed = false;
514 }
515
516 /*
517  * The mutator updates the superblock flags.
518  */
519 static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
520                                      flags_mutator mutator)
521 {
522         int r;
523         struct cache_disk_superblock *disk_super;
524         struct dm_block *sblock;
525
526         r = superblock_lock(cmd, &sblock);
527         if (r)
528                 return r;
529
530         disk_super = dm_block_data(sblock);
531         update_flags(disk_super, mutator);
532         read_superblock_fields(cmd, disk_super);
533
534         return dm_bm_flush_and_unlock(cmd->bm, sblock);
535 }
536
537 static int __begin_transaction(struct dm_cache_metadata *cmd)
538 {
539         int r;
540         struct cache_disk_superblock *disk_super;
541         struct dm_block *sblock;
542
543         /*
544          * We re-read the superblock every time.  Shouldn't need to do this
545          * really.
546          */
547         r = superblock_read_lock(cmd, &sblock);
548         if (r)
549                 return r;
550
551         disk_super = dm_block_data(sblock);
552         read_superblock_fields(cmd, disk_super);
553         dm_bm_unlock(sblock);
554
555         return 0;
556 }
557
558 static int __commit_transaction(struct dm_cache_metadata *cmd,
559                                 flags_mutator mutator)
560 {
561         int r;
562         size_t metadata_len;
563         struct cache_disk_superblock *disk_super;
564         struct dm_block *sblock;
565
566         /*
567          * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
568          */
569         BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
570
571         r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
572                             &cmd->discard_root);
573         if (r)
574                 return r;
575
576         r = dm_tm_pre_commit(cmd->tm);
577         if (r < 0)
578                 return r;
579
580         r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
581         if (r < 0)
582                 return r;
583
584         r = superblock_lock(cmd, &sblock);
585         if (r)
586                 return r;
587
588         disk_super = dm_block_data(sblock);
589
590         if (mutator)
591                 update_flags(disk_super, mutator);
592
593         disk_super->mapping_root = cpu_to_le64(cmd->root);
594         disk_super->hint_root = cpu_to_le64(cmd->hint_root);
595         disk_super->discard_root = cpu_to_le64(cmd->discard_root);
596         disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
597         disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
598         disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
599         strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
600         disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
601         disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
602         disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
603
604         disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
605         disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
606         disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
607         disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
608
609         r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
610                             metadata_len);
611         if (r < 0) {
612                 dm_bm_unlock(sblock);
613                 return r;
614         }
615
616         return dm_tm_commit(cmd->tm, sblock);
617 }
618
619 /*----------------------------------------------------------------*/
620
621 /*
622  * The mappings are held in a dm-array that has 64-bit values stored in
623  * little-endian format.  The index is the cblock, the high 48bits of the
624  * value are the oblock and the low 16 bit the flags.
625  */
626 #define FLAGS_MASK ((1 << 16) - 1)
627
628 static __le64 pack_value(dm_oblock_t block, unsigned flags)
629 {
630         uint64_t value = from_oblock(block);
631         value <<= 16;
632         value = value | (flags & FLAGS_MASK);
633         return cpu_to_le64(value);
634 }
635
636 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
637 {
638         uint64_t value = le64_to_cpu(value_le);
639         uint64_t b = value >> 16;
640         *block = to_oblock(b);
641         *flags = value & FLAGS_MASK;
642 }
643
644 /*----------------------------------------------------------------*/
645
646 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
647                                                  sector_t data_block_size,
648                                                  bool may_format_device,
649                                                  size_t policy_hint_size)
650 {
651         int r;
652         struct dm_cache_metadata *cmd;
653
654         cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
655         if (!cmd) {
656                 DMERR("could not allocate metadata struct");
657                 return NULL;
658         }
659
660         init_rwsem(&cmd->root_lock);
661         cmd->bdev = bdev;
662         cmd->data_block_size = data_block_size;
663         cmd->cache_blocks = 0;
664         cmd->policy_hint_size = policy_hint_size;
665         cmd->changed = true;
666
667         r = __create_persistent_data_objects(cmd, may_format_device);
668         if (r) {
669                 kfree(cmd);
670                 return ERR_PTR(r);
671         }
672
673         r = __begin_transaction_flags(cmd, clear_clean_shutdown);
674         if (r < 0) {
675                 dm_cache_metadata_close(cmd);
676                 return ERR_PTR(r);
677         }
678
679         return cmd;
680 }
681
682 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
683 {
684         __destroy_persistent_data_objects(cmd);
685         kfree(cmd);
686 }
687
688 /*
689  * Checks that the given cache block is either unmapped or clean.
690  */
691 static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
692                                    bool *result)
693 {
694         int r;
695         __le64 value;
696         dm_oblock_t ob;
697         unsigned flags;
698
699         r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
700         if (r) {
701                 DMERR("block_unmapped_or_clean failed");
702                 return r;
703         }
704
705         unpack_value(value, &ob, &flags);
706         *result = !((flags & M_VALID) && (flags & M_DIRTY));
707
708         return 0;
709 }
710
711 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
712                                         dm_cblock_t begin, dm_cblock_t end,
713                                         bool *result)
714 {
715         int r;
716         *result = true;
717
718         while (begin != end) {
719                 r = block_unmapped_or_clean(cmd, begin, result);
720                 if (r)
721                         return r;
722
723                 if (!*result) {
724                         DMERR("cache block %llu is dirty",
725                               (unsigned long long) from_cblock(begin));
726                         return 0;
727                 }
728
729                 begin = to_cblock(from_cblock(begin) + 1);
730         }
731
732         return 0;
733 }
734
735 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
736 {
737         int r;
738         bool clean;
739         __le64 null_mapping = pack_value(0, 0);
740
741         down_write(&cmd->root_lock);
742         __dm_bless_for_disk(&null_mapping);
743
744         if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
745                 r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
746                 if (r) {
747                         __dm_unbless_for_disk(&null_mapping);
748                         goto out;
749                 }
750
751                 if (!clean) {
752                         DMERR("unable to shrink cache due to dirty blocks");
753                         r = -EINVAL;
754                         __dm_unbless_for_disk(&null_mapping);
755                         goto out;
756                 }
757         }
758
759         r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
760                             from_cblock(new_cache_size),
761                             &null_mapping, &cmd->root);
762         if (!r)
763                 cmd->cache_blocks = new_cache_size;
764         cmd->changed = true;
765
766 out:
767         up_write(&cmd->root_lock);
768
769         return r;
770 }
771
772 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
773                                    sector_t discard_block_size,
774                                    dm_dblock_t new_nr_entries)
775 {
776         int r;
777
778         down_write(&cmd->root_lock);
779         r = dm_bitset_resize(&cmd->discard_info,
780                              cmd->discard_root,
781                              from_dblock(cmd->discard_nr_blocks),
782                              from_dblock(new_nr_entries),
783                              false, &cmd->discard_root);
784         if (!r) {
785                 cmd->discard_block_size = discard_block_size;
786                 cmd->discard_nr_blocks = new_nr_entries;
787         }
788
789         cmd->changed = true;
790         up_write(&cmd->root_lock);
791
792         return r;
793 }
794
795 static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
796 {
797         return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
798                                  from_dblock(b), &cmd->discard_root);
799 }
800
801 static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
802 {
803         return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
804                                    from_dblock(b), &cmd->discard_root);
805 }
806
807 static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
808                           bool *is_discarded)
809 {
810         return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
811                                   from_dblock(b), &cmd->discard_root,
812                                   is_discarded);
813 }
814
815 static int __discard(struct dm_cache_metadata *cmd,
816                      dm_dblock_t dblock, bool discard)
817 {
818         int r;
819
820         r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
821         if (r)
822                 return r;
823
824         cmd->changed = true;
825         return 0;
826 }
827
828 int dm_cache_set_discard(struct dm_cache_metadata *cmd,
829                          dm_dblock_t dblock, bool discard)
830 {
831         int r;
832
833         down_write(&cmd->root_lock);
834         r = __discard(cmd, dblock, discard);
835         up_write(&cmd->root_lock);
836
837         return r;
838 }
839
840 static int __load_discards(struct dm_cache_metadata *cmd,
841                            load_discard_fn fn, void *context)
842 {
843         int r = 0;
844         dm_block_t b;
845         bool discard;
846
847         for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
848                 dm_dblock_t dblock = to_dblock(b);
849
850                 if (cmd->clean_when_opened) {
851                         r = __is_discarded(cmd, dblock, &discard);
852                         if (r)
853                                 return r;
854                 } else
855                         discard = false;
856
857                 r = fn(context, cmd->discard_block_size, dblock, discard);
858                 if (r)
859                         break;
860         }
861
862         return r;
863 }
864
865 int dm_cache_load_discards(struct dm_cache_metadata *cmd,
866                            load_discard_fn fn, void *context)
867 {
868         int r;
869
870         down_read(&cmd->root_lock);
871         r = __load_discards(cmd, fn, context);
872         up_read(&cmd->root_lock);
873
874         return r;
875 }
876
877 dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd)
878 {
879         dm_cblock_t r;
880
881         down_read(&cmd->root_lock);
882         r = cmd->cache_blocks;
883         up_read(&cmd->root_lock);
884
885         return r;
886 }
887
888 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
889 {
890         int r;
891         __le64 value = pack_value(0, 0);
892
893         __dm_bless_for_disk(&value);
894         r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
895                                &value, &cmd->root);
896         if (r)
897                 return r;
898
899         cmd->changed = true;
900         return 0;
901 }
902
903 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
904 {
905         int r;
906
907         down_write(&cmd->root_lock);
908         r = __remove(cmd, cblock);
909         up_write(&cmd->root_lock);
910
911         return r;
912 }
913
914 static int __insert(struct dm_cache_metadata *cmd,
915                     dm_cblock_t cblock, dm_oblock_t oblock)
916 {
917         int r;
918         __le64 value = pack_value(oblock, M_VALID);
919         __dm_bless_for_disk(&value);
920
921         r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
922                                &value, &cmd->root);
923         if (r)
924                 return r;
925
926         cmd->changed = true;
927         return 0;
928 }
929
930 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
931                             dm_cblock_t cblock, dm_oblock_t oblock)
932 {
933         int r;
934
935         down_write(&cmd->root_lock);
936         r = __insert(cmd, cblock, oblock);
937         up_write(&cmd->root_lock);
938
939         return r;
940 }
941
942 struct thunk {
943         load_mapping_fn fn;
944         void *context;
945
946         struct dm_cache_metadata *cmd;
947         bool respect_dirty_flags;
948         bool hints_valid;
949 };
950
951 static bool policy_unchanged(struct dm_cache_metadata *cmd,
952                              struct dm_cache_policy *policy)
953 {
954         const char *policy_name = dm_cache_policy_get_name(policy);
955         const unsigned *policy_version = dm_cache_policy_get_version(policy);
956         size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
957
958         /*
959          * Ensure policy names match.
960          */
961         if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
962                 return false;
963
964         /*
965          * Ensure policy major versions match.
966          */
967         if (cmd->policy_version[0] != policy_version[0])
968                 return false;
969
970         /*
971          * Ensure policy hint sizes match.
972          */
973         if (cmd->policy_hint_size != policy_hint_size)
974                 return false;
975
976         return true;
977 }
978
979 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
980 {
981         return cmd->hint_root && cmd->policy_hint_size;
982 }
983
984 static bool hints_array_available(struct dm_cache_metadata *cmd,
985                                   struct dm_cache_policy *policy)
986 {
987         return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
988                 hints_array_initialized(cmd);
989 }
990
991 static int __load_mapping(void *context, uint64_t cblock, void *leaf)
992 {
993         int r = 0;
994         bool dirty;
995         __le64 value;
996         __le32 hint_value = 0;
997         dm_oblock_t oblock;
998         unsigned flags;
999         struct thunk *thunk = context;
1000         struct dm_cache_metadata *cmd = thunk->cmd;
1001
1002         memcpy(&value, leaf, sizeof(value));
1003         unpack_value(value, &oblock, &flags);
1004
1005         if (flags & M_VALID) {
1006                 if (thunk->hints_valid) {
1007                         r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
1008                                                cblock, &hint_value);
1009                         if (r && r != -ENODATA)
1010                                 return r;
1011                 }
1012
1013                 dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
1014                 r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
1015                               dirty, le32_to_cpu(hint_value), thunk->hints_valid);
1016         }
1017
1018         return r;
1019 }
1020
1021 static int __load_mappings(struct dm_cache_metadata *cmd,
1022                            struct dm_cache_policy *policy,
1023                            load_mapping_fn fn, void *context)
1024 {
1025         struct thunk thunk;
1026
1027         thunk.fn = fn;
1028         thunk.context = context;
1029
1030         thunk.cmd = cmd;
1031         thunk.respect_dirty_flags = cmd->clean_when_opened;
1032         thunk.hints_valid = hints_array_available(cmd, policy);
1033
1034         return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
1035 }
1036
1037 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
1038                            struct dm_cache_policy *policy,
1039                            load_mapping_fn fn, void *context)
1040 {
1041         int r;
1042
1043         down_read(&cmd->root_lock);
1044         r = __load_mappings(cmd, policy, fn, context);
1045         up_read(&cmd->root_lock);
1046
1047         return r;
1048 }
1049
1050 static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
1051 {
1052         int r = 0;
1053         __le64 value;
1054         dm_oblock_t oblock;
1055         unsigned flags;
1056
1057         memcpy(&value, leaf, sizeof(value));
1058         unpack_value(value, &oblock, &flags);
1059
1060         return r;
1061 }
1062
1063 static int __dump_mappings(struct dm_cache_metadata *cmd)
1064 {
1065         return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
1066 }
1067
1068 void dm_cache_dump(struct dm_cache_metadata *cmd)
1069 {
1070         down_read(&cmd->root_lock);
1071         __dump_mappings(cmd);
1072         up_read(&cmd->root_lock);
1073 }
1074
1075 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
1076 {
1077         int r;
1078
1079         down_read(&cmd->root_lock);
1080         r = cmd->changed;
1081         up_read(&cmd->root_lock);
1082
1083         return r;
1084 }
1085
1086 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1087 {
1088         int r;
1089         unsigned flags;
1090         dm_oblock_t oblock;
1091         __le64 value;
1092
1093         r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1094         if (r)
1095                 return r;
1096
1097         unpack_value(value, &oblock, &flags);
1098
1099         if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1100                 /* nothing to be done */
1101                 return 0;
1102
1103         value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1104         __dm_bless_for_disk(&value);
1105
1106         r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1107                                &value, &cmd->root);
1108         if (r)
1109                 return r;
1110
1111         cmd->changed = true;
1112         return 0;
1113
1114 }
1115
1116 int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
1117                        dm_cblock_t cblock, bool dirty)
1118 {
1119         int r;
1120
1121         down_write(&cmd->root_lock);
1122         r = __dirty(cmd, cblock, dirty);
1123         up_write(&cmd->root_lock);
1124
1125         return r;
1126 }
1127
1128 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1129                                  struct dm_cache_statistics *stats)
1130 {
1131         down_read(&cmd->root_lock);
1132         *stats = cmd->stats;
1133         up_read(&cmd->root_lock);
1134 }
1135
1136 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1137                                  struct dm_cache_statistics *stats)
1138 {
1139         down_write(&cmd->root_lock);
1140         cmd->stats = *stats;
1141         up_write(&cmd->root_lock);
1142 }
1143
1144 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1145 {
1146         int r;
1147         flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1148                                  clear_clean_shutdown);
1149
1150         down_write(&cmd->root_lock);
1151         r = __commit_transaction(cmd, mutator);
1152         if (r)
1153                 goto out;
1154
1155         r = __begin_transaction(cmd);
1156
1157 out:
1158         up_write(&cmd->root_lock);
1159         return r;
1160 }
1161
1162 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1163                                            dm_block_t *result)
1164 {
1165         int r = -EINVAL;
1166
1167         down_read(&cmd->root_lock);
1168         r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1169         up_read(&cmd->root_lock);
1170
1171         return r;
1172 }
1173
1174 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1175                                    dm_block_t *result)
1176 {
1177         int r = -EINVAL;
1178
1179         down_read(&cmd->root_lock);
1180         r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1181         up_read(&cmd->root_lock);
1182
1183         return r;
1184 }
1185
1186 /*----------------------------------------------------------------*/
1187
1188 static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1189 {
1190         int r;
1191         __le32 value;
1192         size_t hint_size;
1193         const char *policy_name = dm_cache_policy_get_name(policy);
1194         const unsigned *policy_version = dm_cache_policy_get_version(policy);
1195
1196         if (!policy_name[0] ||
1197             (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1198                 return -EINVAL;
1199
1200         if (!policy_unchanged(cmd, policy)) {
1201                 strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1202                 memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1203
1204                 hint_size = dm_cache_policy_get_hint_size(policy);
1205                 if (!hint_size)
1206                         return 0; /* short-circuit hints initialization */
1207                 cmd->policy_hint_size = hint_size;
1208
1209                 if (cmd->hint_root) {
1210                         r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1211                         if (r)
1212                                 return r;
1213                 }
1214
1215                 r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
1216                 if (r)
1217                         return r;
1218
1219                 value = cpu_to_le32(0);
1220                 __dm_bless_for_disk(&value);
1221                 r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
1222                                     from_cblock(cmd->cache_blocks),
1223                                     &value, &cmd->hint_root);
1224                 if (r)
1225                         return r;
1226         }
1227
1228         return 0;
1229 }
1230
1231 int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1232 {
1233         int r;
1234
1235         down_write(&cmd->root_lock);
1236         r = begin_hints(cmd, policy);
1237         up_write(&cmd->root_lock);
1238
1239         return r;
1240 }
1241
1242 static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1243                      uint32_t hint)
1244 {
1245         int r;
1246         __le32 value = cpu_to_le32(hint);
1247         __dm_bless_for_disk(&value);
1248
1249         r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
1250                                from_cblock(cblock), &value, &cmd->hint_root);
1251         cmd->changed = true;
1252
1253         return r;
1254 }
1255
1256 int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1257                        uint32_t hint)
1258 {
1259         int r;
1260
1261         if (!hints_array_initialized(cmd))
1262                 return 0;
1263
1264         down_write(&cmd->root_lock);
1265         r = save_hint(cmd, cblock, hint);
1266         up_write(&cmd->root_lock);
1267
1268         return r;
1269 }
1270
1271 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1272 {
1273         return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1274 }