]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/md/bitmap.c
Merge tag 'mfd-fixes-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd
[karo-tx-linux.git] / drivers / md / bitmap.c
1 /*
2  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3  *
4  * bitmap_create  - sets up the bitmap structure
5  * bitmap_destroy - destroys the bitmap structure
6  *
7  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8  * - added disk storage for bitmap
9  * - changes to allow various bitmap chunk sizes
10  */
11
12 /*
13  * Still to do:
14  *
15  * flush after percent set rather than just time based. (maybe both).
16  */
17
18 #include <linux/blkdev.h>
19 #include <linux/module.h>
20 #include <linux/errno.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/timer.h>
24 #include <linux/sched.h>
25 #include <linux/list.h>
26 #include <linux/file.h>
27 #include <linux/mount.h>
28 #include <linux/buffer_head.h>
29 #include <linux/seq_file.h>
30 #include "md.h"
31 #include "bitmap.h"
32
33 static inline char *bmname(struct bitmap *bitmap)
34 {
35         return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
36 }
37
38 /*
39  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
40  *
41  * 1) check to see if this page is allocated, if it's not then try to alloc
42  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
43  *    page pointer directly as a counter
44  *
45  * if we find our page, we increment the page's refcount so that it stays
46  * allocated while we're using it
47  */
48 static int bitmap_checkpage(struct bitmap_counts *bitmap,
49                             unsigned long page, int create)
50 __releases(bitmap->lock)
51 __acquires(bitmap->lock)
52 {
53         unsigned char *mappage;
54
55         if (page >= bitmap->pages) {
56                 /* This can happen if bitmap_start_sync goes beyond
57                  * End-of-device while looking for a whole page.
58                  * It is harmless.
59                  */
60                 return -EINVAL;
61         }
62
63         if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
64                 return 0;
65
66         if (bitmap->bp[page].map) /* page is already allocated, just return */
67                 return 0;
68
69         if (!create)
70                 return -ENOENT;
71
72         /* this page has not been allocated yet */
73
74         spin_unlock_irq(&bitmap->lock);
75         /* It is possible that this is being called inside a
76          * prepare_to_wait/finish_wait loop from raid5c:make_request().
77          * In general it is not permitted to sleep in that context as it
78          * can cause the loop to spin freely.
79          * That doesn't apply here as we can only reach this point
80          * once with any loop.
81          * When this function completes, either bp[page].map or
82          * bp[page].hijacked.  In either case, this function will
83          * abort before getting to this point again.  So there is
84          * no risk of a free-spin, and so it is safe to assert
85          * that sleeping here is allowed.
86          */
87         sched_annotate_sleep();
88         mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
89         spin_lock_irq(&bitmap->lock);
90
91         if (mappage == NULL) {
92                 pr_debug("md/bitmap: map page allocation failed, hijacking\n");
93                 /* failed - set the hijacked flag so that we can use the
94                  * pointer as a counter */
95                 if (!bitmap->bp[page].map)
96                         bitmap->bp[page].hijacked = 1;
97         } else if (bitmap->bp[page].map ||
98                    bitmap->bp[page].hijacked) {
99                 /* somebody beat us to getting the page */
100                 kfree(mappage);
101                 return 0;
102         } else {
103
104                 /* no page was in place and we have one, so install it */
105
106                 bitmap->bp[page].map = mappage;
107                 bitmap->missing_pages--;
108         }
109         return 0;
110 }
111
112 /* if page is completely empty, put it back on the free list, or dealloc it */
113 /* if page was hijacked, unmark the flag so it might get alloced next time */
114 /* Note: lock should be held when calling this */
115 static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
116 {
117         char *ptr;
118
119         if (bitmap->bp[page].count) /* page is still busy */
120                 return;
121
122         /* page is no longer in use, it can be released */
123
124         if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
125                 bitmap->bp[page].hijacked = 0;
126                 bitmap->bp[page].map = NULL;
127         } else {
128                 /* normal case, free the page */
129                 ptr = bitmap->bp[page].map;
130                 bitmap->bp[page].map = NULL;
131                 bitmap->missing_pages++;
132                 kfree(ptr);
133         }
134 }
135
136 /*
137  * bitmap file handling - read and write the bitmap file and its superblock
138  */
139
140 /*
141  * basic page I/O operations
142  */
143
144 /* IO operations when bitmap is stored near all superblocks */
145 static int read_sb_page(struct mddev *mddev, loff_t offset,
146                         struct page *page,
147                         unsigned long index, int size)
148 {
149         /* choose a good rdev and read the page from there */
150
151         struct md_rdev *rdev;
152         sector_t target;
153
154         rdev_for_each(rdev, mddev) {
155                 if (! test_bit(In_sync, &rdev->flags)
156                     || test_bit(Faulty, &rdev->flags))
157                         continue;
158
159                 target = offset + index * (PAGE_SIZE/512);
160
161                 if (sync_page_io(rdev, target,
162                                  roundup(size, bdev_logical_block_size(rdev->bdev)),
163                                  page, READ, true)) {
164                         page->index = index;
165                         return 0;
166                 }
167         }
168         return -EIO;
169 }
170
171 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
172 {
173         /* Iterate the disks of an mddev, using rcu to protect access to the
174          * linked list, and raising the refcount of devices we return to ensure
175          * they don't disappear while in use.
176          * As devices are only added or removed when raid_disk is < 0 and
177          * nr_pending is 0 and In_sync is clear, the entries we return will
178          * still be in the same position on the list when we re-enter
179          * list_for_each_entry_continue_rcu.
180          *
181          * Note that if entered with 'rdev == NULL' to start at the
182          * beginning, we temporarily assign 'rdev' to an address which
183          * isn't really an rdev, but which can be used by
184          * list_for_each_entry_continue_rcu() to find the first entry.
185          */
186         rcu_read_lock();
187         if (rdev == NULL)
188                 /* start at the beginning */
189                 rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
190         else {
191                 /* release the previous rdev and start from there. */
192                 rdev_dec_pending(rdev, mddev);
193         }
194         list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
195                 if (rdev->raid_disk >= 0 &&
196                     !test_bit(Faulty, &rdev->flags)) {
197                         /* this is a usable devices */
198                         atomic_inc(&rdev->nr_pending);
199                         rcu_read_unlock();
200                         return rdev;
201                 }
202         }
203         rcu_read_unlock();
204         return NULL;
205 }
206
207 static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
208 {
209         struct md_rdev *rdev = NULL;
210         struct block_device *bdev;
211         struct mddev *mddev = bitmap->mddev;
212         struct bitmap_storage *store = &bitmap->storage;
213         int node_offset = 0;
214
215         if (mddev_is_clustered(bitmap->mddev))
216                 node_offset = bitmap->cluster_slot * store->file_pages;
217
218         while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
219                 int size = PAGE_SIZE;
220                 loff_t offset = mddev->bitmap_info.offset;
221
222                 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
223
224                 if (page->index == store->file_pages-1) {
225                         int last_page_size = store->bytes & (PAGE_SIZE-1);
226                         if (last_page_size == 0)
227                                 last_page_size = PAGE_SIZE;
228                         size = roundup(last_page_size,
229                                        bdev_logical_block_size(bdev));
230                 }
231                 /* Just make sure we aren't corrupting data or
232                  * metadata
233                  */
234                 if (mddev->external) {
235                         /* Bitmap could be anywhere. */
236                         if (rdev->sb_start + offset + (page->index
237                                                        * (PAGE_SIZE/512))
238                             > rdev->data_offset
239                             &&
240                             rdev->sb_start + offset
241                             < (rdev->data_offset + mddev->dev_sectors
242                              + (PAGE_SIZE/512)))
243                                 goto bad_alignment;
244                 } else if (offset < 0) {
245                         /* DATA  BITMAP METADATA  */
246                         if (offset
247                             + (long)(page->index * (PAGE_SIZE/512))
248                             + size/512 > 0)
249                                 /* bitmap runs in to metadata */
250                                 goto bad_alignment;
251                         if (rdev->data_offset + mddev->dev_sectors
252                             > rdev->sb_start + offset)
253                                 /* data runs in to bitmap */
254                                 goto bad_alignment;
255                 } else if (rdev->sb_start < rdev->data_offset) {
256                         /* METADATA BITMAP DATA */
257                         if (rdev->sb_start
258                             + offset
259                             + page->index*(PAGE_SIZE/512) + size/512
260                             > rdev->data_offset)
261                                 /* bitmap runs in to data */
262                                 goto bad_alignment;
263                 } else {
264                         /* DATA METADATA BITMAP - no problems */
265                 }
266                 md_super_write(mddev, rdev,
267                                rdev->sb_start + offset
268                                + page->index * (PAGE_SIZE/512),
269                                size,
270                                page);
271         }
272
273         if (wait)
274                 md_super_wait(mddev);
275         return 0;
276
277  bad_alignment:
278         return -EINVAL;
279 }
280
281 static void bitmap_file_kick(struct bitmap *bitmap);
282 /*
283  * write out a page to a file
284  */
285 static void write_page(struct bitmap *bitmap, struct page *page, int wait)
286 {
287         struct buffer_head *bh;
288
289         if (bitmap->storage.file == NULL) {
290                 switch (write_sb_page(bitmap, page, wait)) {
291                 case -EINVAL:
292                         set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
293                 }
294         } else {
295
296                 bh = page_buffers(page);
297
298                 while (bh && bh->b_blocknr) {
299                         atomic_inc(&bitmap->pending_writes);
300                         set_buffer_locked(bh);
301                         set_buffer_mapped(bh);
302                         submit_bh(WRITE | REQ_SYNC, bh);
303                         bh = bh->b_this_page;
304                 }
305
306                 if (wait)
307                         wait_event(bitmap->write_wait,
308                                    atomic_read(&bitmap->pending_writes)==0);
309         }
310         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
311                 bitmap_file_kick(bitmap);
312 }
313
314 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
315 {
316         struct bitmap *bitmap = bh->b_private;
317
318         if (!uptodate)
319                 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
320         if (atomic_dec_and_test(&bitmap->pending_writes))
321                 wake_up(&bitmap->write_wait);
322 }
323
324 /* copied from buffer.c */
325 static void
326 __clear_page_buffers(struct page *page)
327 {
328         ClearPagePrivate(page);
329         set_page_private(page, 0);
330         page_cache_release(page);
331 }
332 static void free_buffers(struct page *page)
333 {
334         struct buffer_head *bh;
335
336         if (!PagePrivate(page))
337                 return;
338
339         bh = page_buffers(page);
340         while (bh) {
341                 struct buffer_head *next = bh->b_this_page;
342                 free_buffer_head(bh);
343                 bh = next;
344         }
345         __clear_page_buffers(page);
346         put_page(page);
347 }
348
349 /* read a page from a file.
350  * We both read the page, and attach buffers to the page to record the
351  * address of each block (using bmap).  These addresses will be used
352  * to write the block later, completely bypassing the filesystem.
353  * This usage is similar to how swap files are handled, and allows us
354  * to write to a file with no concerns of memory allocation failing.
355  */
356 static int read_page(struct file *file, unsigned long index,
357                      struct bitmap *bitmap,
358                      unsigned long count,
359                      struct page *page)
360 {
361         int ret = 0;
362         struct inode *inode = file_inode(file);
363         struct buffer_head *bh;
364         sector_t block;
365
366         pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
367                  (unsigned long long)index << PAGE_SHIFT);
368
369         bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
370         if (!bh) {
371                 ret = -ENOMEM;
372                 goto out;
373         }
374         attach_page_buffers(page, bh);
375         block = index << (PAGE_SHIFT - inode->i_blkbits);
376         while (bh) {
377                 if (count == 0)
378                         bh->b_blocknr = 0;
379                 else {
380                         bh->b_blocknr = bmap(inode, block);
381                         if (bh->b_blocknr == 0) {
382                                 /* Cannot use this file! */
383                                 ret = -EINVAL;
384                                 goto out;
385                         }
386                         bh->b_bdev = inode->i_sb->s_bdev;
387                         if (count < (1<<inode->i_blkbits))
388                                 count = 0;
389                         else
390                                 count -= (1<<inode->i_blkbits);
391
392                         bh->b_end_io = end_bitmap_write;
393                         bh->b_private = bitmap;
394                         atomic_inc(&bitmap->pending_writes);
395                         set_buffer_locked(bh);
396                         set_buffer_mapped(bh);
397                         submit_bh(READ, bh);
398                 }
399                 block++;
400                 bh = bh->b_this_page;
401         }
402         page->index = index;
403
404         wait_event(bitmap->write_wait,
405                    atomic_read(&bitmap->pending_writes)==0);
406         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
407                 ret = -EIO;
408 out:
409         if (ret)
410                 printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
411                         (int)PAGE_SIZE,
412                         (unsigned long long)index << PAGE_SHIFT,
413                         ret);
414         return ret;
415 }
416
417 /*
418  * bitmap file superblock operations
419  */
420
421 /* update the event counter and sync the superblock to disk */
422 void bitmap_update_sb(struct bitmap *bitmap)
423 {
424         bitmap_super_t *sb;
425
426         if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
427                 return;
428         if (bitmap->mddev->bitmap_info.external)
429                 return;
430         if (!bitmap->storage.sb_page) /* no superblock */
431                 return;
432         sb = kmap_atomic(bitmap->storage.sb_page);
433         sb->events = cpu_to_le64(bitmap->mddev->events);
434         if (bitmap->mddev->events < bitmap->events_cleared)
435                 /* rocking back to read-only */
436                 bitmap->events_cleared = bitmap->mddev->events;
437         sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
438         sb->state = cpu_to_le32(bitmap->flags);
439         /* Just in case these have been changed via sysfs: */
440         sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
441         sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
442         /* This might have been changed by a reshape */
443         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
444         sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
445         sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
446         sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
447                                            bitmap_info.space);
448         kunmap_atomic(sb);
449         write_page(bitmap, bitmap->storage.sb_page, 1);
450 }
451
452 /* print out the bitmap file superblock */
453 void bitmap_print_sb(struct bitmap *bitmap)
454 {
455         bitmap_super_t *sb;
456
457         if (!bitmap || !bitmap->storage.sb_page)
458                 return;
459         sb = kmap_atomic(bitmap->storage.sb_page);
460         printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
461         printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
462         printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
463         printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
464                                         *(__u32 *)(sb->uuid+0),
465                                         *(__u32 *)(sb->uuid+4),
466                                         *(__u32 *)(sb->uuid+8),
467                                         *(__u32 *)(sb->uuid+12));
468         printk(KERN_DEBUG "        events: %llu\n",
469                         (unsigned long long) le64_to_cpu(sb->events));
470         printk(KERN_DEBUG "events cleared: %llu\n",
471                         (unsigned long long) le64_to_cpu(sb->events_cleared));
472         printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
473         printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
474         printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
475         printk(KERN_DEBUG "     sync size: %llu KB\n",
476                         (unsigned long long)le64_to_cpu(sb->sync_size)/2);
477         printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
478         kunmap_atomic(sb);
479 }
480
481 /*
482  * bitmap_new_disk_sb
483  * @bitmap
484  *
485  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
486  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
487  * This function verifies 'bitmap_info' and populates the on-disk bitmap
488  * structure, which is to be written to disk.
489  *
490  * Returns: 0 on success, -Exxx on error
491  */
492 static int bitmap_new_disk_sb(struct bitmap *bitmap)
493 {
494         bitmap_super_t *sb;
495         unsigned long chunksize, daemon_sleep, write_behind;
496
497         bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
498         if (bitmap->storage.sb_page == NULL)
499                 return -ENOMEM;
500         bitmap->storage.sb_page->index = 0;
501
502         sb = kmap_atomic(bitmap->storage.sb_page);
503
504         sb->magic = cpu_to_le32(BITMAP_MAGIC);
505         sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
506
507         chunksize = bitmap->mddev->bitmap_info.chunksize;
508         BUG_ON(!chunksize);
509         if (!is_power_of_2(chunksize)) {
510                 kunmap_atomic(sb);
511                 printk(KERN_ERR "bitmap chunksize not a power of 2\n");
512                 return -EINVAL;
513         }
514         sb->chunksize = cpu_to_le32(chunksize);
515
516         daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
517         if (!daemon_sleep ||
518             (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
519                 printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
520                 daemon_sleep = 5 * HZ;
521         }
522         sb->daemon_sleep = cpu_to_le32(daemon_sleep);
523         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
524
525         /*
526          * FIXME: write_behind for RAID1.  If not specified, what
527          * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
528          */
529         write_behind = bitmap->mddev->bitmap_info.max_write_behind;
530         if (write_behind > COUNTER_MAX)
531                 write_behind = COUNTER_MAX / 2;
532         sb->write_behind = cpu_to_le32(write_behind);
533         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
534
535         /* keep the array size field of the bitmap superblock up to date */
536         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
537
538         memcpy(sb->uuid, bitmap->mddev->uuid, 16);
539
540         set_bit(BITMAP_STALE, &bitmap->flags);
541         sb->state = cpu_to_le32(bitmap->flags);
542         bitmap->events_cleared = bitmap->mddev->events;
543         sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
544         bitmap->mddev->bitmap_info.nodes = 0;
545
546         kunmap_atomic(sb);
547
548         return 0;
549 }
550
551 /* read the superblock from the bitmap file and initialize some bitmap fields */
552 static int bitmap_read_sb(struct bitmap *bitmap)
553 {
554         char *reason = NULL;
555         bitmap_super_t *sb;
556         unsigned long chunksize, daemon_sleep, write_behind;
557         unsigned long long events;
558         int nodes = 0;
559         unsigned long sectors_reserved = 0;
560         int err = -EINVAL;
561         struct page *sb_page;
562         loff_t offset = bitmap->mddev->bitmap_info.offset;
563
564         if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
565                 chunksize = 128 * 1024 * 1024;
566                 daemon_sleep = 5 * HZ;
567                 write_behind = 0;
568                 set_bit(BITMAP_STALE, &bitmap->flags);
569                 err = 0;
570                 goto out_no_sb;
571         }
572         /* page 0 is the superblock, read it... */
573         sb_page = alloc_page(GFP_KERNEL);
574         if (!sb_page)
575                 return -ENOMEM;
576         bitmap->storage.sb_page = sb_page;
577
578 re_read:
579         /* If cluster_slot is set, the cluster is setup */
580         if (bitmap->cluster_slot >= 0) {
581                 sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
582
583                 sector_div(bm_blocks,
584                            bitmap->mddev->bitmap_info.chunksize >> 9);
585                 /* bits to bytes */
586                 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
587                 /* to 4k blocks */
588                 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
589                 offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
590                 pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
591                         bitmap->cluster_slot, offset);
592         }
593
594         if (bitmap->storage.file) {
595                 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
596                 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
597
598                 err = read_page(bitmap->storage.file, 0,
599                                 bitmap, bytes, sb_page);
600         } else {
601                 err = read_sb_page(bitmap->mddev,
602                                    offset,
603                                    sb_page,
604                                    0, sizeof(bitmap_super_t));
605         }
606         if (err)
607                 return err;
608
609         err = -EINVAL;
610         sb = kmap_atomic(sb_page);
611
612         chunksize = le32_to_cpu(sb->chunksize);
613         daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
614         write_behind = le32_to_cpu(sb->write_behind);
615         sectors_reserved = le32_to_cpu(sb->sectors_reserved);
616         /* XXX: This is a hack to ensure that we don't use clustering
617          *  in case:
618          *      - dm-raid is in use and
619          *      - the nodes written in bitmap_sb is erroneous.
620          */
621         if (!bitmap->mddev->sync_super) {
622                 nodes = le32_to_cpu(sb->nodes);
623                 strlcpy(bitmap->mddev->bitmap_info.cluster_name,
624                                 sb->cluster_name, 64);
625         }
626
627         /* verify that the bitmap-specific fields are valid */
628         if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
629                 reason = "bad magic";
630         else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
631                  le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
632                 reason = "unrecognized superblock version";
633         else if (chunksize < 512)
634                 reason = "bitmap chunksize too small";
635         else if (!is_power_of_2(chunksize))
636                 reason = "bitmap chunksize not a power of 2";
637         else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
638                 reason = "daemon sleep period out of range";
639         else if (write_behind > COUNTER_MAX)
640                 reason = "write-behind limit out of range (0 - 16383)";
641         if (reason) {
642                 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
643                         bmname(bitmap), reason);
644                 goto out;
645         }
646
647         /* keep the array size field of the bitmap superblock up to date */
648         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
649
650         if (bitmap->mddev->persistent) {
651                 /*
652                  * We have a persistent array superblock, so compare the
653                  * bitmap's UUID and event counter to the mddev's
654                  */
655                 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
656                         printk(KERN_INFO
657                                "%s: bitmap superblock UUID mismatch\n",
658                                bmname(bitmap));
659                         goto out;
660                 }
661                 events = le64_to_cpu(sb->events);
662                 if (!nodes && (events < bitmap->mddev->events)) {
663                         printk(KERN_INFO
664                                "%s: bitmap file is out of date (%llu < %llu) "
665                                "-- forcing full recovery\n",
666                                bmname(bitmap), events,
667                                (unsigned long long) bitmap->mddev->events);
668                         set_bit(BITMAP_STALE, &bitmap->flags);
669                 }
670         }
671
672         /* assign fields using values from superblock */
673         bitmap->flags |= le32_to_cpu(sb->state);
674         if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
675                 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
676         bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
677         strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
678         err = 0;
679
680 out:
681         kunmap_atomic(sb);
682         /* Assiging chunksize is required for "re_read" */
683         bitmap->mddev->bitmap_info.chunksize = chunksize;
684         if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
685                 err = md_setup_cluster(bitmap->mddev, nodes);
686                 if (err) {
687                         pr_err("%s: Could not setup cluster service (%d)\n",
688                                         bmname(bitmap), err);
689                         goto out_no_sb;
690                 }
691                 bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
692                 goto re_read;
693         }
694
695
696 out_no_sb:
697         if (test_bit(BITMAP_STALE, &bitmap->flags))
698                 bitmap->events_cleared = bitmap->mddev->events;
699         bitmap->mddev->bitmap_info.chunksize = chunksize;
700         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
701         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
702         bitmap->mddev->bitmap_info.nodes = nodes;
703         if (bitmap->mddev->bitmap_info.space == 0 ||
704             bitmap->mddev->bitmap_info.space > sectors_reserved)
705                 bitmap->mddev->bitmap_info.space = sectors_reserved;
706         if (err) {
707                 bitmap_print_sb(bitmap);
708                 if (bitmap->cluster_slot < 0)
709                         md_cluster_stop(bitmap->mddev);
710         }
711         return err;
712 }
713
714 /*
715  * general bitmap file operations
716  */
717
718 /*
719  * on-disk bitmap:
720  *
721  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
722  * file a page at a time. There's a superblock at the start of the file.
723  */
724 /* calculate the index of the page that contains this bit */
725 static inline unsigned long file_page_index(struct bitmap_storage *store,
726                                             unsigned long chunk)
727 {
728         if (store->sb_page)
729                 chunk += sizeof(bitmap_super_t) << 3;
730         return chunk >> PAGE_BIT_SHIFT;
731 }
732
733 /* calculate the (bit) offset of this bit within a page */
734 static inline unsigned long file_page_offset(struct bitmap_storage *store,
735                                              unsigned long chunk)
736 {
737         if (store->sb_page)
738                 chunk += sizeof(bitmap_super_t) << 3;
739         return chunk & (PAGE_BITS - 1);
740 }
741
742 /*
743  * return a pointer to the page in the filemap that contains the given bit
744  *
745  */
746 static inline struct page *filemap_get_page(struct bitmap_storage *store,
747                                             unsigned long chunk)
748 {
749         if (file_page_index(store, chunk) >= store->file_pages)
750                 return NULL;
751         return store->filemap[file_page_index(store, chunk)];
752 }
753
754 static int bitmap_storage_alloc(struct bitmap_storage *store,
755                                 unsigned long chunks, int with_super,
756                                 int slot_number)
757 {
758         int pnum, offset = 0;
759         unsigned long num_pages;
760         unsigned long bytes;
761
762         bytes = DIV_ROUND_UP(chunks, 8);
763         if (with_super)
764                 bytes += sizeof(bitmap_super_t);
765
766         num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
767         offset = slot_number * (num_pages - 1);
768
769         store->filemap = kmalloc(sizeof(struct page *)
770                                  * num_pages, GFP_KERNEL);
771         if (!store->filemap)
772                 return -ENOMEM;
773
774         if (with_super && !store->sb_page) {
775                 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
776                 if (store->sb_page == NULL)
777                         return -ENOMEM;
778         }
779
780         pnum = 0;
781         if (store->sb_page) {
782                 store->filemap[0] = store->sb_page;
783                 pnum = 1;
784                 store->sb_page->index = offset;
785         }
786
787         for ( ; pnum < num_pages; pnum++) {
788                 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
789                 if (!store->filemap[pnum]) {
790                         store->file_pages = pnum;
791                         return -ENOMEM;
792                 }
793                 store->filemap[pnum]->index = pnum + offset;
794         }
795         store->file_pages = pnum;
796
797         /* We need 4 bits per page, rounded up to a multiple
798          * of sizeof(unsigned long) */
799         store->filemap_attr = kzalloc(
800                 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
801                 GFP_KERNEL);
802         if (!store->filemap_attr)
803                 return -ENOMEM;
804
805         store->bytes = bytes;
806
807         return 0;
808 }
809
810 static void bitmap_file_unmap(struct bitmap_storage *store)
811 {
812         struct page **map, *sb_page;
813         int pages;
814         struct file *file;
815
816         file = store->file;
817         map = store->filemap;
818         pages = store->file_pages;
819         sb_page = store->sb_page;
820
821         while (pages--)
822                 if (map[pages] != sb_page) /* 0 is sb_page, release it below */
823                         free_buffers(map[pages]);
824         kfree(map);
825         kfree(store->filemap_attr);
826
827         if (sb_page)
828                 free_buffers(sb_page);
829
830         if (file) {
831                 struct inode *inode = file_inode(file);
832                 invalidate_mapping_pages(inode->i_mapping, 0, -1);
833                 fput(file);
834         }
835 }
836
837 /*
838  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
839  * then it is no longer reliable, so we stop using it and we mark the file
840  * as failed in the superblock
841  */
842 static void bitmap_file_kick(struct bitmap *bitmap)
843 {
844         char *path, *ptr = NULL;
845
846         if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
847                 bitmap_update_sb(bitmap);
848
849                 if (bitmap->storage.file) {
850                         path = kmalloc(PAGE_SIZE, GFP_KERNEL);
851                         if (path)
852                                 ptr = file_path(bitmap->storage.file,
853                                              path, PAGE_SIZE);
854
855                         printk(KERN_ALERT
856                               "%s: kicking failed bitmap file %s from array!\n",
857                               bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
858
859                         kfree(path);
860                 } else
861                         printk(KERN_ALERT
862                                "%s: disabling internal bitmap due to errors\n",
863                                bmname(bitmap));
864         }
865 }
866
867 enum bitmap_page_attr {
868         BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
869         BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
870                                     * i.e. counter is 1 or 2. */
871         BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
872 };
873
874 static inline void set_page_attr(struct bitmap *bitmap, int pnum,
875                                  enum bitmap_page_attr attr)
876 {
877         set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
878 }
879
880 static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
881                                    enum bitmap_page_attr attr)
882 {
883         clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
884 }
885
886 static inline int test_page_attr(struct bitmap *bitmap, int pnum,
887                                  enum bitmap_page_attr attr)
888 {
889         return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
890 }
891
892 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
893                                            enum bitmap_page_attr attr)
894 {
895         return test_and_clear_bit((pnum<<2) + attr,
896                                   bitmap->storage.filemap_attr);
897 }
898 /*
899  * bitmap_file_set_bit -- called before performing a write to the md device
900  * to set (and eventually sync) a particular bit in the bitmap file
901  *
902  * we set the bit immediately, then we record the page number so that
903  * when an unplug occurs, we can flush the dirty pages out to disk
904  */
905 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
906 {
907         unsigned long bit;
908         struct page *page;
909         void *kaddr;
910         unsigned long chunk = block >> bitmap->counts.chunkshift;
911
912         page = filemap_get_page(&bitmap->storage, chunk);
913         if (!page)
914                 return;
915         bit = file_page_offset(&bitmap->storage, chunk);
916
917         /* set the bit */
918         kaddr = kmap_atomic(page);
919         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
920                 set_bit(bit, kaddr);
921         else
922                 set_bit_le(bit, kaddr);
923         kunmap_atomic(kaddr);
924         pr_debug("set file bit %lu page %lu\n", bit, page->index);
925         /* record page number so it gets flushed to disk when unplug occurs */
926         set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
927 }
928
929 static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
930 {
931         unsigned long bit;
932         struct page *page;
933         void *paddr;
934         unsigned long chunk = block >> bitmap->counts.chunkshift;
935
936         page = filemap_get_page(&bitmap->storage, chunk);
937         if (!page)
938                 return;
939         bit = file_page_offset(&bitmap->storage, chunk);
940         paddr = kmap_atomic(page);
941         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
942                 clear_bit(bit, paddr);
943         else
944                 clear_bit_le(bit, paddr);
945         kunmap_atomic(paddr);
946         if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
947                 set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
948                 bitmap->allclean = 0;
949         }
950 }
951
952 static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
953 {
954         unsigned long bit;
955         struct page *page;
956         void *paddr;
957         unsigned long chunk = block >> bitmap->counts.chunkshift;
958         int set = 0;
959
960         page = filemap_get_page(&bitmap->storage, chunk);
961         if (!page)
962                 return -EINVAL;
963         bit = file_page_offset(&bitmap->storage, chunk);
964         paddr = kmap_atomic(page);
965         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
966                 set = test_bit(bit, paddr);
967         else
968                 set = test_bit_le(bit, paddr);
969         kunmap_atomic(paddr);
970         return set;
971 }
972
973
974 /* this gets called when the md device is ready to unplug its underlying
975  * (slave) device queues -- before we let any writes go down, we need to
976  * sync the dirty pages of the bitmap file to disk */
977 void bitmap_unplug(struct bitmap *bitmap)
978 {
979         unsigned long i;
980         int dirty, need_write;
981
982         if (!bitmap || !bitmap->storage.filemap ||
983             test_bit(BITMAP_STALE, &bitmap->flags))
984                 return;
985
986         /* look at each page to see if there are any set bits that need to be
987          * flushed out to disk */
988         for (i = 0; i < bitmap->storage.file_pages; i++) {
989                 if (!bitmap->storage.filemap)
990                         return;
991                 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
992                 need_write = test_and_clear_page_attr(bitmap, i,
993                                                       BITMAP_PAGE_NEEDWRITE);
994                 if (dirty || need_write) {
995                         clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
996                         write_page(bitmap, bitmap->storage.filemap[i], 0);
997                 }
998         }
999         if (bitmap->storage.file)
1000                 wait_event(bitmap->write_wait,
1001                            atomic_read(&bitmap->pending_writes)==0);
1002         else
1003                 md_super_wait(bitmap->mddev);
1004
1005         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1006                 bitmap_file_kick(bitmap);
1007 }
1008 EXPORT_SYMBOL(bitmap_unplug);
1009
1010 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
1011 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
1012  * the in-memory bitmap from the on-disk bitmap -- also, sets up the
1013  * memory mapping of the bitmap file
1014  * Special cases:
1015  *   if there's no bitmap file, or if the bitmap file had been
1016  *   previously kicked from the array, we mark all the bits as
1017  *   1's in order to cause a full resync.
1018  *
1019  * We ignore all bits for sectors that end earlier than 'start'.
1020  * This is used when reading an out-of-date bitmap...
1021  */
1022 static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1023 {
1024         unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
1025         struct page *page = NULL;
1026         unsigned long bit_cnt = 0;
1027         struct file *file;
1028         unsigned long offset;
1029         int outofdate;
1030         int ret = -ENOSPC;
1031         void *paddr;
1032         struct bitmap_storage *store = &bitmap->storage;
1033
1034         chunks = bitmap->counts.chunks;
1035         file = store->file;
1036
1037         if (!file && !bitmap->mddev->bitmap_info.offset) {
1038                 /* No permanent bitmap - fill with '1s'. */
1039                 store->filemap = NULL;
1040                 store->file_pages = 0;
1041                 for (i = 0; i < chunks ; i++) {
1042                         /* if the disk bit is set, set the memory bit */
1043                         int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1044                                       >= start);
1045                         bitmap_set_memory_bits(bitmap,
1046                                                (sector_t)i << bitmap->counts.chunkshift,
1047                                                needed);
1048                 }
1049                 return 0;
1050         }
1051
1052         outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1053         if (outofdate)
1054                 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
1055                         "recovery\n", bmname(bitmap));
1056
1057         if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1058                 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
1059                        bmname(bitmap),
1060                        (unsigned long) i_size_read(file->f_mapping->host),
1061                        store->bytes);
1062                 goto err;
1063         }
1064
1065         oldindex = ~0L;
1066         offset = 0;
1067         if (!bitmap->mddev->bitmap_info.external)
1068                 offset = sizeof(bitmap_super_t);
1069
1070         if (mddev_is_clustered(bitmap->mddev))
1071                 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1072
1073         for (i = 0; i < chunks; i++) {
1074                 int b;
1075                 index = file_page_index(&bitmap->storage, i);
1076                 bit = file_page_offset(&bitmap->storage, i);
1077                 if (index != oldindex) { /* this is a new page, read it in */
1078                         int count;
1079                         /* unmap the old page, we're done with it */
1080                         if (index == store->file_pages-1)
1081                                 count = store->bytes - index * PAGE_SIZE;
1082                         else
1083                                 count = PAGE_SIZE;
1084                         page = store->filemap[index];
1085                         if (file)
1086                                 ret = read_page(file, index, bitmap,
1087                                                 count, page);
1088                         else
1089                                 ret = read_sb_page(
1090                                         bitmap->mddev,
1091                                         bitmap->mddev->bitmap_info.offset,
1092                                         page,
1093                                         index + node_offset, count);
1094
1095                         if (ret)
1096                                 goto err;
1097
1098                         oldindex = index;
1099
1100                         if (outofdate) {
1101                                 /*
1102                                  * if bitmap is out of date, dirty the
1103                                  * whole page and write it out
1104                                  */
1105                                 paddr = kmap_atomic(page);
1106                                 memset(paddr + offset, 0xff,
1107                                        PAGE_SIZE - offset);
1108                                 kunmap_atomic(paddr);
1109                                 write_page(bitmap, page, 1);
1110
1111                                 ret = -EIO;
1112                                 if (test_bit(BITMAP_WRITE_ERROR,
1113                                              &bitmap->flags))
1114                                         goto err;
1115                         }
1116                 }
1117                 paddr = kmap_atomic(page);
1118                 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1119                         b = test_bit(bit, paddr);
1120                 else
1121                         b = test_bit_le(bit, paddr);
1122                 kunmap_atomic(paddr);
1123                 if (b) {
1124                         /* if the disk bit is set, set the memory bit */
1125                         int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1126                                       >= start);
1127                         bitmap_set_memory_bits(bitmap,
1128                                                (sector_t)i << bitmap->counts.chunkshift,
1129                                                needed);
1130                         bit_cnt++;
1131                 }
1132                 offset = 0;
1133         }
1134
1135         printk(KERN_INFO "%s: bitmap initialized from disk: "
1136                "read %lu pages, set %lu of %lu bits\n",
1137                bmname(bitmap), store->file_pages,
1138                bit_cnt, chunks);
1139
1140         return 0;
1141
1142  err:
1143         printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1144                bmname(bitmap), ret);
1145         return ret;
1146 }
1147
1148 void bitmap_write_all(struct bitmap *bitmap)
1149 {
1150         /* We don't actually write all bitmap blocks here,
1151          * just flag them as needing to be written
1152          */
1153         int i;
1154
1155         if (!bitmap || !bitmap->storage.filemap)
1156                 return;
1157         if (bitmap->storage.file)
1158                 /* Only one copy, so nothing needed */
1159                 return;
1160
1161         for (i = 0; i < bitmap->storage.file_pages; i++)
1162                 set_page_attr(bitmap, i,
1163                               BITMAP_PAGE_NEEDWRITE);
1164         bitmap->allclean = 0;
1165 }
1166
1167 static void bitmap_count_page(struct bitmap_counts *bitmap,
1168                               sector_t offset, int inc)
1169 {
1170         sector_t chunk = offset >> bitmap->chunkshift;
1171         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1172         bitmap->bp[page].count += inc;
1173         bitmap_checkfree(bitmap, page);
1174 }
1175
1176 static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1177 {
1178         sector_t chunk = offset >> bitmap->chunkshift;
1179         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1180         struct bitmap_page *bp = &bitmap->bp[page];
1181
1182         if (!bp->pending)
1183                 bp->pending = 1;
1184 }
1185
1186 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1187                                             sector_t offset, sector_t *blocks,
1188                                             int create);
1189
1190 /*
1191  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1192  *                      out to disk
1193  */
1194
1195 void bitmap_daemon_work(struct mddev *mddev)
1196 {
1197         struct bitmap *bitmap;
1198         unsigned long j;
1199         unsigned long nextpage;
1200         sector_t blocks;
1201         struct bitmap_counts *counts;
1202
1203         /* Use a mutex to guard daemon_work against
1204          * bitmap_destroy.
1205          */
1206         mutex_lock(&mddev->bitmap_info.mutex);
1207         bitmap = mddev->bitmap;
1208         if (bitmap == NULL) {
1209                 mutex_unlock(&mddev->bitmap_info.mutex);
1210                 return;
1211         }
1212         if (time_before(jiffies, bitmap->daemon_lastrun
1213                         + mddev->bitmap_info.daemon_sleep))
1214                 goto done;
1215
1216         bitmap->daemon_lastrun = jiffies;
1217         if (bitmap->allclean) {
1218                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1219                 goto done;
1220         }
1221         bitmap->allclean = 1;
1222
1223         /* Any file-page which is PENDING now needs to be written.
1224          * So set NEEDWRITE now, then after we make any last-minute changes
1225          * we will write it.
1226          */
1227         for (j = 0; j < bitmap->storage.file_pages; j++)
1228                 if (test_and_clear_page_attr(bitmap, j,
1229                                              BITMAP_PAGE_PENDING))
1230                         set_page_attr(bitmap, j,
1231                                       BITMAP_PAGE_NEEDWRITE);
1232
1233         if (bitmap->need_sync &&
1234             mddev->bitmap_info.external == 0) {
1235                 /* Arrange for superblock update as well as
1236                  * other changes */
1237                 bitmap_super_t *sb;
1238                 bitmap->need_sync = 0;
1239                 if (bitmap->storage.filemap) {
1240                         sb = kmap_atomic(bitmap->storage.sb_page);
1241                         sb->events_cleared =
1242                                 cpu_to_le64(bitmap->events_cleared);
1243                         kunmap_atomic(sb);
1244                         set_page_attr(bitmap, 0,
1245                                       BITMAP_PAGE_NEEDWRITE);
1246                 }
1247         }
1248         /* Now look at the bitmap counters and if any are '2' or '1',
1249          * decrement and handle accordingly.
1250          */
1251         counts = &bitmap->counts;
1252         spin_lock_irq(&counts->lock);
1253         nextpage = 0;
1254         for (j = 0; j < counts->chunks; j++) {
1255                 bitmap_counter_t *bmc;
1256                 sector_t  block = (sector_t)j << counts->chunkshift;
1257
1258                 if (j == nextpage) {
1259                         nextpage += PAGE_COUNTER_RATIO;
1260                         if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1261                                 j |= PAGE_COUNTER_MASK;
1262                                 continue;
1263                         }
1264                         counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1265                 }
1266                 bmc = bitmap_get_counter(counts,
1267                                          block,
1268                                          &blocks, 0);
1269
1270                 if (!bmc) {
1271                         j |= PAGE_COUNTER_MASK;
1272                         continue;
1273                 }
1274                 if (*bmc == 1 && !bitmap->need_sync) {
1275                         /* We can clear the bit */
1276                         *bmc = 0;
1277                         bitmap_count_page(counts, block, -1);
1278                         bitmap_file_clear_bit(bitmap, block);
1279                 } else if (*bmc && *bmc <= 2) {
1280                         *bmc = 1;
1281                         bitmap_set_pending(counts, block);
1282                         bitmap->allclean = 0;
1283                 }
1284         }
1285         spin_unlock_irq(&counts->lock);
1286
1287         /* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1288          * DIRTY pages need to be written by bitmap_unplug so it can wait
1289          * for them.
1290          * If we find any DIRTY page we stop there and let bitmap_unplug
1291          * handle all the rest.  This is important in the case where
1292          * the first blocking holds the superblock and it has been updated.
1293          * We mustn't write any other blocks before the superblock.
1294          */
1295         for (j = 0;
1296              j < bitmap->storage.file_pages
1297                      && !test_bit(BITMAP_STALE, &bitmap->flags);
1298              j++) {
1299                 if (test_page_attr(bitmap, j,
1300                                    BITMAP_PAGE_DIRTY))
1301                         /* bitmap_unplug will handle the rest */
1302                         break;
1303                 if (test_and_clear_page_attr(bitmap, j,
1304                                              BITMAP_PAGE_NEEDWRITE)) {
1305                         write_page(bitmap, bitmap->storage.filemap[j], 0);
1306                 }
1307         }
1308
1309  done:
1310         if (bitmap->allclean == 0)
1311                 mddev->thread->timeout =
1312                         mddev->bitmap_info.daemon_sleep;
1313         mutex_unlock(&mddev->bitmap_info.mutex);
1314 }
1315
1316 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1317                                             sector_t offset, sector_t *blocks,
1318                                             int create)
1319 __releases(bitmap->lock)
1320 __acquires(bitmap->lock)
1321 {
1322         /* If 'create', we might release the lock and reclaim it.
1323          * The lock must have been taken with interrupts enabled.
1324          * If !create, we don't release the lock.
1325          */
1326         sector_t chunk = offset >> bitmap->chunkshift;
1327         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1328         unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1329         sector_t csize;
1330         int err;
1331
1332         err = bitmap_checkpage(bitmap, page, create);
1333
1334         if (bitmap->bp[page].hijacked ||
1335             bitmap->bp[page].map == NULL)
1336                 csize = ((sector_t)1) << (bitmap->chunkshift +
1337                                           PAGE_COUNTER_SHIFT - 1);
1338         else
1339                 csize = ((sector_t)1) << bitmap->chunkshift;
1340         *blocks = csize - (offset & (csize - 1));
1341
1342         if (err < 0)
1343                 return NULL;
1344
1345         /* now locked ... */
1346
1347         if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1348                 /* should we use the first or second counter field
1349                  * of the hijacked pointer? */
1350                 int hi = (pageoff > PAGE_COUNTER_MASK);
1351                 return  &((bitmap_counter_t *)
1352                           &bitmap->bp[page].map)[hi];
1353         } else /* page is allocated */
1354                 return (bitmap_counter_t *)
1355                         &(bitmap->bp[page].map[pageoff]);
1356 }
1357
1358 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1359 {
1360         if (!bitmap)
1361                 return 0;
1362
1363         if (behind) {
1364                 int bw;
1365                 atomic_inc(&bitmap->behind_writes);
1366                 bw = atomic_read(&bitmap->behind_writes);
1367                 if (bw > bitmap->behind_writes_used)
1368                         bitmap->behind_writes_used = bw;
1369
1370                 pr_debug("inc write-behind count %d/%lu\n",
1371                          bw, bitmap->mddev->bitmap_info.max_write_behind);
1372         }
1373
1374         while (sectors) {
1375                 sector_t blocks;
1376                 bitmap_counter_t *bmc;
1377
1378                 spin_lock_irq(&bitmap->counts.lock);
1379                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1380                 if (!bmc) {
1381                         spin_unlock_irq(&bitmap->counts.lock);
1382                         return 0;
1383                 }
1384
1385                 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1386                         DEFINE_WAIT(__wait);
1387                         /* note that it is safe to do the prepare_to_wait
1388                          * after the test as long as we do it before dropping
1389                          * the spinlock.
1390                          */
1391                         prepare_to_wait(&bitmap->overflow_wait, &__wait,
1392                                         TASK_UNINTERRUPTIBLE);
1393                         spin_unlock_irq(&bitmap->counts.lock);
1394                         schedule();
1395                         finish_wait(&bitmap->overflow_wait, &__wait);
1396                         continue;
1397                 }
1398
1399                 switch (*bmc) {
1400                 case 0:
1401                         bitmap_file_set_bit(bitmap, offset);
1402                         bitmap_count_page(&bitmap->counts, offset, 1);
1403                         /* fall through */
1404                 case 1:
1405                         *bmc = 2;
1406                 }
1407
1408                 (*bmc)++;
1409
1410                 spin_unlock_irq(&bitmap->counts.lock);
1411
1412                 offset += blocks;
1413                 if (sectors > blocks)
1414                         sectors -= blocks;
1415                 else
1416                         sectors = 0;
1417         }
1418         return 0;
1419 }
1420 EXPORT_SYMBOL(bitmap_startwrite);
1421
1422 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1423                      int success, int behind)
1424 {
1425         if (!bitmap)
1426                 return;
1427         if (behind) {
1428                 if (atomic_dec_and_test(&bitmap->behind_writes))
1429                         wake_up(&bitmap->behind_wait);
1430                 pr_debug("dec write-behind count %d/%lu\n",
1431                          atomic_read(&bitmap->behind_writes),
1432                          bitmap->mddev->bitmap_info.max_write_behind);
1433         }
1434
1435         while (sectors) {
1436                 sector_t blocks;
1437                 unsigned long flags;
1438                 bitmap_counter_t *bmc;
1439
1440                 spin_lock_irqsave(&bitmap->counts.lock, flags);
1441                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1442                 if (!bmc) {
1443                         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1444                         return;
1445                 }
1446
1447                 if (success && !bitmap->mddev->degraded &&
1448                     bitmap->events_cleared < bitmap->mddev->events) {
1449                         bitmap->events_cleared = bitmap->mddev->events;
1450                         bitmap->need_sync = 1;
1451                         sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1452                 }
1453
1454                 if (!success && !NEEDED(*bmc))
1455                         *bmc |= NEEDED_MASK;
1456
1457                 if (COUNTER(*bmc) == COUNTER_MAX)
1458                         wake_up(&bitmap->overflow_wait);
1459
1460                 (*bmc)--;
1461                 if (*bmc <= 2) {
1462                         bitmap_set_pending(&bitmap->counts, offset);
1463                         bitmap->allclean = 0;
1464                 }
1465                 spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1466                 offset += blocks;
1467                 if (sectors > blocks)
1468                         sectors -= blocks;
1469                 else
1470                         sectors = 0;
1471         }
1472 }
1473 EXPORT_SYMBOL(bitmap_endwrite);
1474
1475 static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1476                                int degraded)
1477 {
1478         bitmap_counter_t *bmc;
1479         int rv;
1480         if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1481                 *blocks = 1024;
1482                 return 1; /* always resync if no bitmap */
1483         }
1484         spin_lock_irq(&bitmap->counts.lock);
1485         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1486         rv = 0;
1487         if (bmc) {
1488                 /* locked */
1489                 if (RESYNC(*bmc))
1490                         rv = 1;
1491                 else if (NEEDED(*bmc)) {
1492                         rv = 1;
1493                         if (!degraded) { /* don't set/clear bits if degraded */
1494                                 *bmc |= RESYNC_MASK;
1495                                 *bmc &= ~NEEDED_MASK;
1496                         }
1497                 }
1498         }
1499         spin_unlock_irq(&bitmap->counts.lock);
1500         return rv;
1501 }
1502
1503 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1504                       int degraded)
1505 {
1506         /* bitmap_start_sync must always report on multiples of whole
1507          * pages, otherwise resync (which is very PAGE_SIZE based) will
1508          * get confused.
1509          * So call __bitmap_start_sync repeatedly (if needed) until
1510          * At least PAGE_SIZE>>9 blocks are covered.
1511          * Return the 'or' of the result.
1512          */
1513         int rv = 0;
1514         sector_t blocks1;
1515
1516         *blocks = 0;
1517         while (*blocks < (PAGE_SIZE>>9)) {
1518                 rv |= __bitmap_start_sync(bitmap, offset,
1519                                           &blocks1, degraded);
1520                 offset += blocks1;
1521                 *blocks += blocks1;
1522         }
1523         return rv;
1524 }
1525 EXPORT_SYMBOL(bitmap_start_sync);
1526
1527 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1528 {
1529         bitmap_counter_t *bmc;
1530         unsigned long flags;
1531
1532         if (bitmap == NULL) {
1533                 *blocks = 1024;
1534                 return;
1535         }
1536         spin_lock_irqsave(&bitmap->counts.lock, flags);
1537         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1538         if (bmc == NULL)
1539                 goto unlock;
1540         /* locked */
1541         if (RESYNC(*bmc)) {
1542                 *bmc &= ~RESYNC_MASK;
1543
1544                 if (!NEEDED(*bmc) && aborted)
1545                         *bmc |= NEEDED_MASK;
1546                 else {
1547                         if (*bmc <= 2) {
1548                                 bitmap_set_pending(&bitmap->counts, offset);
1549                                 bitmap->allclean = 0;
1550                         }
1551                 }
1552         }
1553  unlock:
1554         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1555 }
1556 EXPORT_SYMBOL(bitmap_end_sync);
1557
1558 void bitmap_close_sync(struct bitmap *bitmap)
1559 {
1560         /* Sync has finished, and any bitmap chunks that weren't synced
1561          * properly have been aborted.  It remains to us to clear the
1562          * RESYNC bit wherever it is still on
1563          */
1564         sector_t sector = 0;
1565         sector_t blocks;
1566         if (!bitmap)
1567                 return;
1568         while (sector < bitmap->mddev->resync_max_sectors) {
1569                 bitmap_end_sync(bitmap, sector, &blocks, 0);
1570                 sector += blocks;
1571         }
1572 }
1573 EXPORT_SYMBOL(bitmap_close_sync);
1574
1575 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1576 {
1577         sector_t s = 0;
1578         sector_t blocks;
1579
1580         if (!bitmap)
1581                 return;
1582         if (sector == 0) {
1583                 bitmap->last_end_sync = jiffies;
1584                 return;
1585         }
1586         if (time_before(jiffies, (bitmap->last_end_sync
1587                                   + bitmap->mddev->bitmap_info.daemon_sleep)))
1588                 return;
1589         wait_event(bitmap->mddev->recovery_wait,
1590                    atomic_read(&bitmap->mddev->recovery_active) == 0);
1591
1592         bitmap->mddev->curr_resync_completed = sector;
1593         set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1594         sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1595         s = 0;
1596         while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1597                 bitmap_end_sync(bitmap, s, &blocks, 0);
1598                 s += blocks;
1599         }
1600         bitmap->last_end_sync = jiffies;
1601         sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1602 }
1603 EXPORT_SYMBOL(bitmap_cond_end_sync);
1604
1605 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1606 {
1607         /* For each chunk covered by any of these sectors, set the
1608          * counter to 2 and possibly set resync_needed.  They should all
1609          * be 0 at this point
1610          */
1611
1612         sector_t secs;
1613         bitmap_counter_t *bmc;
1614         spin_lock_irq(&bitmap->counts.lock);
1615         bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1616         if (!bmc) {
1617                 spin_unlock_irq(&bitmap->counts.lock);
1618                 return;
1619         }
1620         if (!*bmc) {
1621                 *bmc = 2;
1622                 bitmap_count_page(&bitmap->counts, offset, 1);
1623                 bitmap_set_pending(&bitmap->counts, offset);
1624                 bitmap->allclean = 0;
1625         }
1626         if (needed)
1627                 *bmc |= NEEDED_MASK;
1628         spin_unlock_irq(&bitmap->counts.lock);
1629 }
1630
1631 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
1632 void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1633 {
1634         unsigned long chunk;
1635
1636         for (chunk = s; chunk <= e; chunk++) {
1637                 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1638                 bitmap_set_memory_bits(bitmap, sec, 1);
1639                 bitmap_file_set_bit(bitmap, sec);
1640                 if (sec < bitmap->mddev->recovery_cp)
1641                         /* We are asserting that the array is dirty,
1642                          * so move the recovery_cp address back so
1643                          * that it is obvious that it is dirty
1644                          */
1645                         bitmap->mddev->recovery_cp = sec;
1646         }
1647 }
1648
1649 /*
1650  * flush out any pending updates
1651  */
1652 void bitmap_flush(struct mddev *mddev)
1653 {
1654         struct bitmap *bitmap = mddev->bitmap;
1655         long sleep;
1656
1657         if (!bitmap) /* there was no bitmap */
1658                 return;
1659
1660         /* run the daemon_work three time to ensure everything is flushed
1661          * that can be
1662          */
1663         sleep = mddev->bitmap_info.daemon_sleep * 2;
1664         bitmap->daemon_lastrun -= sleep;
1665         bitmap_daemon_work(mddev);
1666         bitmap->daemon_lastrun -= sleep;
1667         bitmap_daemon_work(mddev);
1668         bitmap->daemon_lastrun -= sleep;
1669         bitmap_daemon_work(mddev);
1670         bitmap_update_sb(bitmap);
1671 }
1672
1673 /*
1674  * free memory that was allocated
1675  */
1676 static void bitmap_free(struct bitmap *bitmap)
1677 {
1678         unsigned long k, pages;
1679         struct bitmap_page *bp;
1680
1681         if (!bitmap) /* there was no bitmap */
1682                 return;
1683
1684         if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
1685                 bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
1686                 md_cluster_stop(bitmap->mddev);
1687
1688         /* Shouldn't be needed - but just in case.... */
1689         wait_event(bitmap->write_wait,
1690                    atomic_read(&bitmap->pending_writes) == 0);
1691
1692         /* release the bitmap file  */
1693         bitmap_file_unmap(&bitmap->storage);
1694
1695         bp = bitmap->counts.bp;
1696         pages = bitmap->counts.pages;
1697
1698         /* free all allocated memory */
1699
1700         if (bp) /* deallocate the page memory */
1701                 for (k = 0; k < pages; k++)
1702                         if (bp[k].map && !bp[k].hijacked)
1703                                 kfree(bp[k].map);
1704         kfree(bp);
1705         kfree(bitmap);
1706 }
1707
1708 void bitmap_destroy(struct mddev *mddev)
1709 {
1710         struct bitmap *bitmap = mddev->bitmap;
1711
1712         if (!bitmap) /* there was no bitmap */
1713                 return;
1714
1715         mutex_lock(&mddev->bitmap_info.mutex);
1716         spin_lock(&mddev->lock);
1717         mddev->bitmap = NULL; /* disconnect from the md device */
1718         spin_unlock(&mddev->lock);
1719         mutex_unlock(&mddev->bitmap_info.mutex);
1720         if (mddev->thread)
1721                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1722
1723         if (bitmap->sysfs_can_clear)
1724                 sysfs_put(bitmap->sysfs_can_clear);
1725
1726         bitmap_free(bitmap);
1727 }
1728
1729 /*
1730  * initialize the bitmap structure
1731  * if this returns an error, bitmap_destroy must be called to do clean up
1732  */
1733 struct bitmap *bitmap_create(struct mddev *mddev, int slot)
1734 {
1735         struct bitmap *bitmap;
1736         sector_t blocks = mddev->resync_max_sectors;
1737         struct file *file = mddev->bitmap_info.file;
1738         int err;
1739         struct kernfs_node *bm = NULL;
1740
1741         BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1742
1743         BUG_ON(file && mddev->bitmap_info.offset);
1744
1745         bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1746         if (!bitmap)
1747                 return ERR_PTR(-ENOMEM);
1748
1749         spin_lock_init(&bitmap->counts.lock);
1750         atomic_set(&bitmap->pending_writes, 0);
1751         init_waitqueue_head(&bitmap->write_wait);
1752         init_waitqueue_head(&bitmap->overflow_wait);
1753         init_waitqueue_head(&bitmap->behind_wait);
1754
1755         bitmap->mddev = mddev;
1756         bitmap->cluster_slot = slot;
1757
1758         if (mddev->kobj.sd)
1759                 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1760         if (bm) {
1761                 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1762                 sysfs_put(bm);
1763         } else
1764                 bitmap->sysfs_can_clear = NULL;
1765
1766         bitmap->storage.file = file;
1767         if (file) {
1768                 get_file(file);
1769                 /* As future accesses to this file will use bmap,
1770                  * and bypass the page cache, we must sync the file
1771                  * first.
1772                  */
1773                 vfs_fsync(file, 1);
1774         }
1775         /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1776         if (!mddev->bitmap_info.external) {
1777                 /*
1778                  * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1779                  * instructing us to create a new on-disk bitmap instance.
1780                  */
1781                 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1782                         err = bitmap_new_disk_sb(bitmap);
1783                 else
1784                         err = bitmap_read_sb(bitmap);
1785         } else {
1786                 err = 0;
1787                 if (mddev->bitmap_info.chunksize == 0 ||
1788                     mddev->bitmap_info.daemon_sleep == 0)
1789                         /* chunksize and time_base need to be
1790                          * set first. */
1791                         err = -EINVAL;
1792         }
1793         if (err)
1794                 goto error;
1795
1796         bitmap->daemon_lastrun = jiffies;
1797         err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
1798         if (err)
1799                 goto error;
1800
1801         printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1802                bitmap->counts.pages, bmname(bitmap));
1803
1804         err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
1805         if (err)
1806                 goto error;
1807
1808         return bitmap;
1809  error:
1810         bitmap_free(bitmap);
1811         return ERR_PTR(err);
1812 }
1813
1814 int bitmap_load(struct mddev *mddev)
1815 {
1816         int err = 0;
1817         sector_t start = 0;
1818         sector_t sector = 0;
1819         struct bitmap *bitmap = mddev->bitmap;
1820
1821         if (!bitmap)
1822                 goto out;
1823
1824         /* Clear out old bitmap info first:  Either there is none, or we
1825          * are resuming after someone else has possibly changed things,
1826          * so we should forget old cached info.
1827          * All chunks should be clean, but some might need_sync.
1828          */
1829         while (sector < mddev->resync_max_sectors) {
1830                 sector_t blocks;
1831                 bitmap_start_sync(bitmap, sector, &blocks, 0);
1832                 sector += blocks;
1833         }
1834         bitmap_close_sync(bitmap);
1835
1836         if (mddev->degraded == 0
1837             || bitmap->events_cleared == mddev->events)
1838                 /* no need to keep dirty bits to optimise a
1839                  * re-add of a missing device */
1840                 start = mddev->recovery_cp;
1841
1842         mutex_lock(&mddev->bitmap_info.mutex);
1843         err = bitmap_init_from_disk(bitmap, start);
1844         mutex_unlock(&mddev->bitmap_info.mutex);
1845
1846         if (err)
1847                 goto out;
1848         clear_bit(BITMAP_STALE, &bitmap->flags);
1849
1850         /* Kick recovery in case any bits were set */
1851         set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1852
1853         mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1854         md_wakeup_thread(mddev->thread);
1855
1856         bitmap_update_sb(bitmap);
1857
1858         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1859                 err = -EIO;
1860 out:
1861         return err;
1862 }
1863 EXPORT_SYMBOL_GPL(bitmap_load);
1864
1865 /* Loads the bitmap associated with slot and copies the resync information
1866  * to our bitmap
1867  */
1868 int bitmap_copy_from_slot(struct mddev *mddev, int slot,
1869                 sector_t *low, sector_t *high, bool clear_bits)
1870 {
1871         int rv = 0, i, j;
1872         sector_t block, lo = 0, hi = 0;
1873         struct bitmap_counts *counts;
1874         struct bitmap *bitmap = bitmap_create(mddev, slot);
1875
1876         if (IS_ERR(bitmap))
1877                 return PTR_ERR(bitmap);
1878
1879         rv = bitmap_init_from_disk(bitmap, 0);
1880         if (rv)
1881                 goto err;
1882
1883         counts = &bitmap->counts;
1884         for (j = 0; j < counts->chunks; j++) {
1885                 block = (sector_t)j << counts->chunkshift;
1886                 if (bitmap_file_test_bit(bitmap, block)) {
1887                         if (!lo)
1888                                 lo = block;
1889                         hi = block;
1890                         bitmap_file_clear_bit(bitmap, block);
1891                         bitmap_set_memory_bits(mddev->bitmap, block, 1);
1892                         bitmap_file_set_bit(mddev->bitmap, block);
1893                 }
1894         }
1895
1896         if (clear_bits) {
1897                 bitmap_update_sb(bitmap);
1898                 /* Setting this for the ev_page should be enough.
1899                  * And we do not require both write_all and PAGE_DIRT either
1900                  */
1901                 for (i = 0; i < bitmap->storage.file_pages; i++)
1902                         set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1903                 bitmap_write_all(bitmap);
1904                 bitmap_unplug(bitmap);
1905         }
1906         *low = lo;
1907         *high = hi;
1908 err:
1909         bitmap_free(bitmap);
1910         return rv;
1911 }
1912 EXPORT_SYMBOL_GPL(bitmap_copy_from_slot);
1913
1914
1915 void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
1916 {
1917         unsigned long chunk_kb;
1918         struct bitmap_counts *counts;
1919
1920         if (!bitmap)
1921                 return;
1922
1923         counts = &bitmap->counts;
1924
1925         chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
1926         seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
1927                    "%lu%s chunk",
1928                    counts->pages - counts->missing_pages,
1929                    counts->pages,
1930                    (counts->pages - counts->missing_pages)
1931                    << (PAGE_SHIFT - 10),
1932                    chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
1933                    chunk_kb ? "KB" : "B");
1934         if (bitmap->storage.file) {
1935                 seq_printf(seq, ", file: ");
1936                 seq_file_path(seq, bitmap->storage.file, " \t\n");
1937         }
1938
1939         seq_printf(seq, "\n");
1940 }
1941
1942 int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
1943                   int chunksize, int init)
1944 {
1945         /* If chunk_size is 0, choose an appropriate chunk size.
1946          * Then possibly allocate new storage space.
1947          * Then quiesce, copy bits, replace bitmap, and re-start
1948          *
1949          * This function is called both to set up the initial bitmap
1950          * and to resize the bitmap while the array is active.
1951          * If this happens as a result of the array being resized,
1952          * chunksize will be zero, and we need to choose a suitable
1953          * chunksize, otherwise we use what we are given.
1954          */
1955         struct bitmap_storage store;
1956         struct bitmap_counts old_counts;
1957         unsigned long chunks;
1958         sector_t block;
1959         sector_t old_blocks, new_blocks;
1960         int chunkshift;
1961         int ret = 0;
1962         long pages;
1963         struct bitmap_page *new_bp;
1964
1965         if (chunksize == 0) {
1966                 /* If there is enough space, leave the chunk size unchanged,
1967                  * else increase by factor of two until there is enough space.
1968                  */
1969                 long bytes;
1970                 long space = bitmap->mddev->bitmap_info.space;
1971
1972                 if (space == 0) {
1973                         /* We don't know how much space there is, so limit
1974                          * to current size - in sectors.
1975                          */
1976                         bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
1977                         if (!bitmap->mddev->bitmap_info.external)
1978                                 bytes += sizeof(bitmap_super_t);
1979                         space = DIV_ROUND_UP(bytes, 512);
1980                         bitmap->mddev->bitmap_info.space = space;
1981                 }
1982                 chunkshift = bitmap->counts.chunkshift;
1983                 chunkshift--;
1984                 do {
1985                         /* 'chunkshift' is shift from block size to chunk size */
1986                         chunkshift++;
1987                         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1988                         bytes = DIV_ROUND_UP(chunks, 8);
1989                         if (!bitmap->mddev->bitmap_info.external)
1990                                 bytes += sizeof(bitmap_super_t);
1991                 } while (bytes > (space << 9));
1992         } else
1993                 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
1994
1995         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1996         memset(&store, 0, sizeof(store));
1997         if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
1998                 ret = bitmap_storage_alloc(&store, chunks,
1999                                            !bitmap->mddev->bitmap_info.external,
2000                                            mddev_is_clustered(bitmap->mddev)
2001                                            ? bitmap->cluster_slot : 0);
2002         if (ret)
2003                 goto err;
2004
2005         pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
2006
2007         new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
2008         ret = -ENOMEM;
2009         if (!new_bp) {
2010                 bitmap_file_unmap(&store);
2011                 goto err;
2012         }
2013
2014         if (!init)
2015                 bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
2016
2017         store.file = bitmap->storage.file;
2018         bitmap->storage.file = NULL;
2019
2020         if (store.sb_page && bitmap->storage.sb_page)
2021                 memcpy(page_address(store.sb_page),
2022                        page_address(bitmap->storage.sb_page),
2023                        sizeof(bitmap_super_t));
2024         bitmap_file_unmap(&bitmap->storage);
2025         bitmap->storage = store;
2026
2027         old_counts = bitmap->counts;
2028         bitmap->counts.bp = new_bp;
2029         bitmap->counts.pages = pages;
2030         bitmap->counts.missing_pages = pages;
2031         bitmap->counts.chunkshift = chunkshift;
2032         bitmap->counts.chunks = chunks;
2033         bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
2034                                                      BITMAP_BLOCK_SHIFT);
2035
2036         blocks = min(old_counts.chunks << old_counts.chunkshift,
2037                      chunks << chunkshift);
2038
2039         spin_lock_irq(&bitmap->counts.lock);
2040         for (block = 0; block < blocks; ) {
2041                 bitmap_counter_t *bmc_old, *bmc_new;
2042                 int set;
2043
2044                 bmc_old = bitmap_get_counter(&old_counts, block,
2045                                              &old_blocks, 0);
2046                 set = bmc_old && NEEDED(*bmc_old);
2047
2048                 if (set) {
2049                         bmc_new = bitmap_get_counter(&bitmap->counts, block,
2050                                                      &new_blocks, 1);
2051                         if (*bmc_new == 0) {
2052                                 /* need to set on-disk bits too. */
2053                                 sector_t end = block + new_blocks;
2054                                 sector_t start = block >> chunkshift;
2055                                 start <<= chunkshift;
2056                                 while (start < end) {
2057                                         bitmap_file_set_bit(bitmap, block);
2058                                         start += 1 << chunkshift;
2059                                 }
2060                                 *bmc_new = 2;
2061                                 bitmap_count_page(&bitmap->counts,
2062                                                   block, 1);
2063                                 bitmap_set_pending(&bitmap->counts,
2064                                                    block);
2065                         }
2066                         *bmc_new |= NEEDED_MASK;
2067                         if (new_blocks < old_blocks)
2068                                 old_blocks = new_blocks;
2069                 }
2070                 block += old_blocks;
2071         }
2072
2073         if (!init) {
2074                 int i;
2075                 while (block < (chunks << chunkshift)) {
2076                         bitmap_counter_t *bmc;
2077                         bmc = bitmap_get_counter(&bitmap->counts, block,
2078                                                  &new_blocks, 1);
2079                         if (bmc) {
2080                                 /* new space.  It needs to be resynced, so
2081                                  * we set NEEDED_MASK.
2082                                  */
2083                                 if (*bmc == 0) {
2084                                         *bmc = NEEDED_MASK | 2;
2085                                         bitmap_count_page(&bitmap->counts,
2086                                                           block, 1);
2087                                         bitmap_set_pending(&bitmap->counts,
2088                                                            block);
2089                                 }
2090                         }
2091                         block += new_blocks;
2092                 }
2093                 for (i = 0; i < bitmap->storage.file_pages; i++)
2094                         set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
2095         }
2096         spin_unlock_irq(&bitmap->counts.lock);
2097
2098         if (!init) {
2099                 bitmap_unplug(bitmap);
2100                 bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2101         }
2102         ret = 0;
2103 err:
2104         return ret;
2105 }
2106 EXPORT_SYMBOL_GPL(bitmap_resize);
2107
2108 static ssize_t
2109 location_show(struct mddev *mddev, char *page)
2110 {
2111         ssize_t len;
2112         if (mddev->bitmap_info.file)
2113                 len = sprintf(page, "file");
2114         else if (mddev->bitmap_info.offset)
2115                 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2116         else
2117                 len = sprintf(page, "none");
2118         len += sprintf(page+len, "\n");
2119         return len;
2120 }
2121
2122 static ssize_t
2123 location_store(struct mddev *mddev, const char *buf, size_t len)
2124 {
2125
2126         if (mddev->pers) {
2127                 if (!mddev->pers->quiesce)
2128                         return -EBUSY;
2129                 if (mddev->recovery || mddev->sync_thread)
2130                         return -EBUSY;
2131         }
2132
2133         if (mddev->bitmap || mddev->bitmap_info.file ||
2134             mddev->bitmap_info.offset) {
2135                 /* bitmap already configured.  Only option is to clear it */
2136                 if (strncmp(buf, "none", 4) != 0)
2137                         return -EBUSY;
2138                 if (mddev->pers) {
2139                         mddev->pers->quiesce(mddev, 1);
2140                         bitmap_destroy(mddev);
2141                         mddev->pers->quiesce(mddev, 0);
2142                 }
2143                 mddev->bitmap_info.offset = 0;
2144                 if (mddev->bitmap_info.file) {
2145                         struct file *f = mddev->bitmap_info.file;
2146                         mddev->bitmap_info.file = NULL;
2147                         fput(f);
2148                 }
2149         } else {
2150                 /* No bitmap, OK to set a location */
2151                 long long offset;
2152                 if (strncmp(buf, "none", 4) == 0)
2153                         /* nothing to be done */;
2154                 else if (strncmp(buf, "file:", 5) == 0) {
2155                         /* Not supported yet */
2156                         return -EINVAL;
2157                 } else {
2158                         int rv;
2159                         if (buf[0] == '+')
2160                                 rv = kstrtoll(buf+1, 10, &offset);
2161                         else
2162                                 rv = kstrtoll(buf, 10, &offset);
2163                         if (rv)
2164                                 return rv;
2165                         if (offset == 0)
2166                                 return -EINVAL;
2167                         if (mddev->bitmap_info.external == 0 &&
2168                             mddev->major_version == 0 &&
2169                             offset != mddev->bitmap_info.default_offset)
2170                                 return -EINVAL;
2171                         mddev->bitmap_info.offset = offset;
2172                         if (mddev->pers) {
2173                                 struct bitmap *bitmap;
2174                                 mddev->pers->quiesce(mddev, 1);
2175                                 bitmap = bitmap_create(mddev, -1);
2176                                 if (IS_ERR(bitmap))
2177                                         rv = PTR_ERR(bitmap);
2178                                 else {
2179                                         mddev->bitmap = bitmap;
2180                                         rv = bitmap_load(mddev);
2181                                         if (rv) {
2182                                                 bitmap_destroy(mddev);
2183                                                 mddev->bitmap_info.offset = 0;
2184                                         }
2185                                 }
2186                                 mddev->pers->quiesce(mddev, 0);
2187                                 if (rv)
2188                                         return rv;
2189                         }
2190                 }
2191         }
2192         if (!mddev->external) {
2193                 /* Ensure new bitmap info is stored in
2194                  * metadata promptly.
2195                  */
2196                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2197                 md_wakeup_thread(mddev->thread);
2198         }
2199         return len;
2200 }
2201
2202 static struct md_sysfs_entry bitmap_location =
2203 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2204
2205 /* 'bitmap/space' is the space available at 'location' for the
2206  * bitmap.  This allows the kernel to know when it is safe to
2207  * resize the bitmap to match a resized array.
2208  */
2209 static ssize_t
2210 space_show(struct mddev *mddev, char *page)
2211 {
2212         return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2213 }
2214
2215 static ssize_t
2216 space_store(struct mddev *mddev, const char *buf, size_t len)
2217 {
2218         unsigned long sectors;
2219         int rv;
2220
2221         rv = kstrtoul(buf, 10, &sectors);
2222         if (rv)
2223                 return rv;
2224
2225         if (sectors == 0)
2226                 return -EINVAL;
2227
2228         if (mddev->bitmap &&
2229             sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2230                 return -EFBIG; /* Bitmap is too big for this small space */
2231
2232         /* could make sure it isn't too big, but that isn't really
2233          * needed - user-space should be careful.
2234          */
2235         mddev->bitmap_info.space = sectors;
2236         return len;
2237 }
2238
2239 static struct md_sysfs_entry bitmap_space =
2240 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2241
2242 static ssize_t
2243 timeout_show(struct mddev *mddev, char *page)
2244 {
2245         ssize_t len;
2246         unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2247         unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2248
2249         len = sprintf(page, "%lu", secs);
2250         if (jifs)
2251                 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2252         len += sprintf(page+len, "\n");
2253         return len;
2254 }
2255
2256 static ssize_t
2257 timeout_store(struct mddev *mddev, const char *buf, size_t len)
2258 {
2259         /* timeout can be set at any time */
2260         unsigned long timeout;
2261         int rv = strict_strtoul_scaled(buf, &timeout, 4);
2262         if (rv)
2263                 return rv;
2264
2265         /* just to make sure we don't overflow... */
2266         if (timeout >= LONG_MAX / HZ)
2267                 return -EINVAL;
2268
2269         timeout = timeout * HZ / 10000;
2270
2271         if (timeout >= MAX_SCHEDULE_TIMEOUT)
2272                 timeout = MAX_SCHEDULE_TIMEOUT-1;
2273         if (timeout < 1)
2274                 timeout = 1;
2275         mddev->bitmap_info.daemon_sleep = timeout;
2276         if (mddev->thread) {
2277                 /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
2278                  * the bitmap is all clean and we don't need to
2279                  * adjust the timeout right now
2280                  */
2281                 if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
2282                         mddev->thread->timeout = timeout;
2283                         md_wakeup_thread(mddev->thread);
2284                 }
2285         }
2286         return len;
2287 }
2288
2289 static struct md_sysfs_entry bitmap_timeout =
2290 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2291
2292 static ssize_t
2293 backlog_show(struct mddev *mddev, char *page)
2294 {
2295         return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2296 }
2297
2298 static ssize_t
2299 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2300 {
2301         unsigned long backlog;
2302         int rv = kstrtoul(buf, 10, &backlog);
2303         if (rv)
2304                 return rv;
2305         if (backlog > COUNTER_MAX)
2306                 return -EINVAL;
2307         mddev->bitmap_info.max_write_behind = backlog;
2308         return len;
2309 }
2310
2311 static struct md_sysfs_entry bitmap_backlog =
2312 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2313
2314 static ssize_t
2315 chunksize_show(struct mddev *mddev, char *page)
2316 {
2317         return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2318 }
2319
2320 static ssize_t
2321 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2322 {
2323         /* Can only be changed when no bitmap is active */
2324         int rv;
2325         unsigned long csize;
2326         if (mddev->bitmap)
2327                 return -EBUSY;
2328         rv = kstrtoul(buf, 10, &csize);
2329         if (rv)
2330                 return rv;
2331         if (csize < 512 ||
2332             !is_power_of_2(csize))
2333                 return -EINVAL;
2334         mddev->bitmap_info.chunksize = csize;
2335         return len;
2336 }
2337
2338 static struct md_sysfs_entry bitmap_chunksize =
2339 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2340
2341 static ssize_t metadata_show(struct mddev *mddev, char *page)
2342 {
2343         if (mddev_is_clustered(mddev))
2344                 return sprintf(page, "clustered\n");
2345         return sprintf(page, "%s\n", (mddev->bitmap_info.external
2346                                       ? "external" : "internal"));
2347 }
2348
2349 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2350 {
2351         if (mddev->bitmap ||
2352             mddev->bitmap_info.file ||
2353             mddev->bitmap_info.offset)
2354                 return -EBUSY;
2355         if (strncmp(buf, "external", 8) == 0)
2356                 mddev->bitmap_info.external = 1;
2357         else if ((strncmp(buf, "internal", 8) == 0) ||
2358                         (strncmp(buf, "clustered", 9) == 0))
2359                 mddev->bitmap_info.external = 0;
2360         else
2361                 return -EINVAL;
2362         return len;
2363 }
2364
2365 static struct md_sysfs_entry bitmap_metadata =
2366 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2367
2368 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2369 {
2370         int len;
2371         spin_lock(&mddev->lock);
2372         if (mddev->bitmap)
2373                 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2374                                              "false" : "true"));
2375         else
2376                 len = sprintf(page, "\n");
2377         spin_unlock(&mddev->lock);
2378         return len;
2379 }
2380
2381 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2382 {
2383         if (mddev->bitmap == NULL)
2384                 return -ENOENT;
2385         if (strncmp(buf, "false", 5) == 0)
2386                 mddev->bitmap->need_sync = 1;
2387         else if (strncmp(buf, "true", 4) == 0) {
2388                 if (mddev->degraded)
2389                         return -EBUSY;
2390                 mddev->bitmap->need_sync = 0;
2391         } else
2392                 return -EINVAL;
2393         return len;
2394 }
2395
2396 static struct md_sysfs_entry bitmap_can_clear =
2397 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2398
2399 static ssize_t
2400 behind_writes_used_show(struct mddev *mddev, char *page)
2401 {
2402         ssize_t ret;
2403         spin_lock(&mddev->lock);
2404         if (mddev->bitmap == NULL)
2405                 ret = sprintf(page, "0\n");
2406         else
2407                 ret = sprintf(page, "%lu\n",
2408                               mddev->bitmap->behind_writes_used);
2409         spin_unlock(&mddev->lock);
2410         return ret;
2411 }
2412
2413 static ssize_t
2414 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2415 {
2416         if (mddev->bitmap)
2417                 mddev->bitmap->behind_writes_used = 0;
2418         return len;
2419 }
2420
2421 static struct md_sysfs_entry max_backlog_used =
2422 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2423        behind_writes_used_show, behind_writes_used_reset);
2424
2425 static struct attribute *md_bitmap_attrs[] = {
2426         &bitmap_location.attr,
2427         &bitmap_space.attr,
2428         &bitmap_timeout.attr,
2429         &bitmap_backlog.attr,
2430         &bitmap_chunksize.attr,
2431         &bitmap_metadata.attr,
2432         &bitmap_can_clear.attr,
2433         &max_backlog_used.attr,
2434         NULL
2435 };
2436 struct attribute_group md_bitmap_group = {
2437         .name = "bitmap",
2438         .attrs = md_bitmap_attrs,
2439 };
2440