Merge tag 'md/4.3-rc6-fixes' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 26 Oct 2015 22:41:48 +0000 (07:41 +0900)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 26 Oct 2015 22:41:48 +0000 (07:41 +0900)
Pull md fixes from Neil Brown:
 "Some raid1/raid10 fixes.

  I meant to get this to you before -rc7, but what with all the travel
  plans..

  Two fixes for bugs that are in both raid1 and raid10.  Both related to
  bad-block-lists and at least one needs to be back ported to 3.1.

  Also a revision for the "new" layout in raid10.  This "new" code
  (which aims to improve robustness) actually reduces robustness in some
  cases.  It probably isn't in use at all as not public user-space code
  makes use of these new layouts.  However just in case someone has
  their own code, it would be good to get the WARNing out for them
  sooner"

* tag 'md/4.3-rc6-fixes' of git://neil.brown.name/md:
  md/raid10: fix the 'new' raid10 layout to work correctly.
  md/raid10: don't clear bitmap bit when bad-block-list write fails.
  md/raid1: don't clear bitmap bit when bad-block-list write fails.
  md/raid10: submit_bio_wait() returns 0 on success
  md/raid1: submit_bio_wait() returns 0 on success

drivers/md/raid1.c
drivers/md/raid10.c

index ddd8a5f..d9d031e 100644 (file)
@@ -2195,7 +2195,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
                bio_trim(wbio, sector - r1_bio->sector, sectors);
                wbio->bi_iter.bi_sector += rdev->data_offset;
                wbio->bi_bdev = rdev->bdev;
-               if (submit_bio_wait(WRITE, wbio) == 0)
+               if (submit_bio_wait(WRITE, wbio) < 0)
                        /* failure! */
                        ok = rdev_set_badblocks(rdev, sector,
                                                sectors, 0)
@@ -2258,15 +2258,16 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
                        rdev_dec_pending(conf->mirrors[m].rdev,
                                         conf->mddev);
                }
-       if (test_bit(R1BIO_WriteError, &r1_bio->state))
-               close_write(r1_bio);
        if (fail) {
                spin_lock_irq(&conf->device_lock);
                list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
                spin_unlock_irq(&conf->device_lock);
                md_wakeup_thread(conf->mddev->thread);
-       } else
+       } else {
+               if (test_bit(R1BIO_WriteError, &r1_bio->state))
+                       close_write(r1_bio);
                raid_end_bio_io(r1_bio);
+       }
 }
 
 static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
@@ -2385,6 +2386,10 @@ static void raid1d(struct md_thread *thread)
                        r1_bio = list_first_entry(&tmp, struct r1bio,
                                                  retry_list);
                        list_del(&r1_bio->retry_list);
+                       if (mddev->degraded)
+                               set_bit(R1BIO_Degraded, &r1_bio->state);
+                       if (test_bit(R1BIO_WriteError, &r1_bio->state))
+                               close_write(r1_bio);
                        raid_end_bio_io(r1_bio);
                }
        }
index 9f69dc5..96f3659 100644 (file)
@@ -39,6 +39,7 @@
  *    far_copies (stored in second byte of layout)
  *    far_offset (stored in bit 16 of layout )
  *    use_far_sets (stored in bit 17 of layout )
+ *    use_far_sets_bugfixed (stored in bit 18 of layout )
  *
  * The data to be stored is divided into chunks using chunksize.  Each device
  * is divided into far_copies sections.   In each section, chunks are laid out
@@ -1497,6 +1498,8 @@ static void status(struct seq_file *seq, struct mddev *mddev)
                        seq_printf(seq, " %d offset-copies", conf->geo.far_copies);
                else
                        seq_printf(seq, " %d far-copies", conf->geo.far_copies);
+               if (conf->geo.far_set_size != conf->geo.raid_disks)
+                       seq_printf(seq, " %d devices per set", conf->geo.far_set_size);
        }
        seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
                                        conf->geo.raid_disks - mddev->degraded);
@@ -2467,7 +2470,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
                                   choose_data_offset(r10_bio, rdev) +
                                   (sector - r10_bio->sector));
                wbio->bi_bdev = rdev->bdev;
-               if (submit_bio_wait(WRITE, wbio) == 0)
+               if (submit_bio_wait(WRITE, wbio) < 0)
                        /* Failure! */
                        ok = rdev_set_badblocks(rdev, sector,
                                                sectors, 0)
@@ -2654,16 +2657,17 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                                rdev_dec_pending(rdev, conf->mddev);
                        }
                }
-               if (test_bit(R10BIO_WriteError,
-                            &r10_bio->state))
-                       close_write(r10_bio);
                if (fail) {
                        spin_lock_irq(&conf->device_lock);
                        list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
                        spin_unlock_irq(&conf->device_lock);
                        md_wakeup_thread(conf->mddev->thread);
-               } else
+               } else {
+                       if (test_bit(R10BIO_WriteError,
+                                    &r10_bio->state))
+                               close_write(r10_bio);
                        raid_end_bio_io(r10_bio);
+               }
        }
 }
 
@@ -2691,6 +2695,12 @@ static void raid10d(struct md_thread *thread)
                        r10_bio = list_first_entry(&tmp, struct r10bio,
                                                   retry_list);
                        list_del(&r10_bio->retry_list);
+                       if (mddev->degraded)
+                               set_bit(R10BIO_Degraded, &r10_bio->state);
+
+                       if (test_bit(R10BIO_WriteError,
+                                    &r10_bio->state))
+                               close_write(r10_bio);
                        raid_end_bio_io(r10_bio);
                }
        }
@@ -3387,7 +3397,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
                disks = mddev->raid_disks + mddev->delta_disks;
                break;
        }
-       if (layout >> 18)
+       if (layout >> 19)
                return -1;
        if (chunk < (PAGE_SIZE >> 9) ||
            !is_power_of_2(chunk))
@@ -3399,7 +3409,22 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
        geo->near_copies = nc;
        geo->far_copies = fc;
        geo->far_offset = fo;
-       geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks;
+       switch (layout >> 17) {
+       case 0: /* original layout.  simple but not always optimal */
+               geo->far_set_size = disks;
+               break;
+       case 1: /* "improved" layout which was buggy.  Hopefully no-one is
+                * actually using this, but leave code here just in case.*/
+               geo->far_set_size = disks/fc;
+               WARN(geo->far_set_size < fc,
+                    "This RAID10 layout does not provide data safety - please backup and create new array\n");
+               break;
+       case 2: /* "improved" layout fixed to match documentation */
+               geo->far_set_size = fc * nc;
+               break;
+       default: /* Not a valid layout */
+               return -1;
+       }
        geo->chunk_mask = chunk - 1;
        geo->chunk_shift = ffz(~chunk);
        return nc*fc;