]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'for-linus' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Nov 2009 20:59:32 +0000 (12:59 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Nov 2009 20:59:32 +0000 (12:59 -0800)
* 'for-linus' of git://neil.brown.name/md:
  md/raid5: Allow dirty-degraded arrays to be assembled when only party is degraded.
  Don't unconditionally set in_sync on newly added device in raid5_reshape
  md: allow v0.91 metadata to record devices as being active but not in-sync.
  md: factor out updating of 'recovery_offset'.

drivers/md/md.c
drivers/md/raid5.c

index e64c971038d14a4ddc588876d010e5b748b05d8f..b182f86a19dda9c71d12553e9d6891f2e24d0cfe 100644 (file)
@@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                            desc->raid_disk < mddev->raid_disks */) {
                        set_bit(In_sync, &rdev->flags);
                        rdev->raid_disk = desc->raid_disk;
+               } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
+                       /* active but not in sync implies recovery up to
+                        * reshape position.  We don't know exactly where
+                        * that is, so set to zero for now */
+                       if (mddev->minor_version >= 91) {
+                               rdev->recovery_offset = 0;
+                               rdev->raid_disk = desc->raid_disk;
+                       }
                }
                if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
                        set_bit(WriteMostly, &rdev->flags);
@@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        list_for_each_entry(rdev2, &mddev->disks, same_set) {
                mdp_disk_t *d;
                int desc_nr;
-               if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
-                   && !test_bit(Faulty, &rdev2->flags))
+               int is_active = test_bit(In_sync, &rdev2->flags);
+
+               if (rdev2->raid_disk >= 0 &&
+                   sb->minor_version >= 91)
+                       /* we have nowhere to store the recovery_offset,
+                        * but if it is not below the reshape_position,
+                        * we can piggy-back on that.
+                        */
+                       is_active = 1;
+               if (rdev2->raid_disk < 0 ||
+                   test_bit(Faulty, &rdev2->flags))
+                       is_active = 0;
+               if (is_active)
                        desc_nr = rdev2->raid_disk;
                else
                        desc_nr = next_spare++;
@@ -1043,16 +1062,16 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
                d->number = rdev2->desc_nr;
                d->major = MAJOR(rdev2->bdev->bd_dev);
                d->minor = MINOR(rdev2->bdev->bd_dev);
-               if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
-                   && !test_bit(Faulty, &rdev2->flags))
+               if (is_active)
                        d->raid_disk = rdev2->raid_disk;
                else
                        d->raid_disk = rdev2->desc_nr; /* compatibility */
                if (test_bit(Faulty, &rdev2->flags))
                        d->state = (1<<MD_DISK_FAULTY);
-               else if (test_bit(In_sync, &rdev2->flags)) {
+               else if (is_active) {
                        d->state = (1<<MD_DISK_ACTIVE);
-                       d->state |= (1<<MD_DISK_SYNC);
+                       if (test_bit(In_sync, &rdev2->flags))
+                               d->state |= (1<<MD_DISK_SYNC);
                        active++;
                        working++;
                } else {
@@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 
        if (rdev->raid_disk >= 0 &&
            !test_bit(In_sync, &rdev->flags)) {
-               if (mddev->curr_resync_completed > rdev->recovery_offset)
-                       rdev->recovery_offset = mddev->curr_resync_completed;
                if (rdev->recovery_offset > 0) {
                        sb->feature_map |=
                                cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
@@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares)
         */
        mdk_rdev_t *rdev;
 
+       /* First make sure individual recovery_offsets are correct */
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
+               if (rdev->raid_disk >= 0 &&
+                   !test_bit(In_sync, &rdev->flags) &&
+                   mddev->curr_resync_completed > rdev->recovery_offset)
+                               rdev->recovery_offset = mddev->curr_resync_completed;
+
+       }       
        list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (rdev->sb_events == mddev->events ||
                    (nospares &&
index dcce204b6c733bb36887ab1981e25ac35ece189b..d29215d966dadc23f4c1a6844a725dc89d6a8304 100644 (file)
@@ -4823,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
                return ERR_PTR(-ENOMEM);
 }
 
+
+static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
+{
+       switch (algo) {
+       case ALGORITHM_PARITY_0:
+               if (raid_disk < max_degraded)
+                       return 1;
+               break;
+       case ALGORITHM_PARITY_N:
+               if (raid_disk >= raid_disks - max_degraded)
+                       return 1;
+               break;
+       case ALGORITHM_PARITY_0_6:
+               if (raid_disk == 0 || 
+                   raid_disk == raid_disks - 1)
+                       return 1;
+               break;
+       case ALGORITHM_LEFT_ASYMMETRIC_6:
+       case ALGORITHM_RIGHT_ASYMMETRIC_6:
+       case ALGORITHM_LEFT_SYMMETRIC_6:
+       case ALGORITHM_RIGHT_SYMMETRIC_6:
+               if (raid_disk == raid_disks - 1)
+                       return 1;
+       }
+       return 0;
+}
+
 static int run(mddev_t *mddev)
 {
        raid5_conf_t *conf;
        int working_disks = 0, chunk_size;
+       int dirty_parity_disks = 0;
        mdk_rdev_t *rdev;
+       sector_t reshape_offset = 0;
 
        if (mddev->recovery_cp != MaxSector)
                printk(KERN_NOTICE "raid5: %s is not clean"
@@ -4861,6 +4890,7 @@ static int run(mddev_t *mddev)
                               "on a stripe boundary\n");
                        return -EINVAL;
                }
+               reshape_offset = here_new * mddev->new_chunk_sectors;
                /* here_new is the stripe we will write to */
                here_old = mddev->reshape_position;
                sector_div(here_old, mddev->chunk_sectors *
@@ -4916,10 +4946,51 @@ static int run(mddev_t *mddev)
        /*
         * 0 for a fully functional array, 1 or 2 for a degraded array.
         */
-       list_for_each_entry(rdev, &mddev->disks, same_set)
-               if (rdev->raid_disk >= 0 &&
-                   test_bit(In_sync, &rdev->flags))
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
+               if (rdev->raid_disk < 0)
+                       continue;
+               if (test_bit(In_sync, &rdev->flags))
                        working_disks++;
+               /* This disc is not fully in-sync.  However if it
+                * just stored parity (beyond the recovery_offset),
+                * when we don't need to be concerned about the
+                * array being dirty.
+                * When reshape goes 'backwards', we never have
+                * partially completed devices, so we only need
+                * to worry about reshape going forwards.
+                */
+               /* Hack because v0.91 doesn't store recovery_offset properly. */
+               if (mddev->major_version == 0 &&
+                   mddev->minor_version > 90)
+                       rdev->recovery_offset = reshape_offset;
+                       
+               printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n",
+                      rdev->raid_disk, working_disks, conf->prev_algo,
+                      conf->previous_raid_disks, conf->max_degraded,
+                      conf->algorithm, conf->raid_disks, 
+                      only_parity(rdev->raid_disk,
+                                  conf->prev_algo,
+                                  conf->previous_raid_disks,
+                                  conf->max_degraded),
+                      only_parity(rdev->raid_disk,
+                                  conf->algorithm,
+                                  conf->raid_disks,
+                                  conf->max_degraded));
+               if (rdev->recovery_offset < reshape_offset) {
+                       /* We need to check old and new layout */
+                       if (!only_parity(rdev->raid_disk,
+                                        conf->algorithm,
+                                        conf->raid_disks,
+                                        conf->max_degraded))
+                               continue;
+               }
+               if (!only_parity(rdev->raid_disk,
+                                conf->prev_algo,
+                                conf->previous_raid_disks,
+                                conf->max_degraded))
+                       continue;
+               dirty_parity_disks++;
+       }
 
        mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
                           - working_disks);
@@ -4935,7 +5006,7 @@ static int run(mddev_t *mddev)
        mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
        mddev->resync_max_sectors = mddev->dev_sectors;
 
-       if (mddev->degraded > 0 &&
+       if (mddev->degraded > dirty_parity_disks &&
            mddev->recovery_cp != MaxSector) {
                if (mddev->ok_start_degraded)
                        printk(KERN_WARNING
@@ -5361,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev)
                    !test_bit(Faulty, &rdev->flags)) {
                        if (raid5_add_disk(mddev, rdev) == 0) {
                                char nm[20];
-                               set_bit(In_sync, &rdev->flags);
+                               if (rdev->raid_disk >= conf->previous_raid_disks)
+                                       set_bit(In_sync, &rdev->flags);
+                               else
+                                       rdev->recovery_offset = 0;
                                added_devices++;
-                               rdev->recovery_offset = 0;
                                sprintf(nm, "rd%d", rdev->raid_disk);
                                if (sysfs_create_link(&mddev->kobj,
                                                      &rdev->kobj, nm))