bio_list_add(&conf->pending_bio_list, mbio);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
- r1_bio_write_done(r1_bio);
-
- /* In case raid1d snuck in to freeze_array */
- wake_up(&conf->wait_barrier);
-
+ /* Mustn't call r1_bio_write_done before this next test,
+ * as it could result in the bio being freed.
+ */
if (sectors_handled < (bio->bi_size >> 9)) {
+ r1_bio_write_done(r1_bio);
/* We need another r1_bio. It has already been counted
* in bio->bi_phys_segments
*/
goto retry_write;
}
+ r1_bio_write_done(r1_bio);
+
+ /* In case raid1d snuck in to freeze_array */
+ wake_up(&conf->wait_barrier);
+
if (do_sync || !bitmap || !plugged)
md_wakeup_thread(mddev->thread);
s += sync_blocks;
sectors_to_go -= sync_blocks;
} while (sectors_to_go > 0);
- md_error(mddev, conf->mirrors[mirror].rdev);
+ set_bit(WriteErrorSeen,
+ &conf->mirrors[mirror].rdev->flags);
+ set_bit(R1BIO_WriteError, &r1_bio->state);
} else if (is_badblock(conf->mirrors[mirror].rdev,
r1_bio->sector,
r1_bio->sectors,
- &first_bad, &bad_sectors))
+ &first_bad, &bad_sectors) &&
+ !is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
+ r1_bio->sector,
+ r1_bio->sectors,
+ &first_bad, &bad_sectors)
+ )
set_bit(R1BIO_MadeGood, &r1_bio->state);
update_head_pos(mirror, r1_bio);
if (atomic_dec_and_test(&r1_bio->remaining)) {
int s = r1_bio->sectors;
- if (test_bit(R1BIO_MadeGood, &r1_bio->state))
+ if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+ test_bit(R1BIO_WriteError, &r1_bio->state))
reschedule_retry(r1_bio);
else {
put_buf(r1_bio);
}
}
+static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+ int sectors, struct page *page, int rw)
+{
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ /* success */
+ return 1;
+ if (rw == WRITE)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ /* need to record an error - either for the block or the device */
+ if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+ md_error(rdev->mddev, rdev);
+ return 0;
+}
+
static int fix_sync_read_error(r1bio_t *r1_bio)
{
/* Try some synchronous reads of other devices to get
if (!success) {
char b[BDEVNAME_SIZE];
- /* Cannot read from anywhere, array is toast */
- md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+ int abort = 0;
+ /* Cannot read from anywhere, this block is lost.
+ * Record a bad block on each device. If that doesn't
+ * work just disable and interrupt the recovery.
+ * Don't fail devices as that won't really help.
+ */
printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
" for block %llu\n",
mdname(mddev),
bdevname(bio->bi_bdev, b),
(unsigned long long)r1_bio->sector);
- md_done_sync(mddev, r1_bio->sectors, 0);
- put_buf(r1_bio);
- return 0;
+ for (d = 0; d < conf->raid_disks; d++) {
+ rdev = conf->mirrors[d].rdev;
+ if (!rdev || test_bit(Faulty, &rdev->flags))
+ continue;
+ if (!rdev_set_badblocks(rdev, sect, s, 0))
+ abort = 1;
+ }
+ if (abort) {
+ mddev->recovery_disabled = 1;
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ md_done_sync(mddev, r1_bio->sectors, 0);
+ put_buf(r1_bio);
+ return 0;
+ }
+ /* Try next page */
+ sectors -= s;
+ sect += s;
+ idx++;
+ continue;
}
start = d;
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
rdev = conf->mirrors[d].rdev;
- if (sync_page_io(rdev, sect, s<<9,
- bio->bi_io_vec[idx].bv_page,
- WRITE, false) == 0) {
+ if (r1_sync_page_io(rdev, sect, s,
+ bio->bi_io_vec[idx].bv_page,
+ WRITE) == 0) {
r1_bio->bios[d]->bi_end_io = NULL;
rdev_dec_pending(rdev, mddev);
- md_error(mddev, rdev);
}
}
d = start;
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
rdev = conf->mirrors[d].rdev;
- if (sync_page_io(rdev, sect, s<<9,
- bio->bi_io_vec[idx].bv_page,
- READ, false) == 0)
- md_error(mddev, rdev);
- else
+ if (r1_sync_page_io(rdev, sect, s,
+ bio->bi_io_vec[idx].bv_page,
+ READ) != 0)
atomic_add(s, &rdev->corrected_errors);
}
sectors -= s;
} while (!success && d != read_disk);
if (!success) {
- /* Cannot read from anywhere -- bye bye array */
- md_error(mddev, conf->mirrors[read_disk].rdev);
+ /* Cannot read from anywhere - mark it bad */
+ mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev;
+ if (!rdev_set_badblocks(rdev, sect, s, 0))
+ md_error(mddev, rdev);
break;
}
/* write it back and re-read */
d--;
rdev = conf->mirrors[d].rdev;
if (rdev &&
- test_bit(In_sync, &rdev->flags)) {
- if (sync_page_io(rdev, sect, s<<9,
- conf->tmppage, WRITE, false)
- == 0)
- /* Well, this device is dead */
- md_error(mddev, rdev);
- }
+ test_bit(In_sync, &rdev->flags))
+ r1_sync_page_io(rdev, sect, s,
+ conf->tmppage, WRITE);
}
d = start;
while (d != read_disk) {
rdev = conf->mirrors[d].rdev;
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
- if (sync_page_io(rdev, sect, s<<9,
- conf->tmppage, READ, false)
- == 0)
- /* Well, this device is dead */
- md_error(mddev, rdev);
- else {
+ if (r1_sync_page_io(rdev, sect, s,
+ conf->tmppage, READ)) {
atomic_add(s, &rdev->corrected_errors);
printk(KERN_INFO
"md/raid1:%s: read error corrected "
return ok;
}
+static void handle_sync_write_finished(conf_t *conf, r1bio_t *r1_bio)
+{
+ int m;
+ int s = r1_bio->sectors;
+ for (m = 0; m < conf->raid_disks ; m++) {
+ mdk_rdev_t *rdev = conf->mirrors[m].rdev;
+ struct bio *bio = r1_bio->bios[m];
+ if (bio->bi_end_io == NULL)
+ continue;
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+ test_bit(R1BIO_MadeGood, &r1_bio->state)) {
+ rdev_clear_badblocks(rdev, r1_bio->sector, s);
+ }
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+ test_bit(R1BIO_WriteError, &r1_bio->state)) {
+ if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
+ md_error(conf->mddev, rdev);
+ }
+ }
+ put_buf(r1_bio);
+ md_done_sync(conf->mddev, s, 1);
+}
+
+static void handle_write_finished(conf_t *conf, r1bio_t *r1_bio)
+{
+ int m;
+ for (m = 0; m < conf->raid_disks ; m++)
+ if (r1_bio->bios[m] == IO_MADE_GOOD) {
+ mdk_rdev_t *rdev = conf->mirrors[m].rdev;
+ rdev_clear_badblocks(rdev,
+ r1_bio->sector,
+ r1_bio->sectors);
+ rdev_dec_pending(rdev, conf->mddev);
+ } else if (r1_bio->bios[m] != NULL) {
+ /* This drive got a write error. We need to
+ * narrow down and record precise write
+ * errors.
+ */
+ if (!narrow_write_error(r1_bio, m)) {
+ md_error(conf->mddev,
+ conf->mirrors[m].rdev);
+ /* an I/O failed, we can't clear the bitmap */
+ set_bit(R1BIO_Degraded, &r1_bio->state);
+ }
+ rdev_dec_pending(conf->mirrors[m].rdev,
+ conf->mddev);
+ }
+ if (test_bit(R1BIO_WriteError, &r1_bio->state))
+ close_write(r1_bio);
+ raid_end_bio_io(r1_bio);
+}
+
+static void handle_read_error(conf_t *conf, r1bio_t *r1_bio)
+{
+ int disk;
+ int max_sectors;
+ mddev_t *mddev = conf->mddev;
+ struct bio *bio;
+ char b[BDEVNAME_SIZE];
+ mdk_rdev_t *rdev;
+
+ clear_bit(R1BIO_ReadError, &r1_bio->state);
+ /* we got a read error. Maybe the drive is bad. Maybe just
+ * the block and we can fix it.
+ * We freeze all other IO, and try reading the block from
+ * other devices. When we find one, we re-write
+ * and check it that fixes the read error.
+ * This is all done synchronously while the array is
+ * frozen
+ */
+ if (mddev->ro == 0) {
+ freeze_array(conf);
+ fix_read_error(conf, r1_bio->read_disk,
+ r1_bio->sector, r1_bio->sectors);
+ unfreeze_array(conf);
+ } else
+ md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+
+ bio = r1_bio->bios[r1_bio->read_disk];
+ bdevname(bio->bi_bdev, b);
+read_more:
+ disk = read_balance(conf, r1_bio, &max_sectors);
+ if (disk == -1) {
+ printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
+ " read error for block %llu\n",
+ mdname(mddev), b, (unsigned long long)r1_bio->sector);
+ raid_end_bio_io(r1_bio);
+ } else {
+ const unsigned long do_sync
+ = r1_bio->master_bio->bi_rw & REQ_SYNC;
+ if (bio) {
+ r1_bio->bios[r1_bio->read_disk] =
+ mddev->ro ? IO_BLOCKED : NULL;
+ bio_put(bio);
+ }
+ r1_bio->read_disk = disk;
+ bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+ md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors);
+ r1_bio->bios[r1_bio->read_disk] = bio;
+ rdev = conf->mirrors[disk].rdev;
+ printk_ratelimited(KERN_ERR
+ "md/raid1:%s: redirecting sector %llu"
+ " to other mirror: %s\n",
+ mdname(mddev),
+ (unsigned long long)r1_bio->sector,
+ bdevname(rdev->bdev, b));
+ bio->bi_sector = r1_bio->sector + rdev->data_offset;
+ bio->bi_bdev = rdev->bdev;
+ bio->bi_end_io = raid1_end_read_request;
+ bio->bi_rw = READ | do_sync;
+ bio->bi_private = r1_bio;
+ if (max_sectors < r1_bio->sectors) {
+ /* Drat - have to split this up more */
+ struct bio *mbio = r1_bio->master_bio;
+ int sectors_handled = (r1_bio->sector + max_sectors
+ - mbio->bi_sector);
+ r1_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (mbio->bi_phys_segments == 0)
+ mbio->bi_phys_segments = 2;
+ else
+ mbio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ generic_make_request(bio);
+ bio = NULL;
+
+ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+ r1_bio->master_bio = mbio;
+ r1_bio->sectors = (mbio->bi_size >> 9)
+ - sectors_handled;
+ r1_bio->state = 0;
+ set_bit(R1BIO_ReadError, &r1_bio->state);
+ r1_bio->mddev = mddev;
+ r1_bio->sector = mbio->bi_sector + sectors_handled;
+
+ goto read_more;
+ } else
+ generic_make_request(bio);
+ }
+}
+
static void raid1d(mddev_t *mddev)
{
r1bio_t *r1_bio;
- struct bio *bio;
unsigned long flags;
conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
- mdk_rdev_t *rdev;
struct blk_plug plug;
md_check_recovery(mddev);
blk_start_plug(&plug);
for (;;) {
- char b[BDEVNAME_SIZE];
if (atomic_read(&mddev->plug_cnt) == 0)
flush_pending_writes(conf);
mddev = r1_bio->mddev;
conf = mddev->private;
if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
- if (test_bit(R1BIO_MadeGood, &r1_bio->state)) {
- int m;
- int s = r1_bio->sectors;
- for (m = 0; m < conf->raid_disks ; m++) {
- struct bio *bio = r1_bio->bios[m];
- if (bio->bi_end_io != NULL &&
- test_bit(BIO_UPTODATE,
- &bio->bi_flags)) {
- rdev = conf->mirrors[m].rdev;
- rdev_clear_badblocks(
- rdev,
- r1_bio->sector,
- r1_bio->sectors);
- }
- }
- put_buf(r1_bio);
- md_done_sync(mddev, s, 1);
- } else
+ if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+ test_bit(R1BIO_WriteError, &r1_bio->state))
+ handle_sync_write_finished(conf, r1_bio);
+ else
sync_request_write(mddev, r1_bio);
} else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
- test_bit(R1BIO_WriteError, &r1_bio->state)) {
- int m;
- for (m = 0; m < conf->raid_disks ; m++)
- if (r1_bio->bios[m] == IO_MADE_GOOD) {
- rdev = conf->mirrors[m].rdev;
- rdev_clear_badblocks(
- rdev,
- r1_bio->sector,
- r1_bio->sectors);
- rdev_dec_pending(rdev, mddev);
- } else if (r1_bio->bios[m] != NULL) {
- /* This drive got a write error. We
- * need to narrow down and record
- * precise write errors.
- */
- if (!narrow_write_error(r1_bio, m)) {
- md_error(mddev,
- conf->mirrors[m].rdev);
- /* an I/O failed, we can't clear
- * the bitmap */
- set_bit(R1BIO_Degraded,
- &r1_bio->state);
- }
- rdev_dec_pending(conf->mirrors[m].rdev,
- mddev);
- }
- if (test_bit(R1BIO_WriteError, &r1_bio->state))
- close_write(r1_bio);
- raid_end_bio_io(r1_bio);
- } else if (test_bit(R1BIO_ReadError, &r1_bio->state)) {
- int disk;
- int max_sectors;
-
- clear_bit(R1BIO_ReadError, &r1_bio->state);
- /* we got a read error. Maybe the drive is bad. Maybe just
- * the block and we can fix it.
- * We freeze all other IO, and try reading the block from
- * other devices. When we find one, we re-write
- * and check it that fixes the read error.
- * This is all done synchronously while the array is
- * frozen
- */
- if (mddev->ro == 0) {
- freeze_array(conf);
- fix_read_error(conf, r1_bio->read_disk,
- r1_bio->sector,
- r1_bio->sectors);
- unfreeze_array(conf);
- } else
- md_error(mddev,
- conf->mirrors[r1_bio->read_disk].rdev);
-
- bio = r1_bio->bios[r1_bio->read_disk];
- bdevname(bio->bi_bdev, b);
-read_more:
- disk = read_balance(conf, r1_bio, &max_sectors);
- if (disk == -1) {
- printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
- " read error for block %llu\n",
- mdname(mddev), b,
- (unsigned long long)r1_bio->sector);
- raid_end_bio_io(r1_bio);
- } else {
- const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC;
- if (bio) {
- r1_bio->bios[r1_bio->read_disk] =
- mddev->ro ? IO_BLOCKED : NULL;
- bio_put(bio);
- }
- r1_bio->read_disk = disk;
- bio = bio_clone_mddev(r1_bio->master_bio,
- GFP_NOIO, mddev);
- md_trim_bio(bio,
- r1_bio->sector - bio->bi_sector,
- max_sectors);
- r1_bio->bios[r1_bio->read_disk] = bio;
- rdev = conf->mirrors[disk].rdev;
- printk_ratelimited(
- KERN_ERR
- "md/raid1:%s: redirecting sector %llu"
- " to other mirror: %s\n",
- mdname(mddev),
- (unsigned long long)r1_bio->sector,
- bdevname(rdev->bdev, b));
- bio->bi_sector = r1_bio->sector + rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
- bio->bi_end_io = raid1_end_read_request;
- bio->bi_rw = READ | do_sync;
- bio->bi_private = r1_bio;
- if (max_sectors < r1_bio->sectors) {
- /* Drat - have to split this up more */
- struct bio *mbio = r1_bio->master_bio;
- int sectors_handled =
- r1_bio->sector + max_sectors
- - mbio->bi_sector;
- r1_bio->sectors = max_sectors;
- spin_lock_irq(&conf->device_lock);
- if (mbio->bi_phys_segments == 0)
- mbio->bi_phys_segments = 2;
- else
- mbio->bi_phys_segments++;
- spin_unlock_irq(&conf->device_lock);
- generic_make_request(bio);
- bio = NULL;
-
- r1_bio = mempool_alloc(conf->r1bio_pool,
- GFP_NOIO);
-
- r1_bio->master_bio = mbio;
- r1_bio->sectors = (mbio->bi_size >> 9)
- - sectors_handled;
- r1_bio->state = 0;
- set_bit(R1BIO_ReadError,
- &r1_bio->state);
- r1_bio->mddev = mddev;
- r1_bio->sector = mbio->bi_sector
- + sectors_handled;
-
- goto read_more;
- } else
- generic_make_request(bio);
- }
- } else {
+ test_bit(R1BIO_WriteError, &r1_bio->state))
+ handle_write_finished(conf, r1_bio);
+ else if (test_bit(R1BIO_ReadError, &r1_bio->state))
+ handle_read_error(conf, r1_bio);
+ else
/* just a partial read to be scheduled from separate
* context
*/
generic_make_request(r1_bio->bios[r1_bio->read_disk]);
- }
+
cond_resched();
if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
md_check_recovery(mddev);
raise_barrier(conf);
lower_barrier(conf);
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);