#include <linux/seq_file.h>
#include <linux/cpu.h>
#include <linux/slab.h>
+#include <linux/ratelimit.h>
#include "md.h"
#include "raid5.h"
#include "raid0.h"
#define __inline__
#endif
-#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
-
/*
* We maintain a biased count of active stripes in the bottom 16 bits of
* bi_phys_segments, and a count of processed stripes in the upper 16 bits
(unsigned long long)sh->sector, i, dev->toread,
dev->read, dev->towrite, dev->written,
test_bit(R5_LOCKED, &dev->flags));
- BUG();
+ WARN_ON(1);
}
dev->flags = 0;
raid5_build_block(sh, i, previous);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
+ /* We have already checked bad blocks for reads. Now
+ * need to check for writes.
+ */
+ while ((rw & WRITE) && rdev &&
+ test_bit(WriteErrorSeen, &rdev->flags)) {
+ sector_t first_bad;
+ int bad_sectors;
+ int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors);
+ if (!bad)
+ break;
+
+ if (bad < 0) {
+ set_bit(BlockedBadBlocks, &rdev->flags);
+ if (!conf->mddev->external &&
+ conf->mddev->flags) {
+ /* It is very unlikely, but we might
+ * still need to write out the
+ * bad block log - better give it
+ * a chance*/
+ md_check_recovery(conf->mddev);
+ }
+ md_wait_for_blocked_rdev(rdev, conf->mddev);
+ } else {
+ /* Acknowledged bad block - skip the write */
+ rdev_dec_pending(rdev, conf->mddev);
+ rdev = NULL;
+ }
+ }
+
if (rdev) {
if (s->syncing || s->expanding || s->expanded)
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
- if ((rw & WRITE) &&
- test_bit(R5_ReWrite, &sh->dev[i].flags))
- atomic_add(STRIPE_SECTORS,
- &rdev->corrected_errors);
generic_make_request(bi);
} else {
if (rw & WRITE)
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
rdev = conf->disks[i].rdev;
- printk_rl(KERN_INFO "md/raid:%s: read error corrected"
- " (%lu sectors at %llu on %s)\n",
- mdname(conf->mddev), STRIPE_SECTORS,
- (unsigned long long)(sh->sector
- + rdev->data_offset),
- bdevname(rdev->bdev, b));
+ printk_ratelimited(
+ KERN_INFO
+ "md/raid:%s: read error corrected"
+ " (%lu sectors at %llu on %s)\n",
+ mdname(conf->mddev), STRIPE_SECTORS,
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdevname(rdev->bdev, b));
+ atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
}
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
if (conf->mddev->degraded >= conf->max_degraded)
- printk_rl(KERN_WARNING
- "md/raid:%s: read error not correctable "
- "(sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector
- + rdev->data_offset),
- bdn);
+ printk_ratelimited(
+ KERN_WARNING
+ "md/raid:%s: read error not correctable "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
/* Oh, no!!! */
- printk_rl(KERN_WARNING
- "md/raid:%s: read error NOT corrected!! "
- "(sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector
- + rdev->data_offset),
- bdn);
+ printk_ratelimited(
+ KERN_WARNING
+ "md/raid:%s: read error NOT corrected!! "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
printk(KERN_WARNING
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+ sector_t first_bad;
+ int bad_sectors;
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
return;
}
- if (!uptodate)
- md_error(conf->mddev, conf->disks[i].rdev);
+ if (!uptodate) {
+ set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
+ set_bit(R5_WriteError, &sh->dev[i].flags);
+ } else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors))
+ set_bit(R5_MadeGood, &sh->dev[i].flags);
rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
+ set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT
rcu_read_lock();
rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && test_bit(In_sync, &rdev->flags))
- /* multiple read failures in one stripe */
- md_error(conf->mddev, rdev);
+ atomic_inc(&rdev->nr_pending);
+ else
+ rdev = NULL;
rcu_read_unlock();
+ if (rdev) {
+ if (!rdev_set_badblocks(
+ rdev,
+ sh->sector,
+ STRIPE_SECTORS, 0))
+ md_error(conf->mddev, rdev);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
}
spin_lock_irq(&conf->device_lock);
/* fail all writes first */
if (bitmap_end)
bitmap_endwrite(conf->mddev->bitmap, sh->sector,
STRIPE_SECTORS, 0, 0);
+ /* If we were in the middle of a write the parity block might
+ * still be locked - so just clear all R5_LOCKED flags
+ */
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
}
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
md_wakeup_thread(conf->mddev->thread);
}
+static void
+handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh,
+ struct stripe_head_state *s)
+{
+ int abort = 0;
+ int i;
+
+ md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
+ clear_bit(STRIPE_SYNCING, &sh->state);
+ s->syncing = 0;
+ /* There is nothing more to do for sync/check/repair.
+ * For recover we need to record a bad block on all
+ * non-sync devices, or abort the recovery
+ */
+ if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
+ return;
+ /* During recovery devices cannot be removed, so locking and
+ * refcounting of rdevs is not needed
+ */
+ for (i = 0; i < conf->raid_disks; i++) {
+ mdk_rdev_t *rdev = conf->disks[i].rdev;
+ if (!rdev
+ || test_bit(Faulty, &rdev->flags)
+ || test_bit(In_sync, &rdev->flags))
+ continue;
+ if (!rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ }
+ if (abort) {
+ conf->recovery_disabled = conf->mddev->recovery_disabled;
+ set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+ }
+}
+
/* fetch_block - checks the given member device to see if its data needs
* to be read or computed to satisfy a request.
*
*
*/
-static int handle_stripe5(struct stripe_head *sh, struct stripe_head_state *s)
+static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
{
raid5_conf_t *conf = sh->raid_conf;
- int disks = sh->disks, i;
+ int disks = sh->disks;
struct r5dev *dev;
+ int i;
- /* Now to look around and see what can be done */
- rcu_read_lock();
- spin_lock_irq(&conf->device_lock);
- for (i=disks; i--; ) {
- mdk_rdev_t *rdev;
-
- dev = &sh->dev[i];
-
- pr_debug("check %d: state 0x%lx toread %p read %p write %p "
- "written %p\n", i, dev->flags, dev->toread, dev->read,
- dev->towrite, dev->written);
-
- /* maybe we can request a biofill operation
- *
- * new wantfill requests are only permitted while
- * ops_complete_biofill is guaranteed to be inactive
- */
- if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
- !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
- set_bit(R5_Wantfill, &dev->flags);
-
- /* now count some things */
- if (test_bit(R5_LOCKED, &dev->flags))
- s->locked++;
- if (test_bit(R5_UPTODATE, &dev->flags))
- s->uptodate++;
- if (test_bit(R5_Wantcompute, &dev->flags))
- s->compute++;
-
- if (test_bit(R5_Wantfill, &dev->flags))
- s->to_fill++;
- else if (dev->toread)
- s->to_read++;
- if (dev->towrite) {
- s->to_write++;
- if (!test_bit(R5_OVERWRITE, &dev->flags))
- s->non_overwrite++;
- }
- if (dev->written)
- s->written++;
- rdev = rcu_dereference(conf->disks[i].rdev);
- if (s->blocked_rdev == NULL &&
- rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- s->blocked_rdev = rdev;
- atomic_inc(&rdev->nr_pending);
- }
- clear_bit(R5_Insync, &dev->flags);
- if (!rdev)
- /* Not in-sync */;
- else if (test_bit(In_sync, &rdev->flags))
- set_bit(R5_Insync, &dev->flags);
- else {
- /* could be in-sync depending on recovery/reshape status */
- if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
- set_bit(R5_Insync, &dev->flags);
- }
- if (!test_bit(R5_Insync, &dev->flags)) {
- /* The ReadError flag will just be confusing now */
- clear_bit(R5_ReadError, &dev->flags);
- clear_bit(R5_ReWrite, &dev->flags);
- }
- if (test_bit(R5_ReadError, &dev->flags))
- clear_bit(R5_Insync, &dev->flags);
- if (!test_bit(R5_Insync, &dev->flags)) {
- s->failed++;
- s->failed_num[0] = i;
- }
- }
- spin_unlock_irq(&conf->device_lock);
- rcu_read_unlock();
-
- if (unlikely(s->blocked_rdev)) {
- if (s->syncing || s->expanding || s->expanded ||
- s->to_write || s->written) {
- set_bit(STRIPE_HANDLE, &sh->state);
- return 1;
- }
- /* There is nothing for the blocked_rdev to block */
- rdev_dec_pending(s->blocked_rdev, conf->mddev);
- s->blocked_rdev = NULL;
- }
-
- if (s->to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
- set_bit(STRIPE_OP_BIOFILL, &s->ops_request);
- set_bit(STRIPE_BIOFILL_RUN, &sh->state);
- }
-
- pr_debug("locked=%d uptodate=%d to_read=%d"
- " to_write=%d failed=%d failed_num=%d\n",
- s->locked, s->uptodate, s->to_read, s->to_write,
- s->failed, s->failed_num[0]);
- /* check if the array has lost two devices and, if so, some requests might
- * need to be failed
- */
- if (s->failed > 1 && s->to_read+s->to_write+s->written)
- handle_failed_stripe(conf, sh, s, disks, &s->return_bi);
- if (s->failed > 1 && s->syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
- clear_bit(STRIPE_SYNCING, &sh->state);
- s->syncing = 0;
- }
-
- /* might be able to return some write requests if the parity block
- * is safe, or on a failed drive
- */
- dev = &sh->dev[sh->pd_idx];
- if (s->written &&
- ((test_bit(R5_Insync, &dev->flags) &&
- !test_bit(R5_LOCKED, &dev->flags) &&
- test_bit(R5_UPTODATE, &dev->flags)) ||
- (s->failed == 1 && s->failed_num[0] == sh->pd_idx)))
- handle_stripe_clean_event(conf, sh, disks, &s->return_bi);
-
- /* Now we might consider reading some blocks, either to check/generate
- * parity, or to satisfy requests
- * or to load a block that is being partially written.
- */
- if (s->to_read || s->non_overwrite ||
- (s->syncing && (s->uptodate + s->compute < disks)) || s->expanding)
- handle_stripe_fill(sh, s, disks);
-
- return 0;
-}
+ memset(s, 0, sizeof(*s));
-static int handle_stripe6(struct stripe_head *sh, struct stripe_head_state *s)
-{
- raid5_conf_t *conf = sh->raid_conf;
- int disks = sh->disks;
- int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx;
- struct r5dev *dev, *pdev, *qdev;
+ s->syncing = test_bit(STRIPE_SYNCING, &sh->state);
+ s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+ s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+ s->failed_num[0] = -1;
+ s->failed_num[1] = -1;
/* Now to look around and see what can be done */
-
rcu_read_lock();
spin_lock_irq(&conf->device_lock);
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
+ sector_t first_bad;
+ int bad_sectors;
+ int is_bad = 0;
+
dev = &sh->dev[i];
pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
BUG_ON(s->compute > 2);
}
- if (test_bit(R5_Wantfill, &dev->flags)) {
+ if (test_bit(R5_Wantfill, &dev->flags))
s->to_fill++;
- } else if (dev->toread)
+ else if (dev->toread)
s->to_read++;
if (dev->towrite) {
s->to_write++;
if (dev->written)
s->written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (s->blocked_rdev == NULL &&
- rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- s->blocked_rdev = rdev;
- atomic_inc(&rdev->nr_pending);
+ if (rdev) {
+ is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors);
+ if (s->blocked_rdev == NULL
+ && (test_bit(Blocked, &rdev->flags)
+ || is_bad < 0)) {
+ if (is_bad < 0)
+ set_bit(BlockedBadBlocks,
+ &rdev->flags);
+ s->blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
+ }
}
clear_bit(R5_Insync, &dev->flags);
if (!rdev)
/* Not in-sync */;
- else if (test_bit(In_sync, &rdev->flags))
+ else if (is_bad) {
+ /* also not in-sync */
+ if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+ /* treat as in-sync, but with a read error
+ * which we can now try to correct
+ */
+ set_bit(R5_Insync, &dev->flags);
+ set_bit(R5_ReadError, &dev->flags);
+ }
+ } else if (test_bit(In_sync, &rdev->flags))
set_bit(R5_Insync, &dev->flags);
else {
/* in sync if before recovery_offset */
if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
set_bit(R5_Insync, &dev->flags);
}
+ if (test_bit(R5_WriteError, &dev->flags)) {
+ clear_bit(R5_Insync, &dev->flags);
+ if (!test_bit(Faulty, &rdev->flags)) {
+ s->handle_bad_blocks = 1;
+ atomic_inc(&rdev->nr_pending);
+ } else
+ clear_bit(R5_WriteError, &dev->flags);
+ }
+ if (test_bit(R5_MadeGood, &dev->flags)) {
+ if (!test_bit(Faulty, &rdev->flags)) {
+ s->handle_bad_blocks = 1;
+ atomic_inc(&rdev->nr_pending);
+ } else
+ clear_bit(R5_MadeGood, &dev->flags);
+ }
if (!test_bit(R5_Insync, &dev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
}
spin_unlock_irq(&conf->device_lock);
rcu_read_unlock();
-
- if (unlikely(s->blocked_rdev)) {
- if (s->syncing || s->expanding || s->expanded ||
- s->to_write || s->written) {
- set_bit(STRIPE_HANDLE, &sh->state);
- return 1;
- }
- /* There is nothing for the blocked_rdev to block */
- rdev_dec_pending(s->blocked_rdev, conf->mddev);
- s->blocked_rdev = NULL;
- }
-
- if (s->to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
- set_bit(STRIPE_OP_BIOFILL, &s->ops_request);
- set_bit(STRIPE_BIOFILL_RUN, &sh->state);
- }
-
- pr_debug("locked=%d uptodate=%d to_read=%d"
- " to_write=%d failed=%d failed_num=%d,%d\n",
- s->locked, s->uptodate, s->to_read, s->to_write, s->failed,
- s->failed_num[0], s->failed_num[1]);
- /* check if the array has lost >2 devices and, if so, some requests
- * might need to be failed
- */
- if (s->failed > 2 && s->to_read+s->to_write+s->written)
- handle_failed_stripe(conf, sh, s, disks, &s->return_bi);
- if (s->failed > 2 && s->syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
- clear_bit(STRIPE_SYNCING, &sh->state);
- s->syncing = 0;
- }
-
- /*
- * might be able to return some write requests if the parity blocks
- * are safe, or on a failed drive
- */
- pdev = &sh->dev[pd_idx];
- s->p_failed = (s->failed >= 1 && s->failed_num[0] == pd_idx)
- || (s->failed >= 2 && s->failed_num[1] == pd_idx);
- qdev = &sh->dev[qd_idx];
- s->q_failed = (s->failed >= 1 && s->failed_num[0] == qd_idx)
- || (s->failed >= 2 && s->failed_num[1] == qd_idx);
-
- if (s->written &&
- (s->p_failed || ((test_bit(R5_Insync, &pdev->flags)
- && !test_bit(R5_LOCKED, &pdev->flags)
- && test_bit(R5_UPTODATE, &pdev->flags)))) &&
- (s->q_failed || ((test_bit(R5_Insync, &qdev->flags)
- && !test_bit(R5_LOCKED, &qdev->flags)
- && test_bit(R5_UPTODATE, &qdev->flags)))))
- handle_stripe_clean_event(conf, sh, disks, &s->return_bi);
-
- /* Now we might consider reading some blocks, either to check/generate
- * parity, or to satisfy requests
- * or to load a block that is being partially written.
- */
- if (s->to_read || s->non_overwrite || (s->to_write && s->failed) ||
- (s->syncing && (s->uptodate + s->compute < disks)) || s->expanding)
- handle_stripe_fill(sh, s, disks);
-
- return 0;
}
static void handle_stripe(struct stripe_head *sh)
{
struct stripe_head_state s;
- int done;
+ raid5_conf_t *conf = sh->raid_conf;
int i;
int prexor;
int disks = sh->disks;
- raid5_conf_t *conf = sh->raid_conf;
+ struct r5dev *pdev, *qdev;
clear_bit(STRIPE_HANDLE, &sh->state);
if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) {
(unsigned long long)sh->sector, sh->state,
atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
sh->check_state, sh->reconstruct_state);
- memset(&s, 0, sizeof(s));
- s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
- s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
- s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
- s.failed_num[0] = -1;
- s.failed_num[1] = -1;
+ analyse_stripe(sh, &s);
- if (conf->level == 6)
- done = handle_stripe6(sh, &s);
- else
- done = handle_stripe5(sh, &s);
-
- if (done)
+ if (s.handle_bad_blocks) {
+ set_bit(STRIPE_HANDLE, &sh->state);
goto finish;
+ }
+
+ if (unlikely(s.blocked_rdev)) {
+ if (s.syncing || s.expanding || s.expanded ||
+ s.to_write || s.written) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto finish;
+ }
+ /* There is nothing for the blocked_rdev to block */
+ rdev_dec_pending(s.blocked_rdev, conf->mddev);
+ s.blocked_rdev = NULL;
+ }
+
+ if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
+ set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
+ set_bit(STRIPE_BIOFILL_RUN, &sh->state);
+ }
+
+ pr_debug("locked=%d uptodate=%d to_read=%d"
+ " to_write=%d failed=%d failed_num=%d,%d\n",
+ s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
+ s.failed_num[0], s.failed_num[1]);
+ /* check if the array has lost more than max_degraded devices and,
+ * if so, some requests might need to be failed.
+ */
+ if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
+ handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
+ if (s.failed > conf->max_degraded && s.syncing)
+ handle_failed_sync(conf, sh, &s);
+
+ /*
+ * might be able to return some write requests if the parity blocks
+ * are safe, or on a failed drive
+ */
+ pdev = &sh->dev[sh->pd_idx];
+ s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
+ || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
+ qdev = &sh->dev[sh->qd_idx];
+ s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
+ || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
+ || conf->level < 6;
+
+ if (s.written &&
+ (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
+ && !test_bit(R5_LOCKED, &pdev->flags)
+ && test_bit(R5_UPTODATE, &pdev->flags)))) &&
+ (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
+ && !test_bit(R5_LOCKED, &qdev->flags)
+ && test_bit(R5_UPTODATE, &qdev->flags)))))
+ handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
+
+ /* Now we might consider reading some blocks, either to check/generate
+ * parity, or to satisfy requests
+ * or to load a block that is being partially written.
+ */
+ if (s.to_read || s.non_overwrite
+ || (conf->level == 6 && s.to_write && s.failed)
+ || (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
+ handle_stripe_fill(sh, &s, disks);
/* Now we check to see if any write operations have recently
* completed
finish:
/* wait for this device to become unblocked */
- if (unlikely(s.blocked_rdev))
+ if (conf->mddev->external && unlikely(s.blocked_rdev))
md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
+ if (s.handle_bad_blocks)
+ for (i = disks; i--; ) {
+ mdk_rdev_t *rdev;
+ struct r5dev *dev = &sh->dev[i];
+ if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
+ /* We own a safe reference to the rdev */
+ rdev = conf->disks[i].rdev;
+ if (!rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ md_error(conf->mddev, rdev);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
+ if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
+ rdev = conf->disks[i].rdev;
+ rdev_clear_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
+ }
+
if (s.ops_request)
raid_run_ops(sh, s.ops_request);
ops_run_io(sh, &s);
-
if (s.dec_preread_active) {
/* We delay this until after ops_run_io so that if make_request
* is waiting on a flush, it won't continue until the writes
rcu_read_lock();
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
if (rdev && test_bit(In_sync, &rdev->flags)) {
+ sector_t first_bad;
+ int bad_sectors;
+
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
raid_bio->bi_next = (void*)rdev;
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
align_bi->bi_sector += rdev->data_offset;
- if (!bio_fits_rdev(align_bi)) {
- /* too big in some way */
+ if (!bio_fits_rdev(align_bi) ||
+ is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
+ &first_bad, &bad_sectors)) {
+ /* too big in some way, or has a known bad block */
bio_put(align_bi);
rdev_dec_pending(rdev, mddev);
return 0;
release_stripe(sh);
cond_resched();
+ if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+ md_check_recovery(mddev);
+
spin_lock_irq(&conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);
return 0;
abort:
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
if (conf) {
print_raid5_conf(conf);
free_conf(conf);
{
raid5_conf_t *conf = mddev->private;
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
if (mddev->queue)
mddev->queue->backing_dev_info.congested_fn = NULL;
free_conf(conf);
* isn't possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) &&
number < conf->raid_disks) {
err = -EBUSY;
int first = 0;
int last = conf->raid_disks - 1;
+ if (mddev->recovery_disabled == conf->recovery_disabled)
+ return -EBUSY;
+
if (has_failed(conf))
/* no point adding a device */
return -EINVAL;
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) {
if (raid5_add_disk(mddev, rdev) == 0) {
- char nm[20];
if (rdev->raid_disk
>= conf->previous_raid_disks) {
set_bit(In_sync, &rdev->flags);
added_devices++;
} else
rdev->recovery_offset = 0;
- sprintf(nm, "rd%d", rdev->raid_disk);
- if (sysfs_create_link(&mddev->kobj,
- &rdev->kobj, nm))
+
+ if (sysfs_link_rdev(mddev, rdev))
/* Failure here is OK */;
}
} else if (rdev->raid_disk >= conf->previous_raid_disks
d++) {
mdk_rdev_t *rdev = conf->disks[d].rdev;
if (rdev && raid5_remove_disk(mddev, d) == 0) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
+ sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
}
}