#include "md.h"
#include "raid5.h"
#include "bitmap.h"
+#include "raid5-log.h"
/*
* metadata/data stored in disk with 4k size unit (a block) regardless
return;
pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
mdname(mddev));
+
+ /* wait superblock change before suspend */
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+
mddev_suspend(mddev);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
mddev_resume(mddev);
return ret;
}
-void r5c_update_on_rdev_error(struct mddev *mddev)
+void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
{
struct r5conf *conf = mddev->private;
struct r5l_log *log = conf->log;
if (!log)
return;
- if (raid5_calc_degraded(conf) > 0 &&
+ if ((raid5_calc_degraded(conf) > 0 ||
+ test_bit(Journal, &rdev->flags)) &&
conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
schedule_work(&log->disable_writeback_work);
}
bdevname(rdev->bdev, b),
mdname(mddev),
conf->raid_disks - mddev->degraded);
- r5c_update_on_rdev_error(mddev);
+ r5c_update_on_rdev_error(mddev, rdev);
}
/*
* When LOG_CRITICAL, stripes with injournal == 0 will be sent to
* no_space_stripes list.
*
+ * 3. during journal failure
+ * In journal failure, we try to flush all cached data to raid disks
+ * based on data in stripe cache. The array is read-only to upper
+ * layers, so we would skip all pending writes.
+ *
*/
static inline bool delay_towrite(struct r5conf *conf,
struct r5dev *dev,
if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
s->injournal > 0)
return true;
+ /* case 3 above */
+ if (s->log_failed && s->injournal)
+ return true;
return false;
}
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
s.failed_num[0], s.failed_num[1]);
- /* check if the array has lost more than max_degraded devices and,
+ /*
+ * check if the array has lost more than max_degraded devices and,
* if so, some requests might need to be failed.
+ *
+ * When journal device failed (log_failed), we will only process
+ * the stripe if there is data need write to raid disks
*/
- if (s.failed > conf->max_degraded || s.log_failed) {
+ if (s.failed > conf->max_degraded ||
+ (s.log_failed && s.injournal == 0)) {
sh->check_state = 0;
sh->reconstruct_state = 0;
break_stripe_batch_list(sh, 0);
struct stripe_head *sh, *tmp;
struct list_head *handle_list = NULL;
struct r5worker_group *wg;
- bool second_try = !r5c_is_writeback(conf->log);
- bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state);
+ bool second_try = !r5c_is_writeback(conf->log) &&
+ !r5l_log_disk_error(conf);
+ bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
+ r5l_log_disk_error(conf);
again:
wg = NULL;
* neilb: there is no locking about new writes here,
* so this cannot be safe.
*/
- if (atomic_read(&conf->active_stripes)) {
+ if (atomic_read(&conf->active_stripes) ||
+ atomic_read(&conf->r5c_cached_full_stripes) ||
+ atomic_read(&conf->r5c_cached_partial_stripes)) {
return -EBUSY;
}
log_exit(conf);