]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/md/raid5-cache.c
Merge branches 'pm-cpuidle', 'pm-cpufreq' and 'pm-sleep'
[karo-tx-linux.git] / drivers / md / raid5-cache.c
index 0e8ed2c327b07fd849c1720d7b272dd860b949b9..302dea3296ba5ccd07740365314f45d74df49ec2 100644 (file)
@@ -162,6 +162,8 @@ struct r5l_log {
 
        /* to submit async io_units, to fulfill ordering of flush */
        struct work_struct deferred_io_work;
+       /* to disable write back during in degraded mode */
+       struct work_struct disable_writeback_work;
 };
 
 /*
@@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work)
                r5l_do_submit_io(log, io);
 }
 
+static void r5c_disable_writeback_async(struct work_struct *work)
+{
+       struct r5l_log *log = container_of(work, struct r5l_log,
+                                          disable_writeback_work);
+       struct mddev *mddev = log->rdev->mddev;
+
+       if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
+               return;
+       pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
+               mdname(mddev));
+       mddev_suspend(mddev);
+       log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+       mddev_resume(mddev);
+}
+
 static void r5l_submit_current_io(struct r5l_log *log)
 {
        struct r5l_io_unit *io = log->current_io;
@@ -1393,8 +1410,6 @@ static void r5l_do_reclaim(struct r5l_log *log)
        next_checkpoint = r5c_calculate_new_cp(conf);
        spin_unlock_irq(&log->io_list_lock);
 
-       BUG_ON(reclaimable < 0);
-
        if (reclaimable == 0 || !write_super)
                return;
 
@@ -2062,7 +2077,7 @@ static int
 r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
                                       struct r5l_recovery_ctx *ctx)
 {
-       struct stripe_head *sh, *next;
+       struct stripe_head *sh;
        struct mddev *mddev = log->rdev->mddev;
        struct page *page;
        sector_t next_checkpoint = MaxSector;
@@ -2076,7 +2091,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 
        WARN_ON(list_empty(&ctx->cached_list));
 
-       list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
+       list_for_each_entry(sh, &ctx->cached_list, lru) {
                struct r5l_meta_block *mb;
                int i;
                int offset;
@@ -2126,14 +2141,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
                ctx->pos = write_pos;
                ctx->seq += 1;
                next_checkpoint = sh->log_start;
-               list_del_init(&sh->lru);
-               raid5_release_stripe(sh);
        }
        log->next_checkpoint = next_checkpoint;
        __free_page(page);
        return 0;
 }
 
+static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
+                                                struct r5l_recovery_ctx *ctx)
+{
+       struct mddev *mddev = log->rdev->mddev;
+       struct r5conf *conf = mddev->private;
+       struct stripe_head *sh, *next;
+
+       if (ctx->data_only_stripes == 0)
+               return;
+
+       log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK;
+
+       list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
+               r5c_make_stripe_write_out(sh);
+               set_bit(STRIPE_HANDLE, &sh->state);
+               list_del_init(&sh->lru);
+               raid5_release_stripe(sh);
+       }
+
+       md_wakeup_thread(conf->mddev->thread);
+       /* reuse conf->wait_for_quiescent in recovery */
+       wait_event(conf->wait_for_quiescent,
+                  atomic_read(&conf->active_stripes) == 0);
+
+       log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+}
+
 static int r5l_recovery_log(struct r5l_log *log)
 {
        struct mddev *mddev = log->rdev->mddev;
@@ -2160,32 +2200,31 @@ static int r5l_recovery_log(struct r5l_log *log)
        pos = ctx.pos;
        ctx.seq += 10000;
 
-       if (ctx.data_only_stripes == 0) {
-               log->next_checkpoint = ctx.pos;
-               r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
-               ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
-       }
 
        if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
                pr_debug("md/raid:%s: starting from clean shutdown\n",
                         mdname(mddev));
-       else {
+       else
                pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
                         mdname(mddev), ctx.data_only_stripes,
                         ctx.data_parity_stripes);
 
-               if (ctx.data_only_stripes > 0)
-                       if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
-                               pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
-                                      mdname(mddev));
-                               return -EIO;
-                       }
+       if (ctx.data_only_stripes == 0) {
+               log->next_checkpoint = ctx.pos;
+               r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
+               ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
+       } else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
+               pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
+                      mdname(mddev));
+               return -EIO;
        }
 
        log->log_start = ctx.pos;
        log->seq = ctx.seq;
        log->last_checkpoint = pos;
        r5l_write_super(log, pos);
+
+       r5c_recovery_flush_data_only_stripes(log, &ctx);
        return 0;
 }
 
@@ -2247,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
            val > R5C_JOURNAL_MODE_WRITE_BACK)
                return -EINVAL;
 
+       if (raid5_calc_degraded(conf) > 0 &&
+           val == R5C_JOURNAL_MODE_WRITE_BACK)
+               return -EINVAL;
+
        mddev_suspend(mddev);
        conf->log->r5c_journal_mode = val;
        mddev_resume(mddev);
@@ -2301,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf,
                set_bit(STRIPE_R5C_CACHING, &sh->state);
        }
 
+       /*
+        * When run in degraded mode, array is set to write-through mode.
+        * This check helps drain pending write safely in the transition to
+        * write-through mode.
+        */
+       if (s->failed) {
+               r5c_make_stripe_write_out(sh);
+               return -EAGAIN;
+       }
+
        for (i = disks; i--; ) {
                dev = &sh->dev[i];
                /* if non-overwrite, use writing-out phase */
@@ -2351,6 +2404,8 @@ void r5c_release_extra_page(struct stripe_head *sh)
                        struct page *p = sh->dev[i].orig_page;
 
                        sh->dev[i].orig_page = sh->dev[i].page;
+                       clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
+
                        if (!using_disk_info_extra_page)
                                put_page(p);
                }
@@ -2555,6 +2610,19 @@ ioerr:
        return ret;
 }
 
+void r5c_update_on_rdev_error(struct mddev *mddev)
+{
+       struct r5conf *conf = mddev->private;
+       struct r5l_log *log = conf->log;
+
+       if (!log)
+               return;
+
+       if (raid5_calc_degraded(conf) > 0 &&
+           conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
+               schedule_work(&log->disable_writeback_work);
+}
+
 int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 {
        struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -2627,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
        spin_lock_init(&log->no_space_stripes_lock);
 
        INIT_WORK(&log->deferred_io_work, r5l_submit_io_async);
+       INIT_WORK(&log->disable_writeback_work, r5c_disable_writeback_async);
 
        log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
        INIT_LIST_HEAD(&log->stripe_in_journal_list);
@@ -2659,6 +2728,7 @@ io_kc:
 
 void r5l_exit_log(struct r5l_log *log)
 {
+       flush_work(&log->disable_writeback_work);
        md_unregister_thread(&log->reclaim_thread);
        mempool_destroy(log->meta_pool);
        bioset_free(log->bs);