]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/md/raid5.c
Merge remote-tracking branch 'md/for-next'
[karo-tx-linux.git] / drivers / md / raid5.c
index 45933c1606972c007fee4393b82c05db7b24f7ec..704ef7fcfbf83239ff7ac119d37354576dea5221 100644 (file)
@@ -353,7 +353,7 @@ static void release_inactive_stripe_list(struct r5conf *conf,
                struct list_head *list = &temp_inactive_list[size - 1];
 
                /*
-                * We don't hold any lock here yet, get_active_stripe() might
+                * We don't hold any lock here yet, raid5_get_active_stripe() might
                 * remove stripes from the list
                 */
                if (!list_empty_careful(list)) {
@@ -413,7 +413,7 @@ static int release_stripe_list(struct r5conf *conf,
        return count;
 }
 
-static void release_stripe(struct stripe_head *sh)
+void raid5_release_stripe(struct stripe_head *sh)
 {
        struct r5conf *conf = sh->raid_conf;
        unsigned long flags;
@@ -658,9 +658,9 @@ static int has_failed(struct r5conf *conf)
        return 0;
 }
 
-static struct stripe_head *
-get_active_stripe(struct r5conf *conf, sector_t sector,
-                 int previous, int noblock, int noquiesce)
+struct stripe_head *
+raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
+                       int previous, int noblock, int noquiesce)
 {
        struct stripe_head *sh;
        int hash = stripe_hash_locks_hash(sector);
@@ -755,6 +755,10 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 /* Only freshly new full stripe normal write stripe can be added to a batch list */
 static bool stripe_can_batch(struct stripe_head *sh)
 {
+       struct r5conf *conf = sh->raid_conf;
+
+       if (conf->log)
+               return false;
        return test_bit(STRIPE_BATCH_READY, &sh->state) &&
                !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
                is_full_stripe_write(sh);
@@ -858,7 +862,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
 unlock_out:
        unlock_two_stripes(head, sh);
 out:
-       release_stripe(head);
+       raid5_release_stripe(head);
 }
 
 /* Determine if 'data_offset' or 'new_data_offset' should be used
@@ -895,6 +899,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 
        might_sleep();
 
+       if (r5l_write_stripe(conf->log, sh) == 0)
+               return;
        for (i = disks; i--; ) {
                int rw;
                int replace_only = 0;
@@ -1208,7 +1214,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
        return_io(&return_bi);
 
        set_bit(STRIPE_HANDLE, &sh->state);
-       release_stripe(sh);
+       raid5_release_stripe(sh);
 }
 
 static void ops_run_biofill(struct stripe_head *sh)
@@ -1271,7 +1277,7 @@ static void ops_complete_compute(void *stripe_head_ref)
        if (sh->check_state == check_state_compute_run)
                sh->check_state = check_state_compute_result;
        set_bit(STRIPE_HANDLE, &sh->state);
-       release_stripe(sh);
+       raid5_release_stripe(sh);
 }
 
 /* return a pointer to the address conversion region of the scribble buffer */
@@ -1697,7 +1703,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
        }
 
        set_bit(STRIPE_HANDLE, &sh->state);
-       release_stripe(sh);
+       raid5_release_stripe(sh);
 }
 
 static void
@@ -1855,7 +1861,7 @@ static void ops_complete_check(void *stripe_head_ref)
 
        sh->check_state = check_state_check_result;
        set_bit(STRIPE_HANDLE, &sh->state);
-       release_stripe(sh);
+       raid5_release_stripe(sh);
 }
 
 static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
@@ -2017,7 +2023,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
        /* we just created an active stripe so... */
        atomic_inc(&conf->active_stripes);
 
-       release_stripe(sh);
+       raid5_release_stripe(sh);
        conf->max_nr_stripes++;
        return 1;
 }
@@ -2236,7 +2242,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
                                if (!p)
                                        err = -ENOMEM;
                        }
-               release_stripe(nsh);
+               raid5_release_stripe(nsh);
        }
        /* critical section pass, GFP_NOIO no longer needed */
 
@@ -2394,7 +2400,7 @@ static void raid5_end_read_request(struct bio * bi)
        rdev_dec_pending(rdev, conf->mddev);
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
-       release_stripe(sh);
+       raid5_release_stripe(sh);
 }
 
 static void raid5_end_write_request(struct bio *bi)
@@ -2468,14 +2474,12 @@ static void raid5_end_write_request(struct bio *bi)
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
                clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
-       release_stripe(sh);
+       raid5_release_stripe(sh);
 
        if (sh->batch_head && sh != sh->batch_head)
-               release_stripe(sh->batch_head);
+               raid5_release_stripe(sh->batch_head);
 }
 
-static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
-
 static void raid5_build_block(struct stripe_head *sh, int i, int previous)
 {
        struct r5dev *dev = &sh->dev[i];
@@ -2491,7 +2495,7 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous)
        dev->rreq.bi_private = sh;
 
        dev->flags = 0;
-       dev->sector = compute_blocknr(sh, i, previous);
+       dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
 
 static void error(struct mddev *mddev, struct md_rdev *rdev)
@@ -2524,9 +2528,9 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
  * Input: a 'big' sector number,
  * Output: index of the data and parity disk, and the sector # in them.
  */
-static sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
-                                    int previous, int *dd_idx,
-                                    struct stripe_head *sh)
+sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
+                             int previous, int *dd_idx,
+                             struct stripe_head *sh)
 {
        sector_t stripe, stripe2;
        sector_t chunk_number;
@@ -2726,7 +2730,7 @@ static sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
        return new_sector;
 }
 
-static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
+sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
 {
        struct r5conf *conf = sh->raid_conf;
        int raid_disks = sh->disks;
@@ -3098,6 +3102,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                if (bi)
                        bitmap_end = 1;
 
+               r5l_stripe_write_finished(sh);
+
                if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
                        wake_up(&conf->wait_for_overlap);
 
@@ -3141,6 +3147,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                 * the data has not reached the cache yet.
                 */
                if (!test_bit(R5_Wantfill, &sh->dev[i].flags) &&
+                   s->failed > conf->max_degraded &&
                    (!test_bit(R5_Insync, &sh->dev[i].flags) ||
                      test_bit(R5_ReadError, &sh->dev[i].flags))) {
                        spin_lock_irq(&sh->stripe_lock);
@@ -3497,6 +3504,9 @@ returnbi:
                        WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
                        WARN_ON(dev->page != dev->orig_page);
                }
+
+       r5l_stripe_write_finished(sh);
+
        if (!discard_pending &&
            test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
                int hash;
@@ -3939,10 +3949,10 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
                        struct stripe_head *sh2;
                        struct async_submit_ctl submit;
 
-                       sector_t bn = compute_blocknr(sh, i, 1);
+                       sector_t bn = raid5_compute_blocknr(sh, i, 1);
                        sector_t s = raid5_compute_sector(conf, bn, 0,
                                                          &dd_idx, NULL);
-                       sh2 = get_active_stripe(conf, s, 0, 1, 1);
+                       sh2 = raid5_get_active_stripe(conf, s, 0, 1, 1);
                        if (sh2 == NULL)
                                /* so far only the early blocks of this stripe
                                 * have been requested.  When later blocks
@@ -3952,7 +3962,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
                        if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
                           test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
                                /* must have already done this block */
-                               release_stripe(sh2);
+                               raid5_release_stripe(sh2);
                                continue;
                        }
 
@@ -3973,7 +3983,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
                                set_bit(STRIPE_EXPAND_READY, &sh2->state);
                                set_bit(STRIPE_HANDLE, &sh2->state);
                        }
-                       release_stripe(sh2);
+                       raid5_release_stripe(sh2);
 
                }
        /* done submitting copies, wait for them to complete */
@@ -4008,6 +4018,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
        s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head;
        s->failed_num[0] = -1;
        s->failed_num[1] = -1;
+       s->log_failed = r5l_log_disk_error(conf);
 
        /* Now to look around and see what can be done */
        rcu_read_lock();
@@ -4259,7 +4270,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
                if (handle_flags == 0 ||
                    sh->state & handle_flags)
                        set_bit(STRIPE_HANDLE, &sh->state);
-               release_stripe(sh);
+               raid5_release_stripe(sh);
        }
        spin_lock_irq(&head_sh->stripe_lock);
        head_sh->batch_head = NULL;
@@ -4320,6 +4331,9 @@ static void handle_stripe(struct stripe_head *sh)
 
        analyse_stripe(sh, &s);
 
+       if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
+               goto finish;
+
        if (s.handle_bad_blocks) {
                set_bit(STRIPE_HANDLE, &sh->state);
                goto finish;
@@ -4348,7 +4362,7 @@ static void handle_stripe(struct stripe_head *sh)
        /* check if the array has lost more than max_degraded devices and,
         * if so, some requests might need to be failed.
         */
-       if (s.failed > conf->max_degraded) {
+       if (s.failed > conf->max_degraded || s.log_failed) {
                sh->check_state = 0;
                sh->reconstruct_state = 0;
                break_stripe_batch_list(sh, 0);
@@ -4506,7 +4520,7 @@ static void handle_stripe(struct stripe_head *sh)
        /* Finish reconstruct operations initiated by the expansion process */
        if (sh->reconstruct_state == reconstruct_state_result) {
                struct stripe_head *sh_src
-                       = get_active_stripe(conf, sh->sector, 1, 1, 1);
+                       = raid5_get_active_stripe(conf, sh->sector, 1, 1, 1);
                if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) {
                        /* sh cannot be written until sh_src has been read.
                         * so arrange for sh to be delayed a little
@@ -4516,11 +4530,11 @@ static void handle_stripe(struct stripe_head *sh)
                        if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
                                              &sh_src->state))
                                atomic_inc(&conf->preread_active_stripes);
-                       release_stripe(sh_src);
+                       raid5_release_stripe(sh_src);
                        goto finish;
                }
                if (sh_src)
-                       release_stripe(sh_src);
+                       raid5_release_stripe(sh_src);
 
                sh->reconstruct_state = reconstruct_state_idle;
                clear_bit(STRIPE_EXPANDING, &sh->state);
@@ -5012,7 +5026,7 @@ static void release_stripe_plug(struct mddev *mddev,
        struct raid5_plug_cb *cb;
 
        if (!blk_cb) {
-               release_stripe(sh);
+               raid5_release_stripe(sh);
                return;
        }
 
@@ -5028,7 +5042,7 @@ static void release_stripe_plug(struct mddev *mddev,
        if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
                list_add_tail(&sh->lru, &cb->list);
        else
-               release_stripe(sh);
+               raid5_release_stripe(sh);
 }
 
 static void make_discard_request(struct mddev *mddev, struct bio *bi)
@@ -5063,12 +5077,12 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
                DEFINE_WAIT(w);
                int d;
        again:
-               sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
+               sh = raid5_get_active_stripe(conf, logical_sector, 0, 0, 0);
                prepare_to_wait(&conf->wait_for_overlap, &w,
                                TASK_UNINTERRUPTIBLE);
                set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
                if (test_bit(STRIPE_SYNCING, &sh->state)) {
-                       release_stripe(sh);
+                       raid5_release_stripe(sh);
                        schedule();
                        goto again;
                }
@@ -5080,7 +5094,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
                        if (sh->dev[d].towrite || sh->dev[d].toread) {
                                set_bit(R5_Overlap, &sh->dev[d].flags);
                                spin_unlock_irq(&sh->stripe_lock);
-                               release_stripe(sh);
+                               raid5_release_stripe(sh);
                                schedule();
                                goto again;
                        }
@@ -5136,8 +5150,15 @@ static void make_request(struct mddev *mddev, struct bio * bi)
        bool do_prepare;
 
        if (unlikely(bi->bi_rw & REQ_FLUSH)) {
-               md_flush_request(mddev, bi);
-               return;
+               int ret = r5l_handle_flush_request(conf->log, bi);
+
+               if (ret == 0)
+                       return;
+               if (ret == -ENODEV) {
+                       md_flush_request(mddev, bi);
+                       return;
+               }
+               /* ret == -EAGAIN, fallback */
        }
 
        md_write_start(mddev, bi);
@@ -5210,7 +5231,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                        (unsigned long long)new_sector,
                        (unsigned long long)logical_sector);
 
-               sh = get_active_stripe(conf, new_sector, previous,
+               sh = raid5_get_active_stripe(conf, new_sector, previous,
                                       (bi->bi_rw&RWA_MASK), 0);
                if (sh) {
                        if (unlikely(previous)) {
@@ -5231,7 +5252,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                        must_retry = 1;
                                spin_unlock_irq(&conf->device_lock);
                                if (must_retry) {
-                                       release_stripe(sh);
+                                       raid5_release_stripe(sh);
                                        schedule();
                                        do_prepare = true;
                                        goto retry;
@@ -5241,14 +5262,14 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                /* Might have got the wrong stripe_head
                                 * by accident
                                 */
-                               release_stripe(sh);
+                               raid5_release_stripe(sh);
                                goto retry;
                        }
 
                        if (rw == WRITE &&
                            logical_sector >= mddev->suspend_lo &&
                            logical_sector < mddev->suspend_hi) {
-                               release_stripe(sh);
+                               raid5_release_stripe(sh);
                                /* As the suspend_* range is controlled by
                                 * userspace, we want an interruptible
                                 * wait.
@@ -5271,7 +5292,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                 * and wait a while
                                 */
                                md_wakeup_thread(mddev->thread);
-                               release_stripe(sh);
+                               raid5_release_stripe(sh);
                                schedule();
                                do_prepare = true;
                                goto retry;
@@ -5458,7 +5479,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
        for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
                int j;
                int skipped_disk = 0;
-               sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
+               sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
                set_bit(STRIPE_EXPANDING, &sh->state);
                atomic_inc(&conf->reshape_stripes);
                /* If any of this stripe is beyond the end of the old
@@ -5471,7 +5492,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
                        if (conf->level == 6 &&
                            j == sh->qd_idx)
                                continue;
-                       s = compute_blocknr(sh, j, 0);
+                       s = raid5_compute_blocknr(sh, j, 0);
                        if (s < raid5_size(mddev, 0, 0)) {
                                skipped_disk = 1;
                                continue;
@@ -5507,10 +5528,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
        if (last_sector >= mddev->dev_sectors)
                last_sector = mddev->dev_sectors - 1;
        while (first_sector <= last_sector) {
-               sh = get_active_stripe(conf, first_sector, 1, 0, 1);
+               sh = raid5_get_active_stripe(conf, first_sector, 1, 0, 1);
                set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
                set_bit(STRIPE_HANDLE, &sh->state);
-               release_stripe(sh);
+               raid5_release_stripe(sh);
                first_sector += STRIPE_SECTORS;
        }
        /* Now that the sources are clearly marked, we can release
@@ -5519,7 +5540,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
        while (!list_empty(&stripes)) {
                sh = list_entry(stripes.next, struct stripe_head, lru);
                list_del_init(&sh->lru);
-               release_stripe(sh);
+               raid5_release_stripe(sh);
        }
        /* If this takes us to the resync_max point where we have to pause,
         * then we need to write out the superblock.
@@ -5615,11 +5636,11 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
                return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
        }
 
-       bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+       bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
 
-       sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
+       sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0);
        if (sh == NULL) {
-               sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
+               sh = raid5_get_active_stripe(conf, sector_nr, 0, 0, 0);
                /* make sure we don't swamp the stripe cache if someone else
                 * is trying to get access
                 */
@@ -5643,7 +5664,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
        set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
        set_bit(STRIPE_HANDLE, &sh->state);
 
-       release_stripe(sh);
+       raid5_release_stripe(sh);
 
        return STRIPE_SECTORS;
 }
@@ -5682,7 +5703,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
                        /* already done this stripe */
                        continue;
 
-               sh = get_active_stripe(conf, sector, 0, 1, 1);
+               sh = raid5_get_active_stripe(conf, sector, 0, 1, 1);
 
                if (!sh) {
                        /* failed to get a stripe - must wait */
@@ -5692,7 +5713,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
                }
 
                if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) {
-                       release_stripe(sh);
+                       raid5_release_stripe(sh);
                        raid5_set_bi_processed_stripes(raid_bio, scnt);
                        conf->retry_read_aligned = raid_bio;
                        return handled;
@@ -5700,7 +5721,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
 
                set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags);
                handle_stripe(sh);
-               release_stripe(sh);
+               raid5_release_stripe(sh);
                handled++;
        }
        remaining = raid5_dec_bi_active_stripes(raid_bio);
@@ -5730,8 +5751,12 @@ static int handle_active_stripes(struct r5conf *conf, int group,
                for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
                        if (!list_empty(temp_inactive_list + i))
                                break;
-               if (i == NR_STRIPE_HASH_LOCKS)
+               if (i == NR_STRIPE_HASH_LOCKS) {
+                       spin_unlock_irq(&conf->device_lock);
+                       r5l_flush_stripe_to_raid(conf->log);
+                       spin_lock_irq(&conf->device_lock);
                        return batch_size;
+               }
                release_inactive = true;
        }
        spin_unlock_irq(&conf->device_lock);
@@ -5739,6 +5764,7 @@ static int handle_active_stripes(struct r5conf *conf, int group,
        release_inactive_stripe_list(conf, temp_inactive_list,
                                     NR_STRIPE_HASH_LOCKS);
 
+       r5l_flush_stripe_to_raid(conf->log);
        if (release_inactive) {
                spin_lock_irq(&conf->device_lock);
                return 0;
@@ -5746,6 +5772,7 @@ static int handle_active_stripes(struct r5conf *conf, int group,
 
        for (i = 0; i < batch_size; i++)
                handle_stripe(batch[i]);
+       r5l_write_stripe_run(conf->log);
 
        cond_resched();
 
@@ -5879,6 +5906,8 @@ static void raid5d(struct md_thread *thread)
                mutex_unlock(&conf->cache_size_mutex);
        }
 
+       r5l_flush_stripe_to_raid(conf->log);
+
        async_tx_issue_pending_all();
        blk_finish_plug(&plug);
 
@@ -6316,8 +6345,11 @@ static void raid5_free_percpu(struct r5conf *conf)
 
 static void free_conf(struct r5conf *conf)
 {
+       if (conf->log)
+               r5l_exit_log(conf->log);
        if (conf->shrinker.seeks)
                unregister_shrinker(&conf->shrinker);
+
        free_thread_groups(conf);
        shrink_stripes(conf);
        raid5_free_percpu(conf);
@@ -6530,7 +6562,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        rdev_for_each(rdev, mddev) {
                raid_disk = rdev->raid_disk;
                if (raid_disk >= max_disks
-                   || raid_disk < 0)
+                   || raid_disk < 0 || test_bit(Journal, &rdev->flags))
                        continue;
                disk = conf->disks + raid_disk;
 
@@ -6650,6 +6682,7 @@ static int run(struct mddev *mddev)
        int working_disks = 0;
        int dirty_parity_disks = 0;
        struct md_rdev *rdev;
+       struct md_rdev *journal_dev = NULL;
        sector_t reshape_offset = 0;
        int i;
        long long min_offset_diff = 0;
@@ -6662,6 +6695,11 @@ static int run(struct mddev *mddev)
 
        rdev_for_each(rdev, mddev) {
                long long diff;
+
+               if (test_bit(Journal, &rdev->flags)) {
+                       journal_dev = rdev;
+                       continue;
+               }
                if (rdev->raid_disk < 0)
                        continue;
                diff = (rdev->new_data_offset - rdev->data_offset);
@@ -6695,6 +6733,12 @@ static int run(struct mddev *mddev)
                int chunk_sectors;
                int new_data_disks;
 
+               if (journal_dev) {
+                       printk(KERN_ERR "md/raid:%s: don't support reshape with journal - aborting.\n",
+                              mdname(mddev));
+                       return -EINVAL;
+               }
+
                if (mddev->new_level != mddev->level) {
                        printk(KERN_ERR "md/raid:%s: unsupported reshape "
                               "required - aborting.\n",
@@ -6770,6 +6814,13 @@ static int run(struct mddev *mddev)
        if (IS_ERR(conf))
                return PTR_ERR(conf);
 
+       if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
+               printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
+                      mdname(mddev));
+               mddev->ro = 1;
+               set_disk_ro(mddev->gendisk, 1);
+       }
+
        conf->min_offset_diff = min_offset_diff;
        mddev->thread = conf->thread;
        conf->thread = NULL;
@@ -6973,6 +7024,14 @@ static int run(struct mddev *mddev)
                                                mddev->queue);
        }
 
+       if (journal_dev) {
+               char b[BDEVNAME_SIZE];
+
+               printk(KERN_INFO"md/raid:%s: using device %s as journal\n",
+                      mdname(mddev), bdevname(journal_dev->bdev, b));
+               r5l_init_log(conf, journal_dev);
+       }
+
        return 0;
 abort:
        md_unregister_thread(&mddev->thread);
@@ -7082,6 +7141,15 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
        struct disk_info *p = conf->disks + number;
 
        print_raid5_conf(conf);
+       if (test_bit(Journal, &rdev->flags)) {
+               /*
+                * journal disk is not removable, but we need give a chance to
+                * update superblock of other disks. Otherwise journal disk
+                * will be considered as 'fresh'
+                */
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
+               return -EINVAL;
+       }
        if (rdev == p->rdev)
                rdevp = &p->rdev;
        else if (rdev == p->replacement)
@@ -7144,6 +7212,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
        int first = 0;
        int last = conf->raid_disks - 1;
 
+       if (test_bit(Journal, &rdev->flags))
+               return -EINVAL;
        if (mddev->recovery_disabled == conf->recovery_disabled)
                return -EBUSY;
 
@@ -7205,6 +7275,8 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
        sector_t newsize;
        struct r5conf *conf = mddev->private;
 
+       if (conf->log)
+               return -EINVAL;
        sectors &= ~((sector_t)conf->chunk_sectors - 1);
        newsize = raid5_size(mddev, sectors, mddev->raid_disks);
        if (mddev->external_size &&
@@ -7256,6 +7328,8 @@ static int check_reshape(struct mddev *mddev)
 {
        struct r5conf *conf = mddev->private;
 
+       if (conf->log)
+               return -EINVAL;
        if (mddev->delta_disks == 0 &&
            mddev->new_layout == mddev->layout &&
            mddev->new_chunk_sectors == mddev->chunk_sectors)
@@ -7532,6 +7606,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
                unlock_all_device_hash_locks_irq(conf);
                break;
        }
+       r5l_quiesce(conf->log, state);
 }
 
 static void *raid45_takeover_raid0(struct mddev *mddev, int level)