]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/md/raid5.c
Merge remote-tracking branch 'tip/auto-latest'
[karo-tx-linux.git] / drivers / md / raid5.c
index 2afef4ec9312d222bb51e76fbce26f254e564cf0..3c0007ef85edad1fb7ac760576a76013586d9f5b 100644 (file)
@@ -479,6 +479,7 @@ static void shrink_buffers(struct stripe_head *sh)
        int num = sh->raid_conf->pool_size;
 
        for (i = 0; i < num ; i++) {
+               WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
                p = sh->dev[i].page;
                if (!p)
                        continue;
@@ -499,6 +500,7 @@ static int grow_buffers(struct stripe_head *sh)
                        return 1;
                }
                sh->dev[i].page = page;
+               sh->dev[i].orig_page = page;
        }
        return 0;
 }
@@ -855,6 +857,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
                                bi->bi_rw |= REQ_NOMERGE;
 
+                       if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
+                               WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+                       sh->dev[i].vec.bv_page = sh->dev[i].page;
                        bi->bi_vcnt = 1;
                        bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
                        bi->bi_io_vec[0].bv_offset = 0;
@@ -899,6 +904,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        else
                                rbi->bi_iter.bi_sector = (sh->sector
                                                  + rrdev->data_offset);
+                       if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
+                               WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+                       sh->dev[i].rvec.bv_page = sh->dev[i].page;
                        rbi->bi_vcnt = 1;
                        rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
                        rbi->bi_io_vec[0].bv_offset = 0;
@@ -927,8 +935,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 }
 
 static struct dma_async_tx_descriptor *
-async_copy_data(int frombio, struct bio *bio, struct page *page,
-       sector_t sector, struct dma_async_tx_descriptor *tx)
+async_copy_data(int frombio, struct bio *bio, struct page **page,
+       sector_t sector, struct dma_async_tx_descriptor *tx,
+       struct stripe_head *sh)
 {
        struct bio_vec bvl;
        struct bvec_iter iter;
@@ -965,11 +974,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
                if (clen > 0) {
                        b_offset += bvl.bv_offset;
                        bio_page = bvl.bv_page;
-                       if (frombio)
-                               tx = async_memcpy(page, bio_page, page_offset,
+                       if (frombio) {
+                               if (sh->raid_conf->skip_copy &&
+                                   b_offset == 0 && page_offset == 0 &&
+                                   clen == STRIPE_SIZE)
+                                       *page = bio_page;
+                               else
+                                       tx = async_memcpy(*page, bio_page, page_offset,
                                                  b_offset, clen, &submit);
-                       else
-                               tx = async_memcpy(bio_page, page, b_offset,
+                       else
+                               tx = async_memcpy(bio_page, *page, b_offset,
                                                  page_offset, clen, &submit);
                }
                /* chain the operations */
@@ -1045,8 +1059,8 @@ static void ops_run_biofill(struct stripe_head *sh)
                        spin_unlock_irq(&sh->stripe_lock);
                        while (rbi && rbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
-                               tx = async_copy_data(0, rbi, dev->page,
-                                       dev->sector, tx);
+                               tx = async_copy_data(0, rbi, &dev->page,
+                                       dev->sector, tx, sh);
                                rbi = r5_next_bio(rbi, dev->sector);
                        }
                }
@@ -1384,6 +1398,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                        BUG_ON(dev->written);
                        wbi = dev->written = chosen;
                        spin_unlock_irq(&sh->stripe_lock);
+                       WARN_ON(dev->page != dev->orig_page);
 
                        while (wbi && wbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
@@ -1393,9 +1408,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                                        set_bit(R5_SyncIO, &dev->flags);
                                if (wbi->bi_rw & REQ_DISCARD)
                                        set_bit(R5_Discard, &dev->flags);
-                               else
-                                       tx = async_copy_data(1, wbi, dev->page,
-                                               dev->sector, tx);
+                               else {
+                                       tx = async_copy_data(1, wbi, &dev->page,
+                                               dev->sector, tx, sh);
+                                       if (dev->page != dev->orig_page) {
+                                               set_bit(R5_SkipCopy, &dev->flags);
+                                               clear_bit(R5_UPTODATE, &dev->flags);
+                                               clear_bit(R5_OVERWRITE, &dev->flags);
+                                       }
+                               }
                                wbi = r5_next_bio(wbi, dev->sector);
                        }
                }
@@ -1426,7 +1447,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
                struct r5dev *dev = &sh->dev[i];
 
                if (dev->written || i == pd_idx || i == qd_idx) {
-                       if (!discard)
+                       if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
                                set_bit(R5_UPTODATE, &dev->flags);
                        if (fua)
                                set_bit(R5_WantFUA, &dev->flags);
@@ -2133,24 +2154,20 @@ static void raid5_end_write_request(struct bio *bi, int error)
 }
 
 static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
-       
+
 static void raid5_build_block(struct stripe_head *sh, int i, int previous)
 {
        struct r5dev *dev = &sh->dev[i];
 
        bio_init(&dev->req);
        dev->req.bi_io_vec = &dev->vec;
-       dev->req.bi_vcnt++;
-       dev->req.bi_max_vecs++;
+       dev->req.bi_max_vecs = 1;
        dev->req.bi_private = sh;
-       dev->vec.bv_page = dev->page;
 
        bio_init(&dev->rreq);
        dev->rreq.bi_io_vec = &dev->rvec;
-       dev->rreq.bi_vcnt++;
-       dev->rreq.bi_max_vecs++;
+       dev->rreq.bi_max_vecs = 1;
        dev->rreq.bi_private = sh;
-       dev->rvec.bv_page = dev->page;
 
        dev->flags = 0;
        dev->sector = compute_blocknr(sh, i, previous);
@@ -2750,6 +2767,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                /* and fail all 'written' */
                bi = sh->dev[i].written;
                sh->dev[i].written = NULL;
+               if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
+                       WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
+                       sh->dev[i].page = sh->dev[i].orig_page;
+               }
+
                if (bi) bitmap_end = 1;
                while (bi && bi->bi_iter.bi_sector <
                       sh->dev[i].sector + STRIPE_SECTORS) {
@@ -2991,12 +3013,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                        dev = &sh->dev[i];
                        if (!test_bit(R5_LOCKED, &dev->flags) &&
                            (test_bit(R5_UPTODATE, &dev->flags) ||
-                            test_bit(R5_Discard, &dev->flags))) {
+                            test_bit(R5_Discard, &dev->flags) ||
+                            test_bit(R5_SkipCopy, &dev->flags))) {
                                /* We can return any write requests */
                                struct bio *wbi, *wbi2;
                                pr_debug("Return write for disc %d\n", i);
                                if (test_and_clear_bit(R5_Discard, &dev->flags))
                                        clear_bit(R5_UPTODATE, &dev->flags);
+                               if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
+                                       WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
+                                       dev->page = dev->orig_page;
+                               }
                                wbi = dev->written;
                                dev->written = NULL;
                                while (wbi && wbi->bi_iter.bi_sector <
@@ -3015,6 +3042,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                                0);
                        } else if (test_bit(R5_Discard, &dev->flags))
                                discard_pending = 1;
+                       WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
+                       WARN_ON(dev->page != dev->orig_page);
                }
        if (!discard_pending &&
            test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
@@ -5354,6 +5383,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
                                        raid5_show_preread_threshold,
                                        raid5_store_preread_threshold);
 
+static ssize_t
+raid5_show_skip_copy(struct mddev *mddev, char *page)
+{
+       struct r5conf *conf = mddev->private;
+       if (conf)
+               return sprintf(page, "%d\n", conf->skip_copy);
+       else
+               return 0;
+}
+
+static ssize_t
+raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
+{
+       struct r5conf *conf = mddev->private;
+       unsigned long new;
+       if (len >= PAGE_SIZE)
+               return -EINVAL;
+       if (!conf)
+               return -ENODEV;
+
+       if (kstrtoul(page, 10, &new))
+               return -EINVAL;
+       new = !!new;
+       if (new == conf->skip_copy)
+               return len;
+
+       mddev_suspend(mddev);
+       conf->skip_copy = new;
+       if (new)
+               mddev->queue->backing_dev_info.capabilities |=
+                                               BDI_CAP_STABLE_WRITES;
+       else
+               mddev->queue->backing_dev_info.capabilities &=
+                                               ~BDI_CAP_STABLE_WRITES;
+       mddev_resume(mddev);
+       return len;
+}
+
+static struct md_sysfs_entry
+raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
+                                       raid5_show_skip_copy,
+                                       raid5_store_skip_copy);
+
+
 static ssize_t
 stripe_cache_active_show(struct mddev *mddev, char *page)
 {
@@ -5439,6 +5512,7 @@ static struct attribute *raid5_attrs[] =  {
        &raid5_stripecache_active.attr,
        &raid5_preread_bypass_threshold.attr,
        &raid5_group_thread_cnt.attr,
+       &raid5_skip_copy.attr,
        NULL,
 };
 static struct attribute_group raid5_attrs_group = {