]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge tag 'md/4.2' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 29 Jun 2015 18:10:56 +0000 (11:10 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 29 Jun 2015 18:10:56 +0000 (11:10 -0700)
Pull md updates from Neil Brown:
 "A mixed bag

   - a few bug fixes
   - some performance improvement that decrease lock contention
   - some clean-up

  Nothing major"

* tag 'md/4.2' of git://neil.brown.name/md:
  md: clear Blocked flag on failed devices when array is read-only.
  md: unlock mddev_lock on an error path.
  md: clear mddev->private when it has been freed.
  md: fix a build warning
  md/raid5: ignore released_stripes check
  md/raid5: per hash value and exclusive wait_for_stripe
  md/raid5: split wait_for_stripe and introduce wait_for_quiescent
  wait: introduce wait_event_exclusive_cmd
  md: convert to kstrto*()
  md/raid10: make sync_request_write() call bio_copy_data()

drivers/md/md.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h
include/linux/wait.h

index 8d9f89b4519d3c575647dc9e1f67239d2d61abf9..df92d30ca054c68a2af9cc3ee299525d1635a0eb 100644 (file)
@@ -2628,13 +2628,14 @@ errors_show(struct md_rdev *rdev, char *page)
 static ssize_t
 errors_store(struct md_rdev *rdev, const char *buf, size_t len)
 {
-       char *e;
-       unsigned long n = simple_strtoul(buf, &e, 10);
-       if (*buf && (*e == 0 || *e == '\n')) {
-               atomic_set(&rdev->corrected_errors, n);
-               return len;
-       }
-       return -EINVAL;
+       unsigned int n;
+       int rv;
+
+       rv = kstrtouint(buf, 10, &n);
+       if (rv < 0)
+               return rv;
+       atomic_set(&rdev->corrected_errors, n);
+       return len;
 }
 static struct rdev_sysfs_entry rdev_errors =
 __ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
@@ -2651,13 +2652,16 @@ slot_show(struct md_rdev *rdev, char *page)
 static ssize_t
 slot_store(struct md_rdev *rdev, const char *buf, size_t len)
 {
-       char *e;
+       int slot;
        int err;
-       int slot = simple_strtoul(buf, &e, 10);
+
        if (strncmp(buf, "none", 4)==0)
                slot = -1;
-       else if (e==buf || (*e && *e!= '\n'))
-               return -EINVAL;
+       else {
+               err = kstrtouint(buf, 10, (unsigned int *)&slot);
+               if (err < 0)
+                       return err;
+       }
        if (rdev->mddev->pers && slot == -1) {
                /* Setting 'slot' on an active array requires also
                 * updating the 'rd%d' link, and communicating
@@ -3542,12 +3546,12 @@ layout_show(struct mddev *mddev, char *page)
 static ssize_t
 layout_store(struct mddev *mddev, const char *buf, size_t len)
 {
-       char *e;
-       unsigned long n = simple_strtoul(buf, &e, 10);
+       unsigned int n;
        int err;
 
-       if (!*buf || (*e && *e != '\n'))
-               return -EINVAL;
+       err = kstrtouint(buf, 10, &n);
+       if (err < 0)
+               return err;
        err = mddev_lock(mddev);
        if (err)
                return err;
@@ -3591,12 +3595,12 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks);
 static ssize_t
 raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
 {
-       char *e;
+       unsigned int n;
        int err;
-       unsigned long n = simple_strtoul(buf, &e, 10);
 
-       if (!*buf || (*e && *e != '\n'))
-               return -EINVAL;
+       err = kstrtouint(buf, 10, &n);
+       if (err < 0)
+               return err;
 
        err = mddev_lock(mddev);
        if (err)
@@ -3643,12 +3647,12 @@ chunk_size_show(struct mddev *mddev, char *page)
 static ssize_t
 chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
 {
+       unsigned long n;
        int err;
-       char *e;
-       unsigned long n = simple_strtoul(buf, &e, 10);
 
-       if (!*buf || (*e && *e != '\n'))
-               return -EINVAL;
+       err = kstrtoul(buf, 10, &n);
+       if (err < 0)
+               return err;
 
        err = mddev_lock(mddev);
        if (err)
@@ -3686,19 +3690,24 @@ resync_start_show(struct mddev *mddev, char *page)
 static ssize_t
 resync_start_store(struct mddev *mddev, const char *buf, size_t len)
 {
+       unsigned long long n;
        int err;
-       char *e;
-       unsigned long long n = simple_strtoull(buf, &e, 10);
+
+       if (cmd_match(buf, "none"))
+               n = MaxSector;
+       else {
+               err = kstrtoull(buf, 10, &n);
+               if (err < 0)
+                       return err;
+               if (n != (sector_t)n)
+                       return -EINVAL;
+       }
 
        err = mddev_lock(mddev);
        if (err)
                return err;
        if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
                err = -EBUSY;
-       else if (cmd_match(buf, "none"))
-               n = MaxSector;
-       else if (!*buf || (*e && *e != '\n'))
-               err = -EINVAL;
 
        if (!err) {
                mddev->recovery_cp = n;
@@ -3934,14 +3943,14 @@ max_corrected_read_errors_show(struct mddev *mddev, char *page) {
 static ssize_t
 max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
 {
-       char *e;
-       unsigned long n = simple_strtoul(buf, &e, 10);
+       unsigned int n;
+       int rv;
 
-       if (*buf && (*e == 0 || *e == '\n')) {
-               atomic_set(&mddev->max_corr_read_errors, n);
-               return len;
-       }
-       return -EINVAL;
+       rv = kstrtouint(buf, 10, &n);
+       if (rv < 0)
+               return rv;
+       atomic_set(&mddev->max_corr_read_errors, n);
+       return len;
 }
 
 static struct md_sysfs_entry max_corr_read_errors =
@@ -4003,8 +4012,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
        else
                rdev = md_import_device(dev, -1, -1);
 
-       if (IS_ERR(rdev))
+       if (IS_ERR(rdev)) {
+               mddev_unlock(mddev);
                return PTR_ERR(rdev);
+       }
        err = bind_rdev_to_array(rdev, mddev);
  out:
        if (err)
@@ -4298,15 +4309,18 @@ sync_min_show(struct mddev *mddev, char *page)
 static ssize_t
 sync_min_store(struct mddev *mddev, const char *buf, size_t len)
 {
-       int min;
-       char *e;
+       unsigned int min;
+       int rv;
+
        if (strncmp(buf, "system", 6)==0) {
-               mddev->sync_speed_min = 0;
-               return len;
+               min = 0;
+       } else {
+               rv = kstrtouint(buf, 10, &min);
+               if (rv < 0)
+                       return rv;
+               if (min == 0)
+                       return -EINVAL;
        }
-       min = simple_strtoul(buf, &e, 10);
-       if (buf == e || (*e && *e != '\n') || min <= 0)
-               return -EINVAL;
        mddev->sync_speed_min = min;
        return len;
 }
@@ -4324,15 +4338,18 @@ sync_max_show(struct mddev *mddev, char *page)
 static ssize_t
 sync_max_store(struct mddev *mddev, const char *buf, size_t len)
 {
-       int max;
-       char *e;
+       unsigned int max;
+       int rv;
+
        if (strncmp(buf, "system", 6)==0) {
-               mddev->sync_speed_max = 0;
-               return len;
+               max = 0;
+       } else {
+               rv = kstrtouint(buf, 10, &max);
+               if (rv < 0)
+                       return rv;
+               if (max == 0)
+                       return -EINVAL;
        }
-       max = simple_strtoul(buf, &e, 10);
-       if (buf == e || (*e && *e != '\n') || max <= 0)
-               return -EINVAL;
        mddev->sync_speed_max = max;
        return len;
 }
@@ -4515,12 +4532,13 @@ suspend_lo_show(struct mddev *mddev, char *page)
 static ssize_t
 suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
 {
-       char *e;
-       unsigned long long new = simple_strtoull(buf, &e, 10);
-       unsigned long long old;
+       unsigned long long old, new;
        int err;
 
-       if (buf == e || (*e && *e != '\n'))
+       err = kstrtoull(buf, 10, &new);
+       if (err < 0)
+               return err;
+       if (new != (sector_t)new)
                return -EINVAL;
 
        err = mddev_lock(mddev);
@@ -4557,12 +4575,13 @@ suspend_hi_show(struct mddev *mddev, char *page)
 static ssize_t
 suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
 {
-       char *e;
-       unsigned long long new = simple_strtoull(buf, &e, 10);
-       unsigned long long old;
+       unsigned long long old, new;
        int err;
 
-       if (buf == e || (*e && *e != '\n'))
+       err = kstrtoull(buf, 10, &new);
+       if (err < 0)
+               return err;
+       if (new != (sector_t)new)
                return -EINVAL;
 
        err = mddev_lock(mddev);
@@ -4604,11 +4623,13 @@ static ssize_t
 reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
 {
        struct md_rdev *rdev;
-       char *e;
+       unsigned long long new;
        int err;
-       unsigned long long new = simple_strtoull(buf, &e, 10);
 
-       if (buf == e || (*e && *e != '\n'))
+       err = kstrtoull(buf, 10, &new);
+       if (err < 0)
+               return err;
+       if (new != (sector_t)new)
                return -EINVAL;
        err = mddev_lock(mddev);
        if (err)
@@ -5157,6 +5178,7 @@ int md_run(struct mddev *mddev)
                mddev_detach(mddev);
                if (mddev->private)
                        pers->free(mddev, mddev->private);
+               mddev->private = NULL;
                module_put(pers->owner);
                bitmap_destroy(mddev);
                return err;
@@ -5292,6 +5314,7 @@ static void md_clean(struct mddev *mddev)
        mddev->changed = 0;
        mddev->degraded = 0;
        mddev->safemode = 0;
+       mddev->private = NULL;
        mddev->merge_check_needed = 0;
        mddev->bitmap_info.offset = 0;
        mddev->bitmap_info.default_offset = 0;
@@ -5364,6 +5387,7 @@ static void __md_stop(struct mddev *mddev)
        mddev->pers = NULL;
        spin_unlock(&mddev->lock);
        pers->free(mddev, mddev->private);
+       mddev->private = NULL;
        if (pers->sync_request && mddev->to_remove == NULL)
                mddev->to_remove = &md_redundancy_group;
        module_put(pers->owner);
@@ -6373,7 +6397,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
            mddev->ctime         != info->ctime         ||
            mddev->level         != info->level         ||
 /*         mddev->layout        != info->layout        || */
-           !mddev->persistent   != info->not_persistent||
+           mddev->persistent    != !info->not_persistent ||
            mddev->chunk_sectors != info->chunk_size >> 9 ||
            /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
            ((state^info->state) & 0xfffffe00)
@@ -8104,6 +8128,15 @@ void md_check_recovery(struct mddev *mddev)
                int spares = 0;
 
                if (mddev->ro) {
+                       struct md_rdev *rdev;
+                       if (!mddev->external && mddev->in_sync)
+                               /* 'Blocked' flag not needed as failed devices
+                                * will be recorded if array switched to read/write.
+                                * Leaving it set will prevent the device
+                                * from being removed.
+                                */
+                               rdev_for_each(rdev, mddev)
+                                       clear_bit(Blocked, &rdev->flags);
                        /* On a read-only array we can:
                         * - remove failed devices
                         * - add already-in_sync devices if the array itself
@@ -9011,13 +9044,7 @@ static int get_ro(char *buffer, struct kernel_param *kp)
 }
 static int set_ro(const char *val, struct kernel_param *kp)
 {
-       char *e;
-       int num = simple_strtoul(val, &e, 10);
-       if (*val && (*e == '\0' || *e == '\n')) {
-               start_readonly = num;
-               return 0;
-       }
-       return -EINVAL;
+       return kstrtouint(val, 10, (unsigned int *)&start_readonly);
 }
 
 module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
index 188d8e9a6bdcc39e4da54095466f45683d6b2177..940f2f3654617918d8eef951262c3ca120ab83ce 100644 (file)
@@ -2099,17 +2099,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                tbio->bi_rw = WRITE;
                tbio->bi_private = r10_bio;
                tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
-
-               for (j=0; j < vcnt ; j++) {
-                       tbio->bi_io_vec[j].bv_offset = 0;
-                       tbio->bi_io_vec[j].bv_len = PAGE_SIZE;
-
-                       memcpy(page_address(tbio->bi_io_vec[j].bv_page),
-                              page_address(fbio->bi_io_vec[j].bv_page),
-                              PAGE_SIZE);
-               }
                tbio->bi_end_io = end_sync_write;
 
+               bio_copy_data(tbio, fbio);
+
                d = r10_bio->devs[i].devnum;
                atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                atomic_inc(&r10_bio->remaining);
@@ -2124,17 +2117,14 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
         * that are active
         */
        for (i = 0; i < conf->copies; i++) {
-               int j, d;
+               int d;
 
                tbio = r10_bio->devs[i].repl_bio;
                if (!tbio || !tbio->bi_end_io)
                        continue;
                if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
                    && r10_bio->devs[i].bio != fbio)
-                       for (j = 0; j < vcnt; j++)
-                               memcpy(page_address(tbio->bi_io_vec[j].bv_page),
-                                      page_address(fbio->bi_io_vec[j].bv_page),
-                                      PAGE_SIZE);
+                       bio_copy_data(tbio, fbio);
                d = r10_bio->devs[i].devnum;
                atomic_inc(&r10_bio->remaining);
                md_sync_acct(conf->mirrors[d].replacement->bdev,
index b6793d2e051f3b278405f236e6623980bcdf1d04..59e44e99eef3bacd4703fd6883513688f7c58b09 100644 (file)
@@ -344,7 +344,8 @@ static void release_inactive_stripe_list(struct r5conf *conf,
                                         int hash)
 {
        int size;
-       bool do_wakeup = false;
+       unsigned long do_wakeup = 0;
+       int i = 0;
        unsigned long flags;
 
        if (hash == NR_STRIPE_HASH_LOCKS) {
@@ -365,15 +366,21 @@ static void release_inactive_stripe_list(struct r5conf *conf,
                            !list_empty(list))
                                atomic_dec(&conf->empty_inactive_list_nr);
                        list_splice_tail_init(list, conf->inactive_list + hash);
-                       do_wakeup = true;
+                       do_wakeup |= 1 << hash;
                        spin_unlock_irqrestore(conf->hash_locks + hash, flags);
                }
                size--;
                hash--;
        }
 
+       for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
+               if (do_wakeup & (1 << i))
+                       wake_up(&conf->wait_for_stripe[i]);
+       }
+
        if (do_wakeup) {
-               wake_up(&conf->wait_for_stripe);
+               if (atomic_read(&conf->active_stripes) == 0)
+                       wake_up(&conf->wait_for_quiescent);
                if (conf->retry_read_aligned)
                        md_wakeup_thread(conf->mddev->thread);
        }
@@ -667,15 +674,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
        spin_lock_irq(conf->hash_locks + hash);
 
        do {
-               wait_event_lock_irq(conf->wait_for_stripe,
+               wait_event_lock_irq(conf->wait_for_quiescent,
                                    conf->quiesce == 0 || noquiesce,
                                    *(conf->hash_locks + hash));
                sh = __find_stripe(conf, sector, conf->generation - previous);
                if (!sh) {
                        if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
                                sh = get_free_stripe(conf, hash);
-                               if (!sh && llist_empty(&conf->released_stripes) &&
-                                   !test_bit(R5_DID_ALLOC, &conf->cache_state))
+                               if (!sh && !test_bit(R5_DID_ALLOC,
+                                                    &conf->cache_state))
                                        set_bit(R5_ALLOC_MORE,
                                                &conf->cache_state);
                        }
@@ -684,14 +691,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                        if (!sh) {
                                set_bit(R5_INACTIVE_BLOCKED,
                                        &conf->cache_state);
-                               wait_event_lock_irq(
-                                       conf->wait_for_stripe,
+                               wait_event_exclusive_cmd(
+                                       conf->wait_for_stripe[hash],
                                        !list_empty(conf->inactive_list + hash) &&
                                        (atomic_read(&conf->active_stripes)
                                         < (conf->max_nr_stripes * 3 / 4)
                                         || !test_bit(R5_INACTIVE_BLOCKED,
                                                      &conf->cache_state)),
-                                       *(conf->hash_locks + hash));
+                                       spin_unlock_irq(conf->hash_locks + hash),
+                                       spin_lock_irq(conf->hash_locks + hash));
                                clear_bit(R5_INACTIVE_BLOCKED,
                                          &conf->cache_state);
                        } else {
@@ -716,6 +724,9 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                }
        } while (sh == NULL);
 
+       if (!list_empty(conf->inactive_list + hash))
+               wake_up(&conf->wait_for_stripe[hash]);
+
        spin_unlock_irq(conf->hash_locks + hash);
        return sh;
 }
@@ -2177,7 +2188,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        cnt = 0;
        list_for_each_entry(nsh, &newstripes, lru) {
                lock_device_hash_lock(conf, hash);
-               wait_event_cmd(conf->wait_for_stripe,
+               wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
                                    !list_empty(conf->inactive_list + hash),
                                    unlock_device_hash_lock(conf, hash),
                                    lock_device_hash_lock(conf, hash));
@@ -4760,7 +4771,7 @@ static void raid5_align_endio(struct bio *bi, int error)
                                         raid_bi, 0);
                bio_endio(raid_bi, 0);
                if (atomic_dec_and_test(&conf->active_aligned_reads))
-                       wake_up(&conf->wait_for_stripe);
+                       wake_up(&conf->wait_for_quiescent);
                return;
        }
 
@@ -4855,7 +4866,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                align_bi->bi_iter.bi_sector += rdev->data_offset;
 
                spin_lock_irq(&conf->device_lock);
-               wait_event_lock_irq(conf->wait_for_stripe,
+               wait_event_lock_irq(conf->wait_for_quiescent,
                                    conf->quiesce == 0,
                                    conf->device_lock);
                atomic_inc(&conf->active_aligned_reads);
@@ -5699,7 +5710,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
                bio_endio(raid_bio, 0);
        }
        if (atomic_dec_and_test(&conf->active_aligned_reads))
-               wake_up(&conf->wait_for_stripe);
+               wake_up(&conf->wait_for_quiescent);
        return handled;
 }
 
@@ -6433,7 +6444,10 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                goto abort;
        spin_lock_init(&conf->device_lock);
        seqcount_init(&conf->gen_lock);
-       init_waitqueue_head(&conf->wait_for_stripe);
+       init_waitqueue_head(&conf->wait_for_quiescent);
+       for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
+               init_waitqueue_head(&conf->wait_for_stripe[i]);
+       }
        init_waitqueue_head(&conf->wait_for_overlap);
        INIT_LIST_HEAD(&conf->handle_list);
        INIT_LIST_HEAD(&conf->hold_list);
@@ -7466,7 +7480,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
                 * active stripes can drain
                 */
                conf->quiesce = 2;
-               wait_event_cmd(conf->wait_for_stripe,
+               wait_event_cmd(conf->wait_for_quiescent,
                                    atomic_read(&conf->active_stripes) == 0 &&
                                    atomic_read(&conf->active_aligned_reads) == 0,
                                    unlock_all_device_hash_locks_irq(conf),
@@ -7480,7 +7494,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
        case 0: /* re-enable writes */
                lock_all_device_hash_locks_irq(conf);
                conf->quiesce = 0;
-               wake_up(&conf->wait_for_stripe);
+               wake_up(&conf->wait_for_quiescent);
                wake_up(&conf->wait_for_overlap);
                unlock_all_device_hash_locks_irq(conf);
                break;
index 896d603ad0da964d2c45f22039d8b733f0bef26e..02c3bf8fbfe7aa1b0a1c568605393dc3769c6b18 100644 (file)
@@ -511,7 +511,8 @@ struct r5conf {
        struct list_head        inactive_list[NR_STRIPE_HASH_LOCKS];
        atomic_t                empty_inactive_list_nr;
        struct llist_head       released_stripes;
-       wait_queue_head_t       wait_for_stripe;
+       wait_queue_head_t       wait_for_quiescent;
+       wait_queue_head_t       wait_for_stripe[NR_STRIPE_HASH_LOCKS];
        wait_queue_head_t       wait_for_overlap;
        unsigned long           cache_state;
 #define R5_INACTIVE_BLOCKED    1       /* release of inactive stripes blocked,
index d69ac4ecc88b9c0d6ff4d5f97cf0fa9d89b5fcdc..1e1bf9f963a947fc686125d0a2809ad63b8a13ed 100644 (file)
@@ -358,6 +358,19 @@ do {                                                                       \
        __ret;                                                          \
 })
 
+#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)          \
+       (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0,  \
+                           cmd1; schedule(); cmd2)
+/*
+ * Just like wait_event_cmd(), except it sets exclusive flag
+ */
+#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)            \
+do {                                                                   \
+       if (condition)                                                  \
+               break;                                                  \
+       __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2);          \
+} while (0)
+
 #define __wait_event_cmd(wq, condition, cmd1, cmd2)                    \
        (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,  \
                            cmd1; schedule(); cmd2)