]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'block/for-next'
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Feb 2016 01:27:35 +0000 (12:27 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Feb 2016 01:27:35 +0000 (12:27 +1100)
block/blk-mq-sysfs.c
block/blk-mq.c
block/blk-mq.h
block/cfq-iosched.c
drivers/nvme/host/Kconfig
drivers/nvme/host/Makefile
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
include/linux/blk-mq.h

index 1cf18784c5cf3c44be94dbd003ca9d7088f883e0..431fdda21737cb91b9a5a0f49fd1c2ccd2cfae7c 100644 (file)
@@ -408,17 +408,18 @@ void blk_mq_unregister_disk(struct gendisk *disk)
        blk_mq_enable_hotplug();
 }
 
+void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
+{
+       kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
+}
+
 static void blk_mq_sysfs_init(struct request_queue *q)
 {
-       struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        int i;
 
        kobject_init(&q->mq_kobj, &blk_mq_ktype);
 
-       queue_for_each_hw_ctx(q, hctx, i)
-               kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
-
        queue_for_each_ctx(q, ctx, i)
                kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
 }
index 4c0622fae41383d0f5577ea9b8b127d93df33bb6..645eb9e716d0106a5a3582fb1e4516e1d63169ce 100644 (file)
@@ -1742,31 +1742,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
        return -1;
 }
 
-static int blk_mq_init_hw_queues(struct request_queue *q,
-               struct blk_mq_tag_set *set)
-{
-       struct blk_mq_hw_ctx *hctx;
-       unsigned int i;
-
-       /*
-        * Initialize hardware queues
-        */
-       queue_for_each_hw_ctx(q, hctx, i) {
-               if (blk_mq_init_hctx(q, set, hctx, i))
-                       break;
-       }
-
-       if (i == q->nr_hw_queues)
-               return 0;
-
-       /*
-        * Init failed
-        */
-       blk_mq_exit_hw_queues(q, set, i);
-
-       return 1;
-}
-
 static void blk_mq_init_cpu_queues(struct request_queue *q,
                                   unsigned int nr_hw_queues)
 {
@@ -1824,6 +1799,7 @@ static void blk_mq_map_swqueue(struct request_queue *q,
                        continue;
 
                hctx = q->mq_ops->map_queue(q, i);
+
                cpumask_set_cpu(i, hctx->cpumask);
                ctx->index_hw = hctx->nr_ctx;
                hctx->ctxs[hctx->nr_ctx++] = ctx;
@@ -1972,54 +1948,89 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-                                                 struct request_queue *q)
+static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+                                               struct request_queue *q)
 {
-       struct blk_mq_hw_ctx **hctxs;
-       struct blk_mq_ctx __percpu *ctx;
-       unsigned int *map;
-       int i;
-
-       ctx = alloc_percpu(struct blk_mq_ctx);
-       if (!ctx)
-               return ERR_PTR(-ENOMEM);
-
-       hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
-                       set->numa_node);
-
-       if (!hctxs)
-               goto err_percpu;
-
-       map = blk_mq_make_queue_map(set);
-       if (!map)
-               goto err_map;
+       int i, j;
+       struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
 
+       blk_mq_sysfs_unregister(q);
        for (i = 0; i < set->nr_hw_queues; i++) {
-               int node = blk_mq_hw_queue_to_node(map, i);
+               int node;
 
+               if (hctxs[i])
+                       continue;
+
+               node = blk_mq_hw_queue_to_node(q->mq_map, i);
                hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
                                        GFP_KERNEL, node);
                if (!hctxs[i])
-                       goto err_hctxs;
+                       break;
 
                if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
-                                               node))
-                       goto err_hctxs;
+                                               node)) {
+                       kfree(hctxs[i]);
+                       hctxs[i] = NULL;
+                       break;
+               }
 
                atomic_set(&hctxs[i]->nr_active, 0);
                hctxs[i]->numa_node = node;
                hctxs[i]->queue_num = i;
+
+               if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
+                       free_cpumask_var(hctxs[i]->cpumask);
+                       kfree(hctxs[i]);
+                       hctxs[i] = NULL;
+                       break;
+               }
+               blk_mq_hctx_kobj_init(hctxs[i]);
        }
+       for (j = i; j < q->nr_hw_queues; j++) {
+               struct blk_mq_hw_ctx *hctx = hctxs[j];
+
+               if (hctx) {
+                       if (hctx->tags) {
+                               blk_mq_free_rq_map(set, hctx->tags, j);
+                               set->tags[j] = NULL;
+                       }
+                       blk_mq_exit_hctx(q, set, hctx, j);
+                       free_cpumask_var(hctx->cpumask);
+                       kobject_put(&hctx->kobj);
+                       kfree(hctx->ctxs);
+                       kfree(hctx);
+                       hctxs[j] = NULL;
+
+               }
+       }
+       q->nr_hw_queues = i;
+       blk_mq_sysfs_register(q);
+}
+
+struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+                                                 struct request_queue *q)
+{
+       q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+       if (!q->queue_ctx)
+               return ERR_PTR(-ENOMEM);
+
+       q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
+                                               GFP_KERNEL, set->numa_node);
+       if (!q->queue_hw_ctx)
+               goto err_percpu;
+
+       q->mq_map = blk_mq_make_queue_map(set);
+       if (!q->mq_map)
+               goto err_map;
+
+       blk_mq_realloc_hw_ctxs(set, q);
+       if (!q->nr_hw_queues)
+               goto err_hctxs;
 
        INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
        blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
 
        q->nr_queues = nr_cpu_ids;
-       q->nr_hw_queues = set->nr_hw_queues;
-       q->mq_map = map;
-
-       q->queue_ctx = ctx;
-       q->queue_hw_ctx = hctxs;
 
        q->mq_ops = set->ops;
        q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
@@ -2048,9 +2059,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
        blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
-       if (blk_mq_init_hw_queues(q, set))
-               goto err_hctxs;
-
        get_online_cpus();
        mutex_lock(&all_q_mutex);
 
@@ -2064,17 +2072,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
        return q;
 
 err_hctxs:
-       kfree(map);
-       for (i = 0; i < set->nr_hw_queues; i++) {
-               if (!hctxs[i])
-                       break;
-               free_cpumask_var(hctxs[i]->cpumask);
-               kfree(hctxs[i]);
-       }
+       kfree(q->mq_map);
 err_map:
-       kfree(hctxs);
+       kfree(q->queue_hw_ctx);
 err_percpu:
-       free_percpu(ctx);
+       free_percpu(q->queue_ctx);
        return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
@@ -2282,9 +2284,13 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
                set->nr_hw_queues = 1;
                set->queue_depth = min(64U, set->queue_depth);
        }
+       /*
+        * There is no use for more h/w queues than cpus.
+        */
+       if (set->nr_hw_queues > nr_cpu_ids)
+               set->nr_hw_queues = nr_cpu_ids;
 
-       set->tags = kmalloc_node(set->nr_hw_queues *
-                                sizeof(struct blk_mq_tags *),
+       set->tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
                                 GFP_KERNEL, set->numa_node);
        if (!set->tags)
                return -ENOMEM;
@@ -2307,7 +2313,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
        int i;
 
-       for (i = 0; i < set->nr_hw_queues; i++) {
+       for (i = 0; i < nr_cpu_ids; i++) {
                if (set->tags[i])
                        blk_mq_free_rq_map(set, set->tags[i], i);
        }
@@ -2339,6 +2345,35 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
        return ret;
 }
 
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+       struct request_queue *q;
+
+       if (nr_hw_queues > nr_cpu_ids)
+               nr_hw_queues = nr_cpu_ids;
+       if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
+               return;
+
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_freeze_queue(q);
+
+       set->nr_hw_queues = nr_hw_queues;
+       list_for_each_entry(q, &set->tag_list, tag_set_list) {
+               blk_mq_realloc_hw_ctxs(set, q);
+
+               if (q->nr_hw_queues > 1)
+                       blk_queue_make_request(q, blk_mq_make_request);
+               else
+                       blk_queue_make_request(q, blk_sq_make_request);
+
+               blk_mq_queue_reinit(q, cpu_online_mask);
+       }
+
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_unfreeze_queue(q);
+}
+EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
+
 void blk_mq_disable_hotplug(void)
 {
        mutex_lock(&all_q_mutex);
index eaede8e45c9c3e5f2555ea86ec39492e1c042e59..9087b11037b70ae514fd46ea10f659152f291aa2 100644 (file)
@@ -57,6 +57,7 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
  */
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
+extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 
 extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
 
index 1f9093e901daed7849a54633be5d80ce96b010f4..e3c591dd8f19d0b46fe42dc842510f9d220af143 100644 (file)
@@ -632,6 +632,13 @@ static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
        return pblkg ? blkg_to_cfqg(pblkg) : NULL;
 }
 
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+                                     struct cfq_group *ancestor)
+{
+       return cgroup_is_descendant(cfqg_to_blkg(cfqg)->blkcg->css.cgroup,
+                                   cfqg_to_blkg(ancestor)->blkcg->css.cgroup);
+}
+
 static inline void cfqg_get(struct cfq_group *cfqg)
 {
        return blkg_get(cfqg_to_blkg(cfqg));
@@ -758,6 +765,11 @@ static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
 #else  /* CONFIG_CFQ_GROUP_IOSCHED */
 
 static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+                                     struct cfq_group *ancestor)
+{
+       return true;
+}
 static inline void cfqg_get(struct cfq_group *cfqg) { }
 static inline void cfqg_put(struct cfq_group *cfqg) { }
 
@@ -2897,6 +2909,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 {
        struct cfq_queue *cfqq = cfqd->active_queue;
+       struct cfq_rb_root *st = cfqq->service_tree;
        struct cfq_io_cq *cic;
        unsigned long sl, group_idle = 0;
 
@@ -2947,8 +2960,13 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
                return;
        }
 
-       /* There are other queues in the group, don't do group idle */
-       if (group_idle && cfqq->cfqg->nr_cfqq > 1)
+       /*
+        * There are other queues in the group or this is the only group and
+        * it has too big thinktime, don't do group idle.
+        */
+       if (group_idle &&
+           (cfqq->cfqg->nr_cfqq > 1 ||
+            cfq_io_thinktime_big(cfqd, &st->ttime, true)))
                return;
 
        cfq_mark_cfqq_wait_request(cfqq);
@@ -3947,16 +3965,27 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
        if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
                return true;
 
-       if (new_cfqq->cfqg != cfqq->cfqg)
+       /*
+        * Treat ancestors of current cgroup the same way as current cgroup.
+        * For anybody else we disallow preemption to guarantee service
+        * fairness among cgroups.
+        */
+       if (!cfqg_is_descendant(cfqq->cfqg, new_cfqq->cfqg))
                return false;
 
        if (cfq_slice_used(cfqq))
                return true;
 
+       /*
+        * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
+        */
+       if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
+               return true;
+
+       WARN_ON_ONCE(cfqq->ioprio_class != new_cfqq->ioprio_class);
        /* Allow preemption only if we are idling on sync-noidle tree */
        if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
            cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
-           new_cfqq->service_tree->count == 2 &&
            RB_EMPTY_ROOT(&cfqq->sort_list))
                return true;
 
@@ -3967,12 +3996,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
        if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
                return true;
 
-       /*
-        * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
-        */
-       if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
-               return true;
-
        /* An idle queue should not be idle now for some reason */
        if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
                return true;
index 5d6237391dcd4e3851390abe9b1412217d2428d8..2ed30f063a132973beb6f21b15f30b86da1763a7 100644 (file)
@@ -1,6 +1,10 @@
+config NVME_CORE
+       tristate
+
 config BLK_DEV_NVME
        tristate "NVM Express block device"
        depends on PCI && BLOCK
+       select NVME_CORE
        ---help---
          The NVM Express driver is for solid state drives directly
          connected to the PCI or PCI Express bus.  If you know you
@@ -11,7 +15,7 @@ config BLK_DEV_NVME
 
 config BLK_DEV_NVME_SCSI
        bool "SCSI emulation for NVMe device nodes"
-       depends on BLK_DEV_NVME
+       depends on NVME_CORE
        ---help---
          This adds support for the SG_IO ioctl on the NVMe character
          and block devices nodes, as well a a translation for a small
index 51bf90871549844c7f4db638a7ec26c4b4b9aa51..9a3ca892b4a722e2a6d51aef48d1397985fa5bc4 100644 (file)
@@ -1,6 +1,8 @@
+obj-$(CONFIG_NVME_CORE)                        += nvme-core.o
+obj-$(CONFIG_BLK_DEV_NVME)             += nvme.o
 
-obj-$(CONFIG_BLK_DEV_NVME)     += nvme.o
+nvme-core-y                            := core.o
+nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI)  += scsi.o
+nvme-core-$(CONFIG_NVM)                        += lightnvm.o
 
-lightnvm-$(CONFIG_NVM)                 := lightnvm.o
-nvme-y                                 += core.o pci.o $(lightnvm-y)
-nvme-$(CONFIG_BLK_DEV_NVME_SCSI)        += scsi.o
+nvme-y                                 += pci.o
index c5bf001af55954e2c9275d34c65cbb5dfa2a037d..07b7ec699e9213f48c2c9624674e5e4147cb7085 100644 (file)
 
 #define NVME_MINORS            (1U << MINORBITS)
 
+unsigned char admin_timeout = 60;
+module_param(admin_timeout, byte, 0644);
+MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
+EXPORT_SYMBOL_GPL(admin_timeout);
+
+unsigned char nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
+MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
+EXPORT_SYMBOL_GPL(nvme_io_timeout);
+
+unsigned char shutdown_timeout = 5;
+module_param(shutdown_timeout, byte, 0644);
+MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
+
 static int nvme_major;
 module_param(nvme_major, int, 0);
 
@@ -40,7 +54,7 @@ static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
 
 static LIST_HEAD(nvme_ctrl_list);
-DEFINE_SPINLOCK(dev_list_lock);
+static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
@@ -71,11 +85,21 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
 
        spin_lock(&dev_list_lock);
        ns = disk->private_data;
-       if (ns && !kref_get_unless_zero(&ns->kref))
-               ns = NULL;
+       if (ns) {
+               if (!kref_get_unless_zero(&ns->kref))
+                       goto fail;
+               if (!try_module_get(ns->ctrl->ops->module))
+                       goto fail_put_ns;
+       }
        spin_unlock(&dev_list_lock);
 
        return ns;
+
+fail_put_ns:
+       kref_put(&ns->kref, nvme_free_ns);
+fail:
+       spin_unlock(&dev_list_lock);
+       return NULL;
 }
 
 void nvme_requeue_req(struct request *req)
@@ -88,6 +112,7 @@ void nvme_requeue_req(struct request *req)
                blk_mq_kick_requeue_list(req->q);
        spin_unlock_irqrestore(req->q->queue_lock, flags);
 }
+EXPORT_SYMBOL_GPL(nvme_requeue_req);
 
 struct request *nvme_alloc_request(struct request_queue *q,
                struct nvme_command *cmd, unsigned int flags)
@@ -111,6 +136,7 @@ struct request *nvme_alloc_request(struct request_queue *q,
 
        return req;
 }
+EXPORT_SYMBOL_GPL(nvme_alloc_request);
 
 /*
  * Returns 0 on success.  If the result is negative, it's a Linux error code;
@@ -148,6 +174,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 {
        return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
 }
+EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
 
 int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                void __user *ubuffer, unsigned bufflen,
@@ -363,6 +390,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
        *count = min(*count, nr_io_queues);
        return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_set_queue_count);
 
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
@@ -499,7 +527,10 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
 
 static void nvme_release(struct gendisk *disk, fmode_t mode)
 {
-       nvme_put_ns(disk->private_data);
+       struct nvme_ns *ns = disk->private_data;
+
+       module_put(ns->ctrl->ops->module);
+       nvme_put_ns(ns);
 }
 
 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -557,8 +588,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
        unsigned short bs;
 
        if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
-               dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
-                               __func__, ns->ctrl->instance, ns->ns_id);
+               dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n",
+                               __func__);
                return -ENODEV;
        }
        if (id->ncap == 0) {
@@ -568,7 +599,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 
        if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
                if (nvme_nvm_register(ns->queue, disk->disk_name)) {
-                       dev_warn(ns->ctrl->dev,
+                       dev_warn(disk_to_dev(ns->disk),
                                "%s: LightNVM init failure\n", __func__);
                        kfree(id);
                        return -ENODEV;
@@ -741,7 +772,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
                if (fatal_signal_pending(current))
                        return -EINTR;
                if (time_after(jiffies, timeout)) {
-                       dev_err(ctrl->dev,
+                       dev_err(ctrl->device,
                                "Device not ready; aborting %s\n", enabled ?
                                                "initialisation" : "reset");
                        return -ENODEV;
@@ -769,6 +800,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
                return ret;
        return nvme_wait_ready(ctrl, cap, false);
 }
+EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
 
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
 {
@@ -781,7 +813,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
        int ret;
 
        if (page_shift < dev_page_min) {
-               dev_err(ctrl->dev,
+               dev_err(ctrl->device,
                        "Minimum device page size %u too large for host (%u)\n",
                        1 << dev_page_min, 1 << page_shift);
                return -ENODEV;
@@ -800,6 +832,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
                return ret;
        return nvme_wait_ready(ctrl, cap, true);
 }
+EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
 {
@@ -822,7 +855,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
                if (fatal_signal_pending(current))
                        return -EINTR;
                if (time_after(jiffies, timeout)) {
-                       dev_err(ctrl->dev,
+                       dev_err(ctrl->device,
                                "Device shutdown incomplete; abort shutdown\n");
                        return -ENODEV;
                }
@@ -830,6 +863,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
 
 /*
  * Initialize the cached copies of the Identify data and various controller
@@ -844,13 +878,13 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 
        ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
        if (ret) {
-               dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+               dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
                return ret;
        }
 
        ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
        if (ret) {
-               dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
+               dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
                return ret;
        }
        page_shift = NVME_CAP_MPSMIN(cap) + 12;
@@ -860,7 +894,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 
        ret = nvme_identify_ctrl(ctrl, &id);
        if (ret) {
-               dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
+               dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
                return -EIO;
        }
 
@@ -891,6 +925,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        kfree(id);
        return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_init_identify);
 
 static int nvme_dev_open(struct inode *inode, struct file *file)
 {
@@ -937,13 +972,13 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
 
        ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
        if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
-               dev_warn(ctrl->dev,
+               dev_warn(ctrl->device,
                        "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
                ret = -EINVAL;
                goto out_unlock;
        }
 
-       dev_warn(ctrl->dev,
+       dev_warn(ctrl->device,
                "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
        kref_get(&ns->kref);
        mutex_unlock(&ctrl->namespaces_mutex);
@@ -969,7 +1004,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
        case NVME_IOCTL_IO_CMD:
                return nvme_dev_user_cmd(ctrl, argp);
        case NVME_IOCTL_RESET:
-               dev_warn(ctrl->dev, "resetting controller\n");
+               dev_warn(ctrl->device, "resetting controller\n");
                return ctrl->ops->reset_ctrl(ctrl);
        case NVME_IOCTL_SUBSYS_RESET:
                return nvme_reset_subsystem(ctrl);
@@ -1296,6 +1331,7 @@ void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
        mutex_unlock(&ctrl->namespaces_mutex);
        kfree(id);
 }
+EXPORT_SYMBOL_GPL(nvme_scan_namespaces);
 
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 {
@@ -1306,6 +1342,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
                nvme_ns_remove(ns);
        mutex_unlock(&ctrl->namespaces_mutex);
 }
+EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
 
 static DEFINE_IDA(nvme_instance_ida);
 
@@ -1337,13 +1374,14 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
 }
 
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
- {
+{
        device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
 
        spin_lock(&dev_list_lock);
        list_del(&ctrl->node);
        spin_unlock(&dev_list_lock);
 }
+EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
 
 static void nvme_free_ctrl(struct kref *kref)
 {
@@ -1359,6 +1397,7 @@ void nvme_put_ctrl(struct nvme_ctrl *ctrl)
 {
        kref_put(&ctrl->kref, nvme_free_ctrl);
 }
+EXPORT_SYMBOL_GPL(nvme_put_ctrl);
 
 /*
  * Initialize a NVMe controller structures.  This needs to be called during
@@ -1383,14 +1422,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 
        ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
                                MKDEV(nvme_char_major, ctrl->instance),
-                               dev, nvme_dev_attr_groups,
+                               ctrl, nvme_dev_attr_groups,
                                "nvme%d", ctrl->instance);
        if (IS_ERR(ctrl->device)) {
                ret = PTR_ERR(ctrl->device);
                goto out_release_instance;
        }
        get_device(ctrl->device);
-       dev_set_drvdata(ctrl->device, ctrl);
 
        spin_lock(&dev_list_lock);
        list_add_tail(&ctrl->node, &nvme_ctrl_list);
@@ -1402,6 +1440,7 @@ out_release_instance:
 out:
        return ret;
 }
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
 
 void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
@@ -1418,6 +1457,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
        }
        mutex_unlock(&ctrl->namespaces_mutex);
 }
+EXPORT_SYMBOL_GPL(nvme_stop_queues);
 
 void nvme_start_queues(struct nvme_ctrl *ctrl)
 {
@@ -1431,6 +1471,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
        }
        mutex_unlock(&ctrl->namespaces_mutex);
 }
+EXPORT_SYMBOL_GPL(nvme_start_queues);
 
 int __init nvme_core_init(void)
 {
@@ -1470,3 +1511,8 @@ void nvme_core_exit(void)
        class_destroy(nvme_class);
        __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
 }
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+module_init(nvme_core_init);
+module_exit(nvme_core_exit);
index 4fb5bb737868ce2db7da41700cd238c8804b086c..63ba8a500ee13e13c8285fee73783cfa66020bce 100644 (file)
@@ -117,6 +117,7 @@ struct nvme_ns {
 };
 
 struct nvme_ctrl_ops {
+       struct module *module;
        int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
        int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
        int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
@@ -265,8 +266,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
                        dma_addr_t dma_addr, u32 *result);
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 
-extern spinlock_t dev_list_lock;
-
 struct sg_io_hdr;
 
 int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
index 72ef8322d32ac7180912e2ae2cf38245f3713d9a..fec747917690665f68e5a6cf341599fb6e958cbc 100644 (file)
 #define NVME_NR_AEN_COMMANDS   1
 #define NVME_AQ_BLKMQ_DEPTH    (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
 
-unsigned char admin_timeout = 60;
-module_param(admin_timeout, byte, 0644);
-MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
-
-unsigned char nvme_io_timeout = 30;
-module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
-MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-
-unsigned char shutdown_timeout = 5;
-module_param(shutdown_timeout, byte, 0644);
-MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
-
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
@@ -77,6 +65,7 @@ module_param(use_cmb_sqes, bool, 0644);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
 static LIST_HEAD(dev_list);
+static DEFINE_SPINLOCK(dev_list_lock);
 static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
 static wait_queue_head_t nvme_kthread_wait;
@@ -299,10 +288,10 @@ static void nvme_complete_async_event(struct nvme_dev *dev,
 
        switch (result & 0xff07) {
        case NVME_AER_NOTICE_NS_CHANGED:
-               dev_info(dev->dev, "rescanning\n");
+               dev_info(dev->ctrl.device, "rescanning\n");
                queue_work(nvme_workq, &dev->scan_work);
        default:
-               dev_warn(dev->dev, "async event result %08x\n", result);
+               dev_warn(dev->ctrl.device, "async event result %08x\n", result);
        }
 }
 
@@ -708,7 +697,7 @@ static void nvme_complete_rq(struct request *req)
        }
 
        if (unlikely(iod->aborted)) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                        "completing aborted command with status: %04x\n",
                        req->errors);
        }
@@ -740,7 +729,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
                        *tag = -1;
 
                if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
-                       dev_warn(nvmeq->q_dmadev,
+                       dev_warn(nvmeq->dev->ctrl.device,
                                "invalid id %d completed on queue %d\n",
                                cqe.command_id, le16_to_cpu(cqe.sq_id));
                        continue;
@@ -908,7 +897,8 @@ static void abort_endio(struct request *req, int error)
        u32 result = (u32)(uintptr_t)req->special;
        u16 status = req->errors;
 
-       dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
+       dev_warn(nvmeq->dev->ctrl.device,
+               "Abort status:%x result:%x", status, result);
        atomic_inc(&nvmeq->dev->ctrl.abort_limit);
 
        blk_mq_free_request(req);
@@ -929,7 +919,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         * shutdown, so we return BLK_EH_HANDLED.
         */
        if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                         "I/O %d QID %d timeout, disable controller\n",
                         req->tag, nvmeq->qid);
                nvme_dev_disable(dev, false);
@@ -943,7 +933,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         * returned to the driver, or if this is the admin queue.
         */
        if (!nvmeq->qid || iod->aborted) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                         "I/O %d QID %d timeout, reset controller\n",
                         req->tag, nvmeq->qid);
                nvme_dev_disable(dev, false);
@@ -969,8 +959,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
        cmd.abort.cid = req->tag;
        cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
 
-       dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n",
-                                req->tag, nvmeq->qid);
+       dev_warn(nvmeq->dev->ctrl.device,
+               "I/O %d QID %d timeout, aborting\n",
+                req->tag, nvmeq->qid);
 
        abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
                        BLK_MQ_REQ_NOWAIT);
@@ -999,7 +990,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved
        if (!blk_mq_request_started(req))
                return;
 
-       dev_warn(nvmeq->q_dmadev,
+       dev_warn(nvmeq->dev->ctrl.device,
                 "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
 
        status = NVME_SC_ABORT_REQ;
@@ -1355,7 +1346,7 @@ static int nvme_kthread(void *data)
                        if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
                                                        csts & NVME_CSTS_CFS) {
                                if (queue_work(nvme_workq, &dev->reset_work)) {
-                                       dev_warn(dev->dev,
+                                       dev_warn(dev->ctrl.device,
                                                "Failed status: %x, reset controller\n",
                                                readl(dev->bar + NVME_REG_CSTS));
                                }
@@ -1381,7 +1372,7 @@ static int nvme_kthread(void *data)
 
 static int nvme_create_io_queues(struct nvme_dev *dev)
 {
-       unsigned i;
+       unsigned i, max;
        int ret = 0;
 
        for (i = dev->queue_count; i <= dev->max_qid; i++) {
@@ -1391,7 +1382,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
                }
        }
 
-       for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
+       max = min(dev->max_qid, dev->queue_count - 1);
+       for (i = dev->online_queues; i <= max; i++) {
                ret = nvme_create_queue(dev->queues[i], i);
                if (ret) {
                        nvme_free_queues(dev, i);
@@ -1482,7 +1474,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
         * access to the admin queue, as that might be only way to fix them up.
         */
        if (result > 0) {
-               dev_err(dev->dev, "Could not set queue count (%d)\n", result);
+               dev_err(dev->ctrl.device,
+                       "Could not set queue count (%d)\n", result);
                nr_io_queues = 0;
                result = 0;
        }
@@ -1548,9 +1541,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
                adminq->cq_vector = -1;
                goto free_queues;
        }
-
-       /* Free previously allocated queues that are no longer usable */
-       nvme_free_queues(dev, nr_io_queues + 1);
        return nvme_create_io_queues(dev);
 
  free_queues:
@@ -1684,7 +1674,13 @@ static int nvme_dev_add(struct nvme_dev *dev)
                if (blk_mq_alloc_tag_set(&dev->tagset))
                        return 0;
                dev->ctrl.tagset = &dev->tagset;
+       } else {
+               blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
+
+               /* Free previously allocated queues that are no longer usable */
+               nvme_free_queues(dev, dev->online_queues);
        }
+
        queue_work(nvme_workq, &dev->scan_work);
        return 0;
 }
@@ -1943,7 +1939,7 @@ static void nvme_reset_work(struct work_struct *work)
         * any working I/O queue.
         */
        if (dev->online_queues < 2) {
-               dev_warn(dev->dev, "IO queues not created\n");
+               dev_warn(dev->ctrl.device, "IO queues not created\n");
                nvme_remove_namespaces(&dev->ctrl);
        } else {
                nvme_start_queues(&dev->ctrl);
@@ -1980,7 +1976,7 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 
 static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
 {
-       dev_warn(dev->dev, "Removing after probe failure\n");
+       dev_warn(dev->ctrl.device, "Removing after probe failure\n");
        kref_get(&dev->ctrl.kref);
        if (!schedule_work(&dev->remove_work))
                nvme_put_ctrl(&dev->ctrl);
@@ -2029,6 +2025,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
 }
 
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+       .module                 = THIS_MODULE,
        .reg_read32             = nvme_pci_reg_read32,
        .reg_write32            = nvme_pci_reg_write32,
        .reg_read64             = nvme_pci_reg_read64,
@@ -2077,6 +2074,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (result)
                goto release_pools;
 
+       dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
+
        queue_work(nvme_workq, &dev->reset_work);
        return 0;
 
@@ -2160,7 +2159,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
         * shutdown the controller to quiesce. The controller will be restarted
         * after the slot reset through driver's slot_reset callback.
         */
-       dev_warn(&pdev->dev, "error detected: state:%d\n", state);
+       dev_warn(dev->ctrl.device, "error detected: state:%d\n", state);
        switch (state) {
        case pci_channel_io_normal:
                return PCI_ERS_RESULT_CAN_RECOVER;
@@ -2177,7 +2176,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
 
-       dev_info(&pdev->dev, "restart after slot reset\n");
+       dev_info(dev->ctrl.device, "restart after slot reset\n");
        pci_restore_state(pdev);
        queue_work(nvme_workq, &dev->reset_work);
        return PCI_ERS_RESULT_RECOVERED;
@@ -2231,26 +2230,15 @@ static int __init nvme_init(void)
        if (!nvme_workq)
                return -ENOMEM;
 
-       result = nvme_core_init();
-       if (result < 0)
-               goto kill_workq;
-
        result = pci_register_driver(&nvme_driver);
        if (result)
-               goto core_exit;
-       return 0;
-
- core_exit:
-       nvme_core_exit();
- kill_workq:
-       destroy_workqueue(nvme_workq);
+               destroy_workqueue(nvme_workq);
        return result;
 }
 
 static void __exit nvme_exit(void)
 {
        pci_unregister_driver(&nvme_driver);
-       nvme_core_exit();
        destroy_workqueue(nvme_workq);
        BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
        _nvme_check_size();
index 7fc9296b574290e768dbb38585e725525802940b..15a73d49fd1d54a4b401bf3a042b466de6f311c8 100644 (file)
@@ -244,6 +244,8 @@ void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
 
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.