blk_mq_enable_hotplug();
}
+void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
+{
+ kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
+}
+
static void blk_mq_sysfs_init(struct request_queue *q)
{
- struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
int i;
kobject_init(&q->mq_kobj, &blk_mq_ktype);
- queue_for_each_hw_ctx(q, hctx, i)
- kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
-
queue_for_each_ctx(q, ctx, i)
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}
return -1;
}
-static int blk_mq_init_hw_queues(struct request_queue *q,
- struct blk_mq_tag_set *set)
-{
- struct blk_mq_hw_ctx *hctx;
- unsigned int i;
-
- /*
- * Initialize hardware queues
- */
- queue_for_each_hw_ctx(q, hctx, i) {
- if (blk_mq_init_hctx(q, set, hctx, i))
- break;
- }
-
- if (i == q->nr_hw_queues)
- return 0;
-
- /*
- * Init failed
- */
- blk_mq_exit_hw_queues(q, set, i);
-
- return 1;
-}
-
static void blk_mq_init_cpu_queues(struct request_queue *q,
unsigned int nr_hw_queues)
{
continue;
hctx = q->mq_ops->map_queue(q, i);
+
cpumask_set_cpu(i, hctx->cpumask);
ctx->index_hw = hctx->nr_ctx;
hctx->ctxs[hctx->nr_ctx++] = ctx;
}
EXPORT_SYMBOL(blk_mq_init_queue);
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
- struct request_queue *q)
+static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+ struct request_queue *q)
{
- struct blk_mq_hw_ctx **hctxs;
- struct blk_mq_ctx __percpu *ctx;
- unsigned int *map;
- int i;
-
- ctx = alloc_percpu(struct blk_mq_ctx);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
-
- hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
- set->numa_node);
-
- if (!hctxs)
- goto err_percpu;
-
- map = blk_mq_make_queue_map(set);
- if (!map)
- goto err_map;
+ int i, j;
+ struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
+ blk_mq_sysfs_unregister(q);
for (i = 0; i < set->nr_hw_queues; i++) {
- int node = blk_mq_hw_queue_to_node(map, i);
+ int node;
+ if (hctxs[i])
+ continue;
+
+ node = blk_mq_hw_queue_to_node(q->mq_map, i);
hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
GFP_KERNEL, node);
if (!hctxs[i])
- goto err_hctxs;
+ break;
if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
- node))
- goto err_hctxs;
+ node)) {
+ kfree(hctxs[i]);
+ hctxs[i] = NULL;
+ break;
+ }
atomic_set(&hctxs[i]->nr_active, 0);
hctxs[i]->numa_node = node;
hctxs[i]->queue_num = i;
+
+ if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
+ free_cpumask_var(hctxs[i]->cpumask);
+ kfree(hctxs[i]);
+ hctxs[i] = NULL;
+ break;
+ }
+ blk_mq_hctx_kobj_init(hctxs[i]);
}
+ for (j = i; j < q->nr_hw_queues; j++) {
+ struct blk_mq_hw_ctx *hctx = hctxs[j];
+
+ if (hctx) {
+ if (hctx->tags) {
+ blk_mq_free_rq_map(set, hctx->tags, j);
+ set->tags[j] = NULL;
+ }
+ blk_mq_exit_hctx(q, set, hctx, j);
+ free_cpumask_var(hctx->cpumask);
+ kobject_put(&hctx->kobj);
+ kfree(hctx->ctxs);
+ kfree(hctx);
+ hctxs[j] = NULL;
+
+ }
+ }
+ q->nr_hw_queues = i;
+ blk_mq_sysfs_register(q);
+}
+
+struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+ struct request_queue *q)
+{
+ q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+ if (!q->queue_ctx)
+ return ERR_PTR(-ENOMEM);
+
+ q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
+ GFP_KERNEL, set->numa_node);
+ if (!q->queue_hw_ctx)
+ goto err_percpu;
+
+ q->mq_map = blk_mq_make_queue_map(set);
+ if (!q->mq_map)
+ goto err_map;
+
+ blk_mq_realloc_hw_ctxs(set, q);
+ if (!q->nr_hw_queues)
+ goto err_hctxs;
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
q->nr_queues = nr_cpu_ids;
- q->nr_hw_queues = set->nr_hw_queues;
- q->mq_map = map;
-
- q->queue_ctx = ctx;
- q->queue_hw_ctx = hctxs;
q->mq_ops = set->ops;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
- if (blk_mq_init_hw_queues(q, set))
- goto err_hctxs;
-
get_online_cpus();
mutex_lock(&all_q_mutex);
return q;
err_hctxs:
- kfree(map);
- for (i = 0; i < set->nr_hw_queues; i++) {
- if (!hctxs[i])
- break;
- free_cpumask_var(hctxs[i]->cpumask);
- kfree(hctxs[i]);
- }
+ kfree(q->mq_map);
err_map:
- kfree(hctxs);
+ kfree(q->queue_hw_ctx);
err_percpu:
- free_percpu(ctx);
+ free_percpu(q->queue_ctx);
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(blk_mq_init_allocated_queue);
set->nr_hw_queues = 1;
set->queue_depth = min(64U, set->queue_depth);
}
+ /*
+ * There is no use for more h/w queues than cpus.
+ */
+ if (set->nr_hw_queues > nr_cpu_ids)
+ set->nr_hw_queues = nr_cpu_ids;
- set->tags = kmalloc_node(set->nr_hw_queues *
- sizeof(struct blk_mq_tags *),
+ set->tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
if (!set->tags)
return -ENOMEM;
{
int i;
- for (i = 0; i < set->nr_hw_queues; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (set->tags[i])
blk_mq_free_rq_map(set, set->tags[i], i);
}
return ret;
}
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+ struct request_queue *q;
+
+ if (nr_hw_queues > nr_cpu_ids)
+ nr_hw_queues = nr_cpu_ids;
+ if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
+ return;
+
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ blk_mq_freeze_queue(q);
+
+ set->nr_hw_queues = nr_hw_queues;
+ list_for_each_entry(q, &set->tag_list, tag_set_list) {
+ blk_mq_realloc_hw_ctxs(set, q);
+
+ if (q->nr_hw_queues > 1)
+ blk_queue_make_request(q, blk_mq_make_request);
+ else
+ blk_queue_make_request(q, blk_sq_make_request);
+
+ blk_mq_queue_reinit(q, cpu_online_mask);
+ }
+
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ blk_mq_unfreeze_queue(q);
+}
+EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
+
void blk_mq_disable_hotplug(void)
{
mutex_lock(&all_q_mutex);
*/
extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q);
+extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
return pblkg ? blkg_to_cfqg(pblkg) : NULL;
}
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+ struct cfq_group *ancestor)
+{
+ return cgroup_is_descendant(cfqg_to_blkg(cfqg)->blkcg->css.cgroup,
+ cfqg_to_blkg(ancestor)->blkcg->css.cgroup);
+}
+
static inline void cfqg_get(struct cfq_group *cfqg)
{
return blkg_get(cfqg_to_blkg(cfqg));
#else /* CONFIG_CFQ_GROUP_IOSCHED */
static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+ struct cfq_group *ancestor)
+{
+ return true;
+}
static inline void cfqg_get(struct cfq_group *cfqg) { }
static inline void cfqg_put(struct cfq_group *cfqg) { }
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq = cfqd->active_queue;
+ struct cfq_rb_root *st = cfqq->service_tree;
struct cfq_io_cq *cic;
unsigned long sl, group_idle = 0;
return;
}
- /* There are other queues in the group, don't do group idle */
- if (group_idle && cfqq->cfqg->nr_cfqq > 1)
+ /*
+ * There are other queues in the group or this is the only group and
+ * it has too big thinktime, don't do group idle.
+ */
+ if (group_idle &&
+ (cfqq->cfqg->nr_cfqq > 1 ||
+ cfq_io_thinktime_big(cfqd, &st->ttime, true)))
return;
cfq_mark_cfqq_wait_request(cfqq);
if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
return true;
- if (new_cfqq->cfqg != cfqq->cfqg)
+ /*
+ * Treat ancestors of current cgroup the same way as current cgroup.
+ * For anybody else we disallow preemption to guarantee service
+ * fairness among cgroups.
+ */
+ if (!cfqg_is_descendant(cfqq->cfqg, new_cfqq->cfqg))
return false;
if (cfq_slice_used(cfqq))
return true;
+ /*
+ * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
+ */
+ if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
+ return true;
+
+ WARN_ON_ONCE(cfqq->ioprio_class != new_cfqq->ioprio_class);
/* Allow preemption only if we are idling on sync-noidle tree */
if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
- new_cfqq->service_tree->count == 2 &&
RB_EMPTY_ROOT(&cfqq->sort_list))
return true;
if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
return true;
- /*
- * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
- */
- if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
- return true;
-
/* An idle queue should not be idle now for some reason */
if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
return true;
+config NVME_CORE
+ tristate
+
config BLK_DEV_NVME
tristate "NVM Express block device"
depends on PCI && BLOCK
+ select NVME_CORE
---help---
The NVM Express driver is for solid state drives directly
connected to the PCI or PCI Express bus. If you know you
config BLK_DEV_NVME_SCSI
bool "SCSI emulation for NVMe device nodes"
- depends on BLK_DEV_NVME
+ depends on NVME_CORE
---help---
This adds support for the SG_IO ioctl on the NVMe character
and block devices nodes, as well a a translation for a small
+obj-$(CONFIG_NVME_CORE) += nvme-core.o
+obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
-obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
+nvme-core-y := core.o
+nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
+nvme-core-$(CONFIG_NVM) += lightnvm.o
-lightnvm-$(CONFIG_NVM) := lightnvm.o
-nvme-y += core.o pci.o $(lightnvm-y)
-nvme-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
+nvme-y += pci.o
#define NVME_MINORS (1U << MINORBITS)
+unsigned char admin_timeout = 60;
+module_param(admin_timeout, byte, 0644);
+MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
+EXPORT_SYMBOL_GPL(admin_timeout);
+
+unsigned char nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
+MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
+EXPORT_SYMBOL_GPL(nvme_io_timeout);
+
+unsigned char shutdown_timeout = 5;
+module_param(shutdown_timeout, byte, 0644);
+MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
+
static int nvme_major;
module_param(nvme_major, int, 0);
module_param(nvme_char_major, int, 0);
static LIST_HEAD(nvme_ctrl_list);
-DEFINE_SPINLOCK(dev_list_lock);
+static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
spin_lock(&dev_list_lock);
ns = disk->private_data;
- if (ns && !kref_get_unless_zero(&ns->kref))
- ns = NULL;
+ if (ns) {
+ if (!kref_get_unless_zero(&ns->kref))
+ goto fail;
+ if (!try_module_get(ns->ctrl->ops->module))
+ goto fail_put_ns;
+ }
spin_unlock(&dev_list_lock);
return ns;
+
+fail_put_ns:
+ kref_put(&ns->kref, nvme_free_ns);
+fail:
+ spin_unlock(&dev_list_lock);
+ return NULL;
}
void nvme_requeue_req(struct request *req)
blk_mq_kick_requeue_list(req->q);
spin_unlock_irqrestore(req->q->queue_lock, flags);
}
+EXPORT_SYMBOL_GPL(nvme_requeue_req);
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags)
return req;
}
+EXPORT_SYMBOL_GPL(nvme_alloc_request);
/*
* Returns 0 on success. If the result is negative, it's a Linux error code;
{
return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
}
+EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
void __user *ubuffer, unsigned bufflen,
*count = min(*count, nr_io_queues);
return 0;
}
+EXPORT_SYMBOL_GPL(nvme_set_queue_count);
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
- nvme_put_ns(disk->private_data);
+ struct nvme_ns *ns = disk->private_data;
+
+ module_put(ns->ctrl->ops->module);
+ nvme_put_ns(ns);
}
static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
unsigned short bs;
if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
- dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
- __func__, ns->ctrl->instance, ns->ns_id);
+ dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n",
+ __func__);
return -ENODEV;
}
if (id->ncap == 0) {
if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
if (nvme_nvm_register(ns->queue, disk->disk_name)) {
- dev_warn(ns->ctrl->dev,
+ dev_warn(disk_to_dev(ns->disk),
"%s: LightNVM init failure\n", __func__);
kfree(id);
return -ENODEV;
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
- dev_err(ctrl->dev,
+ dev_err(ctrl->device,
"Device not ready; aborting %s\n", enabled ?
"initialisation" : "reset");
return -ENODEV;
return ret;
return nvme_wait_ready(ctrl, cap, false);
}
+EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
{
int ret;
if (page_shift < dev_page_min) {
- dev_err(ctrl->dev,
+ dev_err(ctrl->device,
"Minimum device page size %u too large for host (%u)\n",
1 << dev_page_min, 1 << page_shift);
return -ENODEV;
return ret;
return nvme_wait_ready(ctrl, cap, true);
}
+EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
- dev_err(ctrl->dev,
+ dev_err(ctrl->device,
"Device shutdown incomplete; abort shutdown\n");
return -ENODEV;
}
return ret;
}
+EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
/*
* Initialize the cached copies of the Identify data and various controller
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
if (ret) {
- dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+ dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
return ret;
}
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
if (ret) {
- dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
+ dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
return ret;
}
page_shift = NVME_CAP_MPSMIN(cap) + 12;
ret = nvme_identify_ctrl(ctrl, &id);
if (ret) {
- dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
+ dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
return -EIO;
}
kfree(id);
return 0;
}
+EXPORT_SYMBOL_GPL(nvme_init_identify);
static int nvme_dev_open(struct inode *inode, struct file *file)
{
ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
- dev_warn(ctrl->dev,
+ dev_warn(ctrl->device,
"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
ret = -EINVAL;
goto out_unlock;
}
- dev_warn(ctrl->dev,
+ dev_warn(ctrl->device,
"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
kref_get(&ns->kref);
mutex_unlock(&ctrl->namespaces_mutex);
case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
- dev_warn(ctrl->dev, "resetting controller\n");
+ dev_warn(ctrl->device, "resetting controller\n");
return ctrl->ops->reset_ctrl(ctrl);
case NVME_IOCTL_SUBSYS_RESET:
return nvme_reset_subsystem(ctrl);
mutex_unlock(&ctrl->namespaces_mutex);
kfree(id);
}
+EXPORT_SYMBOL_GPL(nvme_scan_namespaces);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
{
nvme_ns_remove(ns);
mutex_unlock(&ctrl->namespaces_mutex);
}
+EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
static DEFINE_IDA(nvme_instance_ida);
}
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
- {
+{
device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
spin_lock(&dev_list_lock);
list_del(&ctrl->node);
spin_unlock(&dev_list_lock);
}
+EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
static void nvme_free_ctrl(struct kref *kref)
{
{
kref_put(&ctrl->kref, nvme_free_ctrl);
}
+EXPORT_SYMBOL_GPL(nvme_put_ctrl);
/*
* Initialize a NVMe controller structures. This needs to be called during
ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
MKDEV(nvme_char_major, ctrl->instance),
- dev, nvme_dev_attr_groups,
+ ctrl, nvme_dev_attr_groups,
"nvme%d", ctrl->instance);
if (IS_ERR(ctrl->device)) {
ret = PTR_ERR(ctrl->device);
goto out_release_instance;
}
get_device(ctrl->device);
- dev_set_drvdata(ctrl->device, ctrl);
spin_lock(&dev_list_lock);
list_add_tail(&ctrl->node, &nvme_ctrl_list);
out:
return ret;
}
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
void nvme_stop_queues(struct nvme_ctrl *ctrl)
{
}
mutex_unlock(&ctrl->namespaces_mutex);
}
+EXPORT_SYMBOL_GPL(nvme_stop_queues);
void nvme_start_queues(struct nvme_ctrl *ctrl)
{
}
mutex_unlock(&ctrl->namespaces_mutex);
}
+EXPORT_SYMBOL_GPL(nvme_start_queues);
int __init nvme_core_init(void)
{
class_destroy(nvme_class);
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
}
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+module_init(nvme_core_init);
+module_exit(nvme_core_exit);
};
struct nvme_ctrl_ops {
+ struct module *module;
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
dma_addr_t dma_addr, u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
-extern spinlock_t dev_list_lock;
-
struct sg_io_hdr;
int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
#define NVME_NR_AEN_COMMANDS 1
#define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
-unsigned char admin_timeout = 60;
-module_param(admin_timeout, byte, 0644);
-MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
-
-unsigned char nvme_io_timeout = 30;
-module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
-MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-
-unsigned char shutdown_timeout = 5;
-module_param(shutdown_timeout, byte, 0644);
-MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
-
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
static LIST_HEAD(dev_list);
+static DEFINE_SPINLOCK(dev_list_lock);
static struct task_struct *nvme_thread;
static struct workqueue_struct *nvme_workq;
static wait_queue_head_t nvme_kthread_wait;
switch (result & 0xff07) {
case NVME_AER_NOTICE_NS_CHANGED:
- dev_info(dev->dev, "rescanning\n");
+ dev_info(dev->ctrl.device, "rescanning\n");
queue_work(nvme_workq, &dev->scan_work);
default:
- dev_warn(dev->dev, "async event result %08x\n", result);
+ dev_warn(dev->ctrl.device, "async event result %08x\n", result);
}
}
}
if (unlikely(iod->aborted)) {
- dev_warn(dev->dev,
+ dev_warn(dev->ctrl.device,
"completing aborted command with status: %04x\n",
req->errors);
}
*tag = -1;
if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
- dev_warn(nvmeq->q_dmadev,
+ dev_warn(nvmeq->dev->ctrl.device,
"invalid id %d completed on queue %d\n",
cqe.command_id, le16_to_cpu(cqe.sq_id));
continue;
u32 result = (u32)(uintptr_t)req->special;
u16 status = req->errors;
- dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
+ dev_warn(nvmeq->dev->ctrl.device,
+ "Abort status:%x result:%x", status, result);
atomic_inc(&nvmeq->dev->ctrl.abort_limit);
blk_mq_free_request(req);
* shutdown, so we return BLK_EH_HANDLED.
*/
if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
- dev_warn(dev->dev,
+ dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
* returned to the driver, or if this is the admin queue.
*/
if (!nvmeq->qid || iod->aborted) {
- dev_warn(dev->dev,
+ dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
cmd.abort.cid = req->tag;
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
- dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n",
- req->tag, nvmeq->qid);
+ dev_warn(nvmeq->dev->ctrl.device,
+ "I/O %d QID %d timeout, aborting\n",
+ req->tag, nvmeq->qid);
abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
BLK_MQ_REQ_NOWAIT);
if (!blk_mq_request_started(req))
return;
- dev_warn(nvmeq->q_dmadev,
+ dev_warn(nvmeq->dev->ctrl.device,
"Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
status = NVME_SC_ABORT_REQ;
if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
csts & NVME_CSTS_CFS) {
if (queue_work(nvme_workq, &dev->reset_work)) {
- dev_warn(dev->dev,
+ dev_warn(dev->ctrl.device,
"Failed status: %x, reset controller\n",
readl(dev->bar + NVME_REG_CSTS));
}
static int nvme_create_io_queues(struct nvme_dev *dev)
{
- unsigned i;
+ unsigned i, max;
int ret = 0;
for (i = dev->queue_count; i <= dev->max_qid; i++) {
}
}
- for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
+ max = min(dev->max_qid, dev->queue_count - 1);
+ for (i = dev->online_queues; i <= max; i++) {
ret = nvme_create_queue(dev->queues[i], i);
if (ret) {
nvme_free_queues(dev, i);
* access to the admin queue, as that might be only way to fix them up.
*/
if (result > 0) {
- dev_err(dev->dev, "Could not set queue count (%d)\n", result);
+ dev_err(dev->ctrl.device,
+ "Could not set queue count (%d)\n", result);
nr_io_queues = 0;
result = 0;
}
adminq->cq_vector = -1;
goto free_queues;
}
-
- /* Free previously allocated queues that are no longer usable */
- nvme_free_queues(dev, nr_io_queues + 1);
return nvme_create_io_queues(dev);
free_queues:
if (blk_mq_alloc_tag_set(&dev->tagset))
return 0;
dev->ctrl.tagset = &dev->tagset;
+ } else {
+ blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
+
+ /* Free previously allocated queues that are no longer usable */
+ nvme_free_queues(dev, dev->online_queues);
}
+
queue_work(nvme_workq, &dev->scan_work);
return 0;
}
* any working I/O queue.
*/
if (dev->online_queues < 2) {
- dev_warn(dev->dev, "IO queues not created\n");
+ dev_warn(dev->ctrl.device, "IO queues not created\n");
nvme_remove_namespaces(&dev->ctrl);
} else {
nvme_start_queues(&dev->ctrl);
static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{
- dev_warn(dev->dev, "Removing after probe failure\n");
+ dev_warn(dev->ctrl.device, "Removing after probe failure\n");
kref_get(&dev->ctrl.kref);
if (!schedule_work(&dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
}
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+ .module = THIS_MODULE,
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
if (result)
goto release_pools;
+ dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
+
queue_work(nvme_workq, &dev->reset_work);
return 0;
* shutdown the controller to quiesce. The controller will be restarted
* after the slot reset through driver's slot_reset callback.
*/
- dev_warn(&pdev->dev, "error detected: state:%d\n", state);
+ dev_warn(dev->ctrl.device, "error detected: state:%d\n", state);
switch (state) {
case pci_channel_io_normal:
return PCI_ERS_RESULT_CAN_RECOVER;
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- dev_info(&pdev->dev, "restart after slot reset\n");
+ dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
queue_work(nvme_workq, &dev->reset_work);
return PCI_ERS_RESULT_RECOVERED;
if (!nvme_workq)
return -ENOMEM;
- result = nvme_core_init();
- if (result < 0)
- goto kill_workq;
-
result = pci_register_driver(&nvme_driver);
if (result)
- goto core_exit;
- return 0;
-
- core_exit:
- nvme_core_exit();
- kill_workq:
- destroy_workqueue(nvme_workq);
+ destroy_workqueue(nvme_workq);
return result;
}
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
- nvme_core_exit();
destroy_workqueue(nvme_workq);
BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
_nvme_check_size();
void blk_mq_unfreeze_queue(struct request_queue *q);
void blk_mq_freeze_queue_start(struct request_queue *q);
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+
/*
* Driver command data is immediately after the request. So subtract request
* size to get back to the original request, add request size to get the PDU.