};
enum nvme_rdma_queue_flags {
- NVME_RDMA_Q_CONNECTED = (1 << 0),
- NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
- NVME_RDMA_Q_DELETING = (1 << 2),
- NVME_RDMA_Q_LIVE = (1 << 3),
+ NVME_RDMA_Q_CONNECTED = 0,
+ NVME_RDMA_IB_QUEUE_ALLOCATED = 1,
+ NVME_RDMA_Q_DELETING = 2,
+ NVME_RDMA_Q_LIVE = 3,
};
struct nvme_rdma_queue {
};
struct nvme_rdma_ctrl {
- /* read and written in the hot path */
- spinlock_t lock;
-
/* read only in the hot path */
struct nvme_rdma_queue *queues;
u32 queue_count;
if (ret)
goto requeue;
- blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
-
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (ret)
- goto stop_admin_q;
+ goto requeue;
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
if (ret)
- goto stop_admin_q;
+ goto requeue;
nvme_start_keep_alive(&ctrl->ctrl);
if (ctrl->queue_count > 1) {
ret = nvme_rdma_init_io_queues(ctrl);
if (ret)
- goto stop_admin_q;
+ goto requeue;
ret = nvme_rdma_connect_io_queues(ctrl);
if (ret)
- goto stop_admin_q;
+ goto requeue;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
ctrl->ctrl.opts->nr_reconnects = 0;
if (ctrl->queue_count > 1) {
- nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl);
nvme_queue_async_events(&ctrl->ctrl);
}
return;
-stop_admin_q:
- blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.opts->nr_reconnects);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
+ /*
+ * queues are not a live anymore, so restart the queues to fail fast
+ * new IO
+ */
+ blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+ nvme_start_queues(&ctrl->ctrl);
+
nvme_rdma_reconnect_or_remove(ctrl);
}
/*
* We cannot accept any other command until the Connect command has completed.
*/
-static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
- struct request *rq)
+static inline blk_status_t
+nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
{
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
struct nvme_command *cmd = nvme_req(rq)->cmd;
if (!blk_rq_is_passthrough(rq) ||
cmd->common.opcode != nvme_fabrics_command ||
- cmd->fabrics.fctype != nvme_fabrics_type_connect)
- return false;
+ cmd->fabrics.fctype != nvme_fabrics_type_connect) {
+ /*
+ * reconnecting state means transport disruption, which
+ * can take a long time and even might fail permanently,
+ * so we can't let incoming I/O be requeued forever.
+ * fail it fast to allow upper layers a chance to
+ * failover.
+ */
+ if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
+ return BLK_STS_IOERR;
+ return BLK_STS_RESOURCE; /* try again later */
+ }
}
- return true;
+ return 0;
}
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_command *c = sqe->data;
bool flush = false;
struct ib_device *dev;
- int ret;
+ blk_status_t ret;
+ int err;
WARN_ON_ONCE(rq->tag < 0);
- if (!nvme_rdma_queue_is_ready(queue, rq))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ ret = nvme_rdma_queue_is_ready(queue, rq);
+ if (unlikely(ret))
+ return ret;
dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);
ret = nvme_setup_cmd(ns, rq, c);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
blk_mq_start_request(rq);
- ret = nvme_rdma_map_data(queue, rq, c);
- if (ret < 0) {
+ err = nvme_rdma_map_data(queue, rq, c);
+ if (err < 0) {
dev_err(queue->ctrl->ctrl.device,
- "Failed to map data (%d)\n", ret);
+ "Failed to map data (%d)\n", err);
nvme_cleanup_cmd(rq);
goto err;
}
if (req_op(rq) == REQ_OP_FLUSH)
flush = true;
- ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+ err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
- if (ret) {
+ if (err) {
nvme_rdma_unmap_data(queue, rq);
goto err;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
err:
- return (ret == -ENOMEM || ret == -EAGAIN) ?
- BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+ if (err == -ENOMEM || err == -EAGAIN)
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
- spin_lock_init(&ctrl->lock);
ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
ctrl->ctrl.sqsize = opts->queue_size - 1;