]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/nvme/host/rdma.c
nvme-rdma: Make queue flags bit numbers and not shifts
[karo-tx-linux.git] / drivers / nvme / host / rdma.c
index 28bd255c144dcca10aa60cede2c9a51cd101426a..c4fd9d50b27b8d6ed57708bb1b2a85d6e2579039 100644 (file)
@@ -80,10 +80,10 @@ struct nvme_rdma_request {
 };
 
 enum nvme_rdma_queue_flags {
-       NVME_RDMA_Q_CONNECTED = (1 << 0),
-       NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
-       NVME_RDMA_Q_DELETING = (1 << 2),
-       NVME_RDMA_Q_LIVE = (1 << 3),
+       NVME_RDMA_Q_CONNECTED           = 0,
+       NVME_RDMA_IB_QUEUE_ALLOCATED    = 1,
+       NVME_RDMA_Q_DELETING            = 2,
+       NVME_RDMA_Q_LIVE                = 3,
 };
 
 struct nvme_rdma_queue {
@@ -103,9 +103,6 @@ struct nvme_rdma_queue {
 };
 
 struct nvme_rdma_ctrl {
-       /* read and written in the hot path */
-       spinlock_t              lock;
-
        /* read only in the hot path */
        struct nvme_rdma_queue  *queues;
        u32                     queue_count;
@@ -753,28 +750,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        if (ret)
                goto requeue;
 
-       blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
-
        ret = nvmf_connect_admin_queue(&ctrl->ctrl);
        if (ret)
-               goto stop_admin_q;
+               goto requeue;
 
        set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
        ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
        if (ret)
-               goto stop_admin_q;
+               goto requeue;
 
        nvme_start_keep_alive(&ctrl->ctrl);
 
        if (ctrl->queue_count > 1) {
                ret = nvme_rdma_init_io_queues(ctrl);
                if (ret)
-                       goto stop_admin_q;
+                       goto requeue;
 
                ret = nvme_rdma_connect_io_queues(ctrl);
                if (ret)
-                       goto stop_admin_q;
+                       goto requeue;
        }
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -782,7 +777,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        ctrl->ctrl.opts->nr_reconnects = 0;
 
        if (ctrl->queue_count > 1) {
-               nvme_start_queues(&ctrl->ctrl);
                nvme_queue_scan(&ctrl->ctrl);
                nvme_queue_async_events(&ctrl->ctrl);
        }
@@ -791,8 +785,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
        return;
 
-stop_admin_q:
-       blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
 requeue:
        dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
                        ctrl->ctrl.opts->nr_reconnects);
@@ -823,6 +815,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
 
+       /*
+        * queues are not a live anymore, so restart the queues to fail fast
+        * new IO
+        */
+       blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+       nvme_start_queues(&ctrl->ctrl);
+
        nvme_rdma_reconnect_or_remove(ctrl);
 }
 
@@ -1433,22 +1432,32 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
 /*
  * We cannot accept any other command until the Connect command has completed.
  */
-static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
-               struct request *rq)
+static inline blk_status_t
+nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
 {
        if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
                struct nvme_command *cmd = nvme_req(rq)->cmd;
 
                if (!blk_rq_is_passthrough(rq) ||
                    cmd->common.opcode != nvme_fabrics_command ||
-                   cmd->fabrics.fctype != nvme_fabrics_type_connect)
-                       return false;
+                   cmd->fabrics.fctype != nvme_fabrics_type_connect) {
+                       /*
+                        * reconnecting state means transport disruption, which
+                        * can take a long time and even might fail permanently,
+                        * so we can't let incoming I/O be requeued forever.
+                        * fail it fast to allow upper layers a chance to
+                        * failover.
+                        */
+                       if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
+                               return BLK_STS_IOERR;
+                       return BLK_STS_RESOURCE; /* try again later */
+               }
        }
 
-       return true;
+       return 0;
 }
 
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
                const struct blk_mq_queue_data *bd)
 {
        struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1459,27 +1468,29 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct nvme_command *c = sqe->data;
        bool flush = false;
        struct ib_device *dev;
-       int ret;
+       blk_status_t ret;
+       int err;
 
        WARN_ON_ONCE(rq->tag < 0);
 
-       if (!nvme_rdma_queue_is_ready(queue, rq))
-               return BLK_MQ_RQ_QUEUE_BUSY;
+       ret = nvme_rdma_queue_is_ready(queue, rq);
+       if (unlikely(ret))
+               return ret;
 
        dev = queue->device->dev;
        ib_dma_sync_single_for_cpu(dev, sqe->dma,
                        sizeof(struct nvme_command), DMA_TO_DEVICE);
 
        ret = nvme_setup_cmd(ns, rq, c);
-       if (ret != BLK_MQ_RQ_QUEUE_OK)
+       if (ret)
                return ret;
 
        blk_mq_start_request(rq);
 
-       ret = nvme_rdma_map_data(queue, rq, c);
-       if (ret < 0) {
+       err = nvme_rdma_map_data(queue, rq, c);
+       if (err < 0) {
                dev_err(queue->ctrl->ctrl.device,
-                            "Failed to map data (%d)\n", ret);
+                            "Failed to map data (%d)\n", err);
                nvme_cleanup_cmd(rq);
                goto err;
        }
@@ -1489,17 +1500,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        if (req_op(rq) == REQ_OP_FLUSH)
                flush = true;
-       ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+       err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
                        req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
-       if (ret) {
+       if (err) {
                nvme_rdma_unmap_data(queue, rq);
                goto err;
        }
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 err:
-       return (ret == -ENOMEM || ret == -EAGAIN) ?
-               BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+       if (err == -ENOMEM || err == -EAGAIN)
+               return BLK_STS_RESOURCE;
+       return BLK_STS_IOERR;
 }
 
 static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -1906,7 +1918,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
        INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
        INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
-       spin_lock_init(&ctrl->lock);
 
        ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
        ctrl->ctrl.sqsize = opts->queue_size - 1;