nvmet_fc: add defer_req callback for deferment of cmd buffer return

author James Smart <jsmart2021@gmail.com>

Tue, 1 Aug 2017 22:12:39 +0000 (15:12 -0700)

committer Christoph Hellwig <hch@lst.de>

Thu, 10 Aug 2017 09:06:38 +0000 (11:06 +0200)
author James Smart <jsmart2021@gmail.com>
Tue, 1 Aug 2017 22:12:39 +0000 (15:12 -0700)
committer Christoph Hellwig <hch@lst.de>
Thu, 10 Aug 2017 09:06:38 +0000 (11:06 +0200)
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c

index 31ca55dfcb1d49f3a1d88f7c6f5d0e7f0ee1e1ea..1b7f2520a20db7e151afe4a85a0e488fe0c85005 100644 (file)
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -114,6 +114,11 @@ struct nvmet_fc_tgtport {
         struct kref                     ref;
  };
  
+struct nvmet_fc_defer_fcp_req {
+       struct list_head                req_list;
+       struct nvmefc_tgt_fcp_req       *fcp_req;
+};
+
  struct nvmet_fc_tgt_queue {
         bool                            ninetypercent;
         u16                             qid;
@@ -132,6 +137,8 @@ struct nvmet_fc_tgt_queue {
         struct nvmet_fc_tgt_assoc       *assoc;
         struct nvmet_fc_fcp_iod         *fod;           /* array of fcp_iods */
         struct list_head                fod_list;
+       struct list_head                pending_cmd_list;
+       struct list_head                avail_defer_list;
         struct workqueue_struct         *work_q;
         struct kref                     ref;
  } __aligned(sizeof(unsigned long long));
@@ -223,6 +230,8 @@ static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
  static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
  static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
  static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
+static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
+                                       struct nvmet_fc_fcp_iod *fod);
  
  
  /* *********************** FC-NVME DMA Handling **************************** */
@@ -463,9 +472,9 @@ static struct nvmet_fc_fcp_iod *
  nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
  {
         static struct nvmet_fc_fcp_iod *fod;
-       unsigned long flags;
  
-       spin_lock_irqsave(&queue->qlock, flags);
+       lockdep_assert_held(&queue->qlock);
+
         fod = list_first_entry_or_null(&queue->fod_list,
                                         struct nvmet_fc_fcp_iod, fcp_list);
         if (fod) {
@@ -477,17 +486,37 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
                  * will "inherit" that reference.
                  */
         }
-       spin_unlock_irqrestore(&queue->qlock, flags);
         return fod;
  }
  
  
+static void
+nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport,
+                      struct nvmet_fc_tgt_queue *queue,
+                      struct nvmefc_tgt_fcp_req *fcpreq)
+{
+       struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+
+       /*
+        * put all admin cmds on hw queue id 0. All io commands go to
+        * the respective hw queue based on a modulo basis
+        */
+       fcpreq->hwqid = queue->qid ?
+                       ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
+
+       if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
+               queue_work_on(queue->cpu, queue->work_q, &fod->work);
+       else
+               nvmet_fc_handle_fcp_rqst(tgtport, fod);
+}
+
  static void
  nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
                         struct nvmet_fc_fcp_iod *fod)
  {
         struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
         struct nvmet_fc_tgtport *tgtport = fod->tgtport;
+       struct nvmet_fc_defer_fcp_req *deferfcp;
         unsigned long flags;
  
         fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
@@ -495,21 +524,56 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
  
         fcpreq->nvmet_fc_private = NULL;
  
-       spin_lock_irqsave(&queue->qlock, flags);
-       list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
         fod->active = false;
         fod->abort = false;
         fod->aborted = false;
         fod->writedataactive = false;
         fod->fcpreq = NULL;
+
+       tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
+
+       spin_lock_irqsave(&queue->qlock, flags);
+       deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
+                               struct nvmet_fc_defer_fcp_req, req_list);
+       if (!deferfcp) {
+               list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
+               spin_unlock_irqrestore(&queue->qlock, flags);
+
+               /* Release reference taken at queue lookup and fod allocation */
+               nvmet_fc_tgt_q_put(queue);
+               return;
+       }
+
+       /* Re-use the fod for the next pending cmd that was deferred */
+       list_del(&deferfcp->req_list);
+
+       fcpreq = deferfcp->fcp_req;
+
+       /* deferfcp can be reused for another IO at a later date */
+       list_add_tail(&deferfcp->req_list, &queue->avail_defer_list);
+
         spin_unlock_irqrestore(&queue->qlock, flags);
  
+       /* Save NVME CMD IO in fod */
+       memcpy(&fod->cmdiubuf, fcpreq->rspaddr, fcpreq->rsplen);
+
+       /* Setup new fcpreq to be processed */
+       fcpreq->rspaddr = NULL;
+       fcpreq->rsplen  = 0;
+       fcpreq->nvmet_fc_private = fod;
+       fod->fcpreq = fcpreq;
+       fod->active = true;
+
+       /* inform LLDD IO is now being processed */
+       tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq);
+
+       /* Submit deferred IO for processing */
+       nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
+
         /*
-        * release the reference taken at queue lookup and fod allocation
+        * Leave the queue lookup get reference taken when
+        * fod was originally allocated.
          */
-       nvmet_fc_tgt_q_put(queue);
-
-       tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
  }
  
  static int
@@ -569,6 +633,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
         queue->port = assoc->tgtport->port;
         queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid);
         INIT_LIST_HEAD(&queue->fod_list);
+       INIT_LIST_HEAD(&queue->avail_defer_list);
+       INIT_LIST_HEAD(&queue->pending_cmd_list);
         atomic_set(&queue->connected, 0);
         atomic_set(&queue->sqtail, 0);
         atomic_set(&queue->rsn, 1);
@@ -638,6 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
  {
         struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport;
         struct nvmet_fc_fcp_iod *fod = queue->fod;
+       struct nvmet_fc_defer_fcp_req *deferfcp;
         unsigned long flags;
         int i, writedataactive;
         bool disconnect;
@@ -666,6 +733,35 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
                         }
                 }
         }
+
+       /* Cleanup defer'ed IOs in queue */
+       list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) {
+               list_del(&deferfcp->req_list);
+               kfree(deferfcp);
+       }
+
+       for (;;) {
+               deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
+                               struct nvmet_fc_defer_fcp_req, req_list);
+               if (!deferfcp)
+                       break;
+
+               list_del(&deferfcp->req_list);
+               spin_unlock_irqrestore(&queue->qlock, flags);
+
+               tgtport->ops->defer_rcv(&tgtport->fc_target_port,
+                               deferfcp->fcp_req);
+
+               tgtport->ops->fcp_abort(&tgtport->fc_target_port,
+                               deferfcp->fcp_req);
+
+               tgtport->ops->fcp_req_release(&tgtport->fc_target_port,
+                               deferfcp->fcp_req);
+
+               kfree(deferfcp);
+
+               spin_lock_irqsave(&queue->qlock, flags);
+       }
         spin_unlock_irqrestore(&queue->qlock, flags);
  
         flush_workqueue(queue->work_q);
@@ -2172,11 +2268,38 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
   * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc
   * layer for processing.
   *
- * The nvmet-fc layer will copy cmd payload to an internal structure for
- * processing.  As such, upon completion of the routine, the LLDD may
- * immediately free/reuse the CMD IU buffer passed in the call.
+ * The nvmet_fc layer allocates a local job structure (struct
+ * nvmet_fc_fcp_iod) from the queue for the io and copies the
+ * CMD IU buffer to the job structure. As such, on a successful
+ * completion (returns 0), the LLDD may immediately free/reuse
+ * the CMD IU buffer passed in the call.
+ *
+ * However, in some circumstances, due to the packetized nature of FC
+ * and the api of the FC LLDD which may issue a hw command to send the
+ * response, but the LLDD may not get the hw completion for that command
+ * and upcall the nvmet_fc layer before a new command may be
+ * asynchronously received - its possible for a command to be received
+ * before the LLDD and nvmet_fc have recycled the job structure. It gives
+ * the appearance of more commands received than fits in the sq.
+ * To alleviate this scenario, a temporary queue is maintained in the
+ * transport for pending LLDD requests waiting for a queue job structure.
+ * In these "overrun" cases, a temporary queue element is allocated
+ * the LLDD request and CMD iu buffer information remembered, and the
+ * routine returns a -EOVERFLOW status. Subsequently, when a queue job
+ * structure is freed, it is immediately reallocated for anything on the
+ * pending request list. The LLDDs defer_rcv() callback is called,
+ * informing the LLDD that it may reuse the CMD IU buffer, and the io
+ * is then started normally with the transport.
   *
- * If this routine returns error, the lldd should abort the exchange.
+ * The LLDD, when receiving an -EOVERFLOW completion status, is to treat
+ * the completion as successful but must not reuse the CMD IU buffer
+ * until the LLDD's defer_rcv() callback has been called for the
+ * corresponding struct nvmefc_tgt_fcp_req pointer.
+ *
+ * If there is any other condition in which an error occurs, the
+ * transport will return a non-zero status indicating the error.
+ * In all cases other than -EOVERFLOW, the transport has not accepted the
+ * request and the LLDD should abort the exchange.
   *
   * @target_port: pointer to the (registered) target port the FCP CMD IU
   *              was received on.
@@ -2194,6 +2317,8 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
         struct nvme_fc_cmd_iu *cmdiu = cmdiubuf;
         struct nvmet_fc_tgt_queue *queue;
         struct nvmet_fc_fcp_iod *fod;
+       struct nvmet_fc_defer_fcp_req *deferfcp;
+       unsigned long flags;
  
         /* validate iu, so the connection id can be used to find the queue */
         if ((cmdiubuf_len != sizeof(*cmdiu)) ||
@@ -2214,29 +2339,60 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
          * when the fod is freed.
          */
  
+       spin_lock_irqsave(&queue->qlock, flags);
+
         fod = nvmet_fc_alloc_fcp_iod(queue);
-       if (!fod) {
+       if (fod) {
+               spin_unlock_irqrestore(&queue->qlock, flags);
+
+               fcpreq->nvmet_fc_private = fod;
+               fod->fcpreq = fcpreq;
+
+               memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
+
+               nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
+
+               return 0;
+       }
+
+       if (!tgtport->ops->defer_rcv) {
+               spin_unlock_irqrestore(&queue->qlock, flags);
                 /* release the queue lookup reference */
                 nvmet_fc_tgt_q_put(queue);
                 return -ENOENT;
         }
  
-       fcpreq->nvmet_fc_private = fod;
-       fod->fcpreq = fcpreq;
-       /*
-        * put all admin cmds on hw queue id 0. All io commands go to
-        * the respective hw queue based on a modulo basis
-        */
-       fcpreq->hwqid = queue->qid ?
-                       ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
-       memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
+       deferfcp = list_first_entry_or_null(&queue->avail_defer_list,
+                       struct nvmet_fc_defer_fcp_req, req_list);
+       if (deferfcp) {
+               /* Just re-use one that was previously allocated */
+               list_del(&deferfcp->req_list);
+       } else {
+               spin_unlock_irqrestore(&queue->qlock, flags);
  
-       if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
-               queue_work_on(queue->cpu, queue->work_q, &fod->work);
-       else
-               nvmet_fc_handle_fcp_rqst(tgtport, fod);
+               /* Now we need to dynamically allocate one */
+               deferfcp = kmalloc(sizeof(*deferfcp), GFP_KERNEL);
+               if (!deferfcp) {
+                       /* release the queue lookup reference */
+                       nvmet_fc_tgt_q_put(queue);
+                       return -ENOMEM;
+               }
+               spin_lock_irqsave(&queue->qlock, flags);
+       }
  
-       return 0;
+       /* For now, use rspaddr / rsplen to save payload information */
+       fcpreq->rspaddr = cmdiubuf;
+       fcpreq->rsplen  = cmdiubuf_len;
+       deferfcp->fcp_req = fcpreq;
+
+       /* defer processing till a fod becomes available */
+       list_add_tail(&deferfcp->req_list, &queue->pending_cmd_list);
+
+       /* NOTE: the queue lookup reference is still valid */
+
+       spin_unlock_irqrestore(&queue->qlock, flags);
+
+       return -EOVERFLOW;
  }
  EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req);
  
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h

index 6c8c5d8041b72ec01097d1c0563b793ea7449f1f..2591878c1d4804d374d39491c2a3f64d1b43a214 100644 (file)
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -346,6 +346,11 @@ struct nvme_fc_remote_port {
   *       indicating an FC transport Aborted status.
   *       Entrypoint is Mandatory.
   *
+ * @defer_rcv:  Called by the transport to signal the LLLD that it has
+ *       begun processing of a previously received NVME CMD IU. The LLDD
+ *       is now free to re-use the rcv buffer associated with the
+ *       nvmefc_tgt_fcp_req.
+ *
   * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
   *       supports for cpu affinitization.
   *       Value is Mandatory. Must be at least 1.
@@ -846,6 +851,8 @@ struct nvmet_fc_target_template {
                                 struct nvmefc_tgt_fcp_req *fcpreq);
         void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport,
                                 struct nvmefc_tgt_fcp_req *fcpreq);
+       void (*defer_rcv)(struct nvmet_fc_target_port *tgtport,
+                               struct nvmefc_tgt_fcp_req *fcpreq);
  
         u32     max_hw_queues;
         u16     max_sgl_segments;
author	James Smart <jsmart2021@gmail.com>
	Tue, 1 Aug 2017 22:12:39 +0000 (15:12 -0700)
committer	Christoph Hellwig <hch@lst.de>
	Thu, 10 Aug 2017 09:06:38 +0000 (11:06 +0200)
drivers/nvme/target/fc.c		patch \| blob \| history
include/linux/nvme-fc-driver.h		patch \| blob \| history