2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/kernel.h>
34 #include <linux/slab.h>
36 #include <linux/scatterlist.h>
37 #include <linux/kfifo.h>
38 #include <scsi/scsi_cmnd.h>
39 #include <scsi/scsi_host.h>
41 #include "iscsi_iser.h"
43 /* Register user buffer memory and initialize passive rdma
44 * dto descriptor. Data size is stored in
45 * task->data[ISER_DIR_IN].data_len, Protection size
46 * os stored in task->prot[ISER_DIR_IN].data_len
48 static int iser_prepare_read_cmd(struct iscsi_task *task)
51 struct iscsi_iser_task *iser_task = task->dd_data;
52 struct iser_mem_reg *mem_reg;
54 struct iser_hdr *hdr = &iser_task->desc.iser_header;
55 struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
57 err = iser_dma_map_task_data(iser_task,
64 if (scsi_prot_sg_count(iser_task->sc)) {
65 struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN];
67 err = iser_dma_map_task_data(iser_task,
75 err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
77 iser_err("Failed to set up Data-IN RDMA\n");
80 mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
82 hdr->flags |= ISER_RSV;
83 hdr->read_stag = cpu_to_be32(mem_reg->rkey);
84 hdr->read_va = cpu_to_be64(mem_reg->sge.addr);
86 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
87 task->itt, mem_reg->rkey,
88 (unsigned long long)mem_reg->sge.addr);
93 /* Register user buffer memory and initialize passive rdma
94 * dto descriptor. Data size is stored in
95 * task->data[ISER_DIR_OUT].data_len, Protection size
96 * is stored at task->prot[ISER_DIR_OUT].data_len
99 iser_prepare_write_cmd(struct iscsi_task *task,
101 unsigned int unsol_sz,
104 struct iscsi_iser_task *iser_task = task->dd_data;
105 struct iser_mem_reg *mem_reg;
107 struct iser_hdr *hdr = &iser_task->desc.iser_header;
108 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
109 struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
111 err = iser_dma_map_task_data(iser_task,
118 if (scsi_prot_sg_count(iser_task->sc)) {
119 struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT];
121 err = iser_dma_map_task_data(iser_task,
129 err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
131 iser_err("Failed to register write cmd RDMA mem\n");
135 mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
137 if (unsol_sz < edtl) {
138 hdr->flags |= ISER_WSV;
139 hdr->write_stag = cpu_to_be32(mem_reg->rkey);
140 hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
142 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
143 "VA:%#llX + unsol:%d\n",
144 task->itt, mem_reg->rkey,
145 (unsigned long long)mem_reg->sge.addr, unsol_sz);
149 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
151 tx_dsg->addr = mem_reg->sge.addr;
152 tx_dsg->length = imm_sz;
153 tx_dsg->lkey = mem_reg->sge.lkey;
154 iser_task->desc.num_sge = 2;
160 /* creates a new tx descriptor and adds header regd buffer */
161 static void iser_create_send_desc(struct iser_conn *iser_conn,
162 struct iser_tx_desc *tx_desc)
164 struct iser_device *device = iser_conn->ib_conn.device;
166 ib_dma_sync_single_for_cpu(device->ib_device,
167 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
169 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
170 tx_desc->iser_header.flags = ISER_VER;
171 tx_desc->num_sge = 1;
174 static void iser_free_login_buf(struct iser_conn *iser_conn)
176 struct iser_device *device = iser_conn->ib_conn.device;
177 struct iser_login_desc *desc = &iser_conn->login_desc;
182 ib_dma_unmap_single(device->ib_device, desc->req_dma,
183 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
185 ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
186 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
191 /* make sure we never redo any unmapping */
196 static int iser_alloc_login_buf(struct iser_conn *iser_conn)
198 struct iser_device *device = iser_conn->ib_conn.device;
199 struct iser_login_desc *desc = &iser_conn->login_desc;
201 desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
205 desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
206 ISCSI_DEF_MAX_RECV_SEG_LEN,
208 if (ib_dma_mapping_error(device->ib_device,
212 desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
216 desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
219 if (ib_dma_mapping_error(device->ib_device,
228 ib_dma_unmap_single(device->ib_device, desc->req_dma,
229 ISCSI_DEF_MAX_RECV_SEG_LEN,
237 int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
238 struct iscsi_session *session)
242 struct iser_rx_desc *rx_desc;
243 struct ib_sge *rx_sg;
244 struct ib_conn *ib_conn = &iser_conn->ib_conn;
245 struct iser_device *device = ib_conn->device;
247 iser_conn->qp_max_recv_dtos = session->cmds_max;
248 iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
249 iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
251 if (device->reg_ops->alloc_reg_res(ib_conn, session->scsi_cmds_max,
252 iser_conn->scsi_sg_tablesize))
253 goto create_rdma_reg_res_failed;
255 if (iser_alloc_login_buf(iser_conn))
256 goto alloc_login_buf_fail;
258 iser_conn->num_rx_descs = session->cmds_max;
259 iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs *
260 sizeof(struct iser_rx_desc), GFP_KERNEL);
261 if (!iser_conn->rx_descs)
262 goto rx_desc_alloc_fail;
264 rx_desc = iser_conn->rx_descs;
266 for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) {
267 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
268 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
269 if (ib_dma_mapping_error(device->ib_device, dma_addr))
270 goto rx_desc_dma_map_failed;
272 rx_desc->dma_addr = dma_addr;
274 rx_sg = &rx_desc->rx_sg;
275 rx_sg->addr = rx_desc->dma_addr;
276 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
277 rx_sg->lkey = device->pd->local_dma_lkey;
280 iser_conn->rx_desc_head = 0;
283 rx_desc_dma_map_failed:
284 rx_desc = iser_conn->rx_descs;
285 for (j = 0; j < i; j++, rx_desc++)
286 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
287 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
288 kfree(iser_conn->rx_descs);
289 iser_conn->rx_descs = NULL;
291 iser_free_login_buf(iser_conn);
292 alloc_login_buf_fail:
293 device->reg_ops->free_reg_res(ib_conn);
294 create_rdma_reg_res_failed:
295 iser_err("failed allocating rx descriptors / data buffers\n");
299 void iser_free_rx_descriptors(struct iser_conn *iser_conn)
302 struct iser_rx_desc *rx_desc;
303 struct ib_conn *ib_conn = &iser_conn->ib_conn;
304 struct iser_device *device = ib_conn->device;
306 if (device->reg_ops->free_reg_res)
307 device->reg_ops->free_reg_res(ib_conn);
309 rx_desc = iser_conn->rx_descs;
310 for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
311 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
312 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
313 kfree(iser_conn->rx_descs);
314 /* make sure we never redo any unmapping */
315 iser_conn->rx_descs = NULL;
317 iser_free_login_buf(iser_conn);
320 static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
322 struct iser_conn *iser_conn = conn->dd_data;
323 struct ib_conn *ib_conn = &iser_conn->ib_conn;
324 struct iscsi_session *session = conn->session;
326 iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
327 /* check if this is the last login - going to full feature phase */
328 if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE)
332 * Check that there is one posted recv buffer
333 * (for the last login response).
335 WARN_ON(ib_conn->post_recv_buf_count != 1);
337 if (session->discovery_sess) {
338 iser_info("Discovery session, re-using login RX buffer\n");
341 iser_info("Normal session, posting batch of RX %d buffers\n",
342 iser_conn->min_posted_rx);
344 /* Initial post receive buffers */
345 if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx))
351 static inline bool iser_signal_comp(u8 sig_count)
353 return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
357 * iser_send_command - send command PDU
359 int iser_send_command(struct iscsi_conn *conn,
360 struct iscsi_task *task)
362 struct iser_conn *iser_conn = conn->dd_data;
363 struct iscsi_iser_task *iser_task = task->dd_data;
366 struct iser_data_buf *data_buf, *prot_buf;
367 struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
368 struct scsi_cmnd *sc = task->sc;
369 struct iser_tx_desc *tx_desc = &iser_task->desc;
370 u8 sig_count = ++iser_conn->ib_conn.sig_count;
372 edtl = ntohl(hdr->data_length);
374 /* build the tx desc regd header and add it to the tx desc dto */
375 tx_desc->type = ISCSI_TX_SCSI_COMMAND;
376 iser_create_send_desc(iser_conn, tx_desc);
378 if (hdr->flags & ISCSI_FLAG_CMD_READ) {
379 data_buf = &iser_task->data[ISER_DIR_IN];
380 prot_buf = &iser_task->prot[ISER_DIR_IN];
382 data_buf = &iser_task->data[ISER_DIR_OUT];
383 prot_buf = &iser_task->prot[ISER_DIR_OUT];
386 if (scsi_sg_count(sc)) { /* using a scatter list */
387 data_buf->sg = scsi_sglist(sc);
388 data_buf->size = scsi_sg_count(sc);
390 data_buf->data_len = scsi_bufflen(sc);
392 if (scsi_prot_sg_count(sc)) {
393 prot_buf->sg = scsi_prot_sglist(sc);
394 prot_buf->size = scsi_prot_sg_count(sc);
395 prot_buf->data_len = (data_buf->data_len >>
396 ilog2(sc->device->sector_size)) * 8;
399 if (hdr->flags & ISCSI_FLAG_CMD_READ) {
400 err = iser_prepare_read_cmd(task);
402 goto send_command_error;
404 if (hdr->flags & ISCSI_FLAG_CMD_WRITE) {
405 err = iser_prepare_write_cmd(task,
408 task->unsol_r2t.data_length,
411 goto send_command_error;
414 iser_task->status = ISER_TASK_STATUS_STARTED;
416 err = iser_post_send(&iser_conn->ib_conn, tx_desc,
417 iser_signal_comp(sig_count));
422 iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
427 * iser_send_data_out - send data out PDU
429 int iser_send_data_out(struct iscsi_conn *conn,
430 struct iscsi_task *task,
431 struct iscsi_data *hdr)
433 struct iser_conn *iser_conn = conn->dd_data;
434 struct iscsi_iser_task *iser_task = task->dd_data;
435 struct iser_tx_desc *tx_desc = NULL;
436 struct iser_mem_reg *mem_reg;
437 unsigned long buf_offset;
438 unsigned long data_seg_len;
441 struct ib_sge *tx_dsg;
443 itt = (__force uint32_t)hdr->itt;
444 data_seg_len = ntoh24(hdr->dlength);
445 buf_offset = ntohl(hdr->offset);
447 iser_dbg("%s itt %d dseg_len %d offset %d\n",
448 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);
450 tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
451 if (tx_desc == NULL) {
452 iser_err("Failed to alloc desc for post dataout\n");
456 tx_desc->type = ISCSI_TX_DATAOUT;
457 tx_desc->iser_header.flags = ISER_VER;
458 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
460 /* build the tx desc */
461 err = iser_initialize_task_headers(task, tx_desc);
463 goto send_data_out_error;
465 mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
466 tx_dsg = &tx_desc->tx_sg[1];
467 tx_dsg->addr = mem_reg->sge.addr + buf_offset;
468 tx_dsg->length = data_seg_len;
469 tx_dsg->lkey = mem_reg->sge.lkey;
470 tx_desc->num_sge = 2;
472 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
473 iser_err("Offset:%ld & DSL:%ld in Data-Out "
474 "inconsistent with total len:%ld, itt:%d\n",
475 buf_offset, data_seg_len,
476 iser_task->data[ISER_DIR_OUT].data_len, itt);
478 goto send_data_out_error;
480 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
481 itt, buf_offset, data_seg_len);
484 err = iser_post_send(&iser_conn->ib_conn, tx_desc, true);
489 kmem_cache_free(ig.desc_cache, tx_desc);
490 iser_err("conn %p failed err %d\n", conn, err);
494 int iser_send_control(struct iscsi_conn *conn,
495 struct iscsi_task *task)
497 struct iser_conn *iser_conn = conn->dd_data;
498 struct iscsi_iser_task *iser_task = task->dd_data;
499 struct iser_tx_desc *mdesc = &iser_task->desc;
500 unsigned long data_seg_len;
502 struct iser_device *device;
504 /* build the tx desc regd header and add it to the tx desc dto */
505 mdesc->type = ISCSI_TX_CONTROL;
506 iser_create_send_desc(iser_conn, mdesc);
508 device = iser_conn->ib_conn.device;
510 data_seg_len = ntoh24(task->hdr->dlength);
512 if (data_seg_len > 0) {
513 struct iser_login_desc *desc = &iser_conn->login_desc;
514 struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
516 if (task != conn->login_task) {
517 iser_err("data present on non login task!!!\n");
518 goto send_control_error;
521 ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
522 task->data_count, DMA_TO_DEVICE);
524 memcpy(desc->req, task->data, task->data_count);
526 ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
527 task->data_count, DMA_TO_DEVICE);
529 tx_dsg->addr = desc->req_dma;
530 tx_dsg->length = task->data_count;
531 tx_dsg->lkey = device->pd->local_dma_lkey;
535 if (task == conn->login_task) {
536 iser_dbg("op %x dsl %lx, posting login rx buffer\n",
537 task->hdr->opcode, data_seg_len);
538 err = iser_post_recvl(iser_conn);
540 goto send_control_error;
541 err = iser_post_rx_bufs(conn, task->hdr);
543 goto send_control_error;
546 err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
551 iser_err("conn %p failed err %d\n",conn, err);
556 * iser_rcv_dto_completion - recv DTO completion
558 void iser_rcv_completion(struct iser_rx_desc *rx_desc,
559 unsigned long rx_xfer_len,
560 struct ib_conn *ib_conn)
562 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
564 struct iscsi_hdr *hdr;
567 int rx_buflen, outstanding, count, err;
569 /* differentiate between login to all other PDUs */
570 if (rx_desc == (void *)&iser_conn->login_desc) {
571 rx_dma = iser_conn->login_desc.rsp_dma;
572 rx_buflen = ISER_RX_LOGIN_SIZE;
573 hdr = iser_conn->login_desc.rsp + sizeof(struct iser_hdr);
574 data = iser_conn->login_desc.rsp + ISER_HEADERS_LEN;
576 rx_dma = rx_desc->dma_addr;
577 rx_buflen = ISER_RX_PAYLOAD_SIZE;
578 hdr = &rx_desc->iscsi_header;
579 data = rx_desc->data;
582 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
583 rx_buflen, DMA_FROM_DEVICE);
586 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
587 hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
589 iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data,
590 rx_xfer_len - ISER_HEADERS_LEN);
592 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
593 rx_buflen, DMA_FROM_DEVICE);
595 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
596 * task eliminates the need to worry on tasks which are completed in *
597 * parallel to the execution of iser_conn_term. So the code that waits *
598 * for the posted rx bufs refcount to become zero handles everything */
599 ib_conn->post_recv_buf_count--;
601 if (rx_desc == (void *)&iser_conn->login_desc)
604 outstanding = ib_conn->post_recv_buf_count;
605 if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
606 count = min(iser_conn->qp_max_recv_dtos - outstanding,
607 iser_conn->min_posted_rx);
608 err = iser_post_recvm(iser_conn, count);
610 iser_err("posting %d rx bufs err %d\n", count, err);
614 void iser_snd_completion(struct iser_tx_desc *tx_desc,
615 struct ib_conn *ib_conn)
617 struct iscsi_task *task;
618 struct iser_device *device = ib_conn->device;
620 if (tx_desc->type == ISCSI_TX_DATAOUT) {
621 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
622 ISER_HEADERS_LEN, DMA_TO_DEVICE);
623 kmem_cache_free(ig.desc_cache, tx_desc);
627 if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
628 /* this arithmetic is legal by libiscsi dd_data allocation */
629 task = (void *) ((long)(void *)tx_desc -
630 sizeof(struct iscsi_task));
631 if (task->hdr->itt == RESERVED_ITT)
632 iscsi_put_task(task);
636 void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
639 iser_task->status = ISER_TASK_STATUS_INIT;
641 iser_task->dir[ISER_DIR_IN] = 0;
642 iser_task->dir[ISER_DIR_OUT] = 0;
644 iser_task->data[ISER_DIR_IN].data_len = 0;
645 iser_task->data[ISER_DIR_OUT].data_len = 0;
647 iser_task->prot[ISER_DIR_IN].data_len = 0;
648 iser_task->prot[ISER_DIR_OUT].data_len = 0;
650 memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
651 sizeof(struct iser_mem_reg));
652 memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
653 sizeof(struct iser_mem_reg));
656 void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
658 int prot_count = scsi_prot_sg_count(iser_task->sc);
660 if (iser_task->dir[ISER_DIR_IN]) {
661 iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
662 iser_dma_unmap_task_data(iser_task,
663 &iser_task->data[ISER_DIR_IN],
666 iser_dma_unmap_task_data(iser_task,
667 &iser_task->prot[ISER_DIR_IN],
671 if (iser_task->dir[ISER_DIR_OUT]) {
672 iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
673 iser_dma_unmap_task_data(iser_task,
674 &iser_task->data[ISER_DIR_OUT],
677 iser_dma_unmap_task_data(iser_task,
678 &iser_task->prot[ISER_DIR_OUT],