]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/infiniband/ulp/srp/ib_srp.c
Merge branches 'bart-srp', 'generic-errors', 'ira-cleanups' and 'mwang-v8' into k...
[karo-tx-linux.git] / drivers / infiniband / ulp / srp / ib_srp.c
1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
44
45 #include <linux/atomic.h>
46
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
53
54 #include "ib_srp.h"
55
56 #define DRV_NAME        "ib_srp"
57 #define PFX             DRV_NAME ": "
58 #define DRV_VERSION     "1.0"
59 #define DRV_RELDATE     "July 1, 2013"
60
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
66
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr;
72 static bool register_always;
73 static int topspin_workarounds = 1;
74
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
77
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80                  "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
81
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84                  "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
85
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88                   "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
89
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92                  "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
93
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
97
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100                  "Use memory registration even for contiguous memory regions");
101
102 static struct kernel_param_ops srp_tmo_ops;
103
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
106                 S_IRUGO | S_IWUSR);
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
108
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
111                 S_IRUGO | S_IWUSR);
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113                  "Number of seconds between the observation of a transport"
114                  " layer error and failing all I/O. \"off\" means that this"
115                  " functionality is disabled.");
116
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
119                 S_IRUGO | S_IWUSR);
120 MODULE_PARM_DESC(dev_loss_tmo,
121                  "Maximum number of seconds that the SRP transport should"
122                  " insulate transport layer errors. After this time has been"
123                  " exceeded the SCSI host is removed. Should be"
124                  " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125                  " if fast_io_fail_tmo has not been set. \"off\" means that"
126                  " this functionality is disabled.");
127
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131                  "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
132
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
138
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
141
142 static struct ib_client srp_client = {
143         .name   = "srp",
144         .add    = srp_add_one,
145         .remove = srp_remove_one
146 };
147
148 static struct ib_sa_client srp_sa_client;
149
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
151 {
152         int tmo = *(int *)kp->arg;
153
154         if (tmo >= 0)
155                 return sprintf(buffer, "%d", tmo);
156         else
157                 return sprintf(buffer, "off");
158 }
159
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
161 {
162         int tmo, res;
163
164         if (strncmp(val, "off", 3) != 0) {
165                 res = kstrtoint(val, 0, &tmo);
166                 if (res)
167                         goto out;
168         } else {
169                 tmo = -1;
170         }
171         if (kp->arg == &srp_reconnect_delay)
172                 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
173                                     srp_dev_loss_tmo);
174         else if (kp->arg == &srp_fast_io_fail_tmo)
175                 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
176         else
177                 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
178                                     tmo);
179         if (res)
180                 goto out;
181         *(int *)kp->arg = tmo;
182
183 out:
184         return res;
185 }
186
187 static struct kernel_param_ops srp_tmo_ops = {
188         .get = srp_tmo_get,
189         .set = srp_tmo_set,
190 };
191
192 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
193 {
194         return (struct srp_target_port *) host->hostdata;
195 }
196
197 static const char *srp_target_info(struct Scsi_Host *host)
198 {
199         return host_to_target(host)->target_name;
200 }
201
202 static int srp_target_is_topspin(struct srp_target_port *target)
203 {
204         static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
205         static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
206
207         return topspin_workarounds &&
208                 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
209                  !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
210 }
211
212 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
213                                    gfp_t gfp_mask,
214                                    enum dma_data_direction direction)
215 {
216         struct srp_iu *iu;
217
218         iu = kmalloc(sizeof *iu, gfp_mask);
219         if (!iu)
220                 goto out;
221
222         iu->buf = kzalloc(size, gfp_mask);
223         if (!iu->buf)
224                 goto out_free_iu;
225
226         iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
227                                     direction);
228         if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
229                 goto out_free_buf;
230
231         iu->size      = size;
232         iu->direction = direction;
233
234         return iu;
235
236 out_free_buf:
237         kfree(iu->buf);
238 out_free_iu:
239         kfree(iu);
240 out:
241         return NULL;
242 }
243
244 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
245 {
246         if (!iu)
247                 return;
248
249         ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
250                             iu->direction);
251         kfree(iu->buf);
252         kfree(iu);
253 }
254
255 static void srp_qp_event(struct ib_event *event, void *context)
256 {
257         pr_debug("QP event %s (%d)\n",
258                  ib_event_msg(event->event), event->event);
259 }
260
261 static int srp_init_qp(struct srp_target_port *target,
262                        struct ib_qp *qp)
263 {
264         struct ib_qp_attr *attr;
265         int ret;
266
267         attr = kmalloc(sizeof *attr, GFP_KERNEL);
268         if (!attr)
269                 return -ENOMEM;
270
271         ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
272                                   target->srp_host->port,
273                                   be16_to_cpu(target->pkey),
274                                   &attr->pkey_index);
275         if (ret)
276                 goto out;
277
278         attr->qp_state        = IB_QPS_INIT;
279         attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
280                                     IB_ACCESS_REMOTE_WRITE);
281         attr->port_num        = target->srp_host->port;
282
283         ret = ib_modify_qp(qp, attr,
284                            IB_QP_STATE          |
285                            IB_QP_PKEY_INDEX     |
286                            IB_QP_ACCESS_FLAGS   |
287                            IB_QP_PORT);
288
289 out:
290         kfree(attr);
291         return ret;
292 }
293
294 static int srp_new_cm_id(struct srp_rdma_ch *ch)
295 {
296         struct srp_target_port *target = ch->target;
297         struct ib_cm_id *new_cm_id;
298
299         new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
300                                     srp_cm_handler, ch);
301         if (IS_ERR(new_cm_id))
302                 return PTR_ERR(new_cm_id);
303
304         if (ch->cm_id)
305                 ib_destroy_cm_id(ch->cm_id);
306         ch->cm_id = new_cm_id;
307         ch->path.sgid = target->sgid;
308         ch->path.dgid = target->orig_dgid;
309         ch->path.pkey = target->pkey;
310         ch->path.service_id = target->service_id;
311
312         return 0;
313 }
314
315 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
316 {
317         struct srp_device *dev = target->srp_host->srp_dev;
318         struct ib_fmr_pool_param fmr_param;
319
320         memset(&fmr_param, 0, sizeof(fmr_param));
321         fmr_param.pool_size         = target->scsi_host->can_queue;
322         fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
323         fmr_param.cache             = 1;
324         fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
325         fmr_param.page_shift        = ilog2(dev->mr_page_size);
326         fmr_param.access            = (IB_ACCESS_LOCAL_WRITE |
327                                        IB_ACCESS_REMOTE_WRITE |
328                                        IB_ACCESS_REMOTE_READ);
329
330         return ib_create_fmr_pool(dev->pd, &fmr_param);
331 }
332
333 /**
334  * srp_destroy_fr_pool() - free the resources owned by a pool
335  * @pool: Fast registration pool to be destroyed.
336  */
337 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
338 {
339         int i;
340         struct srp_fr_desc *d;
341
342         if (!pool)
343                 return;
344
345         for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
346                 if (d->frpl)
347                         ib_free_fast_reg_page_list(d->frpl);
348                 if (d->mr)
349                         ib_dereg_mr(d->mr);
350         }
351         kfree(pool);
352 }
353
354 /**
355  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
356  * @device:            IB device to allocate fast registration descriptors for.
357  * @pd:                Protection domain associated with the FR descriptors.
358  * @pool_size:         Number of descriptors to allocate.
359  * @max_page_list_len: Maximum fast registration work request page list length.
360  */
361 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
362                                               struct ib_pd *pd, int pool_size,
363                                               int max_page_list_len)
364 {
365         struct srp_fr_pool *pool;
366         struct srp_fr_desc *d;
367         struct ib_mr *mr;
368         struct ib_fast_reg_page_list *frpl;
369         int i, ret = -EINVAL;
370
371         if (pool_size <= 0)
372                 goto err;
373         ret = -ENOMEM;
374         pool = kzalloc(sizeof(struct srp_fr_pool) +
375                        pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
376         if (!pool)
377                 goto err;
378         pool->size = pool_size;
379         pool->max_page_list_len = max_page_list_len;
380         spin_lock_init(&pool->lock);
381         INIT_LIST_HEAD(&pool->free_list);
382
383         for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
384                 mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
385                 if (IS_ERR(mr)) {
386                         ret = PTR_ERR(mr);
387                         goto destroy_pool;
388                 }
389                 d->mr = mr;
390                 frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
391                 if (IS_ERR(frpl)) {
392                         ret = PTR_ERR(frpl);
393                         goto destroy_pool;
394                 }
395                 d->frpl = frpl;
396                 list_add_tail(&d->entry, &pool->free_list);
397         }
398
399 out:
400         return pool;
401
402 destroy_pool:
403         srp_destroy_fr_pool(pool);
404
405 err:
406         pool = ERR_PTR(ret);
407         goto out;
408 }
409
410 /**
411  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
412  * @pool: Pool to obtain descriptor from.
413  */
414 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
415 {
416         struct srp_fr_desc *d = NULL;
417         unsigned long flags;
418
419         spin_lock_irqsave(&pool->lock, flags);
420         if (!list_empty(&pool->free_list)) {
421                 d = list_first_entry(&pool->free_list, typeof(*d), entry);
422                 list_del(&d->entry);
423         }
424         spin_unlock_irqrestore(&pool->lock, flags);
425
426         return d;
427 }
428
429 /**
430  * srp_fr_pool_put() - put an FR descriptor back in the free list
431  * @pool: Pool the descriptor was allocated from.
432  * @desc: Pointer to an array of fast registration descriptor pointers.
433  * @n:    Number of descriptors to put back.
434  *
435  * Note: The caller must already have queued an invalidation request for
436  * desc->mr->rkey before calling this function.
437  */
438 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
439                             int n)
440 {
441         unsigned long flags;
442         int i;
443
444         spin_lock_irqsave(&pool->lock, flags);
445         for (i = 0; i < n; i++)
446                 list_add(&desc[i]->entry, &pool->free_list);
447         spin_unlock_irqrestore(&pool->lock, flags);
448 }
449
450 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
451 {
452         struct srp_device *dev = target->srp_host->srp_dev;
453
454         return srp_create_fr_pool(dev->dev, dev->pd,
455                                   target->scsi_host->can_queue,
456                                   dev->max_pages_per_mr);
457 }
458
459 /**
460  * srp_destroy_qp() - destroy an RDMA queue pair
461  * @ch: SRP RDMA channel.
462  *
463  * Change a queue pair into the error state and wait until all receive
464  * completions have been processed before destroying it. This avoids that
465  * the receive completion handler can access the queue pair while it is
466  * being destroyed.
467  */
468 static void srp_destroy_qp(struct srp_rdma_ch *ch)
469 {
470         static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
471         static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
472         struct ib_recv_wr *bad_wr;
473         int ret;
474
475         /* Destroying a QP and reusing ch->done is only safe if not connected */
476         WARN_ON_ONCE(ch->connected);
477
478         ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
479         WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
480         if (ret)
481                 goto out;
482
483         init_completion(&ch->done);
484         ret = ib_post_recv(ch->qp, &wr, &bad_wr);
485         WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
486         if (ret == 0)
487                 wait_for_completion(&ch->done);
488
489 out:
490         ib_destroy_qp(ch->qp);
491 }
492
493 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
494 {
495         struct srp_target_port *target = ch->target;
496         struct srp_device *dev = target->srp_host->srp_dev;
497         struct ib_qp_init_attr *init_attr;
498         struct ib_cq *recv_cq, *send_cq;
499         struct ib_qp *qp;
500         struct ib_fmr_pool *fmr_pool = NULL;
501         struct srp_fr_pool *fr_pool = NULL;
502         const int m = 1 + dev->use_fast_reg;
503         int ret;
504
505         init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
506         if (!init_attr)
507                 return -ENOMEM;
508
509         /* + 1 for SRP_LAST_WR_ID */
510         recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
511                                target->queue_size + 1, ch->comp_vector);
512         if (IS_ERR(recv_cq)) {
513                 ret = PTR_ERR(recv_cq);
514                 goto err;
515         }
516
517         send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
518                                m * target->queue_size, ch->comp_vector);
519         if (IS_ERR(send_cq)) {
520                 ret = PTR_ERR(send_cq);
521                 goto err_recv_cq;
522         }
523
524         ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
525
526         init_attr->event_handler       = srp_qp_event;
527         init_attr->cap.max_send_wr     = m * target->queue_size;
528         init_attr->cap.max_recv_wr     = target->queue_size + 1;
529         init_attr->cap.max_recv_sge    = 1;
530         init_attr->cap.max_send_sge    = 1;
531         init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
532         init_attr->qp_type             = IB_QPT_RC;
533         init_attr->send_cq             = send_cq;
534         init_attr->recv_cq             = recv_cq;
535
536         qp = ib_create_qp(dev->pd, init_attr);
537         if (IS_ERR(qp)) {
538                 ret = PTR_ERR(qp);
539                 goto err_send_cq;
540         }
541
542         ret = srp_init_qp(target, qp);
543         if (ret)
544                 goto err_qp;
545
546         if (dev->use_fast_reg && dev->has_fr) {
547                 fr_pool = srp_alloc_fr_pool(target);
548                 if (IS_ERR(fr_pool)) {
549                         ret = PTR_ERR(fr_pool);
550                         shost_printk(KERN_WARNING, target->scsi_host, PFX
551                                      "FR pool allocation failed (%d)\n", ret);
552                         goto err_qp;
553                 }
554                 if (ch->fr_pool)
555                         srp_destroy_fr_pool(ch->fr_pool);
556                 ch->fr_pool = fr_pool;
557         } else if (!dev->use_fast_reg && dev->has_fmr) {
558                 fmr_pool = srp_alloc_fmr_pool(target);
559                 if (IS_ERR(fmr_pool)) {
560                         ret = PTR_ERR(fmr_pool);
561                         shost_printk(KERN_WARNING, target->scsi_host, PFX
562                                      "FMR pool allocation failed (%d)\n", ret);
563                         goto err_qp;
564                 }
565                 if (ch->fmr_pool)
566                         ib_destroy_fmr_pool(ch->fmr_pool);
567                 ch->fmr_pool = fmr_pool;
568         }
569
570         if (ch->qp)
571                 srp_destroy_qp(ch);
572         if (ch->recv_cq)
573                 ib_destroy_cq(ch->recv_cq);
574         if (ch->send_cq)
575                 ib_destroy_cq(ch->send_cq);
576
577         ch->qp = qp;
578         ch->recv_cq = recv_cq;
579         ch->send_cq = send_cq;
580
581         kfree(init_attr);
582         return 0;
583
584 err_qp:
585         ib_destroy_qp(qp);
586
587 err_send_cq:
588         ib_destroy_cq(send_cq);
589
590 err_recv_cq:
591         ib_destroy_cq(recv_cq);
592
593 err:
594         kfree(init_attr);
595         return ret;
596 }
597
598 /*
599  * Note: this function may be called without srp_alloc_iu_bufs() having been
600  * invoked. Hence the ch->[rt]x_ring checks.
601  */
602 static void srp_free_ch_ib(struct srp_target_port *target,
603                            struct srp_rdma_ch *ch)
604 {
605         struct srp_device *dev = target->srp_host->srp_dev;
606         int i;
607
608         if (!ch->target)
609                 return;
610
611         if (ch->cm_id) {
612                 ib_destroy_cm_id(ch->cm_id);
613                 ch->cm_id = NULL;
614         }
615
616         /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
617         if (!ch->qp)
618                 return;
619
620         if (dev->use_fast_reg) {
621                 if (ch->fr_pool)
622                         srp_destroy_fr_pool(ch->fr_pool);
623         } else {
624                 if (ch->fmr_pool)
625                         ib_destroy_fmr_pool(ch->fmr_pool);
626         }
627         srp_destroy_qp(ch);
628         ib_destroy_cq(ch->send_cq);
629         ib_destroy_cq(ch->recv_cq);
630
631         /*
632          * Avoid that the SCSI error handler tries to use this channel after
633          * it has been freed. The SCSI error handler can namely continue
634          * trying to perform recovery actions after scsi_remove_host()
635          * returned.
636          */
637         ch->target = NULL;
638
639         ch->qp = NULL;
640         ch->send_cq = ch->recv_cq = NULL;
641
642         if (ch->rx_ring) {
643                 for (i = 0; i < target->queue_size; ++i)
644                         srp_free_iu(target->srp_host, ch->rx_ring[i]);
645                 kfree(ch->rx_ring);
646                 ch->rx_ring = NULL;
647         }
648         if (ch->tx_ring) {
649                 for (i = 0; i < target->queue_size; ++i)
650                         srp_free_iu(target->srp_host, ch->tx_ring[i]);
651                 kfree(ch->tx_ring);
652                 ch->tx_ring = NULL;
653         }
654 }
655
656 static void srp_path_rec_completion(int status,
657                                     struct ib_sa_path_rec *pathrec,
658                                     void *ch_ptr)
659 {
660         struct srp_rdma_ch *ch = ch_ptr;
661         struct srp_target_port *target = ch->target;
662
663         ch->status = status;
664         if (status)
665                 shost_printk(KERN_ERR, target->scsi_host,
666                              PFX "Got failed path rec status %d\n", status);
667         else
668                 ch->path = *pathrec;
669         complete(&ch->done);
670 }
671
672 static int srp_lookup_path(struct srp_rdma_ch *ch)
673 {
674         struct srp_target_port *target = ch->target;
675         int ret;
676
677         ch->path.numb_path = 1;
678
679         init_completion(&ch->done);
680
681         ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
682                                                target->srp_host->srp_dev->dev,
683                                                target->srp_host->port,
684                                                &ch->path,
685                                                IB_SA_PATH_REC_SERVICE_ID |
686                                                IB_SA_PATH_REC_DGID       |
687                                                IB_SA_PATH_REC_SGID       |
688                                                IB_SA_PATH_REC_NUMB_PATH  |
689                                                IB_SA_PATH_REC_PKEY,
690                                                SRP_PATH_REC_TIMEOUT_MS,
691                                                GFP_KERNEL,
692                                                srp_path_rec_completion,
693                                                ch, &ch->path_query);
694         if (ch->path_query_id < 0)
695                 return ch->path_query_id;
696
697         ret = wait_for_completion_interruptible(&ch->done);
698         if (ret < 0)
699                 return ret;
700
701         if (ch->status < 0)
702                 shost_printk(KERN_WARNING, target->scsi_host,
703                              PFX "Path record query failed\n");
704
705         return ch->status;
706 }
707
708 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
709 {
710         struct srp_target_port *target = ch->target;
711         struct {
712                 struct ib_cm_req_param param;
713                 struct srp_login_req   priv;
714         } *req = NULL;
715         int status;
716
717         req = kzalloc(sizeof *req, GFP_KERNEL);
718         if (!req)
719                 return -ENOMEM;
720
721         req->param.primary_path               = &ch->path;
722         req->param.alternate_path             = NULL;
723         req->param.service_id                 = target->service_id;
724         req->param.qp_num                     = ch->qp->qp_num;
725         req->param.qp_type                    = ch->qp->qp_type;
726         req->param.private_data               = &req->priv;
727         req->param.private_data_len           = sizeof req->priv;
728         req->param.flow_control               = 1;
729
730         get_random_bytes(&req->param.starting_psn, 4);
731         req->param.starting_psn              &= 0xffffff;
732
733         /*
734          * Pick some arbitrary defaults here; we could make these
735          * module parameters if anyone cared about setting them.
736          */
737         req->param.responder_resources        = 4;
738         req->param.remote_cm_response_timeout = 20;
739         req->param.local_cm_response_timeout  = 20;
740         req->param.retry_count                = target->tl_retry_count;
741         req->param.rnr_retry_count            = 7;
742         req->param.max_cm_retries             = 15;
743
744         req->priv.opcode        = SRP_LOGIN_REQ;
745         req->priv.tag           = 0;
746         req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
747         req->priv.req_buf_fmt   = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
748                                               SRP_BUF_FORMAT_INDIRECT);
749         req->priv.req_flags     = (multich ? SRP_MULTICHAN_MULTI :
750                                    SRP_MULTICHAN_SINGLE);
751         /*
752          * In the published SRP specification (draft rev. 16a), the
753          * port identifier format is 8 bytes of ID extension followed
754          * by 8 bytes of GUID.  Older drafts put the two halves in the
755          * opposite order, so that the GUID comes first.
756          *
757          * Targets conforming to these obsolete drafts can be
758          * recognized by the I/O Class they report.
759          */
760         if (target->io_class == SRP_REV10_IB_IO_CLASS) {
761                 memcpy(req->priv.initiator_port_id,
762                        &target->sgid.global.interface_id, 8);
763                 memcpy(req->priv.initiator_port_id + 8,
764                        &target->initiator_ext, 8);
765                 memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
766                 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
767         } else {
768                 memcpy(req->priv.initiator_port_id,
769                        &target->initiator_ext, 8);
770                 memcpy(req->priv.initiator_port_id + 8,
771                        &target->sgid.global.interface_id, 8);
772                 memcpy(req->priv.target_port_id,     &target->id_ext, 8);
773                 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
774         }
775
776         /*
777          * Topspin/Cisco SRP targets will reject our login unless we
778          * zero out the first 8 bytes of our initiator port ID and set
779          * the second 8 bytes to the local node GUID.
780          */
781         if (srp_target_is_topspin(target)) {
782                 shost_printk(KERN_DEBUG, target->scsi_host,
783                              PFX "Topspin/Cisco initiator port ID workaround "
784                              "activated for target GUID %016llx\n",
785                              be64_to_cpu(target->ioc_guid));
786                 memset(req->priv.initiator_port_id, 0, 8);
787                 memcpy(req->priv.initiator_port_id + 8,
788                        &target->srp_host->srp_dev->dev->node_guid, 8);
789         }
790
791         status = ib_send_cm_req(ch->cm_id, &req->param);
792
793         kfree(req);
794
795         return status;
796 }
797
798 static bool srp_queue_remove_work(struct srp_target_port *target)
799 {
800         bool changed = false;
801
802         spin_lock_irq(&target->lock);
803         if (target->state != SRP_TARGET_REMOVED) {
804                 target->state = SRP_TARGET_REMOVED;
805                 changed = true;
806         }
807         spin_unlock_irq(&target->lock);
808
809         if (changed)
810                 queue_work(srp_remove_wq, &target->remove_work);
811
812         return changed;
813 }
814
815 static void srp_disconnect_target(struct srp_target_port *target)
816 {
817         struct srp_rdma_ch *ch;
818         int i;
819
820         /* XXX should send SRP_I_LOGOUT request */
821
822         for (i = 0; i < target->ch_count; i++) {
823                 ch = &target->ch[i];
824                 ch->connected = false;
825                 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
826                         shost_printk(KERN_DEBUG, target->scsi_host,
827                                      PFX "Sending CM DREQ failed\n");
828                 }
829         }
830 }
831
832 static void srp_free_req_data(struct srp_target_port *target,
833                               struct srp_rdma_ch *ch)
834 {
835         struct srp_device *dev = target->srp_host->srp_dev;
836         struct ib_device *ibdev = dev->dev;
837         struct srp_request *req;
838         int i;
839
840         if (!ch->req_ring)
841                 return;
842
843         for (i = 0; i < target->req_ring_size; ++i) {
844                 req = &ch->req_ring[i];
845                 if (dev->use_fast_reg)
846                         kfree(req->fr_list);
847                 else
848                         kfree(req->fmr_list);
849                 kfree(req->map_page);
850                 if (req->indirect_dma_addr) {
851                         ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
852                                             target->indirect_size,
853                                             DMA_TO_DEVICE);
854                 }
855                 kfree(req->indirect_desc);
856         }
857
858         kfree(ch->req_ring);
859         ch->req_ring = NULL;
860 }
861
862 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
863 {
864         struct srp_target_port *target = ch->target;
865         struct srp_device *srp_dev = target->srp_host->srp_dev;
866         struct ib_device *ibdev = srp_dev->dev;
867         struct srp_request *req;
868         void *mr_list;
869         dma_addr_t dma_addr;
870         int i, ret = -ENOMEM;
871
872         ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
873                                GFP_KERNEL);
874         if (!ch->req_ring)
875                 goto out;
876
877         for (i = 0; i < target->req_ring_size; ++i) {
878                 req = &ch->req_ring[i];
879                 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
880                                   GFP_KERNEL);
881                 if (!mr_list)
882                         goto out;
883                 if (srp_dev->use_fast_reg)
884                         req->fr_list = mr_list;
885                 else
886                         req->fmr_list = mr_list;
887                 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
888                                         sizeof(void *), GFP_KERNEL);
889                 if (!req->map_page)
890                         goto out;
891                 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
892                 if (!req->indirect_desc)
893                         goto out;
894
895                 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
896                                              target->indirect_size,
897                                              DMA_TO_DEVICE);
898                 if (ib_dma_mapping_error(ibdev, dma_addr))
899                         goto out;
900
901                 req->indirect_dma_addr = dma_addr;
902         }
903         ret = 0;
904
905 out:
906         return ret;
907 }
908
909 /**
910  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
911  * @shost: SCSI host whose attributes to remove from sysfs.
912  *
913  * Note: Any attributes defined in the host template and that did not exist
914  * before invocation of this function will be ignored.
915  */
916 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
917 {
918         struct device_attribute **attr;
919
920         for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
921                 device_remove_file(&shost->shost_dev, *attr);
922 }
923
924 static void srp_remove_target(struct srp_target_port *target)
925 {
926         struct srp_rdma_ch *ch;
927         int i;
928
929         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
930
931         srp_del_scsi_host_attr(target->scsi_host);
932         srp_rport_get(target->rport);
933         srp_remove_host(target->scsi_host);
934         scsi_remove_host(target->scsi_host);
935         srp_stop_rport_timers(target->rport);
936         srp_disconnect_target(target);
937         for (i = 0; i < target->ch_count; i++) {
938                 ch = &target->ch[i];
939                 srp_free_ch_ib(target, ch);
940         }
941         cancel_work_sync(&target->tl_err_work);
942         srp_rport_put(target->rport);
943         for (i = 0; i < target->ch_count; i++) {
944                 ch = &target->ch[i];
945                 srp_free_req_data(target, ch);
946         }
947         kfree(target->ch);
948         target->ch = NULL;
949
950         spin_lock(&target->srp_host->target_lock);
951         list_del(&target->list);
952         spin_unlock(&target->srp_host->target_lock);
953
954         scsi_host_put(target->scsi_host);
955 }
956
957 static void srp_remove_work(struct work_struct *work)
958 {
959         struct srp_target_port *target =
960                 container_of(work, struct srp_target_port, remove_work);
961
962         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
963
964         srp_remove_target(target);
965 }
966
967 static void srp_rport_delete(struct srp_rport *rport)
968 {
969         struct srp_target_port *target = rport->lld_data;
970
971         srp_queue_remove_work(target);
972 }
973
974 /**
975  * srp_connected_ch() - number of connected channels
976  * @target: SRP target port.
977  */
978 static int srp_connected_ch(struct srp_target_port *target)
979 {
980         int i, c = 0;
981
982         for (i = 0; i < target->ch_count; i++)
983                 c += target->ch[i].connected;
984
985         return c;
986 }
987
988 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
989 {
990         struct srp_target_port *target = ch->target;
991         int ret;
992
993         WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
994
995         ret = srp_lookup_path(ch);
996         if (ret)
997                 return ret;
998
999         while (1) {
1000                 init_completion(&ch->done);
1001                 ret = srp_send_req(ch, multich);
1002                 if (ret)
1003                         return ret;
1004                 ret = wait_for_completion_interruptible(&ch->done);
1005                 if (ret < 0)
1006                         return ret;
1007
1008                 /*
1009                  * The CM event handling code will set status to
1010                  * SRP_PORT_REDIRECT if we get a port redirect REJ
1011                  * back, or SRP_DLID_REDIRECT if we get a lid/qp
1012                  * redirect REJ back.
1013                  */
1014                 switch (ch->status) {
1015                 case 0:
1016                         ch->connected = true;
1017                         return 0;
1018
1019                 case SRP_PORT_REDIRECT:
1020                         ret = srp_lookup_path(ch);
1021                         if (ret)
1022                                 return ret;
1023                         break;
1024
1025                 case SRP_DLID_REDIRECT:
1026                         break;
1027
1028                 case SRP_STALE_CONN:
1029                         shost_printk(KERN_ERR, target->scsi_host, PFX
1030                                      "giving up on stale connection\n");
1031                         ch->status = -ECONNRESET;
1032                         return ch->status;
1033
1034                 default:
1035                         return ch->status;
1036                 }
1037         }
1038 }
1039
1040 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1041 {
1042         struct ib_send_wr *bad_wr;
1043         struct ib_send_wr wr = {
1044                 .opcode             = IB_WR_LOCAL_INV,
1045                 .wr_id              = LOCAL_INV_WR_ID_MASK,
1046                 .next               = NULL,
1047                 .num_sge            = 0,
1048                 .send_flags         = 0,
1049                 .ex.invalidate_rkey = rkey,
1050         };
1051
1052         return ib_post_send(ch->qp, &wr, &bad_wr);
1053 }
1054
1055 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1056                            struct srp_rdma_ch *ch,
1057                            struct srp_request *req)
1058 {
1059         struct srp_target_port *target = ch->target;
1060         struct srp_device *dev = target->srp_host->srp_dev;
1061         struct ib_device *ibdev = dev->dev;
1062         int i, res;
1063
1064         if (!scsi_sglist(scmnd) ||
1065             (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1066              scmnd->sc_data_direction != DMA_FROM_DEVICE))
1067                 return;
1068
1069         if (dev->use_fast_reg) {
1070                 struct srp_fr_desc **pfr;
1071
1072                 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1073                         res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1074                         if (res < 0) {
1075                                 shost_printk(KERN_ERR, target->scsi_host, PFX
1076                                   "Queueing INV WR for rkey %#x failed (%d)\n",
1077                                   (*pfr)->mr->rkey, res);
1078                                 queue_work(system_long_wq,
1079                                            &target->tl_err_work);
1080                         }
1081                 }
1082                 if (req->nmdesc)
1083                         srp_fr_pool_put(ch->fr_pool, req->fr_list,
1084                                         req->nmdesc);
1085         } else {
1086                 struct ib_pool_fmr **pfmr;
1087
1088                 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1089                         ib_fmr_pool_unmap(*pfmr);
1090         }
1091
1092         ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1093                         scmnd->sc_data_direction);
1094 }
1095
1096 /**
1097  * srp_claim_req - Take ownership of the scmnd associated with a request.
1098  * @ch: SRP RDMA channel.
1099  * @req: SRP request.
1100  * @sdev: If not NULL, only take ownership for this SCSI device.
1101  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1102  *         ownership of @req->scmnd if it equals @scmnd.
1103  *
1104  * Return value:
1105  * Either NULL or a pointer to the SCSI command the caller became owner of.
1106  */
1107 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1108                                        struct srp_request *req,
1109                                        struct scsi_device *sdev,
1110                                        struct scsi_cmnd *scmnd)
1111 {
1112         unsigned long flags;
1113
1114         spin_lock_irqsave(&ch->lock, flags);
1115         if (req->scmnd &&
1116             (!sdev || req->scmnd->device == sdev) &&
1117             (!scmnd || req->scmnd == scmnd)) {
1118                 scmnd = req->scmnd;
1119                 req->scmnd = NULL;
1120         } else {
1121                 scmnd = NULL;
1122         }
1123         spin_unlock_irqrestore(&ch->lock, flags);
1124
1125         return scmnd;
1126 }
1127
1128 /**
1129  * srp_free_req() - Unmap data and add request to the free request list.
1130  * @ch:     SRP RDMA channel.
1131  * @req:    Request to be freed.
1132  * @scmnd:  SCSI command associated with @req.
1133  * @req_lim_delta: Amount to be added to @target->req_lim.
1134  */
1135 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1136                          struct scsi_cmnd *scmnd, s32 req_lim_delta)
1137 {
1138         unsigned long flags;
1139
1140         srp_unmap_data(scmnd, ch, req);
1141
1142         spin_lock_irqsave(&ch->lock, flags);
1143         ch->req_lim += req_lim_delta;
1144         spin_unlock_irqrestore(&ch->lock, flags);
1145 }
1146
1147 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1148                            struct scsi_device *sdev, int result)
1149 {
1150         struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1151
1152         if (scmnd) {
1153                 srp_free_req(ch, req, scmnd, 0);
1154                 scmnd->result = result;
1155                 scmnd->scsi_done(scmnd);
1156         }
1157 }
1158
1159 static void srp_terminate_io(struct srp_rport *rport)
1160 {
1161         struct srp_target_port *target = rport->lld_data;
1162         struct srp_rdma_ch *ch;
1163         struct Scsi_Host *shost = target->scsi_host;
1164         struct scsi_device *sdev;
1165         int i, j;
1166
1167         /*
1168          * Invoking srp_terminate_io() while srp_queuecommand() is running
1169          * is not safe. Hence the warning statement below.
1170          */
1171         shost_for_each_device(sdev, shost)
1172                 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1173
1174         for (i = 0; i < target->ch_count; i++) {
1175                 ch = &target->ch[i];
1176
1177                 for (j = 0; j < target->req_ring_size; ++j) {
1178                         struct srp_request *req = &ch->req_ring[j];
1179
1180                         srp_finish_req(ch, req, NULL,
1181                                        DID_TRANSPORT_FAILFAST << 16);
1182                 }
1183         }
1184 }
1185
1186 /*
1187  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1188  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1189  * srp_reset_device() or srp_reset_host() calls will occur while this function
1190  * is in progress. One way to realize that is not to call this function
1191  * directly but to call srp_reconnect_rport() instead since that last function
1192  * serializes calls of this function via rport->mutex and also blocks
1193  * srp_queuecommand() calls before invoking this function.
1194  */
1195 static int srp_rport_reconnect(struct srp_rport *rport)
1196 {
1197         struct srp_target_port *target = rport->lld_data;
1198         struct srp_rdma_ch *ch;
1199         int i, j, ret = 0;
1200         bool multich = false;
1201
1202         srp_disconnect_target(target);
1203
1204         if (target->state == SRP_TARGET_SCANNING)
1205                 return -ENODEV;
1206
1207         /*
1208          * Now get a new local CM ID so that we avoid confusing the target in
1209          * case things are really fouled up. Doing so also ensures that all CM
1210          * callbacks will have finished before a new QP is allocated.
1211          */
1212         for (i = 0; i < target->ch_count; i++) {
1213                 ch = &target->ch[i];
1214                 ret += srp_new_cm_id(ch);
1215         }
1216         for (i = 0; i < target->ch_count; i++) {
1217                 ch = &target->ch[i];
1218                 for (j = 0; j < target->req_ring_size; ++j) {
1219                         struct srp_request *req = &ch->req_ring[j];
1220
1221                         srp_finish_req(ch, req, NULL, DID_RESET << 16);
1222                 }
1223         }
1224         for (i = 0; i < target->ch_count; i++) {
1225                 ch = &target->ch[i];
1226                 /*
1227                  * Whether or not creating a new CM ID succeeded, create a new
1228                  * QP. This guarantees that all completion callback function
1229                  * invocations have finished before request resetting starts.
1230                  */
1231                 ret += srp_create_ch_ib(ch);
1232
1233                 INIT_LIST_HEAD(&ch->free_tx);
1234                 for (j = 0; j < target->queue_size; ++j)
1235                         list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1236         }
1237
1238         target->qp_in_error = false;
1239
1240         for (i = 0; i < target->ch_count; i++) {
1241                 ch = &target->ch[i];
1242                 if (ret)
1243                         break;
1244                 ret = srp_connect_ch(ch, multich);
1245                 multich = true;
1246         }
1247
1248         if (ret == 0)
1249                 shost_printk(KERN_INFO, target->scsi_host,
1250                              PFX "reconnect succeeded\n");
1251
1252         return ret;
1253 }
1254
1255 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1256                          unsigned int dma_len, u32 rkey)
1257 {
1258         struct srp_direct_buf *desc = state->desc;
1259
1260         desc->va = cpu_to_be64(dma_addr);
1261         desc->key = cpu_to_be32(rkey);
1262         desc->len = cpu_to_be32(dma_len);
1263
1264         state->total_len += dma_len;
1265         state->desc++;
1266         state->ndesc++;
1267 }
1268
1269 static int srp_map_finish_fmr(struct srp_map_state *state,
1270                               struct srp_rdma_ch *ch)
1271 {
1272         struct ib_pool_fmr *fmr;
1273         u64 io_addr = 0;
1274
1275         fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1276                                    state->npages, io_addr);
1277         if (IS_ERR(fmr))
1278                 return PTR_ERR(fmr);
1279
1280         *state->next_fmr++ = fmr;
1281         state->nmdesc++;
1282
1283         srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
1284
1285         return 0;
1286 }
1287
1288 static int srp_map_finish_fr(struct srp_map_state *state,
1289                              struct srp_rdma_ch *ch)
1290 {
1291         struct srp_target_port *target = ch->target;
1292         struct srp_device *dev = target->srp_host->srp_dev;
1293         struct ib_send_wr *bad_wr;
1294         struct ib_send_wr wr;
1295         struct srp_fr_desc *desc;
1296         u32 rkey;
1297
1298         desc = srp_fr_pool_get(ch->fr_pool);
1299         if (!desc)
1300                 return -ENOMEM;
1301
1302         rkey = ib_inc_rkey(desc->mr->rkey);
1303         ib_update_fast_reg_key(desc->mr, rkey);
1304
1305         memcpy(desc->frpl->page_list, state->pages,
1306                sizeof(state->pages[0]) * state->npages);
1307
1308         memset(&wr, 0, sizeof(wr));
1309         wr.opcode = IB_WR_FAST_REG_MR;
1310         wr.wr_id = FAST_REG_WR_ID_MASK;
1311         wr.wr.fast_reg.iova_start = state->base_dma_addr;
1312         wr.wr.fast_reg.page_list = desc->frpl;
1313         wr.wr.fast_reg.page_list_len = state->npages;
1314         wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
1315         wr.wr.fast_reg.length = state->dma_len;
1316         wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
1317                                        IB_ACCESS_REMOTE_READ |
1318                                        IB_ACCESS_REMOTE_WRITE);
1319         wr.wr.fast_reg.rkey = desc->mr->lkey;
1320
1321         *state->next_fr++ = desc;
1322         state->nmdesc++;
1323
1324         srp_map_desc(state, state->base_dma_addr, state->dma_len,
1325                      desc->mr->rkey);
1326
1327         return ib_post_send(ch->qp, &wr, &bad_wr);
1328 }
1329
1330 static int srp_finish_mapping(struct srp_map_state *state,
1331                               struct srp_rdma_ch *ch)
1332 {
1333         struct srp_target_port *target = ch->target;
1334         int ret = 0;
1335
1336         if (state->npages == 0)
1337                 return 0;
1338
1339         if (state->npages == 1 && !register_always)
1340                 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1341                              target->rkey);
1342         else
1343                 ret = target->srp_host->srp_dev->use_fast_reg ?
1344                         srp_map_finish_fr(state, ch) :
1345                         srp_map_finish_fmr(state, ch);
1346
1347         if (ret == 0) {
1348                 state->npages = 0;
1349                 state->dma_len = 0;
1350         }
1351
1352         return ret;
1353 }
1354
1355 static void srp_map_update_start(struct srp_map_state *state,
1356                                  struct scatterlist *sg, int sg_index,
1357                                  dma_addr_t dma_addr)
1358 {
1359         state->unmapped_sg = sg;
1360         state->unmapped_index = sg_index;
1361         state->unmapped_addr = dma_addr;
1362 }
1363
1364 static int srp_map_sg_entry(struct srp_map_state *state,
1365                             struct srp_rdma_ch *ch,
1366                             struct scatterlist *sg, int sg_index,
1367                             bool use_mr)
1368 {
1369         struct srp_target_port *target = ch->target;
1370         struct srp_device *dev = target->srp_host->srp_dev;
1371         struct ib_device *ibdev = dev->dev;
1372         dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1373         unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1374         unsigned int len;
1375         int ret;
1376
1377         if (!dma_len)
1378                 return 0;
1379
1380         if (!use_mr) {
1381                 /*
1382                  * Once we're in direct map mode for a request, we don't
1383                  * go back to FMR or FR mode, so no need to update anything
1384                  * other than the descriptor.
1385                  */
1386                 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1387                 return 0;
1388         }
1389
1390         /*
1391          * Since not all RDMA HW drivers support non-zero page offsets for
1392          * FMR, if we start at an offset into a page, don't merge into the
1393          * current FMR mapping. Finish it out, and use the kernel's MR for
1394          * this sg entry.
1395          */
1396         if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
1397             dma_len > dev->mr_max_size) {
1398                 ret = srp_finish_mapping(state, ch);
1399                 if (ret)
1400                         return ret;
1401
1402                 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1403                 srp_map_update_start(state, NULL, 0, 0);
1404                 return 0;
1405         }
1406
1407         /*
1408          * If this is the first sg that will be mapped via FMR or via FR, save
1409          * our position. We need to know the first unmapped entry, its index,
1410          * and the first unmapped address within that entry to be able to
1411          * restart mapping after an error.
1412          */
1413         if (!state->unmapped_sg)
1414                 srp_map_update_start(state, sg, sg_index, dma_addr);
1415
1416         while (dma_len) {
1417                 unsigned offset = dma_addr & ~dev->mr_page_mask;
1418                 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1419                         ret = srp_finish_mapping(state, ch);
1420                         if (ret)
1421                                 return ret;
1422
1423                         srp_map_update_start(state, sg, sg_index, dma_addr);
1424                 }
1425
1426                 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1427
1428                 if (!state->npages)
1429                         state->base_dma_addr = dma_addr;
1430                 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1431                 state->dma_len += len;
1432                 dma_addr += len;
1433                 dma_len -= len;
1434         }
1435
1436         /*
1437          * If the last entry of the MR wasn't a full page, then we need to
1438          * close it out and start a new one -- we can only merge at page
1439          * boundries.
1440          */
1441         ret = 0;
1442         if (len != dev->mr_page_size) {
1443                 ret = srp_finish_mapping(state, ch);
1444                 if (!ret)
1445                         srp_map_update_start(state, NULL, 0, 0);
1446         }
1447         return ret;
1448 }
1449
1450 static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
1451                       struct srp_request *req, struct scatterlist *scat,
1452                       int count)
1453 {
1454         struct srp_target_port *target = ch->target;
1455         struct srp_device *dev = target->srp_host->srp_dev;
1456         struct ib_device *ibdev = dev->dev;
1457         struct scatterlist *sg;
1458         int i;
1459         bool use_mr;
1460
1461         state->desc     = req->indirect_desc;
1462         state->pages    = req->map_page;
1463         if (dev->use_fast_reg) {
1464                 state->next_fr = req->fr_list;
1465                 use_mr = !!ch->fr_pool;
1466         } else {
1467                 state->next_fmr = req->fmr_list;
1468                 use_mr = !!ch->fmr_pool;
1469         }
1470
1471         for_each_sg(scat, sg, count, i) {
1472                 if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
1473                         /*
1474                          * Memory registration failed, so backtrack to the
1475                          * first unmapped entry and continue on without using
1476                          * memory registration.
1477                          */
1478                         dma_addr_t dma_addr;
1479                         unsigned int dma_len;
1480
1481 backtrack:
1482                         sg = state->unmapped_sg;
1483                         i = state->unmapped_index;
1484
1485                         dma_addr = ib_sg_dma_address(ibdev, sg);
1486                         dma_len = ib_sg_dma_len(ibdev, sg);
1487                         dma_len -= (state->unmapped_addr - dma_addr);
1488                         dma_addr = state->unmapped_addr;
1489                         use_mr = false;
1490                         srp_map_desc(state, dma_addr, dma_len, target->rkey);
1491                 }
1492         }
1493
1494         if (use_mr && srp_finish_mapping(state, ch))
1495                 goto backtrack;
1496
1497         req->nmdesc = state->nmdesc;
1498
1499         return 0;
1500 }
1501
1502 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1503                         struct srp_request *req)
1504 {
1505         struct srp_target_port *target = ch->target;
1506         struct scatterlist *scat;
1507         struct srp_cmd *cmd = req->cmd->buf;
1508         int len, nents, count;
1509         struct srp_device *dev;
1510         struct ib_device *ibdev;
1511         struct srp_map_state state;
1512         struct srp_indirect_buf *indirect_hdr;
1513         u32 table_len;
1514         u8 fmt;
1515
1516         if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1517                 return sizeof (struct srp_cmd);
1518
1519         if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1520             scmnd->sc_data_direction != DMA_TO_DEVICE) {
1521                 shost_printk(KERN_WARNING, target->scsi_host,
1522                              PFX "Unhandled data direction %d\n",
1523                              scmnd->sc_data_direction);
1524                 return -EINVAL;
1525         }
1526
1527         nents = scsi_sg_count(scmnd);
1528         scat  = scsi_sglist(scmnd);
1529
1530         dev = target->srp_host->srp_dev;
1531         ibdev = dev->dev;
1532
1533         count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1534         if (unlikely(count == 0))
1535                 return -EIO;
1536
1537         fmt = SRP_DATA_DESC_DIRECT;
1538         len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1539
1540         if (count == 1 && !register_always) {
1541                 /*
1542                  * The midlayer only generated a single gather/scatter
1543                  * entry, or DMA mapping coalesced everything to a
1544                  * single entry.  So a direct descriptor along with
1545                  * the DMA MR suffices.
1546                  */
1547                 struct srp_direct_buf *buf = (void *) cmd->add_data;
1548
1549                 buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1550                 buf->key = cpu_to_be32(target->rkey);
1551                 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1552
1553                 req->nmdesc = 0;
1554                 goto map_complete;
1555         }
1556
1557         /*
1558          * We have more than one scatter/gather entry, so build our indirect
1559          * descriptor table, trying to merge as many entries as we can.
1560          */
1561         indirect_hdr = (void *) cmd->add_data;
1562
1563         ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1564                                    target->indirect_size, DMA_TO_DEVICE);
1565
1566         memset(&state, 0, sizeof(state));
1567         srp_map_sg(&state, ch, req, scat, count);
1568
1569         /* We've mapped the request, now pull as much of the indirect
1570          * descriptor table as we can into the command buffer. If this
1571          * target is not using an external indirect table, we are
1572          * guaranteed to fit into the command, as the SCSI layer won't
1573          * give us more S/G entries than we allow.
1574          */
1575         if (state.ndesc == 1) {
1576                 /*
1577                  * Memory registration collapsed the sg-list into one entry,
1578                  * so use a direct descriptor.
1579                  */
1580                 struct srp_direct_buf *buf = (void *) cmd->add_data;
1581
1582                 *buf = req->indirect_desc[0];
1583                 goto map_complete;
1584         }
1585
1586         if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1587                                                 !target->allow_ext_sg)) {
1588                 shost_printk(KERN_ERR, target->scsi_host,
1589                              "Could not fit S/G list into SRP_CMD\n");
1590                 return -EIO;
1591         }
1592
1593         count = min(state.ndesc, target->cmd_sg_cnt);
1594         table_len = state.ndesc * sizeof (struct srp_direct_buf);
1595
1596         fmt = SRP_DATA_DESC_INDIRECT;
1597         len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1598         len += count * sizeof (struct srp_direct_buf);
1599
1600         memcpy(indirect_hdr->desc_list, req->indirect_desc,
1601                count * sizeof (struct srp_direct_buf));
1602
1603         indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1604         indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
1605         indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1606         indirect_hdr->len = cpu_to_be32(state.total_len);
1607
1608         if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1609                 cmd->data_out_desc_cnt = count;
1610         else
1611                 cmd->data_in_desc_cnt = count;
1612
1613         ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1614                                       DMA_TO_DEVICE);
1615
1616 map_complete:
1617         if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1618                 cmd->buf_fmt = fmt << 4;
1619         else
1620                 cmd->buf_fmt = fmt;
1621
1622         return len;
1623 }
1624
1625 /*
1626  * Return an IU and possible credit to the free pool
1627  */
1628 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1629                           enum srp_iu_type iu_type)
1630 {
1631         unsigned long flags;
1632
1633         spin_lock_irqsave(&ch->lock, flags);
1634         list_add(&iu->list, &ch->free_tx);
1635         if (iu_type != SRP_IU_RSP)
1636                 ++ch->req_lim;
1637         spin_unlock_irqrestore(&ch->lock, flags);
1638 }
1639
1640 /*
1641  * Must be called with ch->lock held to protect req_lim and free_tx.
1642  * If IU is not sent, it must be returned using srp_put_tx_iu().
1643  *
1644  * Note:
1645  * An upper limit for the number of allocated information units for each
1646  * request type is:
1647  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1648  *   more than Scsi_Host.can_queue requests.
1649  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1650  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1651  *   one unanswered SRP request to an initiator.
1652  */
1653 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1654                                       enum srp_iu_type iu_type)
1655 {
1656         struct srp_target_port *target = ch->target;
1657         s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1658         struct srp_iu *iu;
1659
1660         srp_send_completion(ch->send_cq, ch);
1661
1662         if (list_empty(&ch->free_tx))
1663                 return NULL;
1664
1665         /* Initiator responses to target requests do not consume credits */
1666         if (iu_type != SRP_IU_RSP) {
1667                 if (ch->req_lim <= rsv) {
1668                         ++target->zero_req_lim;
1669                         return NULL;
1670                 }
1671
1672                 --ch->req_lim;
1673         }
1674
1675         iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1676         list_del(&iu->list);
1677         return iu;
1678 }
1679
1680 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1681 {
1682         struct srp_target_port *target = ch->target;
1683         struct ib_sge list;
1684         struct ib_send_wr wr, *bad_wr;
1685
1686         list.addr   = iu->dma;
1687         list.length = len;
1688         list.lkey   = target->lkey;
1689
1690         wr.next       = NULL;
1691         wr.wr_id      = (uintptr_t) iu;
1692         wr.sg_list    = &list;
1693         wr.num_sge    = 1;
1694         wr.opcode     = IB_WR_SEND;
1695         wr.send_flags = IB_SEND_SIGNALED;
1696
1697         return ib_post_send(ch->qp, &wr, &bad_wr);
1698 }
1699
1700 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1701 {
1702         struct srp_target_port *target = ch->target;
1703         struct ib_recv_wr wr, *bad_wr;
1704         struct ib_sge list;
1705
1706         list.addr   = iu->dma;
1707         list.length = iu->size;
1708         list.lkey   = target->lkey;
1709
1710         wr.next     = NULL;
1711         wr.wr_id    = (uintptr_t) iu;
1712         wr.sg_list  = &list;
1713         wr.num_sge  = 1;
1714
1715         return ib_post_recv(ch->qp, &wr, &bad_wr);
1716 }
1717
1718 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1719 {
1720         struct srp_target_port *target = ch->target;
1721         struct srp_request *req;
1722         struct scsi_cmnd *scmnd;
1723         unsigned long flags;
1724
1725         if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1726                 spin_lock_irqsave(&ch->lock, flags);
1727                 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1728                 spin_unlock_irqrestore(&ch->lock, flags);
1729
1730                 ch->tsk_mgmt_status = -1;
1731                 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1732                         ch->tsk_mgmt_status = rsp->data[3];
1733                 complete(&ch->tsk_mgmt_done);
1734         } else {
1735                 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1736                 if (scmnd) {
1737                         req = (void *)scmnd->host_scribble;
1738                         scmnd = srp_claim_req(ch, req, NULL, scmnd);
1739                 }
1740                 if (!scmnd) {
1741                         shost_printk(KERN_ERR, target->scsi_host,
1742                                      "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1743                                      rsp->tag, ch - target->ch, ch->qp->qp_num);
1744
1745                         spin_lock_irqsave(&ch->lock, flags);
1746                         ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1747                         spin_unlock_irqrestore(&ch->lock, flags);
1748
1749                         return;
1750                 }
1751                 scmnd->result = rsp->status;
1752
1753                 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1754                         memcpy(scmnd->sense_buffer, rsp->data +
1755                                be32_to_cpu(rsp->resp_data_len),
1756                                min_t(int, be32_to_cpu(rsp->sense_data_len),
1757                                      SCSI_SENSE_BUFFERSIZE));
1758                 }
1759
1760                 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1761                         scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1762                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1763                         scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1764                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1765                         scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1766                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1767                         scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1768
1769                 srp_free_req(ch, req, scmnd,
1770                              be32_to_cpu(rsp->req_lim_delta));
1771
1772                 scmnd->host_scribble = NULL;
1773                 scmnd->scsi_done(scmnd);
1774         }
1775 }
1776
1777 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1778                                void *rsp, int len)
1779 {
1780         struct srp_target_port *target = ch->target;
1781         struct ib_device *dev = target->srp_host->srp_dev->dev;
1782         unsigned long flags;
1783         struct srp_iu *iu;
1784         int err;
1785
1786         spin_lock_irqsave(&ch->lock, flags);
1787         ch->req_lim += req_delta;
1788         iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1789         spin_unlock_irqrestore(&ch->lock, flags);
1790
1791         if (!iu) {
1792                 shost_printk(KERN_ERR, target->scsi_host, PFX
1793                              "no IU available to send response\n");
1794                 return 1;
1795         }
1796
1797         ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1798         memcpy(iu->buf, rsp, len);
1799         ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1800
1801         err = srp_post_send(ch, iu, len);
1802         if (err) {
1803                 shost_printk(KERN_ERR, target->scsi_host, PFX
1804                              "unable to post response: %d\n", err);
1805                 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1806         }
1807
1808         return err;
1809 }
1810
1811 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1812                                  struct srp_cred_req *req)
1813 {
1814         struct srp_cred_rsp rsp = {
1815                 .opcode = SRP_CRED_RSP,
1816                 .tag = req->tag,
1817         };
1818         s32 delta = be32_to_cpu(req->req_lim_delta);
1819
1820         if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1821                 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1822                              "problems processing SRP_CRED_REQ\n");
1823 }
1824
1825 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1826                                 struct srp_aer_req *req)
1827 {
1828         struct srp_target_port *target = ch->target;
1829         struct srp_aer_rsp rsp = {
1830                 .opcode = SRP_AER_RSP,
1831                 .tag = req->tag,
1832         };
1833         s32 delta = be32_to_cpu(req->req_lim_delta);
1834
1835         shost_printk(KERN_ERR, target->scsi_host, PFX
1836                      "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1837
1838         if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1839                 shost_printk(KERN_ERR, target->scsi_host, PFX
1840                              "problems processing SRP_AER_REQ\n");
1841 }
1842
1843 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1844 {
1845         struct srp_target_port *target = ch->target;
1846         struct ib_device *dev = target->srp_host->srp_dev->dev;
1847         struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1848         int res;
1849         u8 opcode;
1850
1851         ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1852                                    DMA_FROM_DEVICE);
1853
1854         opcode = *(u8 *) iu->buf;
1855
1856         if (0) {
1857                 shost_printk(KERN_ERR, target->scsi_host,
1858                              PFX "recv completion, opcode 0x%02x\n", opcode);
1859                 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1860                                iu->buf, wc->byte_len, true);
1861         }
1862
1863         switch (opcode) {
1864         case SRP_RSP:
1865                 srp_process_rsp(ch, iu->buf);
1866                 break;
1867
1868         case SRP_CRED_REQ:
1869                 srp_process_cred_req(ch, iu->buf);
1870                 break;
1871
1872         case SRP_AER_REQ:
1873                 srp_process_aer_req(ch, iu->buf);
1874                 break;
1875
1876         case SRP_T_LOGOUT:
1877                 /* XXX Handle target logout */
1878                 shost_printk(KERN_WARNING, target->scsi_host,
1879                              PFX "Got target logout request\n");
1880                 break;
1881
1882         default:
1883                 shost_printk(KERN_WARNING, target->scsi_host,
1884                              PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1885                 break;
1886         }
1887
1888         ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1889                                       DMA_FROM_DEVICE);
1890
1891         res = srp_post_recv(ch, iu);
1892         if (res != 0)
1893                 shost_printk(KERN_ERR, target->scsi_host,
1894                              PFX "Recv failed with error code %d\n", res);
1895 }
1896
1897 /**
1898  * srp_tl_err_work() - handle a transport layer error
1899  * @work: Work structure embedded in an SRP target port.
1900  *
1901  * Note: This function may get invoked before the rport has been created,
1902  * hence the target->rport test.
1903  */
1904 static void srp_tl_err_work(struct work_struct *work)
1905 {
1906         struct srp_target_port *target;
1907
1908         target = container_of(work, struct srp_target_port, tl_err_work);
1909         if (target->rport)
1910                 srp_start_tl_fail_timers(target->rport);
1911 }
1912
1913 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1914                               bool send_err, struct srp_rdma_ch *ch)
1915 {
1916         struct srp_target_port *target = ch->target;
1917
1918         if (wr_id == SRP_LAST_WR_ID) {
1919                 complete(&ch->done);
1920                 return;
1921         }
1922
1923         if (ch->connected && !target->qp_in_error) {
1924                 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1925                         shost_printk(KERN_ERR, target->scsi_host, PFX
1926                                      "LOCAL_INV failed with status %s (%d)\n",
1927                                      ib_wc_status_msg(wc_status), wc_status);
1928                 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1929                         shost_printk(KERN_ERR, target->scsi_host, PFX
1930                                      "FAST_REG_MR failed status %s (%d)\n",
1931                                      ib_wc_status_msg(wc_status), wc_status);
1932                 } else {
1933                         shost_printk(KERN_ERR, target->scsi_host,
1934                                      PFX "failed %s status %s (%d) for iu %p\n",
1935                                      send_err ? "send" : "receive",
1936                                      ib_wc_status_msg(wc_status), wc_status,
1937                                      (void *)(uintptr_t)wr_id);
1938                 }
1939                 queue_work(system_long_wq, &target->tl_err_work);
1940         }
1941         target->qp_in_error = true;
1942 }
1943
1944 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
1945 {
1946         struct srp_rdma_ch *ch = ch_ptr;
1947         struct ib_wc wc;
1948
1949         ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1950         while (ib_poll_cq(cq, 1, &wc) > 0) {
1951                 if (likely(wc.status == IB_WC_SUCCESS)) {
1952                         srp_handle_recv(ch, &wc);
1953                 } else {
1954                         srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
1955                 }
1956         }
1957 }
1958
1959 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
1960 {
1961         struct srp_rdma_ch *ch = ch_ptr;
1962         struct ib_wc wc;
1963         struct srp_iu *iu;
1964
1965         while (ib_poll_cq(cq, 1, &wc) > 0) {
1966                 if (likely(wc.status == IB_WC_SUCCESS)) {
1967                         iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1968                         list_add(&iu->list, &ch->free_tx);
1969                 } else {
1970                         srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
1971                 }
1972         }
1973 }
1974
1975 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1976 {
1977         struct srp_target_port *target = host_to_target(shost);
1978         struct srp_rport *rport = target->rport;
1979         struct srp_rdma_ch *ch;
1980         struct srp_request *req;
1981         struct srp_iu *iu;
1982         struct srp_cmd *cmd;
1983         struct ib_device *dev;
1984         unsigned long flags;
1985         u32 tag;
1986         u16 idx;
1987         int len, ret;
1988         const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1989
1990         /*
1991          * The SCSI EH thread is the only context from which srp_queuecommand()
1992          * can get invoked for blocked devices (SDEV_BLOCK /
1993          * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1994          * locking the rport mutex if invoked from inside the SCSI EH.
1995          */
1996         if (in_scsi_eh)
1997                 mutex_lock(&rport->mutex);
1998
1999         scmnd->result = srp_chkready(target->rport);
2000         if (unlikely(scmnd->result))
2001                 goto err;
2002
2003         WARN_ON_ONCE(scmnd->request->tag < 0);
2004         tag = blk_mq_unique_tag(scmnd->request);
2005         ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2006         idx = blk_mq_unique_tag_to_tag(tag);
2007         WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2008                   dev_name(&shost->shost_gendev), tag, idx,
2009                   target->req_ring_size);
2010
2011         spin_lock_irqsave(&ch->lock, flags);
2012         iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2013         spin_unlock_irqrestore(&ch->lock, flags);
2014
2015         if (!iu)
2016                 goto err;
2017
2018         req = &ch->req_ring[idx];
2019         dev = target->srp_host->srp_dev->dev;
2020         ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2021                                    DMA_TO_DEVICE);
2022
2023         scmnd->host_scribble = (void *) req;
2024
2025         cmd = iu->buf;
2026         memset(cmd, 0, sizeof *cmd);
2027
2028         cmd->opcode = SRP_CMD;
2029         int_to_scsilun(scmnd->device->lun, &cmd->lun);
2030         cmd->tag    = tag;
2031         memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2032
2033         req->scmnd    = scmnd;
2034         req->cmd      = iu;
2035
2036         len = srp_map_data(scmnd, ch, req);
2037         if (len < 0) {
2038                 shost_printk(KERN_ERR, target->scsi_host,
2039                              PFX "Failed to map data (%d)\n", len);
2040                 /*
2041                  * If we ran out of memory descriptors (-ENOMEM) because an
2042                  * application is queuing many requests with more than
2043                  * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2044                  * to reduce queue depth temporarily.
2045                  */
2046                 scmnd->result = len == -ENOMEM ?
2047                         DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2048                 goto err_iu;
2049         }
2050
2051         ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2052                                       DMA_TO_DEVICE);
2053
2054         if (srp_post_send(ch, iu, len)) {
2055                 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2056                 goto err_unmap;
2057         }
2058
2059         ret = 0;
2060
2061 unlock_rport:
2062         if (in_scsi_eh)
2063                 mutex_unlock(&rport->mutex);
2064
2065         return ret;
2066
2067 err_unmap:
2068         srp_unmap_data(scmnd, ch, req);
2069
2070 err_iu:
2071         srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2072
2073         /*
2074          * Avoid that the loops that iterate over the request ring can
2075          * encounter a dangling SCSI command pointer.
2076          */
2077         req->scmnd = NULL;
2078
2079 err:
2080         if (scmnd->result) {
2081                 scmnd->scsi_done(scmnd);
2082                 ret = 0;
2083         } else {
2084                 ret = SCSI_MLQUEUE_HOST_BUSY;
2085         }
2086
2087         goto unlock_rport;
2088 }
2089
2090 /*
2091  * Note: the resources allocated in this function are freed in
2092  * srp_free_ch_ib().
2093  */
2094 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2095 {
2096         struct srp_target_port *target = ch->target;
2097         int i;
2098
2099         ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2100                               GFP_KERNEL);
2101         if (!ch->rx_ring)
2102                 goto err_no_ring;
2103         ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2104                               GFP_KERNEL);
2105         if (!ch->tx_ring)
2106                 goto err_no_ring;
2107
2108         for (i = 0; i < target->queue_size; ++i) {
2109                 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2110                                               ch->max_ti_iu_len,
2111                                               GFP_KERNEL, DMA_FROM_DEVICE);
2112                 if (!ch->rx_ring[i])
2113                         goto err;
2114         }
2115
2116         for (i = 0; i < target->queue_size; ++i) {
2117                 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2118                                               target->max_iu_len,
2119                                               GFP_KERNEL, DMA_TO_DEVICE);
2120                 if (!ch->tx_ring[i])
2121                         goto err;
2122
2123                 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2124         }
2125
2126         return 0;
2127
2128 err:
2129         for (i = 0; i < target->queue_size; ++i) {
2130                 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2131                 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2132         }
2133
2134
2135 err_no_ring:
2136         kfree(ch->tx_ring);
2137         ch->tx_ring = NULL;
2138         kfree(ch->rx_ring);
2139         ch->rx_ring = NULL;
2140
2141         return -ENOMEM;
2142 }
2143
2144 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2145 {
2146         uint64_t T_tr_ns, max_compl_time_ms;
2147         uint32_t rq_tmo_jiffies;
2148
2149         /*
2150          * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2151          * table 91), both the QP timeout and the retry count have to be set
2152          * for RC QP's during the RTR to RTS transition.
2153          */
2154         WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2155                      (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2156
2157         /*
2158          * Set target->rq_tmo_jiffies to one second more than the largest time
2159          * it can take before an error completion is generated. See also
2160          * C9-140..142 in the IBTA spec for more information about how to
2161          * convert the QP Local ACK Timeout value to nanoseconds.
2162          */
2163         T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2164         max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2165         do_div(max_compl_time_ms, NSEC_PER_MSEC);
2166         rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2167
2168         return rq_tmo_jiffies;
2169 }
2170
2171 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2172                                struct srp_login_rsp *lrsp,
2173                                struct srp_rdma_ch *ch)
2174 {
2175         struct srp_target_port *target = ch->target;
2176         struct ib_qp_attr *qp_attr = NULL;
2177         int attr_mask = 0;
2178         int ret;
2179         int i;
2180
2181         if (lrsp->opcode == SRP_LOGIN_RSP) {
2182                 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2183                 ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2184
2185                 /*
2186                  * Reserve credits for task management so we don't
2187                  * bounce requests back to the SCSI mid-layer.
2188                  */
2189                 target->scsi_host->can_queue
2190                         = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2191                               target->scsi_host->can_queue);
2192                 target->scsi_host->cmd_per_lun
2193                         = min_t(int, target->scsi_host->can_queue,
2194                                 target->scsi_host->cmd_per_lun);
2195         } else {
2196                 shost_printk(KERN_WARNING, target->scsi_host,
2197                              PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2198                 ret = -ECONNRESET;
2199                 goto error;
2200         }
2201
2202         if (!ch->rx_ring) {
2203                 ret = srp_alloc_iu_bufs(ch);
2204                 if (ret)
2205                         goto error;
2206         }
2207
2208         ret = -ENOMEM;
2209         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2210         if (!qp_attr)
2211                 goto error;
2212
2213         qp_attr->qp_state = IB_QPS_RTR;
2214         ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2215         if (ret)
2216                 goto error_free;
2217
2218         ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2219         if (ret)
2220                 goto error_free;
2221
2222         for (i = 0; i < target->queue_size; i++) {
2223                 struct srp_iu *iu = ch->rx_ring[i];
2224
2225                 ret = srp_post_recv(ch, iu);
2226                 if (ret)
2227                         goto error_free;
2228         }
2229
2230         qp_attr->qp_state = IB_QPS_RTS;
2231         ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2232         if (ret)
2233                 goto error_free;
2234
2235         target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2236
2237         ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2238         if (ret)
2239                 goto error_free;
2240
2241         ret = ib_send_cm_rtu(cm_id, NULL, 0);
2242
2243 error_free:
2244         kfree(qp_attr);
2245
2246 error:
2247         ch->status = ret;
2248 }
2249
2250 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2251                                struct ib_cm_event *event,
2252                                struct srp_rdma_ch *ch)
2253 {
2254         struct srp_target_port *target = ch->target;
2255         struct Scsi_Host *shost = target->scsi_host;
2256         struct ib_class_port_info *cpi;
2257         int opcode;
2258
2259         switch (event->param.rej_rcvd.reason) {
2260         case IB_CM_REJ_PORT_CM_REDIRECT:
2261                 cpi = event->param.rej_rcvd.ari;
2262                 ch->path.dlid = cpi->redirect_lid;
2263                 ch->path.pkey = cpi->redirect_pkey;
2264                 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2265                 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2266
2267                 ch->status = ch->path.dlid ?
2268                         SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2269                 break;
2270
2271         case IB_CM_REJ_PORT_REDIRECT:
2272                 if (srp_target_is_topspin(target)) {
2273                         /*
2274                          * Topspin/Cisco SRP gateways incorrectly send
2275                          * reject reason code 25 when they mean 24
2276                          * (port redirect).
2277                          */
2278                         memcpy(ch->path.dgid.raw,
2279                                event->param.rej_rcvd.ari, 16);
2280
2281                         shost_printk(KERN_DEBUG, shost,
2282                                      PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2283                                      be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2284                                      be64_to_cpu(ch->path.dgid.global.interface_id));
2285
2286                         ch->status = SRP_PORT_REDIRECT;
2287                 } else {
2288                         shost_printk(KERN_WARNING, shost,
2289                                      "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2290                         ch->status = -ECONNRESET;
2291                 }
2292                 break;
2293
2294         case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2295                 shost_printk(KERN_WARNING, shost,
2296                             "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2297                 ch->status = -ECONNRESET;
2298                 break;
2299
2300         case IB_CM_REJ_CONSUMER_DEFINED:
2301                 opcode = *(u8 *) event->private_data;
2302                 if (opcode == SRP_LOGIN_REJ) {
2303                         struct srp_login_rej *rej = event->private_data;
2304                         u32 reason = be32_to_cpu(rej->reason);
2305
2306                         if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2307                                 shost_printk(KERN_WARNING, shost,
2308                                              PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2309                         else
2310                                 shost_printk(KERN_WARNING, shost, PFX
2311                                              "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2312                                              target->sgid.raw,
2313                                              target->orig_dgid.raw, reason);
2314                 } else
2315                         shost_printk(KERN_WARNING, shost,
2316                                      "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2317                                      " opcode 0x%02x\n", opcode);
2318                 ch->status = -ECONNRESET;
2319                 break;
2320
2321         case IB_CM_REJ_STALE_CONN:
2322                 shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2323                 ch->status = SRP_STALE_CONN;
2324                 break;
2325
2326         default:
2327                 shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2328                              event->param.rej_rcvd.reason);
2329                 ch->status = -ECONNRESET;
2330         }
2331 }
2332
2333 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2334 {
2335         struct srp_rdma_ch *ch = cm_id->context;
2336         struct srp_target_port *target = ch->target;
2337         int comp = 0;
2338
2339         switch (event->event) {
2340         case IB_CM_REQ_ERROR:
2341                 shost_printk(KERN_DEBUG, target->scsi_host,
2342                              PFX "Sending CM REQ failed\n");
2343                 comp = 1;
2344                 ch->status = -ECONNRESET;
2345                 break;
2346
2347         case IB_CM_REP_RECEIVED:
2348                 comp = 1;
2349                 srp_cm_rep_handler(cm_id, event->private_data, ch);
2350                 break;
2351
2352         case IB_CM_REJ_RECEIVED:
2353                 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2354                 comp = 1;
2355
2356                 srp_cm_rej_handler(cm_id, event, ch);
2357                 break;
2358
2359         case IB_CM_DREQ_RECEIVED:
2360                 shost_printk(KERN_WARNING, target->scsi_host,
2361                              PFX "DREQ received - connection closed\n");
2362                 ch->connected = false;
2363                 if (ib_send_cm_drep(cm_id, NULL, 0))
2364                         shost_printk(KERN_ERR, target->scsi_host,
2365                                      PFX "Sending CM DREP failed\n");
2366                 queue_work(system_long_wq, &target->tl_err_work);
2367                 break;
2368
2369         case IB_CM_TIMEWAIT_EXIT:
2370                 shost_printk(KERN_ERR, target->scsi_host,
2371                              PFX "connection closed\n");
2372                 comp = 1;
2373
2374                 ch->status = 0;
2375                 break;
2376
2377         case IB_CM_MRA_RECEIVED:
2378         case IB_CM_DREQ_ERROR:
2379         case IB_CM_DREP_RECEIVED:
2380                 break;
2381
2382         default:
2383                 shost_printk(KERN_WARNING, target->scsi_host,
2384                              PFX "Unhandled CM event %d\n", event->event);
2385                 break;
2386         }
2387
2388         if (comp)
2389                 complete(&ch->done);
2390
2391         return 0;
2392 }
2393
2394 /**
2395  * srp_change_queue_depth - setting device queue depth
2396  * @sdev: scsi device struct
2397  * @qdepth: requested queue depth
2398  *
2399  * Returns queue depth.
2400  */
2401 static int
2402 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2403 {
2404         if (!sdev->tagged_supported)
2405                 qdepth = 1;
2406         return scsi_change_queue_depth(sdev, qdepth);
2407 }
2408
2409 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2410                              u8 func)
2411 {
2412         struct srp_target_port *target = ch->target;
2413         struct srp_rport *rport = target->rport;
2414         struct ib_device *dev = target->srp_host->srp_dev->dev;
2415         struct srp_iu *iu;
2416         struct srp_tsk_mgmt *tsk_mgmt;
2417
2418         if (!ch->connected || target->qp_in_error)
2419                 return -1;
2420
2421         init_completion(&ch->tsk_mgmt_done);
2422
2423         /*
2424          * Lock the rport mutex to avoid that srp_create_ch_ib() is
2425          * invoked while a task management function is being sent.
2426          */
2427         mutex_lock(&rport->mutex);
2428         spin_lock_irq(&ch->lock);
2429         iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2430         spin_unlock_irq(&ch->lock);
2431
2432         if (!iu) {
2433                 mutex_unlock(&rport->mutex);
2434
2435                 return -1;
2436         }
2437
2438         ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2439                                    DMA_TO_DEVICE);
2440         tsk_mgmt = iu->buf;
2441         memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2442
2443         tsk_mgmt->opcode        = SRP_TSK_MGMT;
2444         int_to_scsilun(lun, &tsk_mgmt->lun);
2445         tsk_mgmt->tag           = req_tag | SRP_TAG_TSK_MGMT;
2446         tsk_mgmt->tsk_mgmt_func = func;
2447         tsk_mgmt->task_tag      = req_tag;
2448
2449         ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2450                                       DMA_TO_DEVICE);
2451         if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2452                 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2453                 mutex_unlock(&rport->mutex);
2454
2455                 return -1;
2456         }
2457         mutex_unlock(&rport->mutex);
2458
2459         if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2460                                          msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2461                 return -1;
2462
2463         return 0;
2464 }
2465
2466 static int srp_abort(struct scsi_cmnd *scmnd)
2467 {
2468         struct srp_target_port *target = host_to_target(scmnd->device->host);
2469         struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2470         u32 tag;
2471         u16 ch_idx;
2472         struct srp_rdma_ch *ch;
2473         int ret;
2474
2475         shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2476
2477         if (!req)
2478                 return SUCCESS;
2479         tag = blk_mq_unique_tag(scmnd->request);
2480         ch_idx = blk_mq_unique_tag_to_hwq(tag);
2481         if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2482                 return SUCCESS;
2483         ch = &target->ch[ch_idx];
2484         if (!srp_claim_req(ch, req, NULL, scmnd))
2485                 return SUCCESS;
2486         shost_printk(KERN_ERR, target->scsi_host,
2487                      "Sending SRP abort for tag %#x\n", tag);
2488         if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2489                               SRP_TSK_ABORT_TASK) == 0)
2490                 ret = SUCCESS;
2491         else if (target->rport->state == SRP_RPORT_LOST)
2492                 ret = FAST_IO_FAIL;
2493         else
2494                 ret = FAILED;
2495         srp_free_req(ch, req, scmnd, 0);
2496         scmnd->result = DID_ABORT << 16;
2497         scmnd->scsi_done(scmnd);
2498
2499         return ret;
2500 }
2501
2502 static int srp_reset_device(struct scsi_cmnd *scmnd)
2503 {
2504         struct srp_target_port *target = host_to_target(scmnd->device->host);
2505         struct srp_rdma_ch *ch;
2506         int i;
2507
2508         shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2509
2510         ch = &target->ch[0];
2511         if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2512                               SRP_TSK_LUN_RESET))
2513                 return FAILED;
2514         if (ch->tsk_mgmt_status)
2515                 return FAILED;
2516
2517         for (i = 0; i < target->ch_count; i++) {
2518                 ch = &target->ch[i];
2519                 for (i = 0; i < target->req_ring_size; ++i) {
2520                         struct srp_request *req = &ch->req_ring[i];
2521
2522                         srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2523                 }
2524         }
2525
2526         return SUCCESS;
2527 }
2528
2529 static int srp_reset_host(struct scsi_cmnd *scmnd)
2530 {
2531         struct srp_target_port *target = host_to_target(scmnd->device->host);
2532
2533         shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2534
2535         return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2536 }
2537
2538 static int srp_slave_configure(struct scsi_device *sdev)
2539 {
2540         struct Scsi_Host *shost = sdev->host;
2541         struct srp_target_port *target = host_to_target(shost);
2542         struct request_queue *q = sdev->request_queue;
2543         unsigned long timeout;
2544
2545         if (sdev->type == TYPE_DISK) {
2546                 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2547                 blk_queue_rq_timeout(q, timeout);
2548         }
2549
2550         return 0;
2551 }
2552
2553 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2554                            char *buf)
2555 {
2556         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2557
2558         return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2559 }
2560
2561 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2562                              char *buf)
2563 {
2564         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2565
2566         return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2567 }
2568
2569 static ssize_t show_service_id(struct device *dev,
2570                                struct device_attribute *attr, char *buf)
2571 {
2572         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2573
2574         return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2575 }
2576
2577 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2578                          char *buf)
2579 {
2580         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2581
2582         return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2583 }
2584
2585 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2586                          char *buf)
2587 {
2588         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2589
2590         return sprintf(buf, "%pI6\n", target->sgid.raw);
2591 }
2592
2593 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2594                          char *buf)
2595 {
2596         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2597         struct srp_rdma_ch *ch = &target->ch[0];
2598
2599         return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2600 }
2601
2602 static ssize_t show_orig_dgid(struct device *dev,
2603                               struct device_attribute *attr, char *buf)
2604 {
2605         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2606
2607         return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2608 }
2609
2610 static ssize_t show_req_lim(struct device *dev,
2611                             struct device_attribute *attr, char *buf)
2612 {
2613         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2614         struct srp_rdma_ch *ch;
2615         int i, req_lim = INT_MAX;
2616
2617         for (i = 0; i < target->ch_count; i++) {
2618                 ch = &target->ch[i];
2619                 req_lim = min(req_lim, ch->req_lim);
2620         }
2621         return sprintf(buf, "%d\n", req_lim);
2622 }
2623
2624 static ssize_t show_zero_req_lim(struct device *dev,
2625                                  struct device_attribute *attr, char *buf)
2626 {
2627         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2628
2629         return sprintf(buf, "%d\n", target->zero_req_lim);
2630 }
2631
2632 static ssize_t show_local_ib_port(struct device *dev,
2633                                   struct device_attribute *attr, char *buf)
2634 {
2635         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2636
2637         return sprintf(buf, "%d\n", target->srp_host->port);
2638 }
2639
2640 static ssize_t show_local_ib_device(struct device *dev,
2641                                     struct device_attribute *attr, char *buf)
2642 {
2643         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2644
2645         return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2646 }
2647
2648 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2649                              char *buf)
2650 {
2651         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2652
2653         return sprintf(buf, "%d\n", target->ch_count);
2654 }
2655
2656 static ssize_t show_comp_vector(struct device *dev,
2657                                 struct device_attribute *attr, char *buf)
2658 {
2659         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2660
2661         return sprintf(buf, "%d\n", target->comp_vector);
2662 }
2663
2664 static ssize_t show_tl_retry_count(struct device *dev,
2665                                    struct device_attribute *attr, char *buf)
2666 {
2667         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2668
2669         return sprintf(buf, "%d\n", target->tl_retry_count);
2670 }
2671
2672 static ssize_t show_cmd_sg_entries(struct device *dev,
2673                                    struct device_attribute *attr, char *buf)
2674 {
2675         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2676
2677         return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2678 }
2679
2680 static ssize_t show_allow_ext_sg(struct device *dev,
2681                                  struct device_attribute *attr, char *buf)
2682 {
2683         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2684
2685         return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2686 }
2687
2688 static DEVICE_ATTR(id_ext,          S_IRUGO, show_id_ext,          NULL);
2689 static DEVICE_ATTR(ioc_guid,        S_IRUGO, show_ioc_guid,        NULL);
2690 static DEVICE_ATTR(service_id,      S_IRUGO, show_service_id,      NULL);
2691 static DEVICE_ATTR(pkey,            S_IRUGO, show_pkey,            NULL);
2692 static DEVICE_ATTR(sgid,            S_IRUGO, show_sgid,            NULL);
2693 static DEVICE_ATTR(dgid,            S_IRUGO, show_dgid,            NULL);
2694 static DEVICE_ATTR(orig_dgid,       S_IRUGO, show_orig_dgid,       NULL);
2695 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2696 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,    NULL);
2697 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2698 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2699 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2700 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2701 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2702 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2703 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2704
2705 static struct device_attribute *srp_host_attrs[] = {
2706         &dev_attr_id_ext,
2707         &dev_attr_ioc_guid,
2708         &dev_attr_service_id,
2709         &dev_attr_pkey,
2710         &dev_attr_sgid,
2711         &dev_attr_dgid,
2712         &dev_attr_orig_dgid,
2713         &dev_attr_req_lim,
2714         &dev_attr_zero_req_lim,
2715         &dev_attr_local_ib_port,
2716         &dev_attr_local_ib_device,
2717         &dev_attr_ch_count,
2718         &dev_attr_comp_vector,
2719         &dev_attr_tl_retry_count,
2720         &dev_attr_cmd_sg_entries,
2721         &dev_attr_allow_ext_sg,
2722         NULL
2723 };
2724
2725 static struct scsi_host_template srp_template = {
2726         .module                         = THIS_MODULE,
2727         .name                           = "InfiniBand SRP initiator",
2728         .proc_name                      = DRV_NAME,
2729         .slave_configure                = srp_slave_configure,
2730         .info                           = srp_target_info,
2731         .queuecommand                   = srp_queuecommand,
2732         .change_queue_depth             = srp_change_queue_depth,
2733         .eh_abort_handler               = srp_abort,
2734         .eh_device_reset_handler        = srp_reset_device,
2735         .eh_host_reset_handler          = srp_reset_host,
2736         .skip_settle_delay              = true,
2737         .sg_tablesize                   = SRP_DEF_SG_TABLESIZE,
2738         .can_queue                      = SRP_DEFAULT_CMD_SQ_SIZE,
2739         .this_id                        = -1,
2740         .cmd_per_lun                    = SRP_DEFAULT_CMD_SQ_SIZE,
2741         .use_clustering                 = ENABLE_CLUSTERING,
2742         .shost_attrs                    = srp_host_attrs,
2743         .use_blk_tags                   = 1,
2744         .track_queue_depth              = 1,
2745 };
2746
2747 static int srp_sdev_count(struct Scsi_Host *host)
2748 {
2749         struct scsi_device *sdev;
2750         int c = 0;
2751
2752         shost_for_each_device(sdev, host)
2753                 c++;
2754
2755         return c;
2756 }
2757
2758 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2759 {
2760         struct srp_rport_identifiers ids;
2761         struct srp_rport *rport;
2762
2763         target->state = SRP_TARGET_SCANNING;
2764         sprintf(target->target_name, "SRP.T10:%016llX",
2765                 be64_to_cpu(target->id_ext));
2766
2767         if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2768                 return -ENODEV;
2769
2770         memcpy(ids.port_id, &target->id_ext, 8);
2771         memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2772         ids.roles = SRP_RPORT_ROLE_TARGET;
2773         rport = srp_rport_add(target->scsi_host, &ids);
2774         if (IS_ERR(rport)) {
2775                 scsi_remove_host(target->scsi_host);
2776                 return PTR_ERR(rport);
2777         }
2778
2779         rport->lld_data = target;
2780         target->rport = rport;
2781
2782         spin_lock(&host->target_lock);
2783         list_add_tail(&target->list, &host->target_list);
2784         spin_unlock(&host->target_lock);
2785
2786         scsi_scan_target(&target->scsi_host->shost_gendev,
2787                          0, target->scsi_id, SCAN_WILD_CARD, 0);
2788
2789         if (srp_connected_ch(target) < target->ch_count ||
2790             target->qp_in_error) {
2791                 shost_printk(KERN_INFO, target->scsi_host,
2792                              PFX "SCSI scan failed - removing SCSI host\n");
2793                 srp_queue_remove_work(target);
2794                 goto out;
2795         }
2796
2797         pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2798                  dev_name(&target->scsi_host->shost_gendev),
2799                  srp_sdev_count(target->scsi_host));
2800
2801         spin_lock_irq(&target->lock);
2802         if (target->state == SRP_TARGET_SCANNING)
2803                 target->state = SRP_TARGET_LIVE;
2804         spin_unlock_irq(&target->lock);
2805
2806 out:
2807         return 0;
2808 }
2809
2810 static void srp_release_dev(struct device *dev)
2811 {
2812         struct srp_host *host =
2813                 container_of(dev, struct srp_host, dev);
2814
2815         complete(&host->released);
2816 }
2817
2818 static struct class srp_class = {
2819         .name    = "infiniband_srp",
2820         .dev_release = srp_release_dev
2821 };
2822
2823 /**
2824  * srp_conn_unique() - check whether the connection to a target is unique
2825  * @host:   SRP host.
2826  * @target: SRP target port.
2827  */
2828 static bool srp_conn_unique(struct srp_host *host,
2829                             struct srp_target_port *target)
2830 {
2831         struct srp_target_port *t;
2832         bool ret = false;
2833
2834         if (target->state == SRP_TARGET_REMOVED)
2835                 goto out;
2836
2837         ret = true;
2838
2839         spin_lock(&host->target_lock);
2840         list_for_each_entry(t, &host->target_list, list) {
2841                 if (t != target &&
2842                     target->id_ext == t->id_ext &&
2843                     target->ioc_guid == t->ioc_guid &&
2844                     target->initiator_ext == t->initiator_ext) {
2845                         ret = false;
2846                         break;
2847                 }
2848         }
2849         spin_unlock(&host->target_lock);
2850
2851 out:
2852         return ret;
2853 }
2854
2855 /*
2856  * Target ports are added by writing
2857  *
2858  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2859  *     pkey=<P_Key>,service_id=<service ID>
2860  *
2861  * to the add_target sysfs attribute.
2862  */
2863 enum {
2864         SRP_OPT_ERR             = 0,
2865         SRP_OPT_ID_EXT          = 1 << 0,
2866         SRP_OPT_IOC_GUID        = 1 << 1,
2867         SRP_OPT_DGID            = 1 << 2,
2868         SRP_OPT_PKEY            = 1 << 3,
2869         SRP_OPT_SERVICE_ID      = 1 << 4,
2870         SRP_OPT_MAX_SECT        = 1 << 5,
2871         SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2872         SRP_OPT_IO_CLASS        = 1 << 7,
2873         SRP_OPT_INITIATOR_EXT   = 1 << 8,
2874         SRP_OPT_CMD_SG_ENTRIES  = 1 << 9,
2875         SRP_OPT_ALLOW_EXT_SG    = 1 << 10,
2876         SRP_OPT_SG_TABLESIZE    = 1 << 11,
2877         SRP_OPT_COMP_VECTOR     = 1 << 12,
2878         SRP_OPT_TL_RETRY_COUNT  = 1 << 13,
2879         SRP_OPT_QUEUE_SIZE      = 1 << 14,
2880         SRP_OPT_ALL             = (SRP_OPT_ID_EXT       |
2881                                    SRP_OPT_IOC_GUID     |
2882                                    SRP_OPT_DGID         |
2883                                    SRP_OPT_PKEY         |
2884                                    SRP_OPT_SERVICE_ID),
2885 };
2886
2887 static const match_table_t srp_opt_tokens = {
2888         { SRP_OPT_ID_EXT,               "id_ext=%s"             },
2889         { SRP_OPT_IOC_GUID,             "ioc_guid=%s"           },
2890         { SRP_OPT_DGID,                 "dgid=%s"               },
2891         { SRP_OPT_PKEY,                 "pkey=%x"               },
2892         { SRP_OPT_SERVICE_ID,           "service_id=%s"         },
2893         { SRP_OPT_MAX_SECT,             "max_sect=%d"           },
2894         { SRP_OPT_MAX_CMD_PER_LUN,      "max_cmd_per_lun=%d"    },
2895         { SRP_OPT_IO_CLASS,             "io_class=%x"           },
2896         { SRP_OPT_INITIATOR_EXT,        "initiator_ext=%s"      },
2897         { SRP_OPT_CMD_SG_ENTRIES,       "cmd_sg_entries=%u"     },
2898         { SRP_OPT_ALLOW_EXT_SG,         "allow_ext_sg=%u"       },
2899         { SRP_OPT_SG_TABLESIZE,         "sg_tablesize=%u"       },
2900         { SRP_OPT_COMP_VECTOR,          "comp_vector=%u"        },
2901         { SRP_OPT_TL_RETRY_COUNT,       "tl_retry_count=%u"     },
2902         { SRP_OPT_QUEUE_SIZE,           "queue_size=%d"         },
2903         { SRP_OPT_ERR,                  NULL                    }
2904 };
2905
2906 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2907 {
2908         char *options, *sep_opt;
2909         char *p;
2910         char dgid[3];
2911         substring_t args[MAX_OPT_ARGS];
2912         int opt_mask = 0;
2913         int token;
2914         int ret = -EINVAL;
2915         int i;
2916
2917         options = kstrdup(buf, GFP_KERNEL);
2918         if (!options)
2919                 return -ENOMEM;
2920
2921         sep_opt = options;
2922         while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2923                 if (!*p)
2924                         continue;
2925
2926                 token = match_token(p, srp_opt_tokens, args);
2927                 opt_mask |= token;
2928
2929                 switch (token) {
2930                 case SRP_OPT_ID_EXT:
2931                         p = match_strdup(args);
2932                         if (!p) {
2933                                 ret = -ENOMEM;
2934                                 goto out;
2935                         }
2936                         target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2937                         kfree(p);
2938                         break;
2939
2940                 case SRP_OPT_IOC_GUID:
2941                         p = match_strdup(args);
2942                         if (!p) {
2943                                 ret = -ENOMEM;
2944                                 goto out;
2945                         }
2946                         target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
2947                         kfree(p);
2948                         break;
2949
2950                 case SRP_OPT_DGID:
2951                         p = match_strdup(args);
2952                         if (!p) {
2953                                 ret = -ENOMEM;
2954                                 goto out;
2955                         }
2956                         if (strlen(p) != 32) {
2957                                 pr_warn("bad dest GID parameter '%s'\n", p);
2958                                 kfree(p);
2959                                 goto out;
2960                         }
2961
2962                         for (i = 0; i < 16; ++i) {
2963                                 strlcpy(dgid, p + i * 2, sizeof(dgid));
2964                                 if (sscanf(dgid, "%hhx",
2965                                            &target->orig_dgid.raw[i]) < 1) {
2966                                         ret = -EINVAL;
2967                                         kfree(p);
2968                                         goto out;
2969                                 }
2970                         }
2971                         kfree(p);
2972                         break;
2973
2974                 case SRP_OPT_PKEY:
2975                         if (match_hex(args, &token)) {
2976                                 pr_warn("bad P_Key parameter '%s'\n", p);
2977                                 goto out;
2978                         }
2979                         target->pkey = cpu_to_be16(token);
2980                         break;
2981
2982                 case SRP_OPT_SERVICE_ID:
2983                         p = match_strdup(args);
2984                         if (!p) {
2985                                 ret = -ENOMEM;
2986                                 goto out;
2987                         }
2988                         target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
2989                         kfree(p);
2990                         break;
2991
2992                 case SRP_OPT_MAX_SECT:
2993                         if (match_int(args, &token)) {
2994                                 pr_warn("bad max sect parameter '%s'\n", p);
2995                                 goto out;
2996                         }
2997                         target->scsi_host->max_sectors = token;
2998                         break;
2999
3000                 case SRP_OPT_QUEUE_SIZE:
3001                         if (match_int(args, &token) || token < 1) {
3002                                 pr_warn("bad queue_size parameter '%s'\n", p);
3003                                 goto out;
3004                         }
3005                         target->scsi_host->can_queue = token;
3006                         target->queue_size = token + SRP_RSP_SQ_SIZE +
3007                                              SRP_TSK_MGMT_SQ_SIZE;
3008                         if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3009                                 target->scsi_host->cmd_per_lun = token;
3010                         break;
3011
3012                 case SRP_OPT_MAX_CMD_PER_LUN:
3013                         if (match_int(args, &token) || token < 1) {
3014                                 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3015                                         p);
3016                                 goto out;
3017                         }
3018                         target->scsi_host->cmd_per_lun = token;
3019                         break;
3020
3021                 case SRP_OPT_IO_CLASS:
3022                         if (match_hex(args, &token)) {
3023                                 pr_warn("bad IO class parameter '%s'\n", p);
3024                                 goto out;
3025                         }
3026                         if (token != SRP_REV10_IB_IO_CLASS &&
3027                             token != SRP_REV16A_IB_IO_CLASS) {
3028                                 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3029                                         token, SRP_REV10_IB_IO_CLASS,
3030                                         SRP_REV16A_IB_IO_CLASS);
3031                                 goto out;
3032                         }
3033                         target->io_class = token;
3034                         break;
3035
3036                 case SRP_OPT_INITIATOR_EXT:
3037                         p = match_strdup(args);
3038                         if (!p) {
3039                                 ret = -ENOMEM;
3040                                 goto out;
3041                         }
3042                         target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3043                         kfree(p);
3044                         break;
3045
3046                 case SRP_OPT_CMD_SG_ENTRIES:
3047                         if (match_int(args, &token) || token < 1 || token > 255) {
3048                                 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3049                                         p);
3050                                 goto out;
3051                         }
3052                         target->cmd_sg_cnt = token;
3053                         break;
3054
3055                 case SRP_OPT_ALLOW_EXT_SG:
3056                         if (match_int(args, &token)) {
3057                                 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3058                                 goto out;
3059                         }
3060                         target->allow_ext_sg = !!token;
3061                         break;
3062
3063                 case SRP_OPT_SG_TABLESIZE:
3064                         if (match_int(args, &token) || token < 1 ||
3065                                         token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3066                                 pr_warn("bad max sg_tablesize parameter '%s'\n",
3067                                         p);
3068                                 goto out;
3069                         }
3070                         target->sg_tablesize = token;
3071                         break;
3072
3073                 case SRP_OPT_COMP_VECTOR:
3074                         if (match_int(args, &token) || token < 0) {
3075                                 pr_warn("bad comp_vector parameter '%s'\n", p);
3076                                 goto out;
3077                         }
3078                         target->comp_vector = token;
3079                         break;
3080
3081                 case SRP_OPT_TL_RETRY_COUNT:
3082                         if (match_int(args, &token) || token < 2 || token > 7) {
3083                                 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3084                                         p);
3085                                 goto out;
3086                         }
3087                         target->tl_retry_count = token;
3088                         break;
3089
3090                 default:
3091                         pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3092                                 p);
3093                         goto out;
3094                 }
3095         }
3096
3097         if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3098                 ret = 0;
3099         else
3100                 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3101                         if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3102                             !(srp_opt_tokens[i].token & opt_mask))
3103                                 pr_warn("target creation request is missing parameter '%s'\n",
3104                                         srp_opt_tokens[i].pattern);
3105
3106         if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3107             && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3108                 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3109                         target->scsi_host->cmd_per_lun,
3110                         target->scsi_host->can_queue);
3111
3112 out:
3113         kfree(options);
3114         return ret;
3115 }
3116
3117 static ssize_t srp_create_target(struct device *dev,
3118                                  struct device_attribute *attr,
3119                                  const char *buf, size_t count)
3120 {
3121         struct srp_host *host =
3122                 container_of(dev, struct srp_host, dev);
3123         struct Scsi_Host *target_host;
3124         struct srp_target_port *target;
3125         struct srp_rdma_ch *ch;
3126         struct srp_device *srp_dev = host->srp_dev;
3127         struct ib_device *ibdev = srp_dev->dev;
3128         int ret, node_idx, node, cpu, i;
3129         bool multich = false;
3130
3131         target_host = scsi_host_alloc(&srp_template,
3132                                       sizeof (struct srp_target_port));
3133         if (!target_host)
3134                 return -ENOMEM;
3135
3136         target_host->transportt  = ib_srp_transport_template;
3137         target_host->max_channel = 0;
3138         target_host->max_id      = 1;
3139         target_host->max_lun     = -1LL;
3140         target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3141
3142         target = host_to_target(target_host);
3143
3144         target->io_class        = SRP_REV16A_IB_IO_CLASS;
3145         target->scsi_host       = target_host;
3146         target->srp_host        = host;
3147         target->lkey            = host->srp_dev->mr->lkey;
3148         target->rkey            = host->srp_dev->mr->rkey;
3149         target->cmd_sg_cnt      = cmd_sg_entries;
3150         target->sg_tablesize    = indirect_sg_entries ? : cmd_sg_entries;
3151         target->allow_ext_sg    = allow_ext_sg;
3152         target->tl_retry_count  = 7;
3153         target->queue_size      = SRP_DEFAULT_QUEUE_SIZE;
3154
3155         /*
3156          * Avoid that the SCSI host can be removed by srp_remove_target()
3157          * before this function returns.
3158          */
3159         scsi_host_get(target->scsi_host);
3160
3161         mutex_lock(&host->add_target_mutex);
3162
3163         ret = srp_parse_options(buf, target);
3164         if (ret)
3165                 goto out;
3166
3167         ret = scsi_init_shared_tag_map(target_host, target_host->can_queue);
3168         if (ret)
3169                 goto out;
3170
3171         target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3172
3173         if (!srp_conn_unique(target->srp_host, target)) {
3174                 shost_printk(KERN_INFO, target->scsi_host,
3175                              PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3176                              be64_to_cpu(target->id_ext),
3177                              be64_to_cpu(target->ioc_guid),
3178                              be64_to_cpu(target->initiator_ext));
3179                 ret = -EEXIST;
3180                 goto out;
3181         }
3182
3183         if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3184             target->cmd_sg_cnt < target->sg_tablesize) {
3185                 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3186                 target->sg_tablesize = target->cmd_sg_cnt;
3187         }
3188
3189         target_host->sg_tablesize = target->sg_tablesize;
3190         target->indirect_size = target->sg_tablesize *
3191                                 sizeof (struct srp_direct_buf);
3192         target->max_iu_len = sizeof (struct srp_cmd) +
3193                              sizeof (struct srp_indirect_buf) +
3194                              target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3195
3196         INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3197         INIT_WORK(&target->remove_work, srp_remove_work);
3198         spin_lock_init(&target->lock);
3199         ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
3200         if (ret)
3201                 goto out;
3202
3203         ret = -ENOMEM;
3204         target->ch_count = max_t(unsigned, num_online_nodes(),
3205                                  min(ch_count ? :
3206                                      min(4 * num_online_nodes(),
3207                                          ibdev->num_comp_vectors),
3208                                      num_online_cpus()));
3209         target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3210                              GFP_KERNEL);
3211         if (!target->ch)
3212                 goto out;
3213
3214         node_idx = 0;
3215         for_each_online_node(node) {
3216                 const int ch_start = (node_idx * target->ch_count /
3217                                       num_online_nodes());
3218                 const int ch_end = ((node_idx + 1) * target->ch_count /
3219                                     num_online_nodes());
3220                 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3221                                       num_online_nodes() + target->comp_vector)
3222                                      % ibdev->num_comp_vectors;
3223                 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3224                                     num_online_nodes() + target->comp_vector)
3225                                    % ibdev->num_comp_vectors;
3226                 int cpu_idx = 0;
3227
3228                 for_each_online_cpu(cpu) {
3229                         if (cpu_to_node(cpu) != node)
3230                                 continue;
3231                         if (ch_start + cpu_idx >= ch_end)
3232                                 continue;
3233                         ch = &target->ch[ch_start + cpu_idx];
3234                         ch->target = target;
3235                         ch->comp_vector = cv_start == cv_end ? cv_start :
3236                                 cv_start + cpu_idx % (cv_end - cv_start);
3237                         spin_lock_init(&ch->lock);
3238                         INIT_LIST_HEAD(&ch->free_tx);
3239                         ret = srp_new_cm_id(ch);
3240                         if (ret)
3241                                 goto err_disconnect;
3242
3243                         ret = srp_create_ch_ib(ch);
3244                         if (ret)
3245                                 goto err_disconnect;
3246
3247                         ret = srp_alloc_req_data(ch);
3248                         if (ret)
3249                                 goto err_disconnect;
3250
3251                         ret = srp_connect_ch(ch, multich);
3252                         if (ret) {
3253                                 shost_printk(KERN_ERR, target->scsi_host,
3254                                              PFX "Connection %d/%d failed\n",
3255                                              ch_start + cpu_idx,
3256                                              target->ch_count);
3257                                 if (node_idx == 0 && cpu_idx == 0) {
3258                                         goto err_disconnect;
3259                                 } else {
3260                                         srp_free_ch_ib(target, ch);
3261                                         srp_free_req_data(target, ch);
3262                                         target->ch_count = ch - target->ch;
3263                                         break;
3264                                 }
3265                         }
3266
3267                         multich = true;
3268                         cpu_idx++;
3269                 }
3270                 node_idx++;
3271         }
3272
3273         target->scsi_host->nr_hw_queues = target->ch_count;
3274
3275         ret = srp_add_target(host, target);
3276         if (ret)
3277                 goto err_disconnect;
3278
3279         if (target->state != SRP_TARGET_REMOVED) {
3280                 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3281                              "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3282                              be64_to_cpu(target->id_ext),
3283                              be64_to_cpu(target->ioc_guid),
3284                              be16_to_cpu(target->pkey),
3285                              be64_to_cpu(target->service_id),
3286                              target->sgid.raw, target->orig_dgid.raw);
3287         }
3288
3289         ret = count;
3290
3291 out:
3292         mutex_unlock(&host->add_target_mutex);
3293
3294         scsi_host_put(target->scsi_host);
3295
3296         return ret;
3297
3298 err_disconnect:
3299         srp_disconnect_target(target);
3300
3301         for (i = 0; i < target->ch_count; i++) {
3302                 ch = &target->ch[i];
3303                 srp_free_ch_ib(target, ch);
3304                 srp_free_req_data(target, ch);
3305         }
3306
3307         kfree(target->ch);
3308         goto out;
3309 }
3310
3311 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3312
3313 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3314                           char *buf)
3315 {
3316         struct srp_host *host = container_of(dev, struct srp_host, dev);
3317
3318         return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3319 }
3320
3321 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3322
3323 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3324                          char *buf)
3325 {
3326         struct srp_host *host = container_of(dev, struct srp_host, dev);
3327
3328         return sprintf(buf, "%d\n", host->port);
3329 }
3330
3331 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3332
3333 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3334 {
3335         struct srp_host *host;
3336
3337         host = kzalloc(sizeof *host, GFP_KERNEL);
3338         if (!host)
3339                 return NULL;
3340
3341         INIT_LIST_HEAD(&host->target_list);
3342         spin_lock_init(&host->target_lock);
3343         init_completion(&host->released);
3344         mutex_init(&host->add_target_mutex);
3345         host->srp_dev = device;
3346         host->port = port;
3347
3348         host->dev.class = &srp_class;
3349         host->dev.parent = device->dev->dma_device;
3350         dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3351
3352         if (device_register(&host->dev))
3353                 goto free_host;
3354         if (device_create_file(&host->dev, &dev_attr_add_target))
3355                 goto err_class;
3356         if (device_create_file(&host->dev, &dev_attr_ibdev))
3357                 goto err_class;
3358         if (device_create_file(&host->dev, &dev_attr_port))
3359                 goto err_class;
3360
3361         return host;
3362
3363 err_class:
3364         device_unregister(&host->dev);
3365
3366 free_host:
3367         kfree(host);
3368
3369         return NULL;
3370 }
3371
3372 static void srp_add_one(struct ib_device *device)
3373 {
3374         struct srp_device *srp_dev;
3375         struct ib_device_attr *dev_attr;
3376         struct srp_host *host;
3377         int mr_page_shift, s, e, p;
3378         u64 max_pages_per_mr;
3379
3380         dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3381         if (!dev_attr)
3382                 return;
3383
3384         if (ib_query_device(device, dev_attr)) {
3385                 pr_warn("Query device failed for %s\n", device->name);
3386                 goto free_attr;
3387         }
3388
3389         srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3390         if (!srp_dev)
3391                 goto free_attr;
3392
3393         srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3394                             device->map_phys_fmr && device->unmap_fmr);
3395         srp_dev->has_fr = (dev_attr->device_cap_flags &
3396                            IB_DEVICE_MEM_MGT_EXTENSIONS);
3397         if (!srp_dev->has_fmr && !srp_dev->has_fr)
3398                 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3399
3400         srp_dev->use_fast_reg = (srp_dev->has_fr &&
3401                                  (!srp_dev->has_fmr || prefer_fr));
3402
3403         /*
3404          * Use the smallest page size supported by the HCA, down to a
3405          * minimum of 4096 bytes. We're unlikely to build large sglists
3406          * out of smaller entries.
3407          */
3408         mr_page_shift           = max(12, ffs(dev_attr->page_size_cap) - 1);
3409         srp_dev->mr_page_size   = 1 << mr_page_shift;
3410         srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
3411         max_pages_per_mr        = dev_attr->max_mr_size;
3412         do_div(max_pages_per_mr, srp_dev->mr_page_size);
3413         srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3414                                           max_pages_per_mr);
3415         if (srp_dev->use_fast_reg) {
3416                 srp_dev->max_pages_per_mr =
3417                         min_t(u32, srp_dev->max_pages_per_mr,
3418                               dev_attr->max_fast_reg_page_list_len);
3419         }
3420         srp_dev->mr_max_size    = srp_dev->mr_page_size *
3421                                    srp_dev->max_pages_per_mr;
3422         pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3423                  device->name, mr_page_shift, dev_attr->max_mr_size,
3424                  dev_attr->max_fast_reg_page_list_len,
3425                  srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3426
3427         INIT_LIST_HEAD(&srp_dev->dev_list);
3428
3429         srp_dev->dev = device;
3430         srp_dev->pd  = ib_alloc_pd(device);
3431         if (IS_ERR(srp_dev->pd))
3432                 goto free_dev;
3433
3434         srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
3435                                     IB_ACCESS_LOCAL_WRITE |
3436                                     IB_ACCESS_REMOTE_READ |
3437                                     IB_ACCESS_REMOTE_WRITE);
3438         if (IS_ERR(srp_dev->mr))
3439                 goto err_pd;
3440
3441         if (device->node_type == RDMA_NODE_IB_SWITCH) {
3442                 s = 0;
3443                 e = 0;
3444         } else {
3445                 s = 1;
3446                 e = device->phys_port_cnt;
3447         }
3448
3449         for (p = s; p <= e; ++p) {
3450                 host = srp_add_port(srp_dev, p);
3451                 if (host)
3452                         list_add_tail(&host->list, &srp_dev->dev_list);
3453         }
3454
3455         ib_set_client_data(device, &srp_client, srp_dev);
3456
3457         goto free_attr;
3458
3459 err_pd:
3460         ib_dealloc_pd(srp_dev->pd);
3461
3462 free_dev:
3463         kfree(srp_dev);
3464
3465 free_attr:
3466         kfree(dev_attr);
3467 }
3468
3469 static void srp_remove_one(struct ib_device *device)
3470 {
3471         struct srp_device *srp_dev;
3472         struct srp_host *host, *tmp_host;
3473         struct srp_target_port *target;
3474
3475         srp_dev = ib_get_client_data(device, &srp_client);
3476         if (!srp_dev)
3477                 return;
3478
3479         list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3480                 device_unregister(&host->dev);
3481                 /*
3482                  * Wait for the sysfs entry to go away, so that no new
3483                  * target ports can be created.
3484                  */
3485                 wait_for_completion(&host->released);
3486
3487                 /*
3488                  * Remove all target ports.
3489                  */
3490                 spin_lock(&host->target_lock);
3491                 list_for_each_entry(target, &host->target_list, list)
3492                         srp_queue_remove_work(target);
3493                 spin_unlock(&host->target_lock);
3494
3495                 /*
3496                  * Wait for tl_err and target port removal tasks.
3497                  */
3498                 flush_workqueue(system_long_wq);
3499                 flush_workqueue(srp_remove_wq);
3500
3501                 kfree(host);
3502         }
3503
3504         ib_dereg_mr(srp_dev->mr);
3505         ib_dealloc_pd(srp_dev->pd);
3506
3507         kfree(srp_dev);
3508 }
3509
3510 static struct srp_function_template ib_srp_transport_functions = {
3511         .has_rport_state         = true,
3512         .reset_timer_if_blocked  = true,
3513         .reconnect_delay         = &srp_reconnect_delay,
3514         .fast_io_fail_tmo        = &srp_fast_io_fail_tmo,
3515         .dev_loss_tmo            = &srp_dev_loss_tmo,
3516         .reconnect               = srp_rport_reconnect,
3517         .rport_delete            = srp_rport_delete,
3518         .terminate_rport_io      = srp_terminate_io,
3519 };
3520
3521 static int __init srp_init_module(void)
3522 {
3523         int ret;
3524
3525         BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3526
3527         if (srp_sg_tablesize) {
3528                 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3529                 if (!cmd_sg_entries)
3530                         cmd_sg_entries = srp_sg_tablesize;
3531         }
3532
3533         if (!cmd_sg_entries)
3534                 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3535
3536         if (cmd_sg_entries > 255) {
3537                 pr_warn("Clamping cmd_sg_entries to 255\n");
3538                 cmd_sg_entries = 255;
3539         }
3540
3541         if (!indirect_sg_entries)
3542                 indirect_sg_entries = cmd_sg_entries;
3543         else if (indirect_sg_entries < cmd_sg_entries) {
3544                 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3545                         cmd_sg_entries);
3546                 indirect_sg_entries = cmd_sg_entries;
3547         }
3548
3549         srp_remove_wq = create_workqueue("srp_remove");
3550         if (!srp_remove_wq) {
3551                 ret = -ENOMEM;
3552                 goto out;
3553         }
3554
3555         ret = -ENOMEM;
3556         ib_srp_transport_template =
3557                 srp_attach_transport(&ib_srp_transport_functions);
3558         if (!ib_srp_transport_template)
3559                 goto destroy_wq;
3560
3561         ret = class_register(&srp_class);
3562         if (ret) {
3563                 pr_err("couldn't register class infiniband_srp\n");
3564                 goto release_tr;
3565         }
3566
3567         ib_sa_register_client(&srp_sa_client);
3568
3569         ret = ib_register_client(&srp_client);
3570         if (ret) {
3571                 pr_err("couldn't register IB client\n");
3572                 goto unreg_sa;
3573         }
3574
3575 out:
3576         return ret;
3577
3578 unreg_sa:
3579         ib_sa_unregister_client(&srp_sa_client);
3580         class_unregister(&srp_class);
3581
3582 release_tr:
3583         srp_release_transport(ib_srp_transport_template);
3584
3585 destroy_wq:
3586         destroy_workqueue(srp_remove_wq);
3587         goto out;
3588 }
3589
3590 static void __exit srp_cleanup_module(void)
3591 {
3592         ib_unregister_client(&srp_client);
3593         ib_sa_unregister_client(&srp_sa_client);
3594         class_unregister(&srp_class);
3595         srp_release_transport(ib_srp_transport_template);
3596         destroy_workqueue(srp_remove_wq);
3597 }
3598
3599 module_init(srp_init_module);
3600 module_exit(srp_cleanup_module);