]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/infiniband/sw/rdmavt/qp.c
IB/{rdmavt, qib, hfi1}: Remove gfp flags argument
[karo-tx-linux.git] / drivers / infiniband / sw / rdmavt / qp.c
1 /*
2  * Copyright(c) 2016, 2017 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47
48 #include <linux/hash.h>
49 #include <linux/bitops.h>
50 #include <linux/lockdep.h>
51 #include <linux/vmalloc.h>
52 #include <linux/slab.h>
53 #include <rdma/ib_verbs.h>
54 #include <rdma/ib_hdrs.h>
55 #include "qp.h"
56 #include "vt.h"
57 #include "trace.h"
58
59 static void rvt_rc_timeout(unsigned long arg);
60
61 /*
62  * Convert the AETH RNR timeout code into the number of microseconds.
63  */
64 static const u32 ib_rvt_rnr_table[32] = {
65         655360, /* 00: 655.36 */
66         10,     /* 01:    .01 */
67         20,     /* 02     .02 */
68         30,     /* 03:    .03 */
69         40,     /* 04:    .04 */
70         60,     /* 05:    .06 */
71         80,     /* 06:    .08 */
72         120,    /* 07:    .12 */
73         160,    /* 08:    .16 */
74         240,    /* 09:    .24 */
75         320,    /* 0A:    .32 */
76         480,    /* 0B:    .48 */
77         640,    /* 0C:    .64 */
78         960,    /* 0D:    .96 */
79         1280,   /* 0E:   1.28 */
80         1920,   /* 0F:   1.92 */
81         2560,   /* 10:   2.56 */
82         3840,   /* 11:   3.84 */
83         5120,   /* 12:   5.12 */
84         7680,   /* 13:   7.68 */
85         10240,  /* 14:  10.24 */
86         15360,  /* 15:  15.36 */
87         20480,  /* 16:  20.48 */
88         30720,  /* 17:  30.72 */
89         40960,  /* 18:  40.96 */
90         61440,  /* 19:  61.44 */
91         81920,  /* 1A:  81.92 */
92         122880, /* 1B: 122.88 */
93         163840, /* 1C: 163.84 */
94         245760, /* 1D: 245.76 */
95         327680, /* 1E: 327.68 */
96         491520  /* 1F: 491.52 */
97 };
98
99 /*
100  * Note that it is OK to post send work requests in the SQE and ERR
101  * states; rvt_do_send() will process them and generate error
102  * completions as per IB 1.2 C10-96.
103  */
104 const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
105         [IB_QPS_RESET] = 0,
106         [IB_QPS_INIT] = RVT_POST_RECV_OK,
107         [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
108         [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
109             RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
110             RVT_PROCESS_NEXT_SEND_OK,
111         [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
112             RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
113         [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
114             RVT_POST_SEND_OK | RVT_FLUSH_SEND,
115         [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
116             RVT_POST_SEND_OK | RVT_FLUSH_SEND,
117 };
118 EXPORT_SYMBOL(ib_rvt_state_ops);
119
120 static void get_map_page(struct rvt_qpn_table *qpt,
121                          struct rvt_qpn_map *map)
122 {
123         unsigned long page = get_zeroed_page(GFP_KERNEL);
124
125         /*
126          * Free the page if someone raced with us installing it.
127          */
128
129         spin_lock(&qpt->lock);
130         if (map->page)
131                 free_page(page);
132         else
133                 map->page = (void *)page;
134         spin_unlock(&qpt->lock);
135 }
136
137 /**
138  * init_qpn_table - initialize the QP number table for a device
139  * @qpt: the QPN table
140  */
141 static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
142 {
143         u32 offset, i;
144         struct rvt_qpn_map *map;
145         int ret = 0;
146
147         if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
148                 return -EINVAL;
149
150         spin_lock_init(&qpt->lock);
151
152         qpt->last = rdi->dparms.qpn_start;
153         qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
154
155         /*
156          * Drivers may want some QPs beyond what we need for verbs let them use
157          * our qpn table. No need for two. Lets go ahead and mark the bitmaps
158          * for those. The reserved range must be *after* the range which verbs
159          * will pick from.
160          */
161
162         /* Figure out number of bit maps needed before reserved range */
163         qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
164
165         /* This should always be zero */
166         offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
167
168         /* Starting with the first reserved bit map */
169         map = &qpt->map[qpt->nmaps];
170
171         rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
172                     rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
173         for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
174                 if (!map->page) {
175                         get_map_page(qpt, map);
176                         if (!map->page) {
177                                 ret = -ENOMEM;
178                                 break;
179                         }
180                 }
181                 set_bit(offset, map->page);
182                 offset++;
183                 if (offset == RVT_BITS_PER_PAGE) {
184                         /* next page */
185                         qpt->nmaps++;
186                         map++;
187                         offset = 0;
188                 }
189         }
190         return ret;
191 }
192
193 /**
194  * free_qpn_table - free the QP number table for a device
195  * @qpt: the QPN table
196  */
197 static void free_qpn_table(struct rvt_qpn_table *qpt)
198 {
199         int i;
200
201         for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
202                 free_page((unsigned long)qpt->map[i].page);
203 }
204
205 /**
206  * rvt_driver_qp_init - Init driver qp resources
207  * @rdi: rvt dev strucutre
208  *
209  * Return: 0 on success
210  */
211 int rvt_driver_qp_init(struct rvt_dev_info *rdi)
212 {
213         int i;
214         int ret = -ENOMEM;
215
216         if (!rdi->dparms.qp_table_size)
217                 return -EINVAL;
218
219         /*
220          * If driver is not doing any QP allocation then make sure it is
221          * providing the necessary QP functions.
222          */
223         if (!rdi->driver_f.free_all_qps ||
224             !rdi->driver_f.qp_priv_alloc ||
225             !rdi->driver_f.qp_priv_free ||
226             !rdi->driver_f.notify_qp_reset ||
227             !rdi->driver_f.notify_restart_rc)
228                 return -EINVAL;
229
230         /* allocate parent object */
231         rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
232                                    rdi->dparms.node);
233         if (!rdi->qp_dev)
234                 return -ENOMEM;
235
236         /* allocate hash table */
237         rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
238         rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
239         rdi->qp_dev->qp_table =
240                 kmalloc_node(rdi->qp_dev->qp_table_size *
241                              sizeof(*rdi->qp_dev->qp_table),
242                              GFP_KERNEL, rdi->dparms.node);
243         if (!rdi->qp_dev->qp_table)
244                 goto no_qp_table;
245
246         for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
247                 RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
248
249         spin_lock_init(&rdi->qp_dev->qpt_lock);
250
251         /* initialize qpn map */
252         if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
253                 goto fail_table;
254
255         spin_lock_init(&rdi->n_qps_lock);
256
257         return 0;
258
259 fail_table:
260         kfree(rdi->qp_dev->qp_table);
261         free_qpn_table(&rdi->qp_dev->qpn_table);
262
263 no_qp_table:
264         kfree(rdi->qp_dev);
265
266         return ret;
267 }
268
269 /**
270  * free_all_qps - check for QPs still in use
271  * @qpt: the QP table to empty
272  *
273  * There should not be any QPs still in use.
274  * Free memory for table.
275  */
276 static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
277 {
278         unsigned long flags;
279         struct rvt_qp *qp;
280         unsigned n, qp_inuse = 0;
281         spinlock_t *ql; /* work around too long line below */
282
283         if (rdi->driver_f.free_all_qps)
284                 qp_inuse = rdi->driver_f.free_all_qps(rdi);
285
286         qp_inuse += rvt_mcast_tree_empty(rdi);
287
288         if (!rdi->qp_dev)
289                 return qp_inuse;
290
291         ql = &rdi->qp_dev->qpt_lock;
292         spin_lock_irqsave(ql, flags);
293         for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
294                 qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
295                                                lockdep_is_held(ql));
296                 RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
297
298                 for (; qp; qp = rcu_dereference_protected(qp->next,
299                                                           lockdep_is_held(ql)))
300                         qp_inuse++;
301         }
302         spin_unlock_irqrestore(ql, flags);
303         synchronize_rcu();
304         return qp_inuse;
305 }
306
307 /**
308  * rvt_qp_exit - clean up qps on device exit
309  * @rdi: rvt dev structure
310  *
311  * Check for qp leaks and free resources.
312  */
313 void rvt_qp_exit(struct rvt_dev_info *rdi)
314 {
315         u32 qps_inuse = rvt_free_all_qps(rdi);
316
317         if (qps_inuse)
318                 rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
319                            qps_inuse);
320         if (!rdi->qp_dev)
321                 return;
322
323         kfree(rdi->qp_dev->qp_table);
324         free_qpn_table(&rdi->qp_dev->qpn_table);
325         kfree(rdi->qp_dev);
326 }
327
328 static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
329                               struct rvt_qpn_map *map, unsigned off)
330 {
331         return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
332 }
333
334 /**
335  * alloc_qpn - Allocate the next available qpn or zero/one for QP type
336  *             IB_QPT_SMI/IB_QPT_GSI
337  *@rdi: rvt device info structure
338  *@qpt: queue pair number table pointer
339  *@port_num: IB port number, 1 based, comes from core
340  *
341  * Return: The queue pair number
342  */
343 static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
344                      enum ib_qp_type type, u8 port_num)
345 {
346         u32 i, offset, max_scan, qpn;
347         struct rvt_qpn_map *map;
348         u32 ret;
349
350         if (rdi->driver_f.alloc_qpn)
351                 return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num);
352
353         if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
354                 unsigned n;
355
356                 ret = type == IB_QPT_GSI;
357                 n = 1 << (ret + 2 * (port_num - 1));
358                 spin_lock(&qpt->lock);
359                 if (qpt->flags & n)
360                         ret = -EINVAL;
361                 else
362                         qpt->flags |= n;
363                 spin_unlock(&qpt->lock);
364                 goto bail;
365         }
366
367         qpn = qpt->last + qpt->incr;
368         if (qpn >= RVT_QPN_MAX)
369                 qpn = qpt->incr | ((qpt->last & 1) ^ 1);
370         /* offset carries bit 0 */
371         offset = qpn & RVT_BITS_PER_PAGE_MASK;
372         map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
373         max_scan = qpt->nmaps - !offset;
374         for (i = 0;;) {
375                 if (unlikely(!map->page)) {
376                         get_map_page(qpt, map);
377                         if (unlikely(!map->page))
378                                 break;
379                 }
380                 do {
381                         if (!test_and_set_bit(offset, map->page)) {
382                                 qpt->last = qpn;
383                                 ret = qpn;
384                                 goto bail;
385                         }
386                         offset += qpt->incr;
387                         /*
388                          * This qpn might be bogus if offset >= BITS_PER_PAGE.
389                          * That is OK.   It gets re-assigned below
390                          */
391                         qpn = mk_qpn(qpt, map, offset);
392                 } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
393                 /*
394                  * In order to keep the number of pages allocated to a
395                  * minimum, we scan the all existing pages before increasing
396                  * the size of the bitmap table.
397                  */
398                 if (++i > max_scan) {
399                         if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
400                                 break;
401                         map = &qpt->map[qpt->nmaps++];
402                         /* start at incr with current bit 0 */
403                         offset = qpt->incr | (offset & 1);
404                 } else if (map < &qpt->map[qpt->nmaps]) {
405                         ++map;
406                         /* start at incr with current bit 0 */
407                         offset = qpt->incr | (offset & 1);
408                 } else {
409                         map = &qpt->map[0];
410                         /* wrap to first map page, invert bit 0 */
411                         offset = qpt->incr | ((offset & 1) ^ 1);
412                 }
413                 /* there can be no set bits in low-order QoS bits */
414                 WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
415                 qpn = mk_qpn(qpt, map, offset);
416         }
417
418         ret = -ENOMEM;
419
420 bail:
421         return ret;
422 }
423
424 static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
425 {
426         struct rvt_qpn_map *map;
427
428         map = qpt->map + qpn / RVT_BITS_PER_PAGE;
429         if (map->page)
430                 clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
431 }
432
433 /**
434  * rvt_clear_mr_refs - Drop help mr refs
435  * @qp: rvt qp data structure
436  * @clr_sends: If shoudl clear send side or not
437  */
438 static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
439 {
440         unsigned n;
441         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
442
443         if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
444                 rvt_put_ss(&qp->s_rdma_read_sge);
445
446         rvt_put_ss(&qp->r_sge);
447
448         if (clr_sends) {
449                 while (qp->s_last != qp->s_head) {
450                         struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
451                         unsigned i;
452
453                         for (i = 0; i < wqe->wr.num_sge; i++) {
454                                 struct rvt_sge *sge = &wqe->sg_list[i];
455
456                                 rvt_put_mr(sge->mr);
457                         }
458                         if (qp->ibqp.qp_type == IB_QPT_UD ||
459                             qp->ibqp.qp_type == IB_QPT_SMI ||
460                             qp->ibqp.qp_type == IB_QPT_GSI)
461                                 atomic_dec(&ibah_to_rvtah(
462                                                 wqe->ud_wr.ah)->refcount);
463                         if (++qp->s_last >= qp->s_size)
464                                 qp->s_last = 0;
465                         smp_wmb(); /* see qp_set_savail */
466                 }
467                 if (qp->s_rdma_mr) {
468                         rvt_put_mr(qp->s_rdma_mr);
469                         qp->s_rdma_mr = NULL;
470                 }
471         }
472
473         if (qp->ibqp.qp_type != IB_QPT_RC)
474                 return;
475
476         for (n = 0; n < rvt_max_atomic(rdi); n++) {
477                 struct rvt_ack_entry *e = &qp->s_ack_queue[n];
478
479                 if (e->rdma_sge.mr) {
480                         rvt_put_mr(e->rdma_sge.mr);
481                         e->rdma_sge.mr = NULL;
482                 }
483         }
484 }
485
486 /**
487  * rvt_remove_qp - remove qp form table
488  * @rdi: rvt dev struct
489  * @qp: qp to remove
490  *
491  * Remove the QP from the table so it can't be found asynchronously by
492  * the receive routine.
493  */
494 static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
495 {
496         struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
497         u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
498         unsigned long flags;
499         int removed = 1;
500
501         spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
502
503         if (rcu_dereference_protected(rvp->qp[0],
504                         lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
505                 RCU_INIT_POINTER(rvp->qp[0], NULL);
506         } else if (rcu_dereference_protected(rvp->qp[1],
507                         lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
508                 RCU_INIT_POINTER(rvp->qp[1], NULL);
509         } else {
510                 struct rvt_qp *q;
511                 struct rvt_qp __rcu **qpp;
512
513                 removed = 0;
514                 qpp = &rdi->qp_dev->qp_table[n];
515                 for (; (q = rcu_dereference_protected(*qpp,
516                         lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
517                         qpp = &q->next) {
518                         if (q == qp) {
519                                 RCU_INIT_POINTER(*qpp,
520                                      rcu_dereference_protected(qp->next,
521                                      lockdep_is_held(&rdi->qp_dev->qpt_lock)));
522                                 removed = 1;
523                                 trace_rvt_qpremove(qp, n);
524                                 break;
525                         }
526                 }
527         }
528
529         spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
530         if (removed) {
531                 synchronize_rcu();
532                 rvt_put_qp(qp);
533         }
534 }
535
536 /**
537  * rvt_init_qp - initialize the QP state to the reset state
538  * @qp: the QP to init or reinit
539  * @type: the QP type
540  *
541  * This function is called from both rvt_create_qp() and
542  * rvt_reset_qp().   The difference is that the reset
543  * patch the necessary locks to protect against concurent
544  * access.
545  */
546 static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
547                         enum ib_qp_type type)
548 {
549         qp->remote_qpn = 0;
550         qp->qkey = 0;
551         qp->qp_access_flags = 0;
552         qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
553         qp->s_hdrwords = 0;
554         qp->s_wqe = NULL;
555         qp->s_draining = 0;
556         qp->s_next_psn = 0;
557         qp->s_last_psn = 0;
558         qp->s_sending_psn = 0;
559         qp->s_sending_hpsn = 0;
560         qp->s_psn = 0;
561         qp->r_psn = 0;
562         qp->r_msn = 0;
563         if (type == IB_QPT_RC) {
564                 qp->s_state = IB_OPCODE_RC_SEND_LAST;
565                 qp->r_state = IB_OPCODE_RC_SEND_LAST;
566         } else {
567                 qp->s_state = IB_OPCODE_UC_SEND_LAST;
568                 qp->r_state = IB_OPCODE_UC_SEND_LAST;
569         }
570         qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
571         qp->r_nak_state = 0;
572         qp->r_aflags = 0;
573         qp->r_flags = 0;
574         qp->s_head = 0;
575         qp->s_tail = 0;
576         qp->s_cur = 0;
577         qp->s_acked = 0;
578         qp->s_last = 0;
579         qp->s_ssn = 1;
580         qp->s_lsn = 0;
581         qp->s_mig_state = IB_MIG_MIGRATED;
582         qp->r_head_ack_queue = 0;
583         qp->s_tail_ack_queue = 0;
584         qp->s_num_rd_atomic = 0;
585         if (qp->r_rq.wq) {
586                 qp->r_rq.wq->head = 0;
587                 qp->r_rq.wq->tail = 0;
588         }
589         qp->r_sge.num_sge = 0;
590         atomic_set(&qp->s_reserved_used, 0);
591 }
592
593 /**
594  * rvt_reset_qp - initialize the QP state to the reset state
595  * @qp: the QP to reset
596  * @type: the QP type
597  *
598  * r_lock, s_hlock, and s_lock are required to be held by the caller
599  */
600 static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
601                          enum ib_qp_type type)
602         __must_hold(&qp->s_lock)
603         __must_hold(&qp->s_hlock)
604         __must_hold(&qp->r_lock)
605 {
606         lockdep_assert_held(&qp->r_lock);
607         lockdep_assert_held(&qp->s_hlock);
608         lockdep_assert_held(&qp->s_lock);
609         if (qp->state != IB_QPS_RESET) {
610                 qp->state = IB_QPS_RESET;
611
612                 /* Let drivers flush their waitlist */
613                 rdi->driver_f.flush_qp_waiters(qp);
614                 rvt_stop_rc_timers(qp);
615                 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
616                 spin_unlock(&qp->s_lock);
617                 spin_unlock(&qp->s_hlock);
618                 spin_unlock_irq(&qp->r_lock);
619
620                 /* Stop the send queue and the retry timer */
621                 rdi->driver_f.stop_send_queue(qp);
622                 rvt_del_timers_sync(qp);
623                 /* Wait for things to stop */
624                 rdi->driver_f.quiesce_qp(qp);
625
626                 /* take qp out the hash and wait for it to be unused */
627                 rvt_remove_qp(rdi, qp);
628                 wait_event(qp->wait, !atomic_read(&qp->refcount));
629
630                 /* grab the lock b/c it was locked at call time */
631                 spin_lock_irq(&qp->r_lock);
632                 spin_lock(&qp->s_hlock);
633                 spin_lock(&qp->s_lock);
634
635                 rvt_clear_mr_refs(qp, 1);
636                 /*
637                  * Let the driver do any tear down or re-init it needs to for
638                  * a qp that has been reset
639                  */
640                 rdi->driver_f.notify_qp_reset(qp);
641         }
642         rvt_init_qp(rdi, qp, type);
643         lockdep_assert_held(&qp->r_lock);
644         lockdep_assert_held(&qp->s_hlock);
645         lockdep_assert_held(&qp->s_lock);
646 }
647
648 /**
649  * rvt_create_qp - create a queue pair for a device
650  * @ibpd: the protection domain who's device we create the queue pair for
651  * @init_attr: the attributes of the queue pair
652  * @udata: user data for libibverbs.so
653  *
654  * Queue pair creation is mostly an rvt issue. However, drivers have their own
655  * unique idea of what queue pair numbers mean. For instance there is a reserved
656  * range for PSM.
657  *
658  * Return: the queue pair on success, otherwise returns an errno.
659  *
660  * Called by the ib_create_qp() core verbs function.
661  */
662 struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
663                             struct ib_qp_init_attr *init_attr,
664                             struct ib_udata *udata)
665 {
666         struct rvt_qp *qp;
667         int err;
668         struct rvt_swqe *swq = NULL;
669         size_t sz;
670         size_t sg_list_sz;
671         struct ib_qp *ret = ERR_PTR(-ENOMEM);
672         struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
673         void *priv = NULL;
674         size_t sqsize;
675
676         if (!rdi)
677                 return ERR_PTR(-EINVAL);
678
679         if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
680             init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
681             init_attr->create_flags)
682                 return ERR_PTR(-EINVAL);
683
684         /* Check receive queue parameters if no SRQ is specified. */
685         if (!init_attr->srq) {
686                 if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
687                     init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
688                         return ERR_PTR(-EINVAL);
689
690                 if (init_attr->cap.max_send_sge +
691                     init_attr->cap.max_send_wr +
692                     init_attr->cap.max_recv_sge +
693                     init_attr->cap.max_recv_wr == 0)
694                         return ERR_PTR(-EINVAL);
695         }
696         sqsize =
697                 init_attr->cap.max_send_wr + 1 +
698                 rdi->dparms.reserved_operations;
699         switch (init_attr->qp_type) {
700         case IB_QPT_SMI:
701         case IB_QPT_GSI:
702                 if (init_attr->port_num == 0 ||
703                     init_attr->port_num > ibpd->device->phys_port_cnt)
704                         return ERR_PTR(-EINVAL);
705         case IB_QPT_UC:
706         case IB_QPT_RC:
707         case IB_QPT_UD:
708                 sz = sizeof(struct rvt_sge) *
709                         init_attr->cap.max_send_sge +
710                         sizeof(struct rvt_swqe);
711                 swq = vzalloc_node(sqsize * sz, rdi->dparms.node);
712                 if (!swq)
713                         return ERR_PTR(-ENOMEM);
714
715                 sz = sizeof(*qp);
716                 sg_list_sz = 0;
717                 if (init_attr->srq) {
718                         struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
719
720                         if (srq->rq.max_sge > 1)
721                                 sg_list_sz = sizeof(*qp->r_sg_list) *
722                                         (srq->rq.max_sge - 1);
723                 } else if (init_attr->cap.max_recv_sge > 1)
724                         sg_list_sz = sizeof(*qp->r_sg_list) *
725                                 (init_attr->cap.max_recv_sge - 1);
726                 qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL,
727                                   rdi->dparms.node);
728                 if (!qp)
729                         goto bail_swq;
730
731                 RCU_INIT_POINTER(qp->next, NULL);
732                 if (init_attr->qp_type == IB_QPT_RC) {
733                         qp->s_ack_queue =
734                                 kzalloc_node(
735                                         sizeof(*qp->s_ack_queue) *
736                                          rvt_max_atomic(rdi),
737                                         GFP_KERNEL,
738                                         rdi->dparms.node);
739                         if (!qp->s_ack_queue)
740                                 goto bail_qp;
741                 }
742                 /* initialize timers needed for rc qp */
743                 setup_timer(&qp->s_timer, rvt_rc_timeout, (unsigned long)qp);
744                 hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
745                              HRTIMER_MODE_REL);
746                 qp->s_rnr_timer.function = rvt_rc_rnr_retry;
747
748                 /*
749                  * Driver needs to set up it's private QP structure and do any
750                  * initialization that is needed.
751                  */
752                 priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
753                 if (IS_ERR(priv)) {
754                         ret = priv;
755                         goto bail_qp;
756                 }
757                 qp->priv = priv;
758                 qp->timeout_jiffies =
759                         usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
760                                 1000UL);
761                 if (init_attr->srq) {
762                         sz = 0;
763                 } else {
764                         qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
765                         qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
766                         sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
767                                 sizeof(struct rvt_rwqe);
768                         if (udata)
769                                 qp->r_rq.wq = vmalloc_user(
770                                                 sizeof(struct rvt_rwq) +
771                                                 qp->r_rq.size * sz);
772                         else
773                                 qp->r_rq.wq = vzalloc_node(
774                                                 sizeof(struct rvt_rwq) +
775                                                 qp->r_rq.size * sz,
776                                                 rdi->dparms.node);
777                         if (!qp->r_rq.wq)
778                                 goto bail_driver_priv;
779                 }
780
781                 /*
782                  * ib_create_qp() will initialize qp->ibqp
783                  * except for qp->ibqp.qp_num.
784                  */
785                 spin_lock_init(&qp->r_lock);
786                 spin_lock_init(&qp->s_hlock);
787                 spin_lock_init(&qp->s_lock);
788                 spin_lock_init(&qp->r_rq.lock);
789                 atomic_set(&qp->refcount, 0);
790                 atomic_set(&qp->local_ops_pending, 0);
791                 init_waitqueue_head(&qp->wait);
792                 init_timer(&qp->s_timer);
793                 qp->s_timer.data = (unsigned long)qp;
794                 INIT_LIST_HEAD(&qp->rspwait);
795                 qp->state = IB_QPS_RESET;
796                 qp->s_wq = swq;
797                 qp->s_size = sqsize;
798                 qp->s_avail = init_attr->cap.max_send_wr;
799                 qp->s_max_sge = init_attr->cap.max_send_sge;
800                 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
801                         qp->s_flags = RVT_S_SIGNAL_REQ_WR;
802
803                 err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
804                                 init_attr->qp_type,
805                                 init_attr->port_num);
806                 if (err < 0) {
807                         ret = ERR_PTR(err);
808                         goto bail_rq_wq;
809                 }
810                 qp->ibqp.qp_num = err;
811                 qp->port_num = init_attr->port_num;
812                 rvt_init_qp(rdi, qp, init_attr->qp_type);
813                 break;
814
815         default:
816                 /* Don't support raw QPs */
817                 return ERR_PTR(-EINVAL);
818         }
819
820         init_attr->cap.max_inline_data = 0;
821
822         /*
823          * Return the address of the RWQ as the offset to mmap.
824          * See rvt_mmap() for details.
825          */
826         if (udata && udata->outlen >= sizeof(__u64)) {
827                 if (!qp->r_rq.wq) {
828                         __u64 offset = 0;
829
830                         err = ib_copy_to_udata(udata, &offset,
831                                                sizeof(offset));
832                         if (err) {
833                                 ret = ERR_PTR(err);
834                                 goto bail_qpn;
835                         }
836                 } else {
837                         u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
838
839                         qp->ip = rvt_create_mmap_info(rdi, s,
840                                                       ibpd->uobject->context,
841                                                       qp->r_rq.wq);
842                         if (!qp->ip) {
843                                 ret = ERR_PTR(-ENOMEM);
844                                 goto bail_qpn;
845                         }
846
847                         err = ib_copy_to_udata(udata, &qp->ip->offset,
848                                                sizeof(qp->ip->offset));
849                         if (err) {
850                                 ret = ERR_PTR(err);
851                                 goto bail_ip;
852                         }
853                 }
854                 qp->pid = current->pid;
855         }
856
857         spin_lock(&rdi->n_qps_lock);
858         if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
859                 spin_unlock(&rdi->n_qps_lock);
860                 ret = ERR_PTR(-ENOMEM);
861                 goto bail_ip;
862         }
863
864         rdi->n_qps_allocated++;
865         /*
866          * Maintain a busy_jiffies variable that will be added to the timeout
867          * period in mod_retry_timer and add_retry_timer. This busy jiffies
868          * is scaled by the number of rc qps created for the device to reduce
869          * the number of timeouts occurring when there is a large number of
870          * qps. busy_jiffies is incremented every rc qp scaling interval.
871          * The scaling interval is selected based on extensive performance
872          * evaluation of targeted workloads.
873          */
874         if (init_attr->qp_type == IB_QPT_RC) {
875                 rdi->n_rc_qps++;
876                 rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
877         }
878         spin_unlock(&rdi->n_qps_lock);
879
880         if (qp->ip) {
881                 spin_lock_irq(&rdi->pending_lock);
882                 list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
883                 spin_unlock_irq(&rdi->pending_lock);
884         }
885
886         ret = &qp->ibqp;
887
888         /*
889          * We have our QP and its good, now keep track of what types of opcodes
890          * can be processed on this QP. We do this by keeping track of what the
891          * 3 high order bits of the opcode are.
892          */
893         switch (init_attr->qp_type) {
894         case IB_QPT_SMI:
895         case IB_QPT_GSI:
896         case IB_QPT_UD:
897                 qp->allowed_ops = IB_OPCODE_UD;
898                 break;
899         case IB_QPT_RC:
900                 qp->allowed_ops = IB_OPCODE_RC;
901                 break;
902         case IB_QPT_UC:
903                 qp->allowed_ops = IB_OPCODE_UC;
904                 break;
905         default:
906                 ret = ERR_PTR(-EINVAL);
907                 goto bail_ip;
908         }
909
910         return ret;
911
912 bail_ip:
913         if (qp->ip)
914                 kref_put(&qp->ip->ref, rvt_release_mmap_info);
915
916 bail_qpn:
917         free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
918
919 bail_rq_wq:
920         if (!qp->ip)
921                 vfree(qp->r_rq.wq);
922
923 bail_driver_priv:
924         rdi->driver_f.qp_priv_free(rdi, qp);
925
926 bail_qp:
927         kfree(qp->s_ack_queue);
928         kfree(qp);
929
930 bail_swq:
931         vfree(swq);
932
933         return ret;
934 }
935
936 /**
937  * rvt_error_qp - put a QP into the error state
938  * @qp: the QP to put into the error state
939  * @err: the receive completion error to signal if a RWQE is active
940  *
941  * Flushes both send and receive work queues.
942  *
943  * Return: true if last WQE event should be generated.
944  * The QP r_lock and s_lock should be held and interrupts disabled.
945  * If we are already in error state, just return.
946  */
947 int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
948 {
949         struct ib_wc wc;
950         int ret = 0;
951         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
952
953         lockdep_assert_held(&qp->r_lock);
954         lockdep_assert_held(&qp->s_lock);
955         if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
956                 goto bail;
957
958         qp->state = IB_QPS_ERR;
959
960         if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
961                 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
962                 del_timer(&qp->s_timer);
963         }
964
965         if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
966                 qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
967
968         rdi->driver_f.notify_error_qp(qp);
969
970         /* Schedule the sending tasklet to drain the send work queue. */
971         if (ACCESS_ONCE(qp->s_last) != qp->s_head)
972                 rdi->driver_f.schedule_send(qp);
973
974         rvt_clear_mr_refs(qp, 0);
975
976         memset(&wc, 0, sizeof(wc));
977         wc.qp = &qp->ibqp;
978         wc.opcode = IB_WC_RECV;
979
980         if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
981                 wc.wr_id = qp->r_wr_id;
982                 wc.status = err;
983                 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
984         }
985         wc.status = IB_WC_WR_FLUSH_ERR;
986
987         if (qp->r_rq.wq) {
988                 struct rvt_rwq *wq;
989                 u32 head;
990                 u32 tail;
991
992                 spin_lock(&qp->r_rq.lock);
993
994                 /* sanity check pointers before trusting them */
995                 wq = qp->r_rq.wq;
996                 head = wq->head;
997                 if (head >= qp->r_rq.size)
998                         head = 0;
999                 tail = wq->tail;
1000                 if (tail >= qp->r_rq.size)
1001                         tail = 0;
1002                 while (tail != head) {
1003                         wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
1004                         if (++tail >= qp->r_rq.size)
1005                                 tail = 0;
1006                         rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1007                 }
1008                 wq->tail = tail;
1009
1010                 spin_unlock(&qp->r_rq.lock);
1011         } else if (qp->ibqp.event_handler) {
1012                 ret = 1;
1013         }
1014
1015 bail:
1016         return ret;
1017 }
1018 EXPORT_SYMBOL(rvt_error_qp);
1019
1020 /*
1021  * Put the QP into the hash table.
1022  * The hash table holds a reference to the QP.
1023  */
1024 static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
1025 {
1026         struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
1027         unsigned long flags;
1028
1029         rvt_get_qp(qp);
1030         spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
1031
1032         if (qp->ibqp.qp_num <= 1) {
1033                 rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
1034         } else {
1035                 u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
1036
1037                 qp->next = rdi->qp_dev->qp_table[n];
1038                 rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
1039                 trace_rvt_qpinsert(qp, n);
1040         }
1041
1042         spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
1043 }
1044
1045 /**
1046  * rvt_modify_qp - modify the attributes of a queue pair
1047  * @ibqp: the queue pair who's attributes we're modifying
1048  * @attr: the new attributes
1049  * @attr_mask: the mask of attributes to modify
1050  * @udata: user data for libibverbs.so
1051  *
1052  * Return: 0 on success, otherwise returns an errno.
1053  */
1054 int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1055                   int attr_mask, struct ib_udata *udata)
1056 {
1057         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1058         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1059         enum ib_qp_state cur_state, new_state;
1060         struct ib_event ev;
1061         int lastwqe = 0;
1062         int mig = 0;
1063         int pmtu = 0; /* for gcc warning only */
1064         enum rdma_link_layer link;
1065
1066         link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
1067
1068         spin_lock_irq(&qp->r_lock);
1069         spin_lock(&qp->s_hlock);
1070         spin_lock(&qp->s_lock);
1071
1072         cur_state = attr_mask & IB_QP_CUR_STATE ?
1073                 attr->cur_qp_state : qp->state;
1074         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1075
1076         if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1077                                 attr_mask, link))
1078                 goto inval;
1079
1080         if (rdi->driver_f.check_modify_qp &&
1081             rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
1082                 goto inval;
1083
1084         if (attr_mask & IB_QP_AV) {
1085                 if (rdma_ah_get_dlid(&attr->ah_attr) >=
1086                     be16_to_cpu(IB_MULTICAST_LID_BASE))
1087                         goto inval;
1088                 if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
1089                         goto inval;
1090         }
1091
1092         if (attr_mask & IB_QP_ALT_PATH) {
1093                 if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
1094                     be16_to_cpu(IB_MULTICAST_LID_BASE))
1095                         goto inval;
1096                 if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
1097                         goto inval;
1098                 if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
1099                         goto inval;
1100         }
1101
1102         if (attr_mask & IB_QP_PKEY_INDEX)
1103                 if (attr->pkey_index >= rvt_get_npkeys(rdi))
1104                         goto inval;
1105
1106         if (attr_mask & IB_QP_MIN_RNR_TIMER)
1107                 if (attr->min_rnr_timer > 31)
1108                         goto inval;
1109
1110         if (attr_mask & IB_QP_PORT)
1111                 if (qp->ibqp.qp_type == IB_QPT_SMI ||
1112                     qp->ibqp.qp_type == IB_QPT_GSI ||
1113                     attr->port_num == 0 ||
1114                     attr->port_num > ibqp->device->phys_port_cnt)
1115                         goto inval;
1116
1117         if (attr_mask & IB_QP_DEST_QPN)
1118                 if (attr->dest_qp_num > RVT_QPN_MASK)
1119                         goto inval;
1120
1121         if (attr_mask & IB_QP_RETRY_CNT)
1122                 if (attr->retry_cnt > 7)
1123                         goto inval;
1124
1125         if (attr_mask & IB_QP_RNR_RETRY)
1126                 if (attr->rnr_retry > 7)
1127                         goto inval;
1128
1129         /*
1130          * Don't allow invalid path_mtu values.  OK to set greater
1131          * than the active mtu (or even the max_cap, if we have tuned
1132          * that to a small mtu.  We'll set qp->path_mtu
1133          * to the lesser of requested attribute mtu and active,
1134          * for packetizing messages.
1135          * Note that the QP port has to be set in INIT and MTU in RTR.
1136          */
1137         if (attr_mask & IB_QP_PATH_MTU) {
1138                 pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
1139                 if (pmtu < 0)
1140                         goto inval;
1141         }
1142
1143         if (attr_mask & IB_QP_PATH_MIG_STATE) {
1144                 if (attr->path_mig_state == IB_MIG_REARM) {
1145                         if (qp->s_mig_state == IB_MIG_ARMED)
1146                                 goto inval;
1147                         if (new_state != IB_QPS_RTS)
1148                                 goto inval;
1149                 } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
1150                         if (qp->s_mig_state == IB_MIG_REARM)
1151                                 goto inval;
1152                         if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
1153                                 goto inval;
1154                         if (qp->s_mig_state == IB_MIG_ARMED)
1155                                 mig = 1;
1156                 } else {
1157                         goto inval;
1158                 }
1159         }
1160
1161         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1162                 if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
1163                         goto inval;
1164
1165         switch (new_state) {
1166         case IB_QPS_RESET:
1167                 if (qp->state != IB_QPS_RESET)
1168                         rvt_reset_qp(rdi, qp, ibqp->qp_type);
1169                 break;
1170
1171         case IB_QPS_RTR:
1172                 /* Allow event to re-trigger if QP set to RTR more than once */
1173                 qp->r_flags &= ~RVT_R_COMM_EST;
1174                 qp->state = new_state;
1175                 break;
1176
1177         case IB_QPS_SQD:
1178                 qp->s_draining = qp->s_last != qp->s_cur;
1179                 qp->state = new_state;
1180                 break;
1181
1182         case IB_QPS_SQE:
1183                 if (qp->ibqp.qp_type == IB_QPT_RC)
1184                         goto inval;
1185                 qp->state = new_state;
1186                 break;
1187
1188         case IB_QPS_ERR:
1189                 lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1190                 break;
1191
1192         default:
1193                 qp->state = new_state;
1194                 break;
1195         }
1196
1197         if (attr_mask & IB_QP_PKEY_INDEX)
1198                 qp->s_pkey_index = attr->pkey_index;
1199
1200         if (attr_mask & IB_QP_PORT)
1201                 qp->port_num = attr->port_num;
1202
1203         if (attr_mask & IB_QP_DEST_QPN)
1204                 qp->remote_qpn = attr->dest_qp_num;
1205
1206         if (attr_mask & IB_QP_SQ_PSN) {
1207                 qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
1208                 qp->s_psn = qp->s_next_psn;
1209                 qp->s_sending_psn = qp->s_next_psn;
1210                 qp->s_last_psn = qp->s_next_psn - 1;
1211                 qp->s_sending_hpsn = qp->s_last_psn;
1212         }
1213
1214         if (attr_mask & IB_QP_RQ_PSN)
1215                 qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
1216
1217         if (attr_mask & IB_QP_ACCESS_FLAGS)
1218                 qp->qp_access_flags = attr->qp_access_flags;
1219
1220         if (attr_mask & IB_QP_AV) {
1221                 qp->remote_ah_attr = attr->ah_attr;
1222                 qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
1223                 qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
1224         }
1225
1226         if (attr_mask & IB_QP_ALT_PATH) {
1227                 qp->alt_ah_attr = attr->alt_ah_attr;
1228                 qp->s_alt_pkey_index = attr->alt_pkey_index;
1229         }
1230
1231         if (attr_mask & IB_QP_PATH_MIG_STATE) {
1232                 qp->s_mig_state = attr->path_mig_state;
1233                 if (mig) {
1234                         qp->remote_ah_attr = qp->alt_ah_attr;
1235                         qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
1236                         qp->s_pkey_index = qp->s_alt_pkey_index;
1237                 }
1238         }
1239
1240         if (attr_mask & IB_QP_PATH_MTU) {
1241                 qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
1242                 qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
1243                 qp->log_pmtu = ilog2(qp->pmtu);
1244         }
1245
1246         if (attr_mask & IB_QP_RETRY_CNT) {
1247                 qp->s_retry_cnt = attr->retry_cnt;
1248                 qp->s_retry = attr->retry_cnt;
1249         }
1250
1251         if (attr_mask & IB_QP_RNR_RETRY) {
1252                 qp->s_rnr_retry_cnt = attr->rnr_retry;
1253                 qp->s_rnr_retry = attr->rnr_retry;
1254         }
1255
1256         if (attr_mask & IB_QP_MIN_RNR_TIMER)
1257                 qp->r_min_rnr_timer = attr->min_rnr_timer;
1258
1259         if (attr_mask & IB_QP_TIMEOUT) {
1260                 qp->timeout = attr->timeout;
1261                 qp->timeout_jiffies =
1262                         usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
1263                                 1000UL);
1264         }
1265
1266         if (attr_mask & IB_QP_QKEY)
1267                 qp->qkey = attr->qkey;
1268
1269         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1270                 qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
1271
1272         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
1273                 qp->s_max_rd_atomic = attr->max_rd_atomic;
1274
1275         if (rdi->driver_f.modify_qp)
1276                 rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
1277
1278         spin_unlock(&qp->s_lock);
1279         spin_unlock(&qp->s_hlock);
1280         spin_unlock_irq(&qp->r_lock);
1281
1282         if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1283                 rvt_insert_qp(rdi, qp);
1284
1285         if (lastwqe) {
1286                 ev.device = qp->ibqp.device;
1287                 ev.element.qp = &qp->ibqp;
1288                 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1289                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1290         }
1291         if (mig) {
1292                 ev.device = qp->ibqp.device;
1293                 ev.element.qp = &qp->ibqp;
1294                 ev.event = IB_EVENT_PATH_MIG;
1295                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1296         }
1297         return 0;
1298
1299 inval:
1300         spin_unlock(&qp->s_lock);
1301         spin_unlock(&qp->s_hlock);
1302         spin_unlock_irq(&qp->r_lock);
1303         return -EINVAL;
1304 }
1305
1306 /** rvt_free_qpn - Free a qpn from the bit map
1307  * @qpt: QP table
1308  * @qpn: queue pair number to free
1309  */
1310 static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
1311 {
1312         struct rvt_qpn_map *map;
1313
1314         map = qpt->map + qpn / RVT_BITS_PER_PAGE;
1315         if (map->page)
1316                 clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
1317 }
1318
1319 /**
1320  * rvt_destroy_qp - destroy a queue pair
1321  * @ibqp: the queue pair to destroy
1322  *
1323  * Note that this can be called while the QP is actively sending or
1324  * receiving!
1325  *
1326  * Return: 0 on success.
1327  */
1328 int rvt_destroy_qp(struct ib_qp *ibqp)
1329 {
1330         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1331         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1332
1333         spin_lock_irq(&qp->r_lock);
1334         spin_lock(&qp->s_hlock);
1335         spin_lock(&qp->s_lock);
1336         rvt_reset_qp(rdi, qp, ibqp->qp_type);
1337         spin_unlock(&qp->s_lock);
1338         spin_unlock(&qp->s_hlock);
1339         spin_unlock_irq(&qp->r_lock);
1340
1341         /* qpn is now available for use again */
1342         rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1343
1344         spin_lock(&rdi->n_qps_lock);
1345         rdi->n_qps_allocated--;
1346         if (qp->ibqp.qp_type == IB_QPT_RC) {
1347                 rdi->n_rc_qps--;
1348                 rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
1349         }
1350         spin_unlock(&rdi->n_qps_lock);
1351
1352         if (qp->ip)
1353                 kref_put(&qp->ip->ref, rvt_release_mmap_info);
1354         else
1355                 vfree(qp->r_rq.wq);
1356         vfree(qp->s_wq);
1357         rdi->driver_f.qp_priv_free(rdi, qp);
1358         kfree(qp->s_ack_queue);
1359         kfree(qp);
1360         return 0;
1361 }
1362
1363 /**
1364  * rvt_query_qp - query an ipbq
1365  * @ibqp: IB qp to query
1366  * @attr: attr struct to fill in
1367  * @attr_mask: attr mask ignored
1368  * @init_attr: struct to fill in
1369  *
1370  * Return: always 0
1371  */
1372 int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1373                  int attr_mask, struct ib_qp_init_attr *init_attr)
1374 {
1375         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1376         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1377
1378         attr->qp_state = qp->state;
1379         attr->cur_qp_state = attr->qp_state;
1380         attr->path_mtu = qp->path_mtu;
1381         attr->path_mig_state = qp->s_mig_state;
1382         attr->qkey = qp->qkey;
1383         attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
1384         attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
1385         attr->dest_qp_num = qp->remote_qpn;
1386         attr->qp_access_flags = qp->qp_access_flags;
1387         attr->cap.max_send_wr = qp->s_size - 1 -
1388                 rdi->dparms.reserved_operations;
1389         attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
1390         attr->cap.max_send_sge = qp->s_max_sge;
1391         attr->cap.max_recv_sge = qp->r_rq.max_sge;
1392         attr->cap.max_inline_data = 0;
1393         attr->ah_attr = qp->remote_ah_attr;
1394         attr->alt_ah_attr = qp->alt_ah_attr;
1395         attr->pkey_index = qp->s_pkey_index;
1396         attr->alt_pkey_index = qp->s_alt_pkey_index;
1397         attr->en_sqd_async_notify = 0;
1398         attr->sq_draining = qp->s_draining;
1399         attr->max_rd_atomic = qp->s_max_rd_atomic;
1400         attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
1401         attr->min_rnr_timer = qp->r_min_rnr_timer;
1402         attr->port_num = qp->port_num;
1403         attr->timeout = qp->timeout;
1404         attr->retry_cnt = qp->s_retry_cnt;
1405         attr->rnr_retry = qp->s_rnr_retry_cnt;
1406         attr->alt_port_num =
1407                 rdma_ah_get_port_num(&qp->alt_ah_attr);
1408         attr->alt_timeout = qp->alt_timeout;
1409
1410         init_attr->event_handler = qp->ibqp.event_handler;
1411         init_attr->qp_context = qp->ibqp.qp_context;
1412         init_attr->send_cq = qp->ibqp.send_cq;
1413         init_attr->recv_cq = qp->ibqp.recv_cq;
1414         init_attr->srq = qp->ibqp.srq;
1415         init_attr->cap = attr->cap;
1416         if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
1417                 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
1418         else
1419                 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1420         init_attr->qp_type = qp->ibqp.qp_type;
1421         init_attr->port_num = qp->port_num;
1422         return 0;
1423 }
1424
1425 /**
1426  * rvt_post_receive - post a receive on a QP
1427  * @ibqp: the QP to post the receive on
1428  * @wr: the WR to post
1429  * @bad_wr: the first bad WR is put here
1430  *
1431  * This may be called from interrupt context.
1432  *
1433  * Return: 0 on success otherwise errno
1434  */
1435 int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1436                   struct ib_recv_wr **bad_wr)
1437 {
1438         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1439         struct rvt_rwq *wq = qp->r_rq.wq;
1440         unsigned long flags;
1441         int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
1442                                 !qp->ibqp.srq;
1443
1444         /* Check that state is OK to post receive. */
1445         if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
1446                 *bad_wr = wr;
1447                 return -EINVAL;
1448         }
1449
1450         for (; wr; wr = wr->next) {
1451                 struct rvt_rwqe *wqe;
1452                 u32 next;
1453                 int i;
1454
1455                 if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
1456                         *bad_wr = wr;
1457                         return -EINVAL;
1458                 }
1459
1460                 spin_lock_irqsave(&qp->r_rq.lock, flags);
1461                 next = wq->head + 1;
1462                 if (next >= qp->r_rq.size)
1463                         next = 0;
1464                 if (next == wq->tail) {
1465                         spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1466                         *bad_wr = wr;
1467                         return -ENOMEM;
1468                 }
1469                 if (unlikely(qp_err_flush)) {
1470                         struct ib_wc wc;
1471
1472                         memset(&wc, 0, sizeof(wc));
1473                         wc.qp = &qp->ibqp;
1474                         wc.opcode = IB_WC_RECV;
1475                         wc.wr_id = wr->wr_id;
1476                         wc.status = IB_WC_WR_FLUSH_ERR;
1477                         rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1478                 } else {
1479                         wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
1480                         wqe->wr_id = wr->wr_id;
1481                         wqe->num_sge = wr->num_sge;
1482                         for (i = 0; i < wr->num_sge; i++)
1483                                 wqe->sg_list[i] = wr->sg_list[i];
1484                         /*
1485                          * Make sure queue entry is written
1486                          * before the head index.
1487                          */
1488                         smp_wmb();
1489                         wq->head = next;
1490                 }
1491                 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1492         }
1493         return 0;
1494 }
1495
1496 /**
1497  * rvt_qp_valid_operation - validate post send wr request
1498  * @qp - the qp
1499  * @post-parms - the post send table for the driver
1500  * @wr - the work request
1501  *
1502  * The routine validates the operation based on the
1503  * validation table an returns the length of the operation
1504  * which can extend beyond the ib_send_bw.  Operation
1505  * dependent flags key atomic operation validation.
1506  *
1507  * There is an exception for UD qps that validates the pd and
1508  * overrides the length to include the additional UD specific
1509  * length.
1510  *
1511  * Returns a negative error or the length of the work request
1512  * for building the swqe.
1513  */
1514 static inline int rvt_qp_valid_operation(
1515         struct rvt_qp *qp,
1516         const struct rvt_operation_params *post_parms,
1517         struct ib_send_wr *wr)
1518 {
1519         int len;
1520
1521         if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length)
1522                 return -EINVAL;
1523         if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type)))
1524                 return -EINVAL;
1525         if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) &&
1526             ibpd_to_rvtpd(qp->ibqp.pd)->user)
1527                 return -EINVAL;
1528         if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE &&
1529             (wr->num_sge == 0 ||
1530              wr->sg_list[0].length < sizeof(u64) ||
1531              wr->sg_list[0].addr & (sizeof(u64) - 1)))
1532                 return -EINVAL;
1533         if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC &&
1534             !qp->s_max_rd_atomic)
1535                 return -EINVAL;
1536         len = post_parms[wr->opcode].length;
1537         /* UD specific */
1538         if (qp->ibqp.qp_type != IB_QPT_UC &&
1539             qp->ibqp.qp_type != IB_QPT_RC) {
1540                 if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
1541                         return -EINVAL;
1542                 len = sizeof(struct ib_ud_wr);
1543         }
1544         return len;
1545 }
1546
1547 /**
1548  * rvt_qp_is_avail - determine queue capacity
1549  * @qp - the qp
1550  * @rdi - the rdmavt device
1551  * @reserved_op - is reserved operation
1552  *
1553  * This assumes the s_hlock is held but the s_last
1554  * qp variable is uncontrolled.
1555  *
1556  * For non reserved operations, the qp->s_avail
1557  * may be changed.
1558  *
1559  * The return value is zero or a -ENOMEM.
1560  */
1561 static inline int rvt_qp_is_avail(
1562         struct rvt_qp *qp,
1563         struct rvt_dev_info *rdi,
1564         bool reserved_op)
1565 {
1566         u32 slast;
1567         u32 avail;
1568         u32 reserved_used;
1569
1570         /* see rvt_qp_wqe_unreserve() */
1571         smp_mb__before_atomic();
1572         reserved_used = atomic_read(&qp->s_reserved_used);
1573         if (unlikely(reserved_op)) {
1574                 /* see rvt_qp_wqe_unreserve() */
1575                 smp_mb__before_atomic();
1576                 if (reserved_used >= rdi->dparms.reserved_operations)
1577                         return -ENOMEM;
1578                 return 0;
1579         }
1580         /* non-reserved operations */
1581         if (likely(qp->s_avail))
1582                 return 0;
1583         smp_read_barrier_depends(); /* see rc.c */
1584         slast = ACCESS_ONCE(qp->s_last);
1585         if (qp->s_head >= slast)
1586                 avail = qp->s_size - (qp->s_head - slast);
1587         else
1588                 avail = slast - qp->s_head;
1589
1590         /* see rvt_qp_wqe_unreserve() */
1591         smp_mb__before_atomic();
1592         reserved_used = atomic_read(&qp->s_reserved_used);
1593         avail =  avail - 1 -
1594                 (rdi->dparms.reserved_operations - reserved_used);
1595         /* insure we don't assign a negative s_avail */
1596         if ((s32)avail <= 0)
1597                 return -ENOMEM;
1598         qp->s_avail = avail;
1599         if (WARN_ON(qp->s_avail >
1600                     (qp->s_size - 1 - rdi->dparms.reserved_operations)))
1601                 rvt_pr_err(rdi,
1602                            "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
1603                            qp->ibqp.qp_num, qp->s_size, qp->s_avail,
1604                            qp->s_head, qp->s_tail, qp->s_cur,
1605                            qp->s_acked, qp->s_last);
1606         return 0;
1607 }
1608
1609 /**
1610  * rvt_post_one_wr - post one RC, UC, or UD send work request
1611  * @qp: the QP to post on
1612  * @wr: the work request to send
1613  */
1614 static int rvt_post_one_wr(struct rvt_qp *qp,
1615                            struct ib_send_wr *wr,
1616                            int *call_send)
1617 {
1618         struct rvt_swqe *wqe;
1619         u32 next;
1620         int i;
1621         int j;
1622         int acc;
1623         struct rvt_lkey_table *rkt;
1624         struct rvt_pd *pd;
1625         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1626         u8 log_pmtu;
1627         int ret;
1628         size_t cplen;
1629         bool reserved_op;
1630         int local_ops_delayed = 0;
1631
1632         BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE));
1633
1634         /* IB spec says that num_sge == 0 is OK. */
1635         if (unlikely(wr->num_sge > qp->s_max_sge))
1636                 return -EINVAL;
1637
1638         ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr);
1639         if (ret < 0)
1640                 return ret;
1641         cplen = ret;
1642
1643         /*
1644          * Local operations include fast register and local invalidate.
1645          * Fast register needs to be processed immediately because the
1646          * registered lkey may be used by following work requests and the
1647          * lkey needs to be valid at the time those requests are posted.
1648          * Local invalidate can be processed immediately if fencing is
1649          * not required and no previous local invalidate ops are pending.
1650          * Signaled local operations that have been processed immediately
1651          * need to have requests with "completion only" flags set posted
1652          * to the send queue in order to generate completions.
1653          */
1654         if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) {
1655                 switch (wr->opcode) {
1656                 case IB_WR_REG_MR:
1657                         ret = rvt_fast_reg_mr(qp,
1658                                               reg_wr(wr)->mr,
1659                                               reg_wr(wr)->key,
1660                                               reg_wr(wr)->access);
1661                         if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1662                                 return ret;
1663                         break;
1664                 case IB_WR_LOCAL_INV:
1665                         if ((wr->send_flags & IB_SEND_FENCE) ||
1666                             atomic_read(&qp->local_ops_pending)) {
1667                                 local_ops_delayed = 1;
1668                         } else {
1669                                 ret = rvt_invalidate_rkey(
1670                                         qp, wr->ex.invalidate_rkey);
1671                                 if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1672                                         return ret;
1673                         }
1674                         break;
1675                 default:
1676                         return -EINVAL;
1677                 }
1678         }
1679
1680         reserved_op = rdi->post_parms[wr->opcode].flags &
1681                         RVT_OPERATION_USE_RESERVE;
1682         /* check for avail */
1683         ret = rvt_qp_is_avail(qp, rdi, reserved_op);
1684         if (ret)
1685                 return ret;
1686         next = qp->s_head + 1;
1687         if (next >= qp->s_size)
1688                 next = 0;
1689
1690         rkt = &rdi->lkey_table;
1691         pd = ibpd_to_rvtpd(qp->ibqp.pd);
1692         wqe = rvt_get_swqe_ptr(qp, qp->s_head);
1693
1694         /* cplen has length from above */
1695         memcpy(&wqe->wr, wr, cplen);
1696
1697         wqe->length = 0;
1698         j = 0;
1699         if (wr->num_sge) {
1700                 acc = wr->opcode >= IB_WR_RDMA_READ ?
1701                         IB_ACCESS_LOCAL_WRITE : 0;
1702                 for (i = 0; i < wr->num_sge; i++) {
1703                         u32 length = wr->sg_list[i].length;
1704                         int ok;
1705
1706                         if (length == 0)
1707                                 continue;
1708                         ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
1709                                          &wr->sg_list[i], acc);
1710                         if (!ok) {
1711                                 ret = -EINVAL;
1712                                 goto bail_inval_free;
1713                         }
1714                         wqe->length += length;
1715                         j++;
1716                 }
1717                 wqe->wr.num_sge = j;
1718         }
1719
1720         /* general part of wqe valid - allow for driver checks */
1721         if (rdi->driver_f.check_send_wqe) {
1722                 ret = rdi->driver_f.check_send_wqe(qp, wqe);
1723                 if (ret < 0)
1724                         goto bail_inval_free;
1725                 if (ret)
1726                         *call_send = ret;
1727         }
1728
1729         log_pmtu = qp->log_pmtu;
1730         if (qp->ibqp.qp_type != IB_QPT_UC &&
1731             qp->ibqp.qp_type != IB_QPT_RC) {
1732                 struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
1733
1734                 log_pmtu = ah->log_pmtu;
1735                 atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
1736         }
1737
1738         if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
1739                 if (local_ops_delayed)
1740                         atomic_inc(&qp->local_ops_pending);
1741                 else
1742                         wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY;
1743                 wqe->ssn = 0;
1744                 wqe->psn = 0;
1745                 wqe->lpsn = 0;
1746         } else {
1747                 wqe->ssn = qp->s_ssn++;
1748                 wqe->psn = qp->s_next_psn;
1749                 wqe->lpsn = wqe->psn +
1750                                 (wqe->length ?
1751                                         ((wqe->length - 1) >> log_pmtu) :
1752                                         0);
1753                 qp->s_next_psn = wqe->lpsn + 1;
1754         }
1755         if (unlikely(reserved_op)) {
1756                 wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
1757                 rvt_qp_wqe_reserve(qp, wqe);
1758         } else {
1759                 wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
1760                 qp->s_avail--;
1761         }
1762         trace_rvt_post_one_wr(qp, wqe);
1763         smp_wmb(); /* see request builders */
1764         qp->s_head = next;
1765
1766         return 0;
1767
1768 bail_inval_free:
1769         /* release mr holds */
1770         while (j) {
1771                 struct rvt_sge *sge = &wqe->sg_list[--j];
1772
1773                 rvt_put_mr(sge->mr);
1774         }
1775         return ret;
1776 }
1777
1778 /**
1779  * rvt_post_send - post a send on a QP
1780  * @ibqp: the QP to post the send on
1781  * @wr: the list of work requests to post
1782  * @bad_wr: the first bad WR is put here
1783  *
1784  * This may be called from interrupt context.
1785  *
1786  * Return: 0 on success else errno
1787  */
1788 int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1789                   struct ib_send_wr **bad_wr)
1790 {
1791         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1792         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1793         unsigned long flags = 0;
1794         int call_send;
1795         unsigned nreq = 0;
1796         int err = 0;
1797
1798         spin_lock_irqsave(&qp->s_hlock, flags);
1799
1800         /*
1801          * Ensure QP state is such that we can send. If not bail out early,
1802          * there is no need to do this every time we post a send.
1803          */
1804         if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
1805                 spin_unlock_irqrestore(&qp->s_hlock, flags);
1806                 return -EINVAL;
1807         }
1808
1809         /*
1810          * If the send queue is empty, and we only have a single WR then just go
1811          * ahead and kick the send engine into gear. Otherwise we will always
1812          * just schedule the send to happen later.
1813          */
1814         call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
1815
1816         for (; wr; wr = wr->next) {
1817                 err = rvt_post_one_wr(qp, wr, &call_send);
1818                 if (unlikely(err)) {
1819                         *bad_wr = wr;
1820                         goto bail;
1821                 }
1822                 nreq++;
1823         }
1824 bail:
1825         spin_unlock_irqrestore(&qp->s_hlock, flags);
1826         if (nreq) {
1827                 if (call_send)
1828                         rdi->driver_f.do_send(qp);
1829                 else
1830                         rdi->driver_f.schedule_send_no_lock(qp);
1831         }
1832         return err;
1833 }
1834
1835 /**
1836  * rvt_post_srq_receive - post a receive on a shared receive queue
1837  * @ibsrq: the SRQ to post the receive on
1838  * @wr: the list of work requests to post
1839  * @bad_wr: A pointer to the first WR to cause a problem is put here
1840  *
1841  * This may be called from interrupt context.
1842  *
1843  * Return: 0 on success else errno
1844  */
1845 int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
1846                       struct ib_recv_wr **bad_wr)
1847 {
1848         struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
1849         struct rvt_rwq *wq;
1850         unsigned long flags;
1851
1852         for (; wr; wr = wr->next) {
1853                 struct rvt_rwqe *wqe;
1854                 u32 next;
1855                 int i;
1856
1857                 if ((unsigned)wr->num_sge > srq->rq.max_sge) {
1858                         *bad_wr = wr;
1859                         return -EINVAL;
1860                 }
1861
1862                 spin_lock_irqsave(&srq->rq.lock, flags);
1863                 wq = srq->rq.wq;
1864                 next = wq->head + 1;
1865                 if (next >= srq->rq.size)
1866                         next = 0;
1867                 if (next == wq->tail) {
1868                         spin_unlock_irqrestore(&srq->rq.lock, flags);
1869                         *bad_wr = wr;
1870                         return -ENOMEM;
1871                 }
1872
1873                 wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
1874                 wqe->wr_id = wr->wr_id;
1875                 wqe->num_sge = wr->num_sge;
1876                 for (i = 0; i < wr->num_sge; i++)
1877                         wqe->sg_list[i] = wr->sg_list[i];
1878                 /* Make sure queue entry is written before the head index. */
1879                 smp_wmb();
1880                 wq->head = next;
1881                 spin_unlock_irqrestore(&srq->rq.lock, flags);
1882         }
1883         return 0;
1884 }
1885
1886 /**
1887  * qp_comm_est - handle trap with QP established
1888  * @qp: the QP
1889  */
1890 void rvt_comm_est(struct rvt_qp *qp)
1891 {
1892         qp->r_flags |= RVT_R_COMM_EST;
1893         if (qp->ibqp.event_handler) {
1894                 struct ib_event ev;
1895
1896                 ev.device = qp->ibqp.device;
1897                 ev.element.qp = &qp->ibqp;
1898                 ev.event = IB_EVENT_COMM_EST;
1899                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1900         }
1901 }
1902 EXPORT_SYMBOL(rvt_comm_est);
1903
1904 void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
1905 {
1906         unsigned long flags;
1907         int lastwqe;
1908
1909         spin_lock_irqsave(&qp->s_lock, flags);
1910         lastwqe = rvt_error_qp(qp, err);
1911         spin_unlock_irqrestore(&qp->s_lock, flags);
1912
1913         if (lastwqe) {
1914                 struct ib_event ev;
1915
1916                 ev.device = qp->ibqp.device;
1917                 ev.element.qp = &qp->ibqp;
1918                 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1919                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1920         }
1921 }
1922 EXPORT_SYMBOL(rvt_rc_error);
1923
1924 /*
1925  *  rvt_rnr_tbl_to_usec - return index into ib_rvt_rnr_table
1926  *  @index - the index
1927  *  return usec from an index into ib_rvt_rnr_table
1928  */
1929 unsigned long rvt_rnr_tbl_to_usec(u32 index)
1930 {
1931         return ib_rvt_rnr_table[(index & IB_AETH_CREDIT_MASK)];
1932 }
1933 EXPORT_SYMBOL(rvt_rnr_tbl_to_usec);
1934
1935 static inline unsigned long rvt_aeth_to_usec(u32 aeth)
1936 {
1937         return ib_rvt_rnr_table[(aeth >> IB_AETH_CREDIT_SHIFT) &
1938                                   IB_AETH_CREDIT_MASK];
1939 }
1940
1941 /*
1942  *  rvt_add_retry_timer - add/start a retry timer
1943  *  @qp - the QP
1944  *  add a retry timer on the QP
1945  */
1946 void rvt_add_retry_timer(struct rvt_qp *qp)
1947 {
1948         struct ib_qp *ibqp = &qp->ibqp;
1949         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1950
1951         lockdep_assert_held(&qp->s_lock);
1952         qp->s_flags |= RVT_S_TIMER;
1953        /* 4.096 usec. * (1 << qp->timeout) */
1954         qp->s_timer.expires = jiffies + qp->timeout_jiffies +
1955                              rdi->busy_jiffies;
1956         add_timer(&qp->s_timer);
1957 }
1958 EXPORT_SYMBOL(rvt_add_retry_timer);
1959
1960 /**
1961  * rvt_add_rnr_timer - add/start an rnr timer
1962  * @qp - the QP
1963  * @aeth - aeth of RNR timeout, simulated aeth for loopback
1964  * add an rnr timer on the QP
1965  */
1966 void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
1967 {
1968         u32 to;
1969
1970         lockdep_assert_held(&qp->s_lock);
1971         qp->s_flags |= RVT_S_WAIT_RNR;
1972         to = rvt_aeth_to_usec(aeth);
1973         hrtimer_start(&qp->s_rnr_timer,
1974                       ns_to_ktime(1000 * to), HRTIMER_MODE_REL);
1975 }
1976 EXPORT_SYMBOL(rvt_add_rnr_timer);
1977
1978 /**
1979  * rvt_stop_rc_timers - stop all timers
1980  * @qp - the QP
1981  * stop any pending timers
1982  */
1983 void rvt_stop_rc_timers(struct rvt_qp *qp)
1984 {
1985         lockdep_assert_held(&qp->s_lock);
1986         /* Remove QP from all timers */
1987         if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1988                 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1989                 del_timer(&qp->s_timer);
1990                 hrtimer_try_to_cancel(&qp->s_rnr_timer);
1991         }
1992 }
1993 EXPORT_SYMBOL(rvt_stop_rc_timers);
1994
1995 /**
1996  * rvt_stop_rnr_timer - stop an rnr timer
1997  * @qp - the QP
1998  *
1999  * stop an rnr timer and return if the timer
2000  * had been pending.
2001  */
2002 static int rvt_stop_rnr_timer(struct rvt_qp *qp)
2003 {
2004         int rval = 0;
2005
2006         lockdep_assert_held(&qp->s_lock);
2007         /* Remove QP from rnr timer */
2008         if (qp->s_flags & RVT_S_WAIT_RNR) {
2009                 qp->s_flags &= ~RVT_S_WAIT_RNR;
2010                 rval = hrtimer_try_to_cancel(&qp->s_rnr_timer);
2011         }
2012         return rval;
2013 }
2014
2015 /**
2016  * rvt_del_timers_sync - wait for any timeout routines to exit
2017  * @qp - the QP
2018  */
2019 void rvt_del_timers_sync(struct rvt_qp *qp)
2020 {
2021         del_timer_sync(&qp->s_timer);
2022         hrtimer_cancel(&qp->s_rnr_timer);
2023 }
2024 EXPORT_SYMBOL(rvt_del_timers_sync);
2025
2026 /**
2027  * This is called from s_timer for missing responses.
2028  */
2029 static void rvt_rc_timeout(unsigned long arg)
2030 {
2031         struct rvt_qp *qp = (struct rvt_qp *)arg;
2032         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2033         unsigned long flags;
2034
2035         spin_lock_irqsave(&qp->r_lock, flags);
2036         spin_lock(&qp->s_lock);
2037         if (qp->s_flags & RVT_S_TIMER) {
2038                 struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
2039
2040                 qp->s_flags &= ~RVT_S_TIMER;
2041                 rvp->n_rc_timeouts++;
2042                 del_timer(&qp->s_timer);
2043                 trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
2044                 if (rdi->driver_f.notify_restart_rc)
2045                         rdi->driver_f.notify_restart_rc(qp,
2046                                                         qp->s_last_psn + 1,
2047                                                         1);
2048                 rdi->driver_f.schedule_send(qp);
2049         }
2050         spin_unlock(&qp->s_lock);
2051         spin_unlock_irqrestore(&qp->r_lock, flags);
2052 }
2053
2054 /*
2055  * This is called from s_timer for RNR timeouts.
2056  */
2057 enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
2058 {
2059         struct rvt_qp *qp = container_of(t, struct rvt_qp, s_rnr_timer);
2060         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2061         unsigned long flags;
2062
2063         spin_lock_irqsave(&qp->s_lock, flags);
2064         rvt_stop_rnr_timer(qp);
2065         rdi->driver_f.schedule_send(qp);
2066         spin_unlock_irqrestore(&qp->s_lock, flags);
2067         return HRTIMER_NORESTART;
2068 }
2069 EXPORT_SYMBOL(rvt_rc_rnr_retry);