]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/infiniband/core/cm.c
IB/cm: cm_init_av_by_path should find a GID by its netdevice
[karo-tx-linux.git] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/module.h>
40 #include <linux/err.h>
41 #include <linux/idr.h>
42 #include <linux/interrupt.h>
43 #include <linux/random.h>
44 #include <linux/rbtree.h>
45 #include <linux/spinlock.h>
46 #include <linux/slab.h>
47 #include <linux/sysfs.h>
48 #include <linux/workqueue.h>
49 #include <linux/kdev_t.h>
50 #include <linux/etherdevice.h>
51
52 #include <rdma/ib_cache.h>
53 #include <rdma/ib_cm.h>
54 #include "cm_msgs.h"
55
56 MODULE_AUTHOR("Sean Hefty");
57 MODULE_DESCRIPTION("InfiniBand CM");
58 MODULE_LICENSE("Dual BSD/GPL");
59
60 static void cm_add_one(struct ib_device *device);
61 static void cm_remove_one(struct ib_device *device, void *client_data);
62
63 static struct ib_client cm_client = {
64         .name   = "cm",
65         .add    = cm_add_one,
66         .remove = cm_remove_one
67 };
68
69 static struct ib_cm {
70         spinlock_t lock;
71         struct list_head device_list;
72         rwlock_t device_lock;
73         struct rb_root listen_service_table;
74         u64 listen_service_id;
75         /* struct rb_root peer_service_table; todo: fix peer to peer */
76         struct rb_root remote_qp_table;
77         struct rb_root remote_id_table;
78         struct rb_root remote_sidr_table;
79         struct idr local_id_table;
80         __be32 random_id_operand;
81         struct list_head timewait_list;
82         struct workqueue_struct *wq;
83 } cm;
84
85 /* Counter indexes ordered by attribute ID */
86 enum {
87         CM_REQ_COUNTER,
88         CM_MRA_COUNTER,
89         CM_REJ_COUNTER,
90         CM_REP_COUNTER,
91         CM_RTU_COUNTER,
92         CM_DREQ_COUNTER,
93         CM_DREP_COUNTER,
94         CM_SIDR_REQ_COUNTER,
95         CM_SIDR_REP_COUNTER,
96         CM_LAP_COUNTER,
97         CM_APR_COUNTER,
98         CM_ATTR_COUNT,
99         CM_ATTR_ID_OFFSET = 0x0010,
100 };
101
102 enum {
103         CM_XMIT,
104         CM_XMIT_RETRIES,
105         CM_RECV,
106         CM_RECV_DUPLICATES,
107         CM_COUNTER_GROUPS
108 };
109
110 static char const counter_group_names[CM_COUNTER_GROUPS]
111                                      [sizeof("cm_rx_duplicates")] = {
112         "cm_tx_msgs", "cm_tx_retries",
113         "cm_rx_msgs", "cm_rx_duplicates"
114 };
115
116 struct cm_counter_group {
117         struct kobject obj;
118         atomic_long_t counter[CM_ATTR_COUNT];
119 };
120
121 struct cm_counter_attribute {
122         struct attribute attr;
123         int index;
124 };
125
126 #define CM_COUNTER_ATTR(_name, _index) \
127 struct cm_counter_attribute cm_##_name##_counter_attr = { \
128         .attr = { .name = __stringify(_name), .mode = 0444 }, \
129         .index = _index \
130 }
131
132 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
133 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
134 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
135 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
136 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
137 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
138 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
139 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
140 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
141 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
142 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
143
144 static struct attribute *cm_counter_default_attrs[] = {
145         &cm_req_counter_attr.attr,
146         &cm_mra_counter_attr.attr,
147         &cm_rej_counter_attr.attr,
148         &cm_rep_counter_attr.attr,
149         &cm_rtu_counter_attr.attr,
150         &cm_dreq_counter_attr.attr,
151         &cm_drep_counter_attr.attr,
152         &cm_sidr_req_counter_attr.attr,
153         &cm_sidr_rep_counter_attr.attr,
154         &cm_lap_counter_attr.attr,
155         &cm_apr_counter_attr.attr,
156         NULL
157 };
158
159 struct cm_port {
160         struct cm_device *cm_dev;
161         struct ib_mad_agent *mad_agent;
162         struct kobject port_obj;
163         u8 port_num;
164         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
165 };
166
167 struct cm_device {
168         struct list_head list;
169         struct ib_device *ib_device;
170         struct device *device;
171         u8 ack_delay;
172         int going_down;
173         struct cm_port *port[0];
174 };
175
176 struct cm_av {
177         struct cm_port *port;
178         union ib_gid dgid;
179         struct ib_ah_attr ah_attr;
180         u16 pkey_index;
181         u8 timeout;
182         u8  valid;
183         u8  smac[ETH_ALEN];
184 };
185
186 struct cm_work {
187         struct delayed_work work;
188         struct list_head list;
189         struct cm_port *port;
190         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
191         __be32 local_id;                        /* Established / timewait */
192         __be32 remote_id;
193         struct ib_cm_event cm_event;
194         struct ib_sa_path_rec path[0];
195 };
196
197 struct cm_timewait_info {
198         struct cm_work work;                    /* Must be first. */
199         struct list_head list;
200         struct rb_node remote_qp_node;
201         struct rb_node remote_id_node;
202         __be64 remote_ca_guid;
203         __be32 remote_qpn;
204         u8 inserted_remote_qp;
205         u8 inserted_remote_id;
206 };
207
208 struct cm_id_private {
209         struct ib_cm_id id;
210
211         struct rb_node service_node;
212         struct rb_node sidr_id_node;
213         spinlock_t lock;        /* Do not acquire inside cm.lock */
214         struct completion comp;
215         atomic_t refcount;
216         /* Number of clients sharing this ib_cm_id. Only valid for listeners.
217          * Protected by the cm.lock spinlock. */
218         int listen_sharecount;
219
220         struct ib_mad_send_buf *msg;
221         struct cm_timewait_info *timewait_info;
222         /* todo: use alternate port on send failure */
223         struct cm_av av;
224         struct cm_av alt_av;
225
226         void *private_data;
227         __be64 tid;
228         __be32 local_qpn;
229         __be32 remote_qpn;
230         enum ib_qp_type qp_type;
231         __be32 sq_psn;
232         __be32 rq_psn;
233         int timeout_ms;
234         enum ib_mtu path_mtu;
235         __be16 pkey;
236         u8 private_data_len;
237         u8 max_cm_retries;
238         u8 peer_to_peer;
239         u8 responder_resources;
240         u8 initiator_depth;
241         u8 retry_count;
242         u8 rnr_retry_count;
243         u8 service_timeout;
244         u8 target_ack_delay;
245
246         struct list_head work_list;
247         atomic_t work_count;
248 };
249
250 static void cm_work_handler(struct work_struct *work);
251
252 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
253 {
254         if (atomic_dec_and_test(&cm_id_priv->refcount))
255                 complete(&cm_id_priv->comp);
256 }
257
258 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
259                         struct ib_mad_send_buf **msg)
260 {
261         struct ib_mad_agent *mad_agent;
262         struct ib_mad_send_buf *m;
263         struct ib_ah *ah;
264
265         mad_agent = cm_id_priv->av.port->mad_agent;
266         ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
267         if (IS_ERR(ah))
268                 return PTR_ERR(ah);
269
270         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
271                                cm_id_priv->av.pkey_index,
272                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
273                                GFP_ATOMIC,
274                                IB_MGMT_BASE_VERSION);
275         if (IS_ERR(m)) {
276                 ib_destroy_ah(ah);
277                 return PTR_ERR(m);
278         }
279
280         /* Timeout set by caller if response is expected. */
281         m->ah = ah;
282         m->retries = cm_id_priv->max_cm_retries;
283
284         atomic_inc(&cm_id_priv->refcount);
285         m->context[0] = cm_id_priv;
286         *msg = m;
287         return 0;
288 }
289
290 static int cm_alloc_response_msg(struct cm_port *port,
291                                  struct ib_mad_recv_wc *mad_recv_wc,
292                                  struct ib_mad_send_buf **msg)
293 {
294         struct ib_mad_send_buf *m;
295         struct ib_ah *ah;
296
297         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
298                                   mad_recv_wc->recv_buf.grh, port->port_num);
299         if (IS_ERR(ah))
300                 return PTR_ERR(ah);
301
302         m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
303                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
304                                GFP_ATOMIC,
305                                IB_MGMT_BASE_VERSION);
306         if (IS_ERR(m)) {
307                 ib_destroy_ah(ah);
308                 return PTR_ERR(m);
309         }
310         m->ah = ah;
311         *msg = m;
312         return 0;
313 }
314
315 static void cm_free_msg(struct ib_mad_send_buf *msg)
316 {
317         ib_destroy_ah(msg->ah);
318         if (msg->context[0])
319                 cm_deref_id(msg->context[0]);
320         ib_free_send_mad(msg);
321 }
322
323 static void * cm_copy_private_data(const void *private_data,
324                                    u8 private_data_len)
325 {
326         void *data;
327
328         if (!private_data || !private_data_len)
329                 return NULL;
330
331         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
332         if (!data)
333                 return ERR_PTR(-ENOMEM);
334
335         return data;
336 }
337
338 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
339                                  void *private_data, u8 private_data_len)
340 {
341         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
342                 kfree(cm_id_priv->private_data);
343
344         cm_id_priv->private_data = private_data;
345         cm_id_priv->private_data_len = private_data_len;
346 }
347
348 static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
349                                     struct ib_grh *grh, struct cm_av *av)
350 {
351         av->port = port;
352         av->pkey_index = wc->pkey_index;
353         ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
354                            grh, &av->ah_attr);
355 }
356
357 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
358 {
359         struct cm_device *cm_dev;
360         struct cm_port *port = NULL;
361         unsigned long flags;
362         int ret;
363         u8 p;
364         struct net_device *ndev = ib_get_ndev_from_path(path);
365
366         read_lock_irqsave(&cm.device_lock, flags);
367         list_for_each_entry(cm_dev, &cm.device_list, list) {
368                 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
369                                         ndev, &p, NULL)) {
370                         port = cm_dev->port[p-1];
371                         break;
372                 }
373         }
374         read_unlock_irqrestore(&cm.device_lock, flags);
375
376         if (ndev)
377                 dev_put(ndev);
378
379         if (!port)
380                 return -EINVAL;
381
382         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
383                                   be16_to_cpu(path->pkey), &av->pkey_index);
384         if (ret)
385                 return ret;
386
387         av->port = port;
388         ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
389                              &av->ah_attr);
390         av->timeout = path->packet_life_time + 1;
391
392         av->valid = 1;
393         return 0;
394 }
395
396 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
397 {
398         unsigned long flags;
399         int id;
400
401         idr_preload(GFP_KERNEL);
402         spin_lock_irqsave(&cm.lock, flags);
403
404         id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
405
406         spin_unlock_irqrestore(&cm.lock, flags);
407         idr_preload_end();
408
409         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
410         return id < 0 ? id : 0;
411 }
412
413 static void cm_free_id(__be32 local_id)
414 {
415         spin_lock_irq(&cm.lock);
416         idr_remove(&cm.local_id_table,
417                    (__force int) (local_id ^ cm.random_id_operand));
418         spin_unlock_irq(&cm.lock);
419 }
420
421 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
422 {
423         struct cm_id_private *cm_id_priv;
424
425         cm_id_priv = idr_find(&cm.local_id_table,
426                               (__force int) (local_id ^ cm.random_id_operand));
427         if (cm_id_priv) {
428                 if (cm_id_priv->id.remote_id == remote_id)
429                         atomic_inc(&cm_id_priv->refcount);
430                 else
431                         cm_id_priv = NULL;
432         }
433
434         return cm_id_priv;
435 }
436
437 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
438 {
439         struct cm_id_private *cm_id_priv;
440
441         spin_lock_irq(&cm.lock);
442         cm_id_priv = cm_get_id(local_id, remote_id);
443         spin_unlock_irq(&cm.lock);
444
445         return cm_id_priv;
446 }
447
448 /*
449  * Trivial helpers to strip endian annotation and compare; the
450  * endianness doesn't actually matter since we just need a stable
451  * order for the RB tree.
452  */
453 static int be32_lt(__be32 a, __be32 b)
454 {
455         return (__force u32) a < (__force u32) b;
456 }
457
458 static int be32_gt(__be32 a, __be32 b)
459 {
460         return (__force u32) a > (__force u32) b;
461 }
462
463 static int be64_lt(__be64 a, __be64 b)
464 {
465         return (__force u64) a < (__force u64) b;
466 }
467
468 static int be64_gt(__be64 a, __be64 b)
469 {
470         return (__force u64) a > (__force u64) b;
471 }
472
473 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
474 {
475         struct rb_node **link = &cm.listen_service_table.rb_node;
476         struct rb_node *parent = NULL;
477         struct cm_id_private *cur_cm_id_priv;
478         __be64 service_id = cm_id_priv->id.service_id;
479         __be64 service_mask = cm_id_priv->id.service_mask;
480
481         while (*link) {
482                 parent = *link;
483                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
484                                           service_node);
485                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
486                     (service_mask & cur_cm_id_priv->id.service_id) &&
487                     (cm_id_priv->id.device == cur_cm_id_priv->id.device))
488                         return cur_cm_id_priv;
489
490                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
491                         link = &(*link)->rb_left;
492                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
493                         link = &(*link)->rb_right;
494                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
495                         link = &(*link)->rb_left;
496                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
497                         link = &(*link)->rb_right;
498                 else
499                         link = &(*link)->rb_right;
500         }
501         rb_link_node(&cm_id_priv->service_node, parent, link);
502         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
503         return NULL;
504 }
505
506 static struct cm_id_private * cm_find_listen(struct ib_device *device,
507                                              __be64 service_id)
508 {
509         struct rb_node *node = cm.listen_service_table.rb_node;
510         struct cm_id_private *cm_id_priv;
511
512         while (node) {
513                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
514                 if ((cm_id_priv->id.service_mask & service_id) ==
515                      cm_id_priv->id.service_id &&
516                     (cm_id_priv->id.device == device))
517                         return cm_id_priv;
518
519                 if (device < cm_id_priv->id.device)
520                         node = node->rb_left;
521                 else if (device > cm_id_priv->id.device)
522                         node = node->rb_right;
523                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
524                         node = node->rb_left;
525                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
526                         node = node->rb_right;
527                 else
528                         node = node->rb_right;
529         }
530         return NULL;
531 }
532
533 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
534                                                      *timewait_info)
535 {
536         struct rb_node **link = &cm.remote_id_table.rb_node;
537         struct rb_node *parent = NULL;
538         struct cm_timewait_info *cur_timewait_info;
539         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
540         __be32 remote_id = timewait_info->work.remote_id;
541
542         while (*link) {
543                 parent = *link;
544                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
545                                              remote_id_node);
546                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
547                         link = &(*link)->rb_left;
548                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
549                         link = &(*link)->rb_right;
550                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
551                         link = &(*link)->rb_left;
552                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
553                         link = &(*link)->rb_right;
554                 else
555                         return cur_timewait_info;
556         }
557         timewait_info->inserted_remote_id = 1;
558         rb_link_node(&timewait_info->remote_id_node, parent, link);
559         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
560         return NULL;
561 }
562
563 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
564                                                    __be32 remote_id)
565 {
566         struct rb_node *node = cm.remote_id_table.rb_node;
567         struct cm_timewait_info *timewait_info;
568
569         while (node) {
570                 timewait_info = rb_entry(node, struct cm_timewait_info,
571                                          remote_id_node);
572                 if (be32_lt(remote_id, timewait_info->work.remote_id))
573                         node = node->rb_left;
574                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
575                         node = node->rb_right;
576                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
577                         node = node->rb_left;
578                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
579                         node = node->rb_right;
580                 else
581                         return timewait_info;
582         }
583         return NULL;
584 }
585
586 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
587                                                       *timewait_info)
588 {
589         struct rb_node **link = &cm.remote_qp_table.rb_node;
590         struct rb_node *parent = NULL;
591         struct cm_timewait_info *cur_timewait_info;
592         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
593         __be32 remote_qpn = timewait_info->remote_qpn;
594
595         while (*link) {
596                 parent = *link;
597                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
598                                              remote_qp_node);
599                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
600                         link = &(*link)->rb_left;
601                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
602                         link = &(*link)->rb_right;
603                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
604                         link = &(*link)->rb_left;
605                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
606                         link = &(*link)->rb_right;
607                 else
608                         return cur_timewait_info;
609         }
610         timewait_info->inserted_remote_qp = 1;
611         rb_link_node(&timewait_info->remote_qp_node, parent, link);
612         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
613         return NULL;
614 }
615
616 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
617                                                     *cm_id_priv)
618 {
619         struct rb_node **link = &cm.remote_sidr_table.rb_node;
620         struct rb_node *parent = NULL;
621         struct cm_id_private *cur_cm_id_priv;
622         union ib_gid *port_gid = &cm_id_priv->av.dgid;
623         __be32 remote_id = cm_id_priv->id.remote_id;
624
625         while (*link) {
626                 parent = *link;
627                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
628                                           sidr_id_node);
629                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
630                         link = &(*link)->rb_left;
631                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
632                         link = &(*link)->rb_right;
633                 else {
634                         int cmp;
635                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
636                                      sizeof *port_gid);
637                         if (cmp < 0)
638                                 link = &(*link)->rb_left;
639                         else if (cmp > 0)
640                                 link = &(*link)->rb_right;
641                         else
642                                 return cur_cm_id_priv;
643                 }
644         }
645         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
646         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
647         return NULL;
648 }
649
650 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
651                                enum ib_cm_sidr_status status)
652 {
653         struct ib_cm_sidr_rep_param param;
654
655         memset(&param, 0, sizeof param);
656         param.status = status;
657         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
658 }
659
660 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
661                                  ib_cm_handler cm_handler,
662                                  void *context)
663 {
664         struct cm_id_private *cm_id_priv;
665         int ret;
666
667         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
668         if (!cm_id_priv)
669                 return ERR_PTR(-ENOMEM);
670
671         cm_id_priv->id.state = IB_CM_IDLE;
672         cm_id_priv->id.device = device;
673         cm_id_priv->id.cm_handler = cm_handler;
674         cm_id_priv->id.context = context;
675         cm_id_priv->id.remote_cm_qpn = 1;
676         ret = cm_alloc_id(cm_id_priv);
677         if (ret)
678                 goto error;
679
680         spin_lock_init(&cm_id_priv->lock);
681         init_completion(&cm_id_priv->comp);
682         INIT_LIST_HEAD(&cm_id_priv->work_list);
683         atomic_set(&cm_id_priv->work_count, -1);
684         atomic_set(&cm_id_priv->refcount, 1);
685         return &cm_id_priv->id;
686
687 error:
688         kfree(cm_id_priv);
689         return ERR_PTR(-ENOMEM);
690 }
691 EXPORT_SYMBOL(ib_create_cm_id);
692
693 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
694 {
695         struct cm_work *work;
696
697         if (list_empty(&cm_id_priv->work_list))
698                 return NULL;
699
700         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
701         list_del(&work->list);
702         return work;
703 }
704
705 static void cm_free_work(struct cm_work *work)
706 {
707         if (work->mad_recv_wc)
708                 ib_free_recv_mad(work->mad_recv_wc);
709         kfree(work);
710 }
711
712 static inline int cm_convert_to_ms(int iba_time)
713 {
714         /* approximate conversion to ms from 4.096us x 2^iba_time */
715         return 1 << max(iba_time - 8, 0);
716 }
717
718 /*
719  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
720  * Because of how ack_timeout is stored, adding one doubles the timeout.
721  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
722  * increment it (round up) only if the other is within 50%.
723  */
724 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
725 {
726         int ack_timeout = packet_life_time + 1;
727
728         if (ack_timeout >= ca_ack_delay)
729                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
730         else
731                 ack_timeout = ca_ack_delay +
732                               (ack_timeout >= (ca_ack_delay - 1));
733
734         return min(31, ack_timeout);
735 }
736
737 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
738 {
739         if (timewait_info->inserted_remote_id) {
740                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
741                 timewait_info->inserted_remote_id = 0;
742         }
743
744         if (timewait_info->inserted_remote_qp) {
745                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
746                 timewait_info->inserted_remote_qp = 0;
747         }
748 }
749
750 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
751 {
752         struct cm_timewait_info *timewait_info;
753
754         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
755         if (!timewait_info)
756                 return ERR_PTR(-ENOMEM);
757
758         timewait_info->work.local_id = local_id;
759         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
760         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
761         return timewait_info;
762 }
763
764 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
765 {
766         int wait_time;
767         unsigned long flags;
768         struct cm_device *cm_dev;
769
770         cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
771         if (!cm_dev)
772                 return;
773
774         spin_lock_irqsave(&cm.lock, flags);
775         cm_cleanup_timewait(cm_id_priv->timewait_info);
776         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
777         spin_unlock_irqrestore(&cm.lock, flags);
778
779         /*
780          * The cm_id could be destroyed by the user before we exit timewait.
781          * To protect against this, we search for the cm_id after exiting
782          * timewait before notifying the user that we've exited timewait.
783          */
784         cm_id_priv->id.state = IB_CM_TIMEWAIT;
785         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
786
787         /* Check if the device started its remove_one */
788         spin_lock_irq(&cm.lock);
789         if (!cm_dev->going_down)
790                 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
791                                    msecs_to_jiffies(wait_time));
792         spin_unlock_irq(&cm.lock);
793
794         cm_id_priv->timewait_info = NULL;
795 }
796
797 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
798 {
799         unsigned long flags;
800
801         cm_id_priv->id.state = IB_CM_IDLE;
802         if (cm_id_priv->timewait_info) {
803                 spin_lock_irqsave(&cm.lock, flags);
804                 cm_cleanup_timewait(cm_id_priv->timewait_info);
805                 spin_unlock_irqrestore(&cm.lock, flags);
806                 kfree(cm_id_priv->timewait_info);
807                 cm_id_priv->timewait_info = NULL;
808         }
809 }
810
811 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
812 {
813         struct cm_id_private *cm_id_priv;
814         struct cm_work *work;
815
816         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
817 retest:
818         spin_lock_irq(&cm_id_priv->lock);
819         switch (cm_id->state) {
820         case IB_CM_LISTEN:
821                 spin_unlock_irq(&cm_id_priv->lock);
822
823                 spin_lock_irq(&cm.lock);
824                 if (--cm_id_priv->listen_sharecount > 0) {
825                         /* The id is still shared. */
826                         cm_deref_id(cm_id_priv);
827                         spin_unlock_irq(&cm.lock);
828                         return;
829                 }
830                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
831                 spin_unlock_irq(&cm.lock);
832                 break;
833         case IB_CM_SIDR_REQ_SENT:
834                 cm_id->state = IB_CM_IDLE;
835                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
836                 spin_unlock_irq(&cm_id_priv->lock);
837                 break;
838         case IB_CM_SIDR_REQ_RCVD:
839                 spin_unlock_irq(&cm_id_priv->lock);
840                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
841                 spin_lock_irq(&cm.lock);
842                 if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
843                         rb_erase(&cm_id_priv->sidr_id_node,
844                                  &cm.remote_sidr_table);
845                 spin_unlock_irq(&cm.lock);
846                 break;
847         case IB_CM_REQ_SENT:
848         case IB_CM_MRA_REQ_RCVD:
849                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
850                 spin_unlock_irq(&cm_id_priv->lock);
851                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
852                                &cm_id_priv->id.device->node_guid,
853                                sizeof cm_id_priv->id.device->node_guid,
854                                NULL, 0);
855                 break;
856         case IB_CM_REQ_RCVD:
857                 if (err == -ENOMEM) {
858                         /* Do not reject to allow future retries. */
859                         cm_reset_to_idle(cm_id_priv);
860                         spin_unlock_irq(&cm_id_priv->lock);
861                 } else {
862                         spin_unlock_irq(&cm_id_priv->lock);
863                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
864                                        NULL, 0, NULL, 0);
865                 }
866                 break;
867         case IB_CM_REP_SENT:
868         case IB_CM_MRA_REP_RCVD:
869                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
870                 /* Fall through */
871         case IB_CM_MRA_REQ_SENT:
872         case IB_CM_REP_RCVD:
873         case IB_CM_MRA_REP_SENT:
874                 spin_unlock_irq(&cm_id_priv->lock);
875                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
876                                NULL, 0, NULL, 0);
877                 break;
878         case IB_CM_ESTABLISHED:
879                 spin_unlock_irq(&cm_id_priv->lock);
880                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
881                         break;
882                 ib_send_cm_dreq(cm_id, NULL, 0);
883                 goto retest;
884         case IB_CM_DREQ_SENT:
885                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
886                 cm_enter_timewait(cm_id_priv);
887                 spin_unlock_irq(&cm_id_priv->lock);
888                 break;
889         case IB_CM_DREQ_RCVD:
890                 spin_unlock_irq(&cm_id_priv->lock);
891                 ib_send_cm_drep(cm_id, NULL, 0);
892                 break;
893         default:
894                 spin_unlock_irq(&cm_id_priv->lock);
895                 break;
896         }
897
898         cm_free_id(cm_id->local_id);
899         cm_deref_id(cm_id_priv);
900         wait_for_completion(&cm_id_priv->comp);
901         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
902                 cm_free_work(work);
903         kfree(cm_id_priv->private_data);
904         kfree(cm_id_priv);
905 }
906
907 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
908 {
909         cm_destroy_id(cm_id, 0);
910 }
911 EXPORT_SYMBOL(ib_destroy_cm_id);
912
913 /**
914  * __ib_cm_listen - Initiates listening on the specified service ID for
915  *   connection and service ID resolution requests.
916  * @cm_id: Connection identifier associated with the listen request.
917  * @service_id: Service identifier matched against incoming connection
918  *   and service ID resolution requests.  The service ID should be specified
919  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
920  *   assign a service ID to the caller.
921  * @service_mask: Mask applied to service ID used to listen across a
922  *   range of service IDs.  If set to 0, the service ID is matched
923  *   exactly.  This parameter is ignored if %service_id is set to
924  *   IB_CM_ASSIGN_SERVICE_ID.
925  */
926 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
927                           __be64 service_mask)
928 {
929         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
930         int ret = 0;
931
932         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
933         service_id &= service_mask;
934         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
935             (service_id != IB_CM_ASSIGN_SERVICE_ID))
936                 return -EINVAL;
937
938         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
939         if (cm_id->state != IB_CM_IDLE)
940                 return -EINVAL;
941
942         cm_id->state = IB_CM_LISTEN;
943         ++cm_id_priv->listen_sharecount;
944
945         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
946                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
947                 cm_id->service_mask = ~cpu_to_be64(0);
948         } else {
949                 cm_id->service_id = service_id;
950                 cm_id->service_mask = service_mask;
951         }
952         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
953
954         if (cur_cm_id_priv) {
955                 cm_id->state = IB_CM_IDLE;
956                 --cm_id_priv->listen_sharecount;
957                 ret = -EBUSY;
958         }
959         return ret;
960 }
961
962 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
963 {
964         unsigned long flags;
965         int ret;
966
967         spin_lock_irqsave(&cm.lock, flags);
968         ret = __ib_cm_listen(cm_id, service_id, service_mask);
969         spin_unlock_irqrestore(&cm.lock, flags);
970
971         return ret;
972 }
973 EXPORT_SYMBOL(ib_cm_listen);
974
975 /**
976  * Create a new listening ib_cm_id and listen on the given service ID.
977  *
978  * If there's an existing ID listening on that same device and service ID,
979  * return it.
980  *
981  * @device: Device associated with the cm_id.  All related communication will
982  * be associated with the specified device.
983  * @cm_handler: Callback invoked to notify the user of CM events.
984  * @service_id: Service identifier matched against incoming connection
985  *   and service ID resolution requests.  The service ID should be specified
986  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
987  *   assign a service ID to the caller.
988  *
989  * Callers should call ib_destroy_cm_id when done with the listener ID.
990  */
991 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
992                                      ib_cm_handler cm_handler,
993                                      __be64 service_id)
994 {
995         struct cm_id_private *cm_id_priv;
996         struct ib_cm_id *cm_id;
997         unsigned long flags;
998         int err = 0;
999
1000         /* Create an ID in advance, since the creation may sleep */
1001         cm_id = ib_create_cm_id(device, cm_handler, NULL);
1002         if (IS_ERR(cm_id))
1003                 return cm_id;
1004
1005         spin_lock_irqsave(&cm.lock, flags);
1006
1007         if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1008                 goto new_id;
1009
1010         /* Find an existing ID */
1011         cm_id_priv = cm_find_listen(device, service_id);
1012         if (cm_id_priv) {
1013                 if (cm_id->cm_handler != cm_handler || cm_id->context) {
1014                         /* Sharing an ib_cm_id with different handlers is not
1015                          * supported */
1016                         spin_unlock_irqrestore(&cm.lock, flags);
1017                         return ERR_PTR(-EINVAL);
1018                 }
1019                 atomic_inc(&cm_id_priv->refcount);
1020                 ++cm_id_priv->listen_sharecount;
1021                 spin_unlock_irqrestore(&cm.lock, flags);
1022
1023                 ib_destroy_cm_id(cm_id);
1024                 cm_id = &cm_id_priv->id;
1025                 return cm_id;
1026         }
1027
1028 new_id:
1029         /* Use newly created ID */
1030         err = __ib_cm_listen(cm_id, service_id, 0);
1031
1032         spin_unlock_irqrestore(&cm.lock, flags);
1033
1034         if (err) {
1035                 ib_destroy_cm_id(cm_id);
1036                 return ERR_PTR(err);
1037         }
1038         return cm_id;
1039 }
1040 EXPORT_SYMBOL(ib_cm_insert_listen);
1041
1042 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
1043                           enum cm_msg_sequence msg_seq)
1044 {
1045         u64 hi_tid, low_tid;
1046
1047         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1048         low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
1049                           (msg_seq << 30));
1050         return cpu_to_be64(hi_tid | low_tid);
1051 }
1052
1053 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1054                               __be16 attr_id, __be64 tid)
1055 {
1056         hdr->base_version  = IB_MGMT_BASE_VERSION;
1057         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
1058         hdr->class_version = IB_CM_CLASS_VERSION;
1059         hdr->method        = IB_MGMT_METHOD_SEND;
1060         hdr->attr_id       = attr_id;
1061         hdr->tid           = tid;
1062 }
1063
1064 static void cm_format_req(struct cm_req_msg *req_msg,
1065                           struct cm_id_private *cm_id_priv,
1066                           struct ib_cm_req_param *param)
1067 {
1068         struct ib_sa_path_rec *pri_path = param->primary_path;
1069         struct ib_sa_path_rec *alt_path = param->alternate_path;
1070
1071         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1072                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1073
1074         req_msg->local_comm_id = cm_id_priv->id.local_id;
1075         req_msg->service_id = param->service_id;
1076         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1077         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1078         cm_req_set_init_depth(req_msg, param->initiator_depth);
1079         cm_req_set_remote_resp_timeout(req_msg,
1080                                        param->remote_cm_response_timeout);
1081         cm_req_set_qp_type(req_msg, param->qp_type);
1082         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1083         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1084         cm_req_set_local_resp_timeout(req_msg,
1085                                       param->local_cm_response_timeout);
1086         req_msg->pkey = param->primary_path->pkey;
1087         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1088         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1089
1090         if (param->qp_type != IB_QPT_XRC_INI) {
1091                 cm_req_set_resp_res(req_msg, param->responder_resources);
1092                 cm_req_set_retry_count(req_msg, param->retry_count);
1093                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1094                 cm_req_set_srq(req_msg, param->srq);
1095         }
1096
1097         if (pri_path->hop_limit <= 1) {
1098                 req_msg->primary_local_lid = pri_path->slid;
1099                 req_msg->primary_remote_lid = pri_path->dlid;
1100         } else {
1101                 /* Work-around until there's a way to obtain remote LID info */
1102                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1103                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1104         }
1105         req_msg->primary_local_gid = pri_path->sgid;
1106         req_msg->primary_remote_gid = pri_path->dgid;
1107         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1108         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1109         req_msg->primary_traffic_class = pri_path->traffic_class;
1110         req_msg->primary_hop_limit = pri_path->hop_limit;
1111         cm_req_set_primary_sl(req_msg, pri_path->sl);
1112         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1113         cm_req_set_primary_local_ack_timeout(req_msg,
1114                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1115                                pri_path->packet_life_time));
1116
1117         if (alt_path) {
1118                 if (alt_path->hop_limit <= 1) {
1119                         req_msg->alt_local_lid = alt_path->slid;
1120                         req_msg->alt_remote_lid = alt_path->dlid;
1121                 } else {
1122                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1123                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1124                 }
1125                 req_msg->alt_local_gid = alt_path->sgid;
1126                 req_msg->alt_remote_gid = alt_path->dgid;
1127                 cm_req_set_alt_flow_label(req_msg,
1128                                           alt_path->flow_label);
1129                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1130                 req_msg->alt_traffic_class = alt_path->traffic_class;
1131                 req_msg->alt_hop_limit = alt_path->hop_limit;
1132                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1133                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1134                 cm_req_set_alt_local_ack_timeout(req_msg,
1135                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1136                                        alt_path->packet_life_time));
1137         }
1138
1139         if (param->private_data && param->private_data_len)
1140                 memcpy(req_msg->private_data, param->private_data,
1141                        param->private_data_len);
1142 }
1143
1144 static int cm_validate_req_param(struct ib_cm_req_param *param)
1145 {
1146         /* peer-to-peer not supported */
1147         if (param->peer_to_peer)
1148                 return -EINVAL;
1149
1150         if (!param->primary_path)
1151                 return -EINVAL;
1152
1153         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1154             param->qp_type != IB_QPT_XRC_INI)
1155                 return -EINVAL;
1156
1157         if (param->private_data &&
1158             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1159                 return -EINVAL;
1160
1161         if (param->alternate_path &&
1162             (param->alternate_path->pkey != param->primary_path->pkey ||
1163              param->alternate_path->mtu != param->primary_path->mtu))
1164                 return -EINVAL;
1165
1166         return 0;
1167 }
1168
1169 int ib_send_cm_req(struct ib_cm_id *cm_id,
1170                    struct ib_cm_req_param *param)
1171 {
1172         struct cm_id_private *cm_id_priv;
1173         struct cm_req_msg *req_msg;
1174         unsigned long flags;
1175         int ret;
1176
1177         ret = cm_validate_req_param(param);
1178         if (ret)
1179                 return ret;
1180
1181         /* Verify that we're not in timewait. */
1182         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1183         spin_lock_irqsave(&cm_id_priv->lock, flags);
1184         if (cm_id->state != IB_CM_IDLE) {
1185                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1186                 ret = -EINVAL;
1187                 goto out;
1188         }
1189         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1190
1191         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1192                                                             id.local_id);
1193         if (IS_ERR(cm_id_priv->timewait_info)) {
1194                 ret = PTR_ERR(cm_id_priv->timewait_info);
1195                 goto out;
1196         }
1197
1198         ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
1199         if (ret)
1200                 goto error1;
1201         if (param->alternate_path) {
1202                 ret = cm_init_av_by_path(param->alternate_path,
1203                                          &cm_id_priv->alt_av);
1204                 if (ret)
1205                         goto error1;
1206         }
1207         cm_id->service_id = param->service_id;
1208         cm_id->service_mask = ~cpu_to_be64(0);
1209         cm_id_priv->timeout_ms = cm_convert_to_ms(
1210                                     param->primary_path->packet_life_time) * 2 +
1211                                  cm_convert_to_ms(
1212                                     param->remote_cm_response_timeout);
1213         cm_id_priv->max_cm_retries = param->max_cm_retries;
1214         cm_id_priv->initiator_depth = param->initiator_depth;
1215         cm_id_priv->responder_resources = param->responder_resources;
1216         cm_id_priv->retry_count = param->retry_count;
1217         cm_id_priv->path_mtu = param->primary_path->mtu;
1218         cm_id_priv->pkey = param->primary_path->pkey;
1219         cm_id_priv->qp_type = param->qp_type;
1220
1221         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1222         if (ret)
1223                 goto error1;
1224
1225         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1226         cm_format_req(req_msg, cm_id_priv, param);
1227         cm_id_priv->tid = req_msg->hdr.tid;
1228         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1229         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1230
1231         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1232         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1233
1234         spin_lock_irqsave(&cm_id_priv->lock, flags);
1235         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1236         if (ret) {
1237                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1238                 goto error2;
1239         }
1240         BUG_ON(cm_id->state != IB_CM_IDLE);
1241         cm_id->state = IB_CM_REQ_SENT;
1242         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1243         return 0;
1244
1245 error2: cm_free_msg(cm_id_priv->msg);
1246 error1: kfree(cm_id_priv->timewait_info);
1247 out:    return ret;
1248 }
1249 EXPORT_SYMBOL(ib_send_cm_req);
1250
1251 static int cm_issue_rej(struct cm_port *port,
1252                         struct ib_mad_recv_wc *mad_recv_wc,
1253                         enum ib_cm_rej_reason reason,
1254                         enum cm_msg_response msg_rejected,
1255                         void *ari, u8 ari_length)
1256 {
1257         struct ib_mad_send_buf *msg = NULL;
1258         struct cm_rej_msg *rej_msg, *rcv_msg;
1259         int ret;
1260
1261         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1262         if (ret)
1263                 return ret;
1264
1265         /* We just need common CM header information.  Cast to any message. */
1266         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1267         rej_msg = (struct cm_rej_msg *) msg->mad;
1268
1269         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1270         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1271         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1272         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1273         rej_msg->reason = cpu_to_be16(reason);
1274
1275         if (ari && ari_length) {
1276                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1277                 memcpy(rej_msg->ari, ari, ari_length);
1278         }
1279
1280         ret = ib_post_send_mad(msg, NULL);
1281         if (ret)
1282                 cm_free_msg(msg);
1283
1284         return ret;
1285 }
1286
1287 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1288                                     __be32 local_qpn, __be32 remote_qpn)
1289 {
1290         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1291                 ((local_ca_guid == remote_ca_guid) &&
1292                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1293 }
1294
1295 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1296                                             struct ib_sa_path_rec *primary_path,
1297                                             struct ib_sa_path_rec *alt_path)
1298 {
1299         memset(primary_path, 0, sizeof *primary_path);
1300         primary_path->dgid = req_msg->primary_local_gid;
1301         primary_path->sgid = req_msg->primary_remote_gid;
1302         primary_path->dlid = req_msg->primary_local_lid;
1303         primary_path->slid = req_msg->primary_remote_lid;
1304         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1305         primary_path->hop_limit = req_msg->primary_hop_limit;
1306         primary_path->traffic_class = req_msg->primary_traffic_class;
1307         primary_path->reversible = 1;
1308         primary_path->pkey = req_msg->pkey;
1309         primary_path->sl = cm_req_get_primary_sl(req_msg);
1310         primary_path->mtu_selector = IB_SA_EQ;
1311         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1312         primary_path->rate_selector = IB_SA_EQ;
1313         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1314         primary_path->packet_life_time_selector = IB_SA_EQ;
1315         primary_path->packet_life_time =
1316                 cm_req_get_primary_local_ack_timeout(req_msg);
1317         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1318         primary_path->service_id = req_msg->service_id;
1319
1320         if (req_msg->alt_local_lid) {
1321                 memset(alt_path, 0, sizeof *alt_path);
1322                 alt_path->dgid = req_msg->alt_local_gid;
1323                 alt_path->sgid = req_msg->alt_remote_gid;
1324                 alt_path->dlid = req_msg->alt_local_lid;
1325                 alt_path->slid = req_msg->alt_remote_lid;
1326                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1327                 alt_path->hop_limit = req_msg->alt_hop_limit;
1328                 alt_path->traffic_class = req_msg->alt_traffic_class;
1329                 alt_path->reversible = 1;
1330                 alt_path->pkey = req_msg->pkey;
1331                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1332                 alt_path->mtu_selector = IB_SA_EQ;
1333                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1334                 alt_path->rate_selector = IB_SA_EQ;
1335                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1336                 alt_path->packet_life_time_selector = IB_SA_EQ;
1337                 alt_path->packet_life_time =
1338                         cm_req_get_alt_local_ack_timeout(req_msg);
1339                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1340                 alt_path->service_id = req_msg->service_id;
1341         }
1342 }
1343
1344 static u16 cm_get_bth_pkey(struct cm_work *work)
1345 {
1346         struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1347         u8 port_num = work->port->port_num;
1348         u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1349         u16 pkey;
1350         int ret;
1351
1352         ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1353         if (ret) {
1354                 dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1355                                      port_num, pkey_index, ret);
1356                 return 0;
1357         }
1358
1359         return pkey;
1360 }
1361
1362 static void cm_format_req_event(struct cm_work *work,
1363                                 struct cm_id_private *cm_id_priv,
1364                                 struct ib_cm_id *listen_id)
1365 {
1366         struct cm_req_msg *req_msg;
1367         struct ib_cm_req_event_param *param;
1368
1369         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1370         param = &work->cm_event.param.req_rcvd;
1371         param->listen_id = listen_id;
1372         param->bth_pkey = cm_get_bth_pkey(work);
1373         param->port = cm_id_priv->av.port->port_num;
1374         param->primary_path = &work->path[0];
1375         if (req_msg->alt_local_lid)
1376                 param->alternate_path = &work->path[1];
1377         else
1378                 param->alternate_path = NULL;
1379         param->remote_ca_guid = req_msg->local_ca_guid;
1380         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1381         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1382         param->qp_type = cm_req_get_qp_type(req_msg);
1383         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1384         param->responder_resources = cm_req_get_init_depth(req_msg);
1385         param->initiator_depth = cm_req_get_resp_res(req_msg);
1386         param->local_cm_response_timeout =
1387                                         cm_req_get_remote_resp_timeout(req_msg);
1388         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1389         param->remote_cm_response_timeout =
1390                                         cm_req_get_local_resp_timeout(req_msg);
1391         param->retry_count = cm_req_get_retry_count(req_msg);
1392         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1393         param->srq = cm_req_get_srq(req_msg);
1394         work->cm_event.private_data = &req_msg->private_data;
1395 }
1396
1397 static void cm_process_work(struct cm_id_private *cm_id_priv,
1398                             struct cm_work *work)
1399 {
1400         int ret;
1401
1402         /* We will typically only have the current event to report. */
1403         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1404         cm_free_work(work);
1405
1406         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1407                 spin_lock_irq(&cm_id_priv->lock);
1408                 work = cm_dequeue_work(cm_id_priv);
1409                 spin_unlock_irq(&cm_id_priv->lock);
1410                 BUG_ON(!work);
1411                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1412                                                 &work->cm_event);
1413                 cm_free_work(work);
1414         }
1415         cm_deref_id(cm_id_priv);
1416         if (ret)
1417                 cm_destroy_id(&cm_id_priv->id, ret);
1418 }
1419
1420 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1421                           struct cm_id_private *cm_id_priv,
1422                           enum cm_msg_response msg_mraed, u8 service_timeout,
1423                           const void *private_data, u8 private_data_len)
1424 {
1425         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1426         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1427         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1428         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1429         cm_mra_set_service_timeout(mra_msg, service_timeout);
1430
1431         if (private_data && private_data_len)
1432                 memcpy(mra_msg->private_data, private_data, private_data_len);
1433 }
1434
1435 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1436                           struct cm_id_private *cm_id_priv,
1437                           enum ib_cm_rej_reason reason,
1438                           void *ari,
1439                           u8 ari_length,
1440                           const void *private_data,
1441                           u8 private_data_len)
1442 {
1443         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1444         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1445
1446         switch(cm_id_priv->id.state) {
1447         case IB_CM_REQ_RCVD:
1448                 rej_msg->local_comm_id = 0;
1449                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1450                 break;
1451         case IB_CM_MRA_REQ_SENT:
1452                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1453                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1454                 break;
1455         case IB_CM_REP_RCVD:
1456         case IB_CM_MRA_REP_SENT:
1457                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1458                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1459                 break;
1460         default:
1461                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1462                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1463                 break;
1464         }
1465
1466         rej_msg->reason = cpu_to_be16(reason);
1467         if (ari && ari_length) {
1468                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1469                 memcpy(rej_msg->ari, ari, ari_length);
1470         }
1471
1472         if (private_data && private_data_len)
1473                 memcpy(rej_msg->private_data, private_data, private_data_len);
1474 }
1475
1476 static void cm_dup_req_handler(struct cm_work *work,
1477                                struct cm_id_private *cm_id_priv)
1478 {
1479         struct ib_mad_send_buf *msg = NULL;
1480         int ret;
1481
1482         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1483                         counter[CM_REQ_COUNTER]);
1484
1485         /* Quick state check to discard duplicate REQs. */
1486         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1487                 return;
1488
1489         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1490         if (ret)
1491                 return;
1492
1493         spin_lock_irq(&cm_id_priv->lock);
1494         switch (cm_id_priv->id.state) {
1495         case IB_CM_MRA_REQ_SENT:
1496                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1497                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1498                               cm_id_priv->private_data,
1499                               cm_id_priv->private_data_len);
1500                 break;
1501         case IB_CM_TIMEWAIT:
1502                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1503                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1504                 break;
1505         default:
1506                 goto unlock;
1507         }
1508         spin_unlock_irq(&cm_id_priv->lock);
1509
1510         ret = ib_post_send_mad(msg, NULL);
1511         if (ret)
1512                 goto free;
1513         return;
1514
1515 unlock: spin_unlock_irq(&cm_id_priv->lock);
1516 free:   cm_free_msg(msg);
1517 }
1518
1519 static struct cm_id_private * cm_match_req(struct cm_work *work,
1520                                            struct cm_id_private *cm_id_priv)
1521 {
1522         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1523         struct cm_timewait_info *timewait_info;
1524         struct cm_req_msg *req_msg;
1525
1526         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1527
1528         /* Check for possible duplicate REQ. */
1529         spin_lock_irq(&cm.lock);
1530         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1531         if (timewait_info) {
1532                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1533                                            timewait_info->work.remote_id);
1534                 spin_unlock_irq(&cm.lock);
1535                 if (cur_cm_id_priv) {
1536                         cm_dup_req_handler(work, cur_cm_id_priv);
1537                         cm_deref_id(cur_cm_id_priv);
1538                 }
1539                 return NULL;
1540         }
1541
1542         /* Check for stale connections. */
1543         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1544         if (timewait_info) {
1545                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1546                 spin_unlock_irq(&cm.lock);
1547                 cm_issue_rej(work->port, work->mad_recv_wc,
1548                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1549                              NULL, 0);
1550                 return NULL;
1551         }
1552
1553         /* Find matching listen request. */
1554         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1555                                            req_msg->service_id);
1556         if (!listen_cm_id_priv) {
1557                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1558                 spin_unlock_irq(&cm.lock);
1559                 cm_issue_rej(work->port, work->mad_recv_wc,
1560                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1561                              NULL, 0);
1562                 goto out;
1563         }
1564         atomic_inc(&listen_cm_id_priv->refcount);
1565         atomic_inc(&cm_id_priv->refcount);
1566         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1567         atomic_inc(&cm_id_priv->work_count);
1568         spin_unlock_irq(&cm.lock);
1569 out:
1570         return listen_cm_id_priv;
1571 }
1572
1573 /*
1574  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1575  * we need to override the LID/SL data in the REQ with the LID information
1576  * in the work completion.
1577  */
1578 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1579 {
1580         if (!cm_req_get_primary_subnet_local(req_msg)) {
1581                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1582                         req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1583                         cm_req_set_primary_sl(req_msg, wc->sl);
1584                 }
1585
1586                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1587                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1588         }
1589
1590         if (!cm_req_get_alt_subnet_local(req_msg)) {
1591                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1592                         req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1593                         cm_req_set_alt_sl(req_msg, wc->sl);
1594                 }
1595
1596                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1597                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1598         }
1599 }
1600
1601 static int cm_req_handler(struct cm_work *work)
1602 {
1603         struct ib_cm_id *cm_id;
1604         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1605         struct cm_req_msg *req_msg;
1606         int ret;
1607
1608         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1609
1610         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1611         if (IS_ERR(cm_id))
1612                 return PTR_ERR(cm_id);
1613
1614         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1615         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1616         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1617                                 work->mad_recv_wc->recv_buf.grh,
1618                                 &cm_id_priv->av);
1619         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1620                                                             id.local_id);
1621         if (IS_ERR(cm_id_priv->timewait_info)) {
1622                 ret = PTR_ERR(cm_id_priv->timewait_info);
1623                 goto destroy;
1624         }
1625         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1626         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1627         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1628
1629         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1630         if (!listen_cm_id_priv) {
1631                 ret = -EINVAL;
1632                 kfree(cm_id_priv->timewait_info);
1633                 goto destroy;
1634         }
1635
1636         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1637         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1638         cm_id_priv->id.service_id = req_msg->service_id;
1639         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1640
1641         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1642         cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1643
1644         memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1645         work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
1646         ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1647         if (ret) {
1648                 ib_get_cached_gid(work->port->cm_dev->ib_device,
1649                                   work->port->port_num, 0, &work->path[0].sgid,
1650                                   NULL);
1651                 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1652                                &work->path[0].sgid, sizeof work->path[0].sgid,
1653                                NULL, 0);
1654                 goto rejected;
1655         }
1656         if (req_msg->alt_local_lid) {
1657                 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1658                 if (ret) {
1659                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1660                                        &work->path[0].sgid,
1661                                        sizeof work->path[0].sgid, NULL, 0);
1662                         goto rejected;
1663                 }
1664         }
1665         cm_id_priv->tid = req_msg->hdr.tid;
1666         cm_id_priv->timeout_ms = cm_convert_to_ms(
1667                                         cm_req_get_local_resp_timeout(req_msg));
1668         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1669         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1670         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1671         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1672         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1673         cm_id_priv->pkey = req_msg->pkey;
1674         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1675         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1676         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1677         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1678
1679         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1680         cm_process_work(cm_id_priv, work);
1681         cm_deref_id(listen_cm_id_priv);
1682         return 0;
1683
1684 rejected:
1685         atomic_dec(&cm_id_priv->refcount);
1686         cm_deref_id(listen_cm_id_priv);
1687 destroy:
1688         ib_destroy_cm_id(cm_id);
1689         return ret;
1690 }
1691
1692 static void cm_format_rep(struct cm_rep_msg *rep_msg,
1693                           struct cm_id_private *cm_id_priv,
1694                           struct ib_cm_rep_param *param)
1695 {
1696         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1697         rep_msg->local_comm_id = cm_id_priv->id.local_id;
1698         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1699         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1700         rep_msg->resp_resources = param->responder_resources;
1701         cm_rep_set_target_ack_delay(rep_msg,
1702                                     cm_id_priv->av.port->cm_dev->ack_delay);
1703         cm_rep_set_failover(rep_msg, param->failover_accepted);
1704         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1705         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1706
1707         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1708                 rep_msg->initiator_depth = param->initiator_depth;
1709                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1710                 cm_rep_set_srq(rep_msg, param->srq);
1711                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1712         } else {
1713                 cm_rep_set_srq(rep_msg, 1);
1714                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1715         }
1716
1717         if (param->private_data && param->private_data_len)
1718                 memcpy(rep_msg->private_data, param->private_data,
1719                        param->private_data_len);
1720 }
1721
1722 int ib_send_cm_rep(struct ib_cm_id *cm_id,
1723                    struct ib_cm_rep_param *param)
1724 {
1725         struct cm_id_private *cm_id_priv;
1726         struct ib_mad_send_buf *msg;
1727         struct cm_rep_msg *rep_msg;
1728         unsigned long flags;
1729         int ret;
1730
1731         if (param->private_data &&
1732             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1733                 return -EINVAL;
1734
1735         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1736         spin_lock_irqsave(&cm_id_priv->lock, flags);
1737         if (cm_id->state != IB_CM_REQ_RCVD &&
1738             cm_id->state != IB_CM_MRA_REQ_SENT) {
1739                 ret = -EINVAL;
1740                 goto out;
1741         }
1742
1743         ret = cm_alloc_msg(cm_id_priv, &msg);
1744         if (ret)
1745                 goto out;
1746
1747         rep_msg = (struct cm_rep_msg *) msg->mad;
1748         cm_format_rep(rep_msg, cm_id_priv, param);
1749         msg->timeout_ms = cm_id_priv->timeout_ms;
1750         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1751
1752         ret = ib_post_send_mad(msg, NULL);
1753         if (ret) {
1754                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1755                 cm_free_msg(msg);
1756                 return ret;
1757         }
1758
1759         cm_id->state = IB_CM_REP_SENT;
1760         cm_id_priv->msg = msg;
1761         cm_id_priv->initiator_depth = param->initiator_depth;
1762         cm_id_priv->responder_resources = param->responder_resources;
1763         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1764         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
1765
1766 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1767         return ret;
1768 }
1769 EXPORT_SYMBOL(ib_send_cm_rep);
1770
1771 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1772                           struct cm_id_private *cm_id_priv,
1773                           const void *private_data,
1774                           u8 private_data_len)
1775 {
1776         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1777         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1778         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1779
1780         if (private_data && private_data_len)
1781                 memcpy(rtu_msg->private_data, private_data, private_data_len);
1782 }
1783
1784 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1785                    const void *private_data,
1786                    u8 private_data_len)
1787 {
1788         struct cm_id_private *cm_id_priv;
1789         struct ib_mad_send_buf *msg;
1790         unsigned long flags;
1791         void *data;
1792         int ret;
1793
1794         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1795                 return -EINVAL;
1796
1797         data = cm_copy_private_data(private_data, private_data_len);
1798         if (IS_ERR(data))
1799                 return PTR_ERR(data);
1800
1801         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1802         spin_lock_irqsave(&cm_id_priv->lock, flags);
1803         if (cm_id->state != IB_CM_REP_RCVD &&
1804             cm_id->state != IB_CM_MRA_REP_SENT) {
1805                 ret = -EINVAL;
1806                 goto error;
1807         }
1808
1809         ret = cm_alloc_msg(cm_id_priv, &msg);
1810         if (ret)
1811                 goto error;
1812
1813         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1814                       private_data, private_data_len);
1815
1816         ret = ib_post_send_mad(msg, NULL);
1817         if (ret) {
1818                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1819                 cm_free_msg(msg);
1820                 kfree(data);
1821                 return ret;
1822         }
1823
1824         cm_id->state = IB_CM_ESTABLISHED;
1825         cm_set_private_data(cm_id_priv, data, private_data_len);
1826         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1827         return 0;
1828
1829 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1830         kfree(data);
1831         return ret;
1832 }
1833 EXPORT_SYMBOL(ib_send_cm_rtu);
1834
1835 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
1836 {
1837         struct cm_rep_msg *rep_msg;
1838         struct ib_cm_rep_event_param *param;
1839
1840         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1841         param = &work->cm_event.param.rep_rcvd;
1842         param->remote_ca_guid = rep_msg->local_ca_guid;
1843         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1844         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
1845         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1846         param->responder_resources = rep_msg->initiator_depth;
1847         param->initiator_depth = rep_msg->resp_resources;
1848         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1849         param->failover_accepted = cm_rep_get_failover(rep_msg);
1850         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1851         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1852         param->srq = cm_rep_get_srq(rep_msg);
1853         work->cm_event.private_data = &rep_msg->private_data;
1854 }
1855
1856 static void cm_dup_rep_handler(struct cm_work *work)
1857 {
1858         struct cm_id_private *cm_id_priv;
1859         struct cm_rep_msg *rep_msg;
1860         struct ib_mad_send_buf *msg = NULL;
1861         int ret;
1862
1863         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1864         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1865                                    rep_msg->local_comm_id);
1866         if (!cm_id_priv)
1867                 return;
1868
1869         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1870                         counter[CM_REP_COUNTER]);
1871         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1872         if (ret)
1873                 goto deref;
1874
1875         spin_lock_irq(&cm_id_priv->lock);
1876         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1877                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1878                               cm_id_priv->private_data,
1879                               cm_id_priv->private_data_len);
1880         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1881                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1882                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1883                               cm_id_priv->private_data,
1884                               cm_id_priv->private_data_len);
1885         else
1886                 goto unlock;
1887         spin_unlock_irq(&cm_id_priv->lock);
1888
1889         ret = ib_post_send_mad(msg, NULL);
1890         if (ret)
1891                 goto free;
1892         goto deref;
1893
1894 unlock: spin_unlock_irq(&cm_id_priv->lock);
1895 free:   cm_free_msg(msg);
1896 deref:  cm_deref_id(cm_id_priv);
1897 }
1898
1899 static int cm_rep_handler(struct cm_work *work)
1900 {
1901         struct cm_id_private *cm_id_priv;
1902         struct cm_rep_msg *rep_msg;
1903         int ret;
1904
1905         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1906         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1907         if (!cm_id_priv) {
1908                 cm_dup_rep_handler(work);
1909                 return -EINVAL;
1910         }
1911
1912         cm_format_rep_event(work, cm_id_priv->qp_type);
1913
1914         spin_lock_irq(&cm_id_priv->lock);
1915         switch (cm_id_priv->id.state) {
1916         case IB_CM_REQ_SENT:
1917         case IB_CM_MRA_REQ_RCVD:
1918                 break;
1919         default:
1920                 spin_unlock_irq(&cm_id_priv->lock);
1921                 ret = -EINVAL;
1922                 goto error;
1923         }
1924
1925         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1926         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1927         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1928
1929         spin_lock(&cm.lock);
1930         /* Check for duplicate REP. */
1931         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1932                 spin_unlock(&cm.lock);
1933                 spin_unlock_irq(&cm_id_priv->lock);
1934                 ret = -EINVAL;
1935                 goto error;
1936         }
1937         /* Check for a stale connection. */
1938         if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1939                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1940                          &cm.remote_id_table);
1941                 cm_id_priv->timewait_info->inserted_remote_id = 0;
1942                 spin_unlock(&cm.lock);
1943                 spin_unlock_irq(&cm_id_priv->lock);
1944                 cm_issue_rej(work->port, work->mad_recv_wc,
1945                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1946                              NULL, 0);
1947                 ret = -EINVAL;
1948                 goto error;
1949         }
1950         spin_unlock(&cm.lock);
1951
1952         cm_id_priv->id.state = IB_CM_REP_RCVD;
1953         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1954         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1955         cm_id_priv->initiator_depth = rep_msg->resp_resources;
1956         cm_id_priv->responder_resources = rep_msg->initiator_depth;
1957         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1958         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1959         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1960         cm_id_priv->av.timeout =
1961                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1962                                        cm_id_priv->av.timeout - 1);
1963         cm_id_priv->alt_av.timeout =
1964                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1965                                        cm_id_priv->alt_av.timeout - 1);
1966
1967         /* todo: handle peer_to_peer */
1968
1969         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1970         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1971         if (!ret)
1972                 list_add_tail(&work->list, &cm_id_priv->work_list);
1973         spin_unlock_irq(&cm_id_priv->lock);
1974
1975         if (ret)
1976                 cm_process_work(cm_id_priv, work);
1977         else
1978                 cm_deref_id(cm_id_priv);
1979         return 0;
1980
1981 error:
1982         cm_deref_id(cm_id_priv);
1983         return ret;
1984 }
1985
1986 static int cm_establish_handler(struct cm_work *work)
1987 {
1988         struct cm_id_private *cm_id_priv;
1989         int ret;
1990
1991         /* See comment in cm_establish about lookup. */
1992         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1993         if (!cm_id_priv)
1994                 return -EINVAL;
1995
1996         spin_lock_irq(&cm_id_priv->lock);
1997         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1998                 spin_unlock_irq(&cm_id_priv->lock);
1999                 goto out;
2000         }
2001
2002         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2003         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2004         if (!ret)
2005                 list_add_tail(&work->list, &cm_id_priv->work_list);
2006         spin_unlock_irq(&cm_id_priv->lock);
2007
2008         if (ret)
2009                 cm_process_work(cm_id_priv, work);
2010         else
2011                 cm_deref_id(cm_id_priv);
2012         return 0;
2013 out:
2014         cm_deref_id(cm_id_priv);
2015         return -EINVAL;
2016 }
2017
2018 static int cm_rtu_handler(struct cm_work *work)
2019 {
2020         struct cm_id_private *cm_id_priv;
2021         struct cm_rtu_msg *rtu_msg;
2022         int ret;
2023
2024         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2025         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2026                                    rtu_msg->local_comm_id);
2027         if (!cm_id_priv)
2028                 return -EINVAL;
2029
2030         work->cm_event.private_data = &rtu_msg->private_data;
2031
2032         spin_lock_irq(&cm_id_priv->lock);
2033         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2034             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2035                 spin_unlock_irq(&cm_id_priv->lock);
2036                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2037                                 counter[CM_RTU_COUNTER]);
2038                 goto out;
2039         }
2040         cm_id_priv->id.state = IB_CM_ESTABLISHED;
2041
2042         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2043         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2044         if (!ret)
2045                 list_add_tail(&work->list, &cm_id_priv->work_list);
2046         spin_unlock_irq(&cm_id_priv->lock);
2047
2048         if (ret)
2049                 cm_process_work(cm_id_priv, work);
2050         else
2051                 cm_deref_id(cm_id_priv);
2052         return 0;
2053 out:
2054         cm_deref_id(cm_id_priv);
2055         return -EINVAL;
2056 }
2057
2058 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2059                           struct cm_id_private *cm_id_priv,
2060                           const void *private_data,
2061                           u8 private_data_len)
2062 {
2063         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2064                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
2065         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2066         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2067         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2068
2069         if (private_data && private_data_len)
2070                 memcpy(dreq_msg->private_data, private_data, private_data_len);
2071 }
2072
2073 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2074                     const void *private_data,
2075                     u8 private_data_len)
2076 {
2077         struct cm_id_private *cm_id_priv;
2078         struct ib_mad_send_buf *msg;
2079         unsigned long flags;
2080         int ret;
2081
2082         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2083                 return -EINVAL;
2084
2085         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2086         spin_lock_irqsave(&cm_id_priv->lock, flags);
2087         if (cm_id->state != IB_CM_ESTABLISHED) {
2088                 ret = -EINVAL;
2089                 goto out;
2090         }
2091
2092         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2093             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2094                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2095
2096         ret = cm_alloc_msg(cm_id_priv, &msg);
2097         if (ret) {
2098                 cm_enter_timewait(cm_id_priv);
2099                 goto out;
2100         }
2101
2102         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2103                        private_data, private_data_len);
2104         msg->timeout_ms = cm_id_priv->timeout_ms;
2105         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2106
2107         ret = ib_post_send_mad(msg, NULL);
2108         if (ret) {
2109                 cm_enter_timewait(cm_id_priv);
2110                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2111                 cm_free_msg(msg);
2112                 return ret;
2113         }
2114
2115         cm_id->state = IB_CM_DREQ_SENT;
2116         cm_id_priv->msg = msg;
2117 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2118         return ret;
2119 }
2120 EXPORT_SYMBOL(ib_send_cm_dreq);
2121
2122 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2123                           struct cm_id_private *cm_id_priv,
2124                           const void *private_data,
2125                           u8 private_data_len)
2126 {
2127         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2128         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2129         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2130
2131         if (private_data && private_data_len)
2132                 memcpy(drep_msg->private_data, private_data, private_data_len);
2133 }
2134
2135 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2136                     const void *private_data,
2137                     u8 private_data_len)
2138 {
2139         struct cm_id_private *cm_id_priv;
2140         struct ib_mad_send_buf *msg;
2141         unsigned long flags;
2142         void *data;
2143         int ret;
2144
2145         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2146                 return -EINVAL;
2147
2148         data = cm_copy_private_data(private_data, private_data_len);
2149         if (IS_ERR(data))
2150                 return PTR_ERR(data);
2151
2152         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2153         spin_lock_irqsave(&cm_id_priv->lock, flags);
2154         if (cm_id->state != IB_CM_DREQ_RCVD) {
2155                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2156                 kfree(data);
2157                 return -EINVAL;
2158         }
2159
2160         cm_set_private_data(cm_id_priv, data, private_data_len);
2161         cm_enter_timewait(cm_id_priv);
2162
2163         ret = cm_alloc_msg(cm_id_priv, &msg);
2164         if (ret)
2165                 goto out;
2166
2167         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2168                        private_data, private_data_len);
2169
2170         ret = ib_post_send_mad(msg, NULL);
2171         if (ret) {
2172                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2173                 cm_free_msg(msg);
2174                 return ret;
2175         }
2176
2177 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2178         return ret;
2179 }
2180 EXPORT_SYMBOL(ib_send_cm_drep);
2181
2182 static int cm_issue_drep(struct cm_port *port,
2183                          struct ib_mad_recv_wc *mad_recv_wc)
2184 {
2185         struct ib_mad_send_buf *msg = NULL;
2186         struct cm_dreq_msg *dreq_msg;
2187         struct cm_drep_msg *drep_msg;
2188         int ret;
2189
2190         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2191         if (ret)
2192                 return ret;
2193
2194         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2195         drep_msg = (struct cm_drep_msg *) msg->mad;
2196
2197         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2198         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2199         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2200
2201         ret = ib_post_send_mad(msg, NULL);
2202         if (ret)
2203                 cm_free_msg(msg);
2204
2205         return ret;
2206 }
2207
2208 static int cm_dreq_handler(struct cm_work *work)
2209 {
2210         struct cm_id_private *cm_id_priv;
2211         struct cm_dreq_msg *dreq_msg;
2212         struct ib_mad_send_buf *msg = NULL;
2213         int ret;
2214
2215         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2216         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2217                                    dreq_msg->local_comm_id);
2218         if (!cm_id_priv) {
2219                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2220                                 counter[CM_DREQ_COUNTER]);
2221                 cm_issue_drep(work->port, work->mad_recv_wc);
2222                 return -EINVAL;
2223         }
2224
2225         work->cm_event.private_data = &dreq_msg->private_data;
2226
2227         spin_lock_irq(&cm_id_priv->lock);
2228         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2229                 goto unlock;
2230
2231         switch (cm_id_priv->id.state) {
2232         case IB_CM_REP_SENT:
2233         case IB_CM_DREQ_SENT:
2234                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2235                 break;
2236         case IB_CM_ESTABLISHED:
2237                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2238                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2239                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2240                 break;
2241         case IB_CM_MRA_REP_RCVD:
2242                 break;
2243         case IB_CM_TIMEWAIT:
2244                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2245                                 counter[CM_DREQ_COUNTER]);
2246                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2247                         goto unlock;
2248
2249                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2250                                cm_id_priv->private_data,
2251                                cm_id_priv->private_data_len);
2252                 spin_unlock_irq(&cm_id_priv->lock);
2253
2254                 if (ib_post_send_mad(msg, NULL))
2255                         cm_free_msg(msg);
2256                 goto deref;
2257         case IB_CM_DREQ_RCVD:
2258                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2259                                 counter[CM_DREQ_COUNTER]);
2260                 goto unlock;
2261         default:
2262                 goto unlock;
2263         }
2264         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2265         cm_id_priv->tid = dreq_msg->hdr.tid;
2266         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2267         if (!ret)
2268                 list_add_tail(&work->list, &cm_id_priv->work_list);
2269         spin_unlock_irq(&cm_id_priv->lock);
2270
2271         if (ret)
2272                 cm_process_work(cm_id_priv, work);
2273         else
2274                 cm_deref_id(cm_id_priv);
2275         return 0;
2276
2277 unlock: spin_unlock_irq(&cm_id_priv->lock);
2278 deref:  cm_deref_id(cm_id_priv);
2279         return -EINVAL;
2280 }
2281
2282 static int cm_drep_handler(struct cm_work *work)
2283 {
2284         struct cm_id_private *cm_id_priv;
2285         struct cm_drep_msg *drep_msg;
2286         int ret;
2287
2288         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2289         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2290                                    drep_msg->local_comm_id);
2291         if (!cm_id_priv)
2292                 return -EINVAL;
2293
2294         work->cm_event.private_data = &drep_msg->private_data;
2295
2296         spin_lock_irq(&cm_id_priv->lock);
2297         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2298             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2299                 spin_unlock_irq(&cm_id_priv->lock);
2300                 goto out;
2301         }
2302         cm_enter_timewait(cm_id_priv);
2303
2304         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2305         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2306         if (!ret)
2307                 list_add_tail(&work->list, &cm_id_priv->work_list);
2308         spin_unlock_irq(&cm_id_priv->lock);
2309
2310         if (ret)
2311                 cm_process_work(cm_id_priv, work);
2312         else
2313                 cm_deref_id(cm_id_priv);
2314         return 0;
2315 out:
2316         cm_deref_id(cm_id_priv);
2317         return -EINVAL;
2318 }
2319
2320 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2321                    enum ib_cm_rej_reason reason,
2322                    void *ari,
2323                    u8 ari_length,
2324                    const void *private_data,
2325                    u8 private_data_len)
2326 {
2327         struct cm_id_private *cm_id_priv;
2328         struct ib_mad_send_buf *msg;
2329         unsigned long flags;
2330         int ret;
2331
2332         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2333             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2334                 return -EINVAL;
2335
2336         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2337
2338         spin_lock_irqsave(&cm_id_priv->lock, flags);
2339         switch (cm_id->state) {
2340         case IB_CM_REQ_SENT:
2341         case IB_CM_MRA_REQ_RCVD:
2342         case IB_CM_REQ_RCVD:
2343         case IB_CM_MRA_REQ_SENT:
2344         case IB_CM_REP_RCVD:
2345         case IB_CM_MRA_REP_SENT:
2346                 ret = cm_alloc_msg(cm_id_priv, &msg);
2347                 if (!ret)
2348                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2349                                       cm_id_priv, reason, ari, ari_length,
2350                                       private_data, private_data_len);
2351
2352                 cm_reset_to_idle(cm_id_priv);
2353                 break;
2354         case IB_CM_REP_SENT:
2355         case IB_CM_MRA_REP_RCVD:
2356                 ret = cm_alloc_msg(cm_id_priv, &msg);
2357                 if (!ret)
2358                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2359                                       cm_id_priv, reason, ari, ari_length,
2360                                       private_data, private_data_len);
2361
2362                 cm_enter_timewait(cm_id_priv);
2363                 break;
2364         default:
2365                 ret = -EINVAL;
2366                 goto out;
2367         }
2368
2369         if (ret)
2370                 goto out;
2371
2372         ret = ib_post_send_mad(msg, NULL);
2373         if (ret)
2374                 cm_free_msg(msg);
2375
2376 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2377         return ret;
2378 }
2379 EXPORT_SYMBOL(ib_send_cm_rej);
2380
2381 static void cm_format_rej_event(struct cm_work *work)
2382 {
2383         struct cm_rej_msg *rej_msg;
2384         struct ib_cm_rej_event_param *param;
2385
2386         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2387         param = &work->cm_event.param.rej_rcvd;
2388         param->ari = rej_msg->ari;
2389         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2390         param->reason = __be16_to_cpu(rej_msg->reason);
2391         work->cm_event.private_data = &rej_msg->private_data;
2392 }
2393
2394 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2395 {
2396         struct cm_timewait_info *timewait_info;
2397         struct cm_id_private *cm_id_priv;
2398         __be32 remote_id;
2399
2400         remote_id = rej_msg->local_comm_id;
2401
2402         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2403                 spin_lock_irq(&cm.lock);
2404                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2405                                                   remote_id);
2406                 if (!timewait_info) {
2407                         spin_unlock_irq(&cm.lock);
2408                         return NULL;
2409                 }
2410                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2411                                       (timewait_info->work.local_id ^
2412                                        cm.random_id_operand));
2413                 if (cm_id_priv) {
2414                         if (cm_id_priv->id.remote_id == remote_id)
2415                                 atomic_inc(&cm_id_priv->refcount);
2416                         else
2417                                 cm_id_priv = NULL;
2418                 }
2419                 spin_unlock_irq(&cm.lock);
2420         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2421                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2422         else
2423                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2424
2425         return cm_id_priv;
2426 }
2427
2428 static int cm_rej_handler(struct cm_work *work)
2429 {
2430         struct cm_id_private *cm_id_priv;
2431         struct cm_rej_msg *rej_msg;
2432         int ret;
2433
2434         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2435         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2436         if (!cm_id_priv)
2437                 return -EINVAL;
2438
2439         cm_format_rej_event(work);
2440
2441         spin_lock_irq(&cm_id_priv->lock);
2442         switch (cm_id_priv->id.state) {
2443         case IB_CM_REQ_SENT:
2444         case IB_CM_MRA_REQ_RCVD:
2445         case IB_CM_REP_SENT:
2446         case IB_CM_MRA_REP_RCVD:
2447                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2448                 /* fall through */
2449         case IB_CM_REQ_RCVD:
2450         case IB_CM_MRA_REQ_SENT:
2451                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2452                         cm_enter_timewait(cm_id_priv);
2453                 else
2454                         cm_reset_to_idle(cm_id_priv);
2455                 break;
2456         case IB_CM_DREQ_SENT:
2457                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2458                 /* fall through */
2459         case IB_CM_REP_RCVD:
2460         case IB_CM_MRA_REP_SENT:
2461                 cm_enter_timewait(cm_id_priv);
2462                 break;
2463         case IB_CM_ESTABLISHED:
2464                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2465                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2466                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2467                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2468                                               cm_id_priv->msg);
2469                         cm_enter_timewait(cm_id_priv);
2470                         break;
2471                 }
2472                 /* fall through */
2473         default:
2474                 spin_unlock_irq(&cm_id_priv->lock);
2475                 ret = -EINVAL;
2476                 goto out;
2477         }
2478
2479         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2480         if (!ret)
2481                 list_add_tail(&work->list, &cm_id_priv->work_list);
2482         spin_unlock_irq(&cm_id_priv->lock);
2483
2484         if (ret)
2485                 cm_process_work(cm_id_priv, work);
2486         else
2487                 cm_deref_id(cm_id_priv);
2488         return 0;
2489 out:
2490         cm_deref_id(cm_id_priv);
2491         return -EINVAL;
2492 }
2493
2494 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2495                    u8 service_timeout,
2496                    const void *private_data,
2497                    u8 private_data_len)
2498 {
2499         struct cm_id_private *cm_id_priv;
2500         struct ib_mad_send_buf *msg;
2501         enum ib_cm_state cm_state;
2502         enum ib_cm_lap_state lap_state;
2503         enum cm_msg_response msg_response;
2504         void *data;
2505         unsigned long flags;
2506         int ret;
2507
2508         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2509                 return -EINVAL;
2510
2511         data = cm_copy_private_data(private_data, private_data_len);
2512         if (IS_ERR(data))
2513                 return PTR_ERR(data);
2514
2515         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2516
2517         spin_lock_irqsave(&cm_id_priv->lock, flags);
2518         switch(cm_id_priv->id.state) {
2519         case IB_CM_REQ_RCVD:
2520                 cm_state = IB_CM_MRA_REQ_SENT;
2521                 lap_state = cm_id->lap_state;
2522                 msg_response = CM_MSG_RESPONSE_REQ;
2523                 break;
2524         case IB_CM_REP_RCVD:
2525                 cm_state = IB_CM_MRA_REP_SENT;
2526                 lap_state = cm_id->lap_state;
2527                 msg_response = CM_MSG_RESPONSE_REP;
2528                 break;
2529         case IB_CM_ESTABLISHED:
2530                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2531                         cm_state = cm_id->state;
2532                         lap_state = IB_CM_MRA_LAP_SENT;
2533                         msg_response = CM_MSG_RESPONSE_OTHER;
2534                         break;
2535                 }
2536         default:
2537                 ret = -EINVAL;
2538                 goto error1;
2539         }
2540
2541         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2542                 ret = cm_alloc_msg(cm_id_priv, &msg);
2543                 if (ret)
2544                         goto error1;
2545
2546                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2547                               msg_response, service_timeout,
2548                               private_data, private_data_len);
2549                 ret = ib_post_send_mad(msg, NULL);
2550                 if (ret)
2551                         goto error2;
2552         }
2553
2554         cm_id->state = cm_state;
2555         cm_id->lap_state = lap_state;
2556         cm_id_priv->service_timeout = service_timeout;
2557         cm_set_private_data(cm_id_priv, data, private_data_len);
2558         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2559         return 0;
2560
2561 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2562         kfree(data);
2563         return ret;
2564
2565 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2566         kfree(data);
2567         cm_free_msg(msg);
2568         return ret;
2569 }
2570 EXPORT_SYMBOL(ib_send_cm_mra);
2571
2572 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2573 {
2574         switch (cm_mra_get_msg_mraed(mra_msg)) {
2575         case CM_MSG_RESPONSE_REQ:
2576                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
2577         case CM_MSG_RESPONSE_REP:
2578         case CM_MSG_RESPONSE_OTHER:
2579                 return cm_acquire_id(mra_msg->remote_comm_id,
2580                                      mra_msg->local_comm_id);
2581         default:
2582                 return NULL;
2583         }
2584 }
2585
2586 static int cm_mra_handler(struct cm_work *work)
2587 {
2588         struct cm_id_private *cm_id_priv;
2589         struct cm_mra_msg *mra_msg;
2590         int timeout, ret;
2591
2592         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2593         cm_id_priv = cm_acquire_mraed_id(mra_msg);
2594         if (!cm_id_priv)
2595                 return -EINVAL;
2596
2597         work->cm_event.private_data = &mra_msg->private_data;
2598         work->cm_event.param.mra_rcvd.service_timeout =
2599                                         cm_mra_get_service_timeout(mra_msg);
2600         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2601                   cm_convert_to_ms(cm_id_priv->av.timeout);
2602
2603         spin_lock_irq(&cm_id_priv->lock);
2604         switch (cm_id_priv->id.state) {
2605         case IB_CM_REQ_SENT:
2606                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2607                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2608                                   cm_id_priv->msg, timeout))
2609                         goto out;
2610                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2611                 break;
2612         case IB_CM_REP_SENT:
2613                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2614                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2615                                   cm_id_priv->msg, timeout))
2616                         goto out;
2617                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2618                 break;
2619         case IB_CM_ESTABLISHED:
2620                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2621                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2622                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2623                                   cm_id_priv->msg, timeout)) {
2624                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2625                                 atomic_long_inc(&work->port->
2626                                                 counter_group[CM_RECV_DUPLICATES].
2627                                                 counter[CM_MRA_COUNTER]);
2628                         goto out;
2629                 }
2630                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2631                 break;
2632         case IB_CM_MRA_REQ_RCVD:
2633         case IB_CM_MRA_REP_RCVD:
2634                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2635                                 counter[CM_MRA_COUNTER]);
2636                 /* fall through */
2637         default:
2638                 goto out;
2639         }
2640
2641         cm_id_priv->msg->context[1] = (void *) (unsigned long)
2642                                       cm_id_priv->id.state;
2643         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2644         if (!ret)
2645                 list_add_tail(&work->list, &cm_id_priv->work_list);
2646         spin_unlock_irq(&cm_id_priv->lock);
2647
2648         if (ret)
2649                 cm_process_work(cm_id_priv, work);
2650         else
2651                 cm_deref_id(cm_id_priv);
2652         return 0;
2653 out:
2654         spin_unlock_irq(&cm_id_priv->lock);
2655         cm_deref_id(cm_id_priv);
2656         return -EINVAL;
2657 }
2658
2659 static void cm_format_lap(struct cm_lap_msg *lap_msg,
2660                           struct cm_id_private *cm_id_priv,
2661                           struct ib_sa_path_rec *alternate_path,
2662                           const void *private_data,
2663                           u8 private_data_len)
2664 {
2665         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2666                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2667         lap_msg->local_comm_id = cm_id_priv->id.local_id;
2668         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2669         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2670         /* todo: need remote CM response timeout */
2671         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2672         lap_msg->alt_local_lid = alternate_path->slid;
2673         lap_msg->alt_remote_lid = alternate_path->dlid;
2674         lap_msg->alt_local_gid = alternate_path->sgid;
2675         lap_msg->alt_remote_gid = alternate_path->dgid;
2676         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2677         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2678         lap_msg->alt_hop_limit = alternate_path->hop_limit;
2679         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2680         cm_lap_set_sl(lap_msg, alternate_path->sl);
2681         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2682         cm_lap_set_local_ack_timeout(lap_msg,
2683                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2684                                alternate_path->packet_life_time));
2685
2686         if (private_data && private_data_len)
2687                 memcpy(lap_msg->private_data, private_data, private_data_len);
2688 }
2689
2690 int ib_send_cm_lap(struct ib_cm_id *cm_id,
2691                    struct ib_sa_path_rec *alternate_path,
2692                    const void *private_data,
2693                    u8 private_data_len)
2694 {
2695         struct cm_id_private *cm_id_priv;
2696         struct ib_mad_send_buf *msg;
2697         unsigned long flags;
2698         int ret;
2699
2700         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2701                 return -EINVAL;
2702
2703         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2704         spin_lock_irqsave(&cm_id_priv->lock, flags);
2705         if (cm_id->state != IB_CM_ESTABLISHED ||
2706             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2707              cm_id->lap_state != IB_CM_LAP_IDLE)) {
2708                 ret = -EINVAL;
2709                 goto out;
2710         }
2711
2712         ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2713         if (ret)
2714                 goto out;
2715         cm_id_priv->alt_av.timeout =
2716                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2717                                        cm_id_priv->alt_av.timeout - 1);
2718
2719         ret = cm_alloc_msg(cm_id_priv, &msg);
2720         if (ret)
2721                 goto out;
2722
2723         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2724                       alternate_path, private_data, private_data_len);
2725         msg->timeout_ms = cm_id_priv->timeout_ms;
2726         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2727
2728         ret = ib_post_send_mad(msg, NULL);
2729         if (ret) {
2730                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2731                 cm_free_msg(msg);
2732                 return ret;
2733         }
2734
2735         cm_id->lap_state = IB_CM_LAP_SENT;
2736         cm_id_priv->msg = msg;
2737
2738 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2739         return ret;
2740 }
2741 EXPORT_SYMBOL(ib_send_cm_lap);
2742
2743 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2744                                     struct ib_sa_path_rec *path,
2745                                     struct cm_lap_msg *lap_msg)
2746 {
2747         memset(path, 0, sizeof *path);
2748         path->dgid = lap_msg->alt_local_gid;
2749         path->sgid = lap_msg->alt_remote_gid;
2750         path->dlid = lap_msg->alt_local_lid;
2751         path->slid = lap_msg->alt_remote_lid;
2752         path->flow_label = cm_lap_get_flow_label(lap_msg);
2753         path->hop_limit = lap_msg->alt_hop_limit;
2754         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2755         path->reversible = 1;
2756         path->pkey = cm_id_priv->pkey;
2757         path->sl = cm_lap_get_sl(lap_msg);
2758         path->mtu_selector = IB_SA_EQ;
2759         path->mtu = cm_id_priv->path_mtu;
2760         path->rate_selector = IB_SA_EQ;
2761         path->rate = cm_lap_get_packet_rate(lap_msg);
2762         path->packet_life_time_selector = IB_SA_EQ;
2763         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2764         path->packet_life_time -= (path->packet_life_time > 0);
2765 }
2766
2767 static int cm_lap_handler(struct cm_work *work)
2768 {
2769         struct cm_id_private *cm_id_priv;
2770         struct cm_lap_msg *lap_msg;
2771         struct ib_cm_lap_event_param *param;
2772         struct ib_mad_send_buf *msg = NULL;
2773         int ret;
2774
2775         /* todo: verify LAP request and send reject APR if invalid. */
2776         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2777         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2778                                    lap_msg->local_comm_id);
2779         if (!cm_id_priv)
2780                 return -EINVAL;
2781
2782         param = &work->cm_event.param.lap_rcvd;
2783         param->alternate_path = &work->path[0];
2784         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2785         work->cm_event.private_data = &lap_msg->private_data;
2786
2787         spin_lock_irq(&cm_id_priv->lock);
2788         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2789                 goto unlock;
2790
2791         switch (cm_id_priv->id.lap_state) {
2792         case IB_CM_LAP_UNINIT:
2793         case IB_CM_LAP_IDLE:
2794                 break;
2795         case IB_CM_MRA_LAP_SENT:
2796                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2797                                 counter[CM_LAP_COUNTER]);
2798                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2799                         goto unlock;
2800
2801                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2802                               CM_MSG_RESPONSE_OTHER,
2803                               cm_id_priv->service_timeout,
2804                               cm_id_priv->private_data,
2805                               cm_id_priv->private_data_len);
2806                 spin_unlock_irq(&cm_id_priv->lock);
2807
2808                 if (ib_post_send_mad(msg, NULL))
2809                         cm_free_msg(msg);
2810                 goto deref;
2811         case IB_CM_LAP_RCVD:
2812                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2813                                 counter[CM_LAP_COUNTER]);
2814                 goto unlock;
2815         default:
2816                 goto unlock;
2817         }
2818
2819         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2820         cm_id_priv->tid = lap_msg->hdr.tid;
2821         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2822                                 work->mad_recv_wc->recv_buf.grh,
2823                                 &cm_id_priv->av);
2824         cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
2825         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2826         if (!ret)
2827                 list_add_tail(&work->list, &cm_id_priv->work_list);
2828         spin_unlock_irq(&cm_id_priv->lock);
2829
2830         if (ret)
2831                 cm_process_work(cm_id_priv, work);
2832         else
2833                 cm_deref_id(cm_id_priv);
2834         return 0;
2835
2836 unlock: spin_unlock_irq(&cm_id_priv->lock);
2837 deref:  cm_deref_id(cm_id_priv);
2838         return -EINVAL;
2839 }
2840
2841 static void cm_format_apr(struct cm_apr_msg *apr_msg,
2842                           struct cm_id_private *cm_id_priv,
2843                           enum ib_cm_apr_status status,
2844                           void *info,
2845                           u8 info_length,
2846                           const void *private_data,
2847                           u8 private_data_len)
2848 {
2849         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2850         apr_msg->local_comm_id = cm_id_priv->id.local_id;
2851         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2852         apr_msg->ap_status = (u8) status;
2853
2854         if (info && info_length) {
2855                 apr_msg->info_length = info_length;
2856                 memcpy(apr_msg->info, info, info_length);
2857         }
2858
2859         if (private_data && private_data_len)
2860                 memcpy(apr_msg->private_data, private_data, private_data_len);
2861 }
2862
2863 int ib_send_cm_apr(struct ib_cm_id *cm_id,
2864                    enum ib_cm_apr_status status,
2865                    void *info,
2866                    u8 info_length,
2867                    const void *private_data,
2868                    u8 private_data_len)
2869 {
2870         struct cm_id_private *cm_id_priv;
2871         struct ib_mad_send_buf *msg;
2872         unsigned long flags;
2873         int ret;
2874
2875         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2876             (info && info_length > IB_CM_APR_INFO_LENGTH))
2877                 return -EINVAL;
2878
2879         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2880         spin_lock_irqsave(&cm_id_priv->lock, flags);
2881         if (cm_id->state != IB_CM_ESTABLISHED ||
2882             (cm_id->lap_state != IB_CM_LAP_RCVD &&
2883              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2884                 ret = -EINVAL;
2885                 goto out;
2886         }
2887
2888         ret = cm_alloc_msg(cm_id_priv, &msg);
2889         if (ret)
2890                 goto out;
2891
2892         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2893                       info, info_length, private_data, private_data_len);
2894         ret = ib_post_send_mad(msg, NULL);
2895         if (ret) {
2896                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2897                 cm_free_msg(msg);
2898                 return ret;
2899         }
2900
2901         cm_id->lap_state = IB_CM_LAP_IDLE;
2902 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2903         return ret;
2904 }
2905 EXPORT_SYMBOL(ib_send_cm_apr);
2906
2907 static int cm_apr_handler(struct cm_work *work)
2908 {
2909         struct cm_id_private *cm_id_priv;
2910         struct cm_apr_msg *apr_msg;
2911         int ret;
2912
2913         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2914         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2915                                    apr_msg->local_comm_id);
2916         if (!cm_id_priv)
2917                 return -EINVAL; /* Unmatched reply. */
2918
2919         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2920         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2921         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2922         work->cm_event.private_data = &apr_msg->private_data;
2923
2924         spin_lock_irq(&cm_id_priv->lock);
2925         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2926             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2927              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2928                 spin_unlock_irq(&cm_id_priv->lock);
2929                 goto out;
2930         }
2931         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2932         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2933         cm_id_priv->msg = NULL;
2934
2935         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2936         if (!ret)
2937                 list_add_tail(&work->list, &cm_id_priv->work_list);
2938         spin_unlock_irq(&cm_id_priv->lock);
2939
2940         if (ret)
2941                 cm_process_work(cm_id_priv, work);
2942         else
2943                 cm_deref_id(cm_id_priv);
2944         return 0;
2945 out:
2946         cm_deref_id(cm_id_priv);
2947         return -EINVAL;
2948 }
2949
2950 static int cm_timewait_handler(struct cm_work *work)
2951 {
2952         struct cm_timewait_info *timewait_info;
2953         struct cm_id_private *cm_id_priv;
2954         int ret;
2955
2956         timewait_info = (struct cm_timewait_info *)work;
2957         spin_lock_irq(&cm.lock);
2958         list_del(&timewait_info->list);
2959         spin_unlock_irq(&cm.lock);
2960
2961         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2962                                    timewait_info->work.remote_id);
2963         if (!cm_id_priv)
2964                 return -EINVAL;
2965
2966         spin_lock_irq(&cm_id_priv->lock);
2967         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2968             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2969                 spin_unlock_irq(&cm_id_priv->lock);
2970                 goto out;
2971         }
2972         cm_id_priv->id.state = IB_CM_IDLE;
2973         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2974         if (!ret)
2975                 list_add_tail(&work->list, &cm_id_priv->work_list);
2976         spin_unlock_irq(&cm_id_priv->lock);
2977
2978         if (ret)
2979                 cm_process_work(cm_id_priv, work);
2980         else
2981                 cm_deref_id(cm_id_priv);
2982         return 0;
2983 out:
2984         cm_deref_id(cm_id_priv);
2985         return -EINVAL;
2986 }
2987
2988 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2989                                struct cm_id_private *cm_id_priv,
2990                                struct ib_cm_sidr_req_param *param)
2991 {
2992         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2993                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2994         sidr_req_msg->request_id = cm_id_priv->id.local_id;
2995         sidr_req_msg->pkey = param->path->pkey;
2996         sidr_req_msg->service_id = param->service_id;
2997
2998         if (param->private_data && param->private_data_len)
2999                 memcpy(sidr_req_msg->private_data, param->private_data,
3000                        param->private_data_len);
3001 }
3002
3003 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3004                         struct ib_cm_sidr_req_param *param)
3005 {
3006         struct cm_id_private *cm_id_priv;
3007         struct ib_mad_send_buf *msg;
3008         unsigned long flags;
3009         int ret;
3010
3011         if (!param->path || (param->private_data &&
3012              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3013                 return -EINVAL;
3014
3015         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3016         ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
3017         if (ret)
3018                 goto out;
3019
3020         cm_id->service_id = param->service_id;
3021         cm_id->service_mask = ~cpu_to_be64(0);
3022         cm_id_priv->timeout_ms = param->timeout_ms;
3023         cm_id_priv->max_cm_retries = param->max_cm_retries;
3024         ret = cm_alloc_msg(cm_id_priv, &msg);
3025         if (ret)
3026                 goto out;
3027
3028         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3029                            param);
3030         msg->timeout_ms = cm_id_priv->timeout_ms;
3031         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3032
3033         spin_lock_irqsave(&cm_id_priv->lock, flags);
3034         if (cm_id->state == IB_CM_IDLE)
3035                 ret = ib_post_send_mad(msg, NULL);
3036         else
3037                 ret = -EINVAL;
3038
3039         if (ret) {
3040                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3041                 cm_free_msg(msg);
3042                 goto out;
3043         }
3044         cm_id->state = IB_CM_SIDR_REQ_SENT;
3045         cm_id_priv->msg = msg;
3046         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3047 out:
3048         return ret;
3049 }
3050 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3051
3052 static void cm_format_sidr_req_event(struct cm_work *work,
3053                                      struct ib_cm_id *listen_id)
3054 {
3055         struct cm_sidr_req_msg *sidr_req_msg;
3056         struct ib_cm_sidr_req_event_param *param;
3057
3058         sidr_req_msg = (struct cm_sidr_req_msg *)
3059                                 work->mad_recv_wc->recv_buf.mad;
3060         param = &work->cm_event.param.sidr_req_rcvd;
3061         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3062         param->listen_id = listen_id;
3063         param->service_id = sidr_req_msg->service_id;
3064         param->bth_pkey = cm_get_bth_pkey(work);
3065         param->port = work->port->port_num;
3066         work->cm_event.private_data = &sidr_req_msg->private_data;
3067 }
3068
3069 static int cm_sidr_req_handler(struct cm_work *work)
3070 {
3071         struct ib_cm_id *cm_id;
3072         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3073         struct cm_sidr_req_msg *sidr_req_msg;
3074         struct ib_wc *wc;
3075
3076         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3077         if (IS_ERR(cm_id))
3078                 return PTR_ERR(cm_id);
3079         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3080
3081         /* Record SGID/SLID and request ID for lookup. */
3082         sidr_req_msg = (struct cm_sidr_req_msg *)
3083                                 work->mad_recv_wc->recv_buf.mad;
3084         wc = work->mad_recv_wc->wc;
3085         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3086         cm_id_priv->av.dgid.global.interface_id = 0;
3087         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3088                                 work->mad_recv_wc->recv_buf.grh,
3089                                 &cm_id_priv->av);
3090         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3091         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3092         atomic_inc(&cm_id_priv->work_count);
3093
3094         spin_lock_irq(&cm.lock);
3095         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3096         if (cur_cm_id_priv) {
3097                 spin_unlock_irq(&cm.lock);
3098                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3099                                 counter[CM_SIDR_REQ_COUNTER]);
3100                 goto out; /* Duplicate message. */
3101         }
3102         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3103         cur_cm_id_priv = cm_find_listen(cm_id->device,
3104                                         sidr_req_msg->service_id);
3105         if (!cur_cm_id_priv) {
3106                 spin_unlock_irq(&cm.lock);
3107                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3108                 goto out; /* No match. */
3109         }
3110         atomic_inc(&cur_cm_id_priv->refcount);
3111         atomic_inc(&cm_id_priv->refcount);
3112         spin_unlock_irq(&cm.lock);
3113
3114         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3115         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3116         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3117         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3118
3119         cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
3120         cm_process_work(cm_id_priv, work);
3121         cm_deref_id(cur_cm_id_priv);
3122         return 0;
3123 out:
3124         ib_destroy_cm_id(&cm_id_priv->id);
3125         return -EINVAL;
3126 }
3127
3128 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3129                                struct cm_id_private *cm_id_priv,
3130                                struct ib_cm_sidr_rep_param *param)
3131 {
3132         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3133                           cm_id_priv->tid);
3134         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3135         sidr_rep_msg->status = param->status;
3136         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3137         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3138         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3139
3140         if (param->info && param->info_length)
3141                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3142
3143         if (param->private_data && param->private_data_len)
3144                 memcpy(sidr_rep_msg->private_data, param->private_data,
3145                        param->private_data_len);
3146 }
3147
3148 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3149                         struct ib_cm_sidr_rep_param *param)
3150 {
3151         struct cm_id_private *cm_id_priv;
3152         struct ib_mad_send_buf *msg;
3153         unsigned long flags;
3154         int ret;
3155
3156         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3157             (param->private_data &&
3158              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3159                 return -EINVAL;
3160
3161         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3162         spin_lock_irqsave(&cm_id_priv->lock, flags);
3163         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3164                 ret = -EINVAL;
3165                 goto error;
3166         }
3167
3168         ret = cm_alloc_msg(cm_id_priv, &msg);
3169         if (ret)
3170                 goto error;
3171
3172         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3173                            param);
3174         ret = ib_post_send_mad(msg, NULL);
3175         if (ret) {
3176                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3177                 cm_free_msg(msg);
3178                 return ret;
3179         }
3180         cm_id->state = IB_CM_IDLE;
3181         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3182
3183         spin_lock_irqsave(&cm.lock, flags);
3184         if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3185                 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3186                 RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3187         }
3188         spin_unlock_irqrestore(&cm.lock, flags);
3189         return 0;
3190
3191 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3192         return ret;
3193 }
3194 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3195
3196 static void cm_format_sidr_rep_event(struct cm_work *work)
3197 {
3198         struct cm_sidr_rep_msg *sidr_rep_msg;
3199         struct ib_cm_sidr_rep_event_param *param;
3200
3201         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3202                                 work->mad_recv_wc->recv_buf.mad;
3203         param = &work->cm_event.param.sidr_rep_rcvd;
3204         param->status = sidr_rep_msg->status;
3205         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3206         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3207         param->info = &sidr_rep_msg->info;
3208         param->info_len = sidr_rep_msg->info_length;
3209         work->cm_event.private_data = &sidr_rep_msg->private_data;
3210 }
3211
3212 static int cm_sidr_rep_handler(struct cm_work *work)
3213 {
3214         struct cm_sidr_rep_msg *sidr_rep_msg;
3215         struct cm_id_private *cm_id_priv;
3216
3217         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3218                                 work->mad_recv_wc->recv_buf.mad;
3219         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3220         if (!cm_id_priv)
3221                 return -EINVAL; /* Unmatched reply. */
3222
3223         spin_lock_irq(&cm_id_priv->lock);
3224         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3225                 spin_unlock_irq(&cm_id_priv->lock);
3226                 goto out;
3227         }
3228         cm_id_priv->id.state = IB_CM_IDLE;
3229         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3230         spin_unlock_irq(&cm_id_priv->lock);
3231
3232         cm_format_sidr_rep_event(work);
3233         cm_process_work(cm_id_priv, work);
3234         return 0;
3235 out:
3236         cm_deref_id(cm_id_priv);
3237         return -EINVAL;
3238 }
3239
3240 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3241                                   enum ib_wc_status wc_status)
3242 {
3243         struct cm_id_private *cm_id_priv;
3244         struct ib_cm_event cm_event;
3245         enum ib_cm_state state;
3246         int ret;
3247
3248         memset(&cm_event, 0, sizeof cm_event);
3249         cm_id_priv = msg->context[0];
3250
3251         /* Discard old sends or ones without a response. */
3252         spin_lock_irq(&cm_id_priv->lock);
3253         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3254         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3255                 goto discard;
3256
3257         switch (state) {
3258         case IB_CM_REQ_SENT:
3259         case IB_CM_MRA_REQ_RCVD:
3260                 cm_reset_to_idle(cm_id_priv);
3261                 cm_event.event = IB_CM_REQ_ERROR;
3262                 break;
3263         case IB_CM_REP_SENT:
3264         case IB_CM_MRA_REP_RCVD:
3265                 cm_reset_to_idle(cm_id_priv);
3266                 cm_event.event = IB_CM_REP_ERROR;
3267                 break;
3268         case IB_CM_DREQ_SENT:
3269                 cm_enter_timewait(cm_id_priv);
3270                 cm_event.event = IB_CM_DREQ_ERROR;
3271                 break;
3272         case IB_CM_SIDR_REQ_SENT:
3273                 cm_id_priv->id.state = IB_CM_IDLE;
3274                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3275                 break;
3276         default:
3277                 goto discard;
3278         }
3279         spin_unlock_irq(&cm_id_priv->lock);
3280         cm_event.param.send_status = wc_status;
3281
3282         /* No other events can occur on the cm_id at this point. */
3283         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3284         cm_free_msg(msg);
3285         if (ret)
3286                 ib_destroy_cm_id(&cm_id_priv->id);
3287         return;
3288 discard:
3289         spin_unlock_irq(&cm_id_priv->lock);
3290         cm_free_msg(msg);
3291 }
3292
3293 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3294                             struct ib_mad_send_wc *mad_send_wc)
3295 {
3296         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3297         struct cm_port *port;
3298         u16 attr_index;
3299
3300         port = mad_agent->context;
3301         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3302                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3303
3304         /*
3305          * If the send was in response to a received message (context[0] is not
3306          * set to a cm_id), and is not a REJ, then it is a send that was
3307          * manually retried.
3308          */
3309         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3310                 msg->retries = 1;
3311
3312         atomic_long_add(1 + msg->retries,
3313                         &port->counter_group[CM_XMIT].counter[attr_index]);
3314         if (msg->retries)
3315                 atomic_long_add(msg->retries,
3316                                 &port->counter_group[CM_XMIT_RETRIES].
3317                                 counter[attr_index]);
3318
3319         switch (mad_send_wc->status) {
3320         case IB_WC_SUCCESS:
3321         case IB_WC_WR_FLUSH_ERR:
3322                 cm_free_msg(msg);
3323                 break;
3324         default:
3325                 if (msg->context[0] && msg->context[1])
3326                         cm_process_send_error(msg, mad_send_wc->status);
3327                 else
3328                         cm_free_msg(msg);
3329                 break;
3330         }
3331 }
3332
3333 static void cm_work_handler(struct work_struct *_work)
3334 {
3335         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3336         int ret;
3337
3338         switch (work->cm_event.event) {
3339         case IB_CM_REQ_RECEIVED:
3340                 ret = cm_req_handler(work);
3341                 break;
3342         case IB_CM_MRA_RECEIVED:
3343                 ret = cm_mra_handler(work);
3344                 break;
3345         case IB_CM_REJ_RECEIVED:
3346                 ret = cm_rej_handler(work);
3347                 break;
3348         case IB_CM_REP_RECEIVED:
3349                 ret = cm_rep_handler(work);
3350                 break;
3351         case IB_CM_RTU_RECEIVED:
3352                 ret = cm_rtu_handler(work);
3353                 break;
3354         case IB_CM_USER_ESTABLISHED:
3355                 ret = cm_establish_handler(work);
3356                 break;
3357         case IB_CM_DREQ_RECEIVED:
3358                 ret = cm_dreq_handler(work);
3359                 break;
3360         case IB_CM_DREP_RECEIVED:
3361                 ret = cm_drep_handler(work);
3362                 break;
3363         case IB_CM_SIDR_REQ_RECEIVED:
3364                 ret = cm_sidr_req_handler(work);
3365                 break;
3366         case IB_CM_SIDR_REP_RECEIVED:
3367                 ret = cm_sidr_rep_handler(work);
3368                 break;
3369         case IB_CM_LAP_RECEIVED:
3370                 ret = cm_lap_handler(work);
3371                 break;
3372         case IB_CM_APR_RECEIVED:
3373                 ret = cm_apr_handler(work);
3374                 break;
3375         case IB_CM_TIMEWAIT_EXIT:
3376                 ret = cm_timewait_handler(work);
3377                 break;
3378         default:
3379                 ret = -EINVAL;
3380                 break;
3381         }
3382         if (ret)
3383                 cm_free_work(work);
3384 }
3385
3386 static int cm_establish(struct ib_cm_id *cm_id)
3387 {
3388         struct cm_id_private *cm_id_priv;
3389         struct cm_work *work;
3390         unsigned long flags;
3391         int ret = 0;
3392         struct cm_device *cm_dev;
3393
3394         cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3395         if (!cm_dev)
3396                 return -ENODEV;
3397
3398         work = kmalloc(sizeof *work, GFP_ATOMIC);
3399         if (!work)
3400                 return -ENOMEM;
3401
3402         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3403         spin_lock_irqsave(&cm_id_priv->lock, flags);
3404         switch (cm_id->state)
3405         {
3406         case IB_CM_REP_SENT:
3407         case IB_CM_MRA_REP_RCVD:
3408                 cm_id->state = IB_CM_ESTABLISHED;
3409                 break;
3410         case IB_CM_ESTABLISHED:
3411                 ret = -EISCONN;
3412                 break;
3413         default:
3414                 ret = -EINVAL;
3415                 break;
3416         }
3417         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3418
3419         if (ret) {
3420                 kfree(work);
3421                 goto out;
3422         }
3423
3424         /*
3425          * The CM worker thread may try to destroy the cm_id before it
3426          * can execute this work item.  To prevent potential deadlock,
3427          * we need to find the cm_id once we're in the context of the
3428          * worker thread, rather than holding a reference on it.
3429          */
3430         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3431         work->local_id = cm_id->local_id;
3432         work->remote_id = cm_id->remote_id;
3433         work->mad_recv_wc = NULL;
3434         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3435
3436         /* Check if the device started its remove_one */
3437         spin_lock_irq(&cm.lock);
3438         if (!cm_dev->going_down) {
3439                 queue_delayed_work(cm.wq, &work->work, 0);
3440         } else {
3441                 kfree(work);
3442                 ret = -ENODEV;
3443         }
3444         spin_unlock_irq(&cm.lock);
3445
3446 out:
3447         return ret;
3448 }
3449
3450 static int cm_migrate(struct ib_cm_id *cm_id)
3451 {
3452         struct cm_id_private *cm_id_priv;
3453         unsigned long flags;
3454         int ret = 0;
3455
3456         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3457         spin_lock_irqsave(&cm_id_priv->lock, flags);
3458         if (cm_id->state == IB_CM_ESTABLISHED &&
3459             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3460              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3461                 cm_id->lap_state = IB_CM_LAP_IDLE;
3462                 cm_id_priv->av = cm_id_priv->alt_av;
3463         } else
3464                 ret = -EINVAL;
3465         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3466
3467         return ret;
3468 }
3469
3470 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3471 {
3472         int ret;
3473
3474         switch (event) {
3475         case IB_EVENT_COMM_EST:
3476                 ret = cm_establish(cm_id);
3477                 break;
3478         case IB_EVENT_PATH_MIG:
3479                 ret = cm_migrate(cm_id);
3480                 break;
3481         default:
3482                 ret = -EINVAL;
3483         }
3484         return ret;
3485 }
3486 EXPORT_SYMBOL(ib_cm_notify);
3487
3488 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3489                             struct ib_mad_recv_wc *mad_recv_wc)
3490 {
3491         struct cm_port *port = mad_agent->context;
3492         struct cm_work *work;
3493         enum ib_cm_event_type event;
3494         u16 attr_id;
3495         int paths = 0;
3496         int going_down = 0;
3497
3498         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3499         case CM_REQ_ATTR_ID:
3500                 paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3501                                                     alt_local_lid != 0);
3502                 event = IB_CM_REQ_RECEIVED;
3503                 break;
3504         case CM_MRA_ATTR_ID:
3505                 event = IB_CM_MRA_RECEIVED;
3506                 break;
3507         case CM_REJ_ATTR_ID:
3508                 event = IB_CM_REJ_RECEIVED;
3509                 break;
3510         case CM_REP_ATTR_ID:
3511                 event = IB_CM_REP_RECEIVED;
3512                 break;
3513         case CM_RTU_ATTR_ID:
3514                 event = IB_CM_RTU_RECEIVED;
3515                 break;
3516         case CM_DREQ_ATTR_ID:
3517                 event = IB_CM_DREQ_RECEIVED;
3518                 break;
3519         case CM_DREP_ATTR_ID:
3520                 event = IB_CM_DREP_RECEIVED;
3521                 break;
3522         case CM_SIDR_REQ_ATTR_ID:
3523                 event = IB_CM_SIDR_REQ_RECEIVED;
3524                 break;
3525         case CM_SIDR_REP_ATTR_ID:
3526                 event = IB_CM_SIDR_REP_RECEIVED;
3527                 break;
3528         case CM_LAP_ATTR_ID:
3529                 paths = 1;
3530                 event = IB_CM_LAP_RECEIVED;
3531                 break;
3532         case CM_APR_ATTR_ID:
3533                 event = IB_CM_APR_RECEIVED;
3534                 break;
3535         default:
3536                 ib_free_recv_mad(mad_recv_wc);
3537                 return;
3538         }
3539
3540         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3541         atomic_long_inc(&port->counter_group[CM_RECV].
3542                         counter[attr_id - CM_ATTR_ID_OFFSET]);
3543
3544         work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3545                        GFP_KERNEL);
3546         if (!work) {
3547                 ib_free_recv_mad(mad_recv_wc);
3548                 return;
3549         }
3550
3551         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3552         work->cm_event.event = event;
3553         work->mad_recv_wc = mad_recv_wc;
3554         work->port = port;
3555
3556         /* Check if the device started its remove_one */
3557         spin_lock_irq(&cm.lock);
3558         if (!port->cm_dev->going_down)
3559                 queue_delayed_work(cm.wq, &work->work, 0);
3560         else
3561                 going_down = 1;
3562         spin_unlock_irq(&cm.lock);
3563
3564         if (going_down) {
3565                 kfree(work);
3566                 ib_free_recv_mad(mad_recv_wc);
3567         }
3568 }
3569
3570 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3571                                 struct ib_qp_attr *qp_attr,
3572                                 int *qp_attr_mask)
3573 {
3574         unsigned long flags;
3575         int ret;
3576
3577         spin_lock_irqsave(&cm_id_priv->lock, flags);
3578         switch (cm_id_priv->id.state) {
3579         case IB_CM_REQ_SENT:
3580         case IB_CM_MRA_REQ_RCVD:
3581         case IB_CM_REQ_RCVD:
3582         case IB_CM_MRA_REQ_SENT:
3583         case IB_CM_REP_RCVD:
3584         case IB_CM_MRA_REP_SENT:
3585         case IB_CM_REP_SENT:
3586         case IB_CM_MRA_REP_RCVD:
3587         case IB_CM_ESTABLISHED:
3588                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3589                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
3590                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3591                 if (cm_id_priv->responder_resources)
3592                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3593                                                     IB_ACCESS_REMOTE_ATOMIC;
3594                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3595                 qp_attr->port_num = cm_id_priv->av.port->port_num;
3596                 ret = 0;
3597                 break;
3598         default:
3599                 ret = -EINVAL;
3600                 break;
3601         }
3602         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3603         return ret;
3604 }
3605
3606 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3607                                struct ib_qp_attr *qp_attr,
3608                                int *qp_attr_mask)
3609 {
3610         unsigned long flags;
3611         int ret;
3612
3613         spin_lock_irqsave(&cm_id_priv->lock, flags);
3614         switch (cm_id_priv->id.state) {
3615         case IB_CM_REQ_RCVD:
3616         case IB_CM_MRA_REQ_SENT:
3617         case IB_CM_REP_RCVD:
3618         case IB_CM_MRA_REP_SENT:
3619         case IB_CM_REP_SENT:
3620         case IB_CM_MRA_REP_RCVD:
3621         case IB_CM_ESTABLISHED:
3622                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3623                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3624                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3625                 if (!cm_id_priv->av.valid) {
3626                         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3627                         return -EINVAL;
3628                 }
3629                 if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
3630                         qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
3631                         *qp_attr_mask |= IB_QP_VID;
3632                 }
3633                 if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
3634                         memcpy(qp_attr->smac, cm_id_priv->av.smac,
3635                                sizeof(qp_attr->smac));
3636                         *qp_attr_mask |= IB_QP_SMAC;
3637                 }
3638                 if (cm_id_priv->alt_av.valid) {
3639                         if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
3640                                 qp_attr->alt_vlan_id =
3641                                         cm_id_priv->alt_av.ah_attr.vlan_id;
3642                                 *qp_attr_mask |= IB_QP_ALT_VID;
3643                         }
3644                         if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
3645                                 memcpy(qp_attr->alt_smac,
3646                                        cm_id_priv->alt_av.smac,
3647                                        sizeof(qp_attr->alt_smac));
3648                                 *qp_attr_mask |= IB_QP_ALT_SMAC;
3649                         }
3650                 }
3651                 qp_attr->path_mtu = cm_id_priv->path_mtu;
3652                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3653                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3654                 if (cm_id_priv->qp_type == IB_QPT_RC ||
3655                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3656                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3657                                          IB_QP_MIN_RNR_TIMER;
3658                         qp_attr->max_dest_rd_atomic =
3659                                         cm_id_priv->responder_resources;
3660                         qp_attr->min_rnr_timer = 0;
3661                 }
3662                 if (cm_id_priv->alt_av.ah_attr.dlid) {
3663                         *qp_attr_mask |= IB_QP_ALT_PATH;
3664                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3665                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3666                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3667                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3668                 }
3669                 ret = 0;
3670                 break;
3671         default:
3672                 ret = -EINVAL;
3673                 break;
3674         }
3675         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3676         return ret;
3677 }
3678
3679 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3680                                struct ib_qp_attr *qp_attr,
3681                                int *qp_attr_mask)
3682 {
3683         unsigned long flags;
3684         int ret;
3685
3686         spin_lock_irqsave(&cm_id_priv->lock, flags);
3687         switch (cm_id_priv->id.state) {
3688         /* Allow transition to RTS before sending REP */
3689         case IB_CM_REQ_RCVD:
3690         case IB_CM_MRA_REQ_SENT:
3691
3692         case IB_CM_REP_RCVD:
3693         case IB_CM_MRA_REP_SENT:
3694         case IB_CM_REP_SENT:
3695         case IB_CM_MRA_REP_RCVD:
3696         case IB_CM_ESTABLISHED:
3697                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3698                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3699                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3700                         switch (cm_id_priv->qp_type) {
3701                         case IB_QPT_RC:
3702                         case IB_QPT_XRC_INI:
3703                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3704                                                  IB_QP_MAX_QP_RD_ATOMIC;
3705                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
3706                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3707                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3708                                 /* fall through */
3709                         case IB_QPT_XRC_TGT:
3710                                 *qp_attr_mask |= IB_QP_TIMEOUT;
3711                                 qp_attr->timeout = cm_id_priv->av.timeout;
3712                                 break;
3713                         default:
3714                                 break;
3715                         }
3716                         if (cm_id_priv->alt_av.ah_attr.dlid) {
3717                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3718                                 qp_attr->path_mig_state = IB_MIG_REARM;
3719                         }
3720                 } else {
3721                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3722                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3723                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3724                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3725                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3726                         qp_attr->path_mig_state = IB_MIG_REARM;
3727                 }
3728                 ret = 0;
3729                 break;
3730         default:
3731                 ret = -EINVAL;
3732                 break;
3733         }
3734         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3735         return ret;
3736 }
3737
3738 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3739                        struct ib_qp_attr *qp_attr,
3740                        int *qp_attr_mask)
3741 {
3742         struct cm_id_private *cm_id_priv;
3743         int ret;
3744
3745         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3746         switch (qp_attr->qp_state) {
3747         case IB_QPS_INIT:
3748                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3749                 break;
3750         case IB_QPS_RTR:
3751                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3752                 break;
3753         case IB_QPS_RTS:
3754                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3755                 break;
3756         default:
3757                 ret = -EINVAL;
3758                 break;
3759         }
3760         return ret;
3761 }
3762 EXPORT_SYMBOL(ib_cm_init_qp_attr);
3763
3764 static void cm_get_ack_delay(struct cm_device *cm_dev)
3765 {
3766         struct ib_device_attr attr;
3767
3768         if (ib_query_device(cm_dev->ib_device, &attr))
3769                 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3770         else
3771                 cm_dev->ack_delay = attr.local_ca_ack_delay;
3772 }
3773
3774 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3775                                char *buf)
3776 {
3777         struct cm_counter_group *group;
3778         struct cm_counter_attribute *cm_attr;
3779
3780         group = container_of(obj, struct cm_counter_group, obj);
3781         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3782
3783         return sprintf(buf, "%ld\n",
3784                        atomic_long_read(&group->counter[cm_attr->index]));
3785 }
3786
3787 static const struct sysfs_ops cm_counter_ops = {
3788         .show = cm_show_counter
3789 };
3790
3791 static struct kobj_type cm_counter_obj_type = {
3792         .sysfs_ops = &cm_counter_ops,
3793         .default_attrs = cm_counter_default_attrs
3794 };
3795
3796 static void cm_release_port_obj(struct kobject *obj)
3797 {
3798         struct cm_port *cm_port;
3799
3800         cm_port = container_of(obj, struct cm_port, port_obj);
3801         kfree(cm_port);
3802 }
3803
3804 static struct kobj_type cm_port_obj_type = {
3805         .release = cm_release_port_obj
3806 };
3807
3808 static char *cm_devnode(struct device *dev, umode_t *mode)
3809 {
3810         if (mode)
3811                 *mode = 0666;
3812         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
3813 }
3814
3815 struct class cm_class = {
3816         .owner   = THIS_MODULE,
3817         .name    = "infiniband_cm",
3818         .devnode = cm_devnode,
3819 };
3820 EXPORT_SYMBOL(cm_class);
3821
3822 static int cm_create_port_fs(struct cm_port *port)
3823 {
3824         int i, ret;
3825
3826         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3827                                    &port->cm_dev->device->kobj,
3828                                    "%d", port->port_num);
3829         if (ret) {
3830                 kfree(port);
3831                 return ret;
3832         }
3833
3834         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3835                 ret = kobject_init_and_add(&port->counter_group[i].obj,
3836                                            &cm_counter_obj_type,
3837                                            &port->port_obj,
3838                                            "%s", counter_group_names[i]);
3839                 if (ret)
3840                         goto error;
3841         }
3842
3843         return 0;
3844
3845 error:
3846         while (i--)
3847                 kobject_put(&port->counter_group[i].obj);
3848         kobject_put(&port->port_obj);
3849         return ret;
3850
3851 }
3852
3853 static void cm_remove_port_fs(struct cm_port *port)
3854 {
3855         int i;
3856
3857         for (i = 0; i < CM_COUNTER_GROUPS; i++)
3858                 kobject_put(&port->counter_group[i].obj);
3859
3860         kobject_put(&port->port_obj);
3861 }
3862
3863 static void cm_add_one(struct ib_device *ib_device)
3864 {
3865         struct cm_device *cm_dev;
3866         struct cm_port *port;
3867         struct ib_mad_reg_req reg_req = {
3868                 .mgmt_class = IB_MGMT_CLASS_CM,
3869                 .mgmt_class_version = IB_CM_CLASS_VERSION,
3870         };
3871         struct ib_port_modify port_modify = {
3872                 .set_port_cap_mask = IB_PORT_CM_SUP
3873         };
3874         unsigned long flags;
3875         int ret;
3876         int count = 0;
3877         u8 i;
3878
3879         cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3880                          ib_device->phys_port_cnt, GFP_KERNEL);
3881         if (!cm_dev)
3882                 return;
3883
3884         cm_dev->ib_device = ib_device;
3885         cm_get_ack_delay(cm_dev);
3886         cm_dev->going_down = 0;
3887         cm_dev->device = device_create(&cm_class, &ib_device->dev,
3888                                        MKDEV(0, 0), NULL,
3889                                        "%s", ib_device->name);
3890         if (IS_ERR(cm_dev->device)) {
3891                 kfree(cm_dev);
3892                 return;
3893         }
3894
3895         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3896         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3897                 if (!rdma_cap_ib_cm(ib_device, i))
3898                         continue;
3899
3900                 port = kzalloc(sizeof *port, GFP_KERNEL);
3901                 if (!port)
3902                         goto error1;
3903
3904                 cm_dev->port[i-1] = port;
3905                 port->cm_dev = cm_dev;
3906                 port->port_num = i;
3907
3908                 ret = cm_create_port_fs(port);
3909                 if (ret)
3910                         goto error1;
3911
3912                 port->mad_agent = ib_register_mad_agent(ib_device, i,
3913                                                         IB_QPT_GSI,
3914                                                         &reg_req,
3915                                                         0,
3916                                                         cm_send_handler,
3917                                                         cm_recv_handler,
3918                                                         port,
3919                                                         0);
3920                 if (IS_ERR(port->mad_agent))
3921                         goto error2;
3922
3923                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
3924                 if (ret)
3925                         goto error3;
3926
3927                 count++;
3928         }
3929
3930         if (!count)
3931                 goto free;
3932
3933         ib_set_client_data(ib_device, &cm_client, cm_dev);
3934
3935         write_lock_irqsave(&cm.device_lock, flags);
3936         list_add_tail(&cm_dev->list, &cm.device_list);
3937         write_unlock_irqrestore(&cm.device_lock, flags);
3938         return;
3939
3940 error3:
3941         ib_unregister_mad_agent(port->mad_agent);
3942 error2:
3943         cm_remove_port_fs(port);
3944 error1:
3945         port_modify.set_port_cap_mask = 0;
3946         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3947         while (--i) {
3948                 if (!rdma_cap_ib_cm(ib_device, i))
3949                         continue;
3950
3951                 port = cm_dev->port[i-1];
3952                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3953                 ib_unregister_mad_agent(port->mad_agent);
3954                 cm_remove_port_fs(port);
3955         }
3956 free:
3957         device_unregister(cm_dev->device);
3958         kfree(cm_dev);
3959 }
3960
3961 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
3962 {
3963         struct cm_device *cm_dev = client_data;
3964         struct cm_port *port;
3965         struct ib_port_modify port_modify = {
3966                 .clr_port_cap_mask = IB_PORT_CM_SUP
3967         };
3968         unsigned long flags;
3969         int i;
3970
3971         if (!cm_dev)
3972                 return;
3973
3974         write_lock_irqsave(&cm.device_lock, flags);
3975         list_del(&cm_dev->list);
3976         write_unlock_irqrestore(&cm.device_lock, flags);
3977
3978         spin_lock_irq(&cm.lock);
3979         cm_dev->going_down = 1;
3980         spin_unlock_irq(&cm.lock);
3981
3982         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3983                 if (!rdma_cap_ib_cm(ib_device, i))
3984                         continue;
3985
3986                 port = cm_dev->port[i-1];
3987                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3988                 /*
3989                  * We flush the queue here after the going_down set, this
3990                  * verify that no new works will be queued in the recv handler,
3991                  * after that we can call the unregister_mad_agent
3992                  */
3993                 flush_workqueue(cm.wq);
3994                 ib_unregister_mad_agent(port->mad_agent);
3995                 cm_remove_port_fs(port);
3996         }
3997         device_unregister(cm_dev->device);
3998         kfree(cm_dev);
3999 }
4000
4001 static int __init ib_cm_init(void)
4002 {
4003         int ret;
4004
4005         memset(&cm, 0, sizeof cm);
4006         INIT_LIST_HEAD(&cm.device_list);
4007         rwlock_init(&cm.device_lock);
4008         spin_lock_init(&cm.lock);
4009         cm.listen_service_table = RB_ROOT;
4010         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4011         cm.remote_id_table = RB_ROOT;
4012         cm.remote_qp_table = RB_ROOT;
4013         cm.remote_sidr_table = RB_ROOT;
4014         idr_init(&cm.local_id_table);
4015         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4016         INIT_LIST_HEAD(&cm.timewait_list);
4017
4018         ret = class_register(&cm_class);
4019         if (ret) {
4020                 ret = -ENOMEM;
4021                 goto error1;
4022         }
4023
4024         cm.wq = create_workqueue("ib_cm");
4025         if (!cm.wq) {
4026                 ret = -ENOMEM;
4027                 goto error2;
4028         }
4029
4030         ret = ib_register_client(&cm_client);
4031         if (ret)
4032                 goto error3;
4033
4034         return 0;
4035 error3:
4036         destroy_workqueue(cm.wq);
4037 error2:
4038         class_unregister(&cm_class);
4039 error1:
4040         idr_destroy(&cm.local_id_table);
4041         return ret;
4042 }
4043
4044 static void __exit ib_cm_cleanup(void)
4045 {
4046         struct cm_timewait_info *timewait_info, *tmp;
4047
4048         spin_lock_irq(&cm.lock);
4049         list_for_each_entry(timewait_info, &cm.timewait_list, list)
4050                 cancel_delayed_work(&timewait_info->work.work);
4051         spin_unlock_irq(&cm.lock);
4052
4053         ib_unregister_client(&cm_client);
4054         destroy_workqueue(cm.wq);
4055
4056         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4057                 list_del(&timewait_info->list);
4058                 kfree(timewait_info);
4059         }
4060
4061         class_unregister(&cm_class);
4062         idr_destroy(&cm.local_id_table);
4063 }
4064
4065 module_init(ib_cm_init);
4066 module_exit(ib_cm_cleanup);
4067