]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/infiniband/core/cm.c
md/raid10: fix the 'new' raid10 layout to work correctly.
[karo-tx-linux.git] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/module.h>
40 #include <linux/err.h>
41 #include <linux/idr.h>
42 #include <linux/interrupt.h>
43 #include <linux/random.h>
44 #include <linux/rbtree.h>
45 #include <linux/spinlock.h>
46 #include <linux/slab.h>
47 #include <linux/sysfs.h>
48 #include <linux/workqueue.h>
49 #include <linux/kdev_t.h>
50 #include <linux/etherdevice.h>
51
52 #include <rdma/ib_cache.h>
53 #include <rdma/ib_cm.h>
54 #include "cm_msgs.h"
55
56 MODULE_AUTHOR("Sean Hefty");
57 MODULE_DESCRIPTION("InfiniBand CM");
58 MODULE_LICENSE("Dual BSD/GPL");
59
60 static void cm_add_one(struct ib_device *device);
61 static void cm_remove_one(struct ib_device *device, void *client_data);
62
63 static struct ib_client cm_client = {
64         .name   = "cm",
65         .add    = cm_add_one,
66         .remove = cm_remove_one
67 };
68
69 static struct ib_cm {
70         spinlock_t lock;
71         struct list_head device_list;
72         rwlock_t device_lock;
73         struct rb_root listen_service_table;
74         u64 listen_service_id;
75         /* struct rb_root peer_service_table; todo: fix peer to peer */
76         struct rb_root remote_qp_table;
77         struct rb_root remote_id_table;
78         struct rb_root remote_sidr_table;
79         struct idr local_id_table;
80         __be32 random_id_operand;
81         struct list_head timewait_list;
82         struct workqueue_struct *wq;
83 } cm;
84
85 /* Counter indexes ordered by attribute ID */
86 enum {
87         CM_REQ_COUNTER,
88         CM_MRA_COUNTER,
89         CM_REJ_COUNTER,
90         CM_REP_COUNTER,
91         CM_RTU_COUNTER,
92         CM_DREQ_COUNTER,
93         CM_DREP_COUNTER,
94         CM_SIDR_REQ_COUNTER,
95         CM_SIDR_REP_COUNTER,
96         CM_LAP_COUNTER,
97         CM_APR_COUNTER,
98         CM_ATTR_COUNT,
99         CM_ATTR_ID_OFFSET = 0x0010,
100 };
101
102 enum {
103         CM_XMIT,
104         CM_XMIT_RETRIES,
105         CM_RECV,
106         CM_RECV_DUPLICATES,
107         CM_COUNTER_GROUPS
108 };
109
110 static char const counter_group_names[CM_COUNTER_GROUPS]
111                                      [sizeof("cm_rx_duplicates")] = {
112         "cm_tx_msgs", "cm_tx_retries",
113         "cm_rx_msgs", "cm_rx_duplicates"
114 };
115
116 struct cm_counter_group {
117         struct kobject obj;
118         atomic_long_t counter[CM_ATTR_COUNT];
119 };
120
121 struct cm_counter_attribute {
122         struct attribute attr;
123         int index;
124 };
125
126 #define CM_COUNTER_ATTR(_name, _index) \
127 struct cm_counter_attribute cm_##_name##_counter_attr = { \
128         .attr = { .name = __stringify(_name), .mode = 0444 }, \
129         .index = _index \
130 }
131
132 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
133 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
134 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
135 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
136 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
137 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
138 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
139 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
140 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
141 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
142 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
143
144 static struct attribute *cm_counter_default_attrs[] = {
145         &cm_req_counter_attr.attr,
146         &cm_mra_counter_attr.attr,
147         &cm_rej_counter_attr.attr,
148         &cm_rep_counter_attr.attr,
149         &cm_rtu_counter_attr.attr,
150         &cm_dreq_counter_attr.attr,
151         &cm_drep_counter_attr.attr,
152         &cm_sidr_req_counter_attr.attr,
153         &cm_sidr_rep_counter_attr.attr,
154         &cm_lap_counter_attr.attr,
155         &cm_apr_counter_attr.attr,
156         NULL
157 };
158
159 struct cm_port {
160         struct cm_device *cm_dev;
161         struct ib_mad_agent *mad_agent;
162         struct kobject port_obj;
163         u8 port_num;
164         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
165 };
166
167 struct cm_device {
168         struct list_head list;
169         struct ib_device *ib_device;
170         struct device *device;
171         u8 ack_delay;
172         int going_down;
173         struct cm_port *port[0];
174 };
175
176 struct cm_av {
177         struct cm_port *port;
178         union ib_gid dgid;
179         struct ib_ah_attr ah_attr;
180         u16 pkey_index;
181         u8 timeout;
182         u8  valid;
183         u8  smac[ETH_ALEN];
184 };
185
186 struct cm_work {
187         struct delayed_work work;
188         struct list_head list;
189         struct cm_port *port;
190         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
191         __be32 local_id;                        /* Established / timewait */
192         __be32 remote_id;
193         struct ib_cm_event cm_event;
194         struct ib_sa_path_rec path[0];
195 };
196
197 struct cm_timewait_info {
198         struct cm_work work;                    /* Must be first. */
199         struct list_head list;
200         struct rb_node remote_qp_node;
201         struct rb_node remote_id_node;
202         __be64 remote_ca_guid;
203         __be32 remote_qpn;
204         u8 inserted_remote_qp;
205         u8 inserted_remote_id;
206 };
207
208 struct cm_id_private {
209         struct ib_cm_id id;
210
211         struct rb_node service_node;
212         struct rb_node sidr_id_node;
213         spinlock_t lock;        /* Do not acquire inside cm.lock */
214         struct completion comp;
215         atomic_t refcount;
216         /* Number of clients sharing this ib_cm_id. Only valid for listeners.
217          * Protected by the cm.lock spinlock. */
218         int listen_sharecount;
219
220         struct ib_mad_send_buf *msg;
221         struct cm_timewait_info *timewait_info;
222         /* todo: use alternate port on send failure */
223         struct cm_av av;
224         struct cm_av alt_av;
225
226         void *private_data;
227         __be64 tid;
228         __be32 local_qpn;
229         __be32 remote_qpn;
230         enum ib_qp_type qp_type;
231         __be32 sq_psn;
232         __be32 rq_psn;
233         int timeout_ms;
234         enum ib_mtu path_mtu;
235         __be16 pkey;
236         u8 private_data_len;
237         u8 max_cm_retries;
238         u8 peer_to_peer;
239         u8 responder_resources;
240         u8 initiator_depth;
241         u8 retry_count;
242         u8 rnr_retry_count;
243         u8 service_timeout;
244         u8 target_ack_delay;
245
246         struct list_head work_list;
247         atomic_t work_count;
248 };
249
250 static void cm_work_handler(struct work_struct *work);
251
252 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
253 {
254         if (atomic_dec_and_test(&cm_id_priv->refcount))
255                 complete(&cm_id_priv->comp);
256 }
257
258 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
259                         struct ib_mad_send_buf **msg)
260 {
261         struct ib_mad_agent *mad_agent;
262         struct ib_mad_send_buf *m;
263         struct ib_ah *ah;
264
265         mad_agent = cm_id_priv->av.port->mad_agent;
266         ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
267         if (IS_ERR(ah))
268                 return PTR_ERR(ah);
269
270         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
271                                cm_id_priv->av.pkey_index,
272                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
273                                GFP_ATOMIC,
274                                IB_MGMT_BASE_VERSION);
275         if (IS_ERR(m)) {
276                 ib_destroy_ah(ah);
277                 return PTR_ERR(m);
278         }
279
280         /* Timeout set by caller if response is expected. */
281         m->ah = ah;
282         m->retries = cm_id_priv->max_cm_retries;
283
284         atomic_inc(&cm_id_priv->refcount);
285         m->context[0] = cm_id_priv;
286         *msg = m;
287         return 0;
288 }
289
290 static int cm_alloc_response_msg(struct cm_port *port,
291                                  struct ib_mad_recv_wc *mad_recv_wc,
292                                  struct ib_mad_send_buf **msg)
293 {
294         struct ib_mad_send_buf *m;
295         struct ib_ah *ah;
296
297         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
298                                   mad_recv_wc->recv_buf.grh, port->port_num);
299         if (IS_ERR(ah))
300                 return PTR_ERR(ah);
301
302         m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
303                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
304                                GFP_ATOMIC,
305                                IB_MGMT_BASE_VERSION);
306         if (IS_ERR(m)) {
307                 ib_destroy_ah(ah);
308                 return PTR_ERR(m);
309         }
310         m->ah = ah;
311         *msg = m;
312         return 0;
313 }
314
315 static void cm_free_msg(struct ib_mad_send_buf *msg)
316 {
317         ib_destroy_ah(msg->ah);
318         if (msg->context[0])
319                 cm_deref_id(msg->context[0]);
320         ib_free_send_mad(msg);
321 }
322
323 static void * cm_copy_private_data(const void *private_data,
324                                    u8 private_data_len)
325 {
326         void *data;
327
328         if (!private_data || !private_data_len)
329                 return NULL;
330
331         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
332         if (!data)
333                 return ERR_PTR(-ENOMEM);
334
335         return data;
336 }
337
338 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
339                                  void *private_data, u8 private_data_len)
340 {
341         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
342                 kfree(cm_id_priv->private_data);
343
344         cm_id_priv->private_data = private_data;
345         cm_id_priv->private_data_len = private_data_len;
346 }
347
348 static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
349                                     struct ib_grh *grh, struct cm_av *av)
350 {
351         av->port = port;
352         av->pkey_index = wc->pkey_index;
353         ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
354                            grh, &av->ah_attr);
355 }
356
357 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
358 {
359         struct cm_device *cm_dev;
360         struct cm_port *port = NULL;
361         unsigned long flags;
362         int ret;
363         u8 p;
364
365         read_lock_irqsave(&cm.device_lock, flags);
366         list_for_each_entry(cm_dev, &cm.device_list, list) {
367                 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
368                                         &p, NULL)) {
369                         port = cm_dev->port[p-1];
370                         break;
371                 }
372         }
373         read_unlock_irqrestore(&cm.device_lock, flags);
374
375         if (!port)
376                 return -EINVAL;
377
378         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
379                                   be16_to_cpu(path->pkey), &av->pkey_index);
380         if (ret)
381                 return ret;
382
383         av->port = port;
384         ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
385                              &av->ah_attr);
386         av->timeout = path->packet_life_time + 1;
387         memcpy(av->smac, path->smac, sizeof(av->smac));
388
389         av->valid = 1;
390         return 0;
391 }
392
393 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
394 {
395         unsigned long flags;
396         int id;
397
398         idr_preload(GFP_KERNEL);
399         spin_lock_irqsave(&cm.lock, flags);
400
401         id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
402
403         spin_unlock_irqrestore(&cm.lock, flags);
404         idr_preload_end();
405
406         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
407         return id < 0 ? id : 0;
408 }
409
410 static void cm_free_id(__be32 local_id)
411 {
412         spin_lock_irq(&cm.lock);
413         idr_remove(&cm.local_id_table,
414                    (__force int) (local_id ^ cm.random_id_operand));
415         spin_unlock_irq(&cm.lock);
416 }
417
418 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
419 {
420         struct cm_id_private *cm_id_priv;
421
422         cm_id_priv = idr_find(&cm.local_id_table,
423                               (__force int) (local_id ^ cm.random_id_operand));
424         if (cm_id_priv) {
425                 if (cm_id_priv->id.remote_id == remote_id)
426                         atomic_inc(&cm_id_priv->refcount);
427                 else
428                         cm_id_priv = NULL;
429         }
430
431         return cm_id_priv;
432 }
433
434 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
435 {
436         struct cm_id_private *cm_id_priv;
437
438         spin_lock_irq(&cm.lock);
439         cm_id_priv = cm_get_id(local_id, remote_id);
440         spin_unlock_irq(&cm.lock);
441
442         return cm_id_priv;
443 }
444
445 /*
446  * Trivial helpers to strip endian annotation and compare; the
447  * endianness doesn't actually matter since we just need a stable
448  * order for the RB tree.
449  */
450 static int be32_lt(__be32 a, __be32 b)
451 {
452         return (__force u32) a < (__force u32) b;
453 }
454
455 static int be32_gt(__be32 a, __be32 b)
456 {
457         return (__force u32) a > (__force u32) b;
458 }
459
460 static int be64_lt(__be64 a, __be64 b)
461 {
462         return (__force u64) a < (__force u64) b;
463 }
464
465 static int be64_gt(__be64 a, __be64 b)
466 {
467         return (__force u64) a > (__force u64) b;
468 }
469
470 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
471 {
472         struct rb_node **link = &cm.listen_service_table.rb_node;
473         struct rb_node *parent = NULL;
474         struct cm_id_private *cur_cm_id_priv;
475         __be64 service_id = cm_id_priv->id.service_id;
476         __be64 service_mask = cm_id_priv->id.service_mask;
477
478         while (*link) {
479                 parent = *link;
480                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
481                                           service_node);
482                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
483                     (service_mask & cur_cm_id_priv->id.service_id) &&
484                     (cm_id_priv->id.device == cur_cm_id_priv->id.device))
485                         return cur_cm_id_priv;
486
487                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
488                         link = &(*link)->rb_left;
489                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
490                         link = &(*link)->rb_right;
491                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
492                         link = &(*link)->rb_left;
493                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
494                         link = &(*link)->rb_right;
495                 else
496                         link = &(*link)->rb_right;
497         }
498         rb_link_node(&cm_id_priv->service_node, parent, link);
499         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
500         return NULL;
501 }
502
503 static struct cm_id_private * cm_find_listen(struct ib_device *device,
504                                              __be64 service_id)
505 {
506         struct rb_node *node = cm.listen_service_table.rb_node;
507         struct cm_id_private *cm_id_priv;
508
509         while (node) {
510                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
511                 if ((cm_id_priv->id.service_mask & service_id) ==
512                      cm_id_priv->id.service_id &&
513                     (cm_id_priv->id.device == device))
514                         return cm_id_priv;
515
516                 if (device < cm_id_priv->id.device)
517                         node = node->rb_left;
518                 else if (device > cm_id_priv->id.device)
519                         node = node->rb_right;
520                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
521                         node = node->rb_left;
522                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
523                         node = node->rb_right;
524                 else
525                         node = node->rb_right;
526         }
527         return NULL;
528 }
529
530 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
531                                                      *timewait_info)
532 {
533         struct rb_node **link = &cm.remote_id_table.rb_node;
534         struct rb_node *parent = NULL;
535         struct cm_timewait_info *cur_timewait_info;
536         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
537         __be32 remote_id = timewait_info->work.remote_id;
538
539         while (*link) {
540                 parent = *link;
541                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
542                                              remote_id_node);
543                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
544                         link = &(*link)->rb_left;
545                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
546                         link = &(*link)->rb_right;
547                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
548                         link = &(*link)->rb_left;
549                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
550                         link = &(*link)->rb_right;
551                 else
552                         return cur_timewait_info;
553         }
554         timewait_info->inserted_remote_id = 1;
555         rb_link_node(&timewait_info->remote_id_node, parent, link);
556         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
557         return NULL;
558 }
559
560 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
561                                                    __be32 remote_id)
562 {
563         struct rb_node *node = cm.remote_id_table.rb_node;
564         struct cm_timewait_info *timewait_info;
565
566         while (node) {
567                 timewait_info = rb_entry(node, struct cm_timewait_info,
568                                          remote_id_node);
569                 if (be32_lt(remote_id, timewait_info->work.remote_id))
570                         node = node->rb_left;
571                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
572                         node = node->rb_right;
573                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
574                         node = node->rb_left;
575                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
576                         node = node->rb_right;
577                 else
578                         return timewait_info;
579         }
580         return NULL;
581 }
582
583 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
584                                                       *timewait_info)
585 {
586         struct rb_node **link = &cm.remote_qp_table.rb_node;
587         struct rb_node *parent = NULL;
588         struct cm_timewait_info *cur_timewait_info;
589         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
590         __be32 remote_qpn = timewait_info->remote_qpn;
591
592         while (*link) {
593                 parent = *link;
594                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
595                                              remote_qp_node);
596                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
597                         link = &(*link)->rb_left;
598                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
599                         link = &(*link)->rb_right;
600                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
601                         link = &(*link)->rb_left;
602                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
603                         link = &(*link)->rb_right;
604                 else
605                         return cur_timewait_info;
606         }
607         timewait_info->inserted_remote_qp = 1;
608         rb_link_node(&timewait_info->remote_qp_node, parent, link);
609         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
610         return NULL;
611 }
612
613 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
614                                                     *cm_id_priv)
615 {
616         struct rb_node **link = &cm.remote_sidr_table.rb_node;
617         struct rb_node *parent = NULL;
618         struct cm_id_private *cur_cm_id_priv;
619         union ib_gid *port_gid = &cm_id_priv->av.dgid;
620         __be32 remote_id = cm_id_priv->id.remote_id;
621
622         while (*link) {
623                 parent = *link;
624                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
625                                           sidr_id_node);
626                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
627                         link = &(*link)->rb_left;
628                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
629                         link = &(*link)->rb_right;
630                 else {
631                         int cmp;
632                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
633                                      sizeof *port_gid);
634                         if (cmp < 0)
635                                 link = &(*link)->rb_left;
636                         else if (cmp > 0)
637                                 link = &(*link)->rb_right;
638                         else
639                                 return cur_cm_id_priv;
640                 }
641         }
642         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
643         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
644         return NULL;
645 }
646
647 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
648                                enum ib_cm_sidr_status status)
649 {
650         struct ib_cm_sidr_rep_param param;
651
652         memset(&param, 0, sizeof param);
653         param.status = status;
654         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
655 }
656
657 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
658                                  ib_cm_handler cm_handler,
659                                  void *context)
660 {
661         struct cm_id_private *cm_id_priv;
662         int ret;
663
664         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
665         if (!cm_id_priv)
666                 return ERR_PTR(-ENOMEM);
667
668         cm_id_priv->id.state = IB_CM_IDLE;
669         cm_id_priv->id.device = device;
670         cm_id_priv->id.cm_handler = cm_handler;
671         cm_id_priv->id.context = context;
672         cm_id_priv->id.remote_cm_qpn = 1;
673         ret = cm_alloc_id(cm_id_priv);
674         if (ret)
675                 goto error;
676
677         spin_lock_init(&cm_id_priv->lock);
678         init_completion(&cm_id_priv->comp);
679         INIT_LIST_HEAD(&cm_id_priv->work_list);
680         atomic_set(&cm_id_priv->work_count, -1);
681         atomic_set(&cm_id_priv->refcount, 1);
682         return &cm_id_priv->id;
683
684 error:
685         kfree(cm_id_priv);
686         return ERR_PTR(-ENOMEM);
687 }
688 EXPORT_SYMBOL(ib_create_cm_id);
689
690 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
691 {
692         struct cm_work *work;
693
694         if (list_empty(&cm_id_priv->work_list))
695                 return NULL;
696
697         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
698         list_del(&work->list);
699         return work;
700 }
701
702 static void cm_free_work(struct cm_work *work)
703 {
704         if (work->mad_recv_wc)
705                 ib_free_recv_mad(work->mad_recv_wc);
706         kfree(work);
707 }
708
709 static inline int cm_convert_to_ms(int iba_time)
710 {
711         /* approximate conversion to ms from 4.096us x 2^iba_time */
712         return 1 << max(iba_time - 8, 0);
713 }
714
715 /*
716  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
717  * Because of how ack_timeout is stored, adding one doubles the timeout.
718  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
719  * increment it (round up) only if the other is within 50%.
720  */
721 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
722 {
723         int ack_timeout = packet_life_time + 1;
724
725         if (ack_timeout >= ca_ack_delay)
726                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
727         else
728                 ack_timeout = ca_ack_delay +
729                               (ack_timeout >= (ca_ack_delay - 1));
730
731         return min(31, ack_timeout);
732 }
733
734 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
735 {
736         if (timewait_info->inserted_remote_id) {
737                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
738                 timewait_info->inserted_remote_id = 0;
739         }
740
741         if (timewait_info->inserted_remote_qp) {
742                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
743                 timewait_info->inserted_remote_qp = 0;
744         }
745 }
746
747 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
748 {
749         struct cm_timewait_info *timewait_info;
750
751         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
752         if (!timewait_info)
753                 return ERR_PTR(-ENOMEM);
754
755         timewait_info->work.local_id = local_id;
756         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
757         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
758         return timewait_info;
759 }
760
761 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
762 {
763         int wait_time;
764         unsigned long flags;
765         struct cm_device *cm_dev;
766
767         cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
768         if (!cm_dev)
769                 return;
770
771         spin_lock_irqsave(&cm.lock, flags);
772         cm_cleanup_timewait(cm_id_priv->timewait_info);
773         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
774         spin_unlock_irqrestore(&cm.lock, flags);
775
776         /*
777          * The cm_id could be destroyed by the user before we exit timewait.
778          * To protect against this, we search for the cm_id after exiting
779          * timewait before notifying the user that we've exited timewait.
780          */
781         cm_id_priv->id.state = IB_CM_TIMEWAIT;
782         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
783
784         /* Check if the device started its remove_one */
785         spin_lock_irq(&cm.lock);
786         if (!cm_dev->going_down)
787                 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
788                                    msecs_to_jiffies(wait_time));
789         spin_unlock_irq(&cm.lock);
790
791         cm_id_priv->timewait_info = NULL;
792 }
793
794 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
795 {
796         unsigned long flags;
797
798         cm_id_priv->id.state = IB_CM_IDLE;
799         if (cm_id_priv->timewait_info) {
800                 spin_lock_irqsave(&cm.lock, flags);
801                 cm_cleanup_timewait(cm_id_priv->timewait_info);
802                 spin_unlock_irqrestore(&cm.lock, flags);
803                 kfree(cm_id_priv->timewait_info);
804                 cm_id_priv->timewait_info = NULL;
805         }
806 }
807
808 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
809 {
810         struct cm_id_private *cm_id_priv;
811         struct cm_work *work;
812
813         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
814 retest:
815         spin_lock_irq(&cm_id_priv->lock);
816         switch (cm_id->state) {
817         case IB_CM_LISTEN:
818                 spin_unlock_irq(&cm_id_priv->lock);
819
820                 spin_lock_irq(&cm.lock);
821                 if (--cm_id_priv->listen_sharecount > 0) {
822                         /* The id is still shared. */
823                         cm_deref_id(cm_id_priv);
824                         spin_unlock_irq(&cm.lock);
825                         return;
826                 }
827                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
828                 spin_unlock_irq(&cm.lock);
829                 break;
830         case IB_CM_SIDR_REQ_SENT:
831                 cm_id->state = IB_CM_IDLE;
832                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
833                 spin_unlock_irq(&cm_id_priv->lock);
834                 break;
835         case IB_CM_SIDR_REQ_RCVD:
836                 spin_unlock_irq(&cm_id_priv->lock);
837                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
838                 break;
839         case IB_CM_REQ_SENT:
840         case IB_CM_MRA_REQ_RCVD:
841                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
842                 spin_unlock_irq(&cm_id_priv->lock);
843                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
844                                &cm_id_priv->id.device->node_guid,
845                                sizeof cm_id_priv->id.device->node_guid,
846                                NULL, 0);
847                 break;
848         case IB_CM_REQ_RCVD:
849                 if (err == -ENOMEM) {
850                         /* Do not reject to allow future retries. */
851                         cm_reset_to_idle(cm_id_priv);
852                         spin_unlock_irq(&cm_id_priv->lock);
853                 } else {
854                         spin_unlock_irq(&cm_id_priv->lock);
855                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
856                                        NULL, 0, NULL, 0);
857                 }
858                 break;
859         case IB_CM_REP_SENT:
860         case IB_CM_MRA_REP_RCVD:
861                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
862                 /* Fall through */
863         case IB_CM_MRA_REQ_SENT:
864         case IB_CM_REP_RCVD:
865         case IB_CM_MRA_REP_SENT:
866                 spin_unlock_irq(&cm_id_priv->lock);
867                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
868                                NULL, 0, NULL, 0);
869                 break;
870         case IB_CM_ESTABLISHED:
871                 spin_unlock_irq(&cm_id_priv->lock);
872                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
873                         break;
874                 ib_send_cm_dreq(cm_id, NULL, 0);
875                 goto retest;
876         case IB_CM_DREQ_SENT:
877                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
878                 cm_enter_timewait(cm_id_priv);
879                 spin_unlock_irq(&cm_id_priv->lock);
880                 break;
881         case IB_CM_DREQ_RCVD:
882                 spin_unlock_irq(&cm_id_priv->lock);
883                 ib_send_cm_drep(cm_id, NULL, 0);
884                 break;
885         default:
886                 spin_unlock_irq(&cm_id_priv->lock);
887                 break;
888         }
889
890         cm_free_id(cm_id->local_id);
891         cm_deref_id(cm_id_priv);
892         wait_for_completion(&cm_id_priv->comp);
893         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
894                 cm_free_work(work);
895         kfree(cm_id_priv->private_data);
896         kfree(cm_id_priv);
897 }
898
899 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
900 {
901         cm_destroy_id(cm_id, 0);
902 }
903 EXPORT_SYMBOL(ib_destroy_cm_id);
904
905 /**
906  * __ib_cm_listen - Initiates listening on the specified service ID for
907  *   connection and service ID resolution requests.
908  * @cm_id: Connection identifier associated with the listen request.
909  * @service_id: Service identifier matched against incoming connection
910  *   and service ID resolution requests.  The service ID should be specified
911  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
912  *   assign a service ID to the caller.
913  * @service_mask: Mask applied to service ID used to listen across a
914  *   range of service IDs.  If set to 0, the service ID is matched
915  *   exactly.  This parameter is ignored if %service_id is set to
916  *   IB_CM_ASSIGN_SERVICE_ID.
917  */
918 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
919                           __be64 service_mask)
920 {
921         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
922         int ret = 0;
923
924         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
925         service_id &= service_mask;
926         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
927             (service_id != IB_CM_ASSIGN_SERVICE_ID))
928                 return -EINVAL;
929
930         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
931         if (cm_id->state != IB_CM_IDLE)
932                 return -EINVAL;
933
934         cm_id->state = IB_CM_LISTEN;
935         ++cm_id_priv->listen_sharecount;
936
937         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
938                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
939                 cm_id->service_mask = ~cpu_to_be64(0);
940         } else {
941                 cm_id->service_id = service_id;
942                 cm_id->service_mask = service_mask;
943         }
944         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
945
946         if (cur_cm_id_priv) {
947                 cm_id->state = IB_CM_IDLE;
948                 --cm_id_priv->listen_sharecount;
949                 ret = -EBUSY;
950         }
951         return ret;
952 }
953
954 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
955 {
956         unsigned long flags;
957         int ret;
958
959         spin_lock_irqsave(&cm.lock, flags);
960         ret = __ib_cm_listen(cm_id, service_id, service_mask);
961         spin_unlock_irqrestore(&cm.lock, flags);
962
963         return ret;
964 }
965 EXPORT_SYMBOL(ib_cm_listen);
966
967 /**
968  * Create a new listening ib_cm_id and listen on the given service ID.
969  *
970  * If there's an existing ID listening on that same device and service ID,
971  * return it.
972  *
973  * @device: Device associated with the cm_id.  All related communication will
974  * be associated with the specified device.
975  * @cm_handler: Callback invoked to notify the user of CM events.
976  * @service_id: Service identifier matched against incoming connection
977  *   and service ID resolution requests.  The service ID should be specified
978  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
979  *   assign a service ID to the caller.
980  *
981  * Callers should call ib_destroy_cm_id when done with the listener ID.
982  */
983 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
984                                      ib_cm_handler cm_handler,
985                                      __be64 service_id)
986 {
987         struct cm_id_private *cm_id_priv;
988         struct ib_cm_id *cm_id;
989         unsigned long flags;
990         int err = 0;
991
992         /* Create an ID in advance, since the creation may sleep */
993         cm_id = ib_create_cm_id(device, cm_handler, NULL);
994         if (IS_ERR(cm_id))
995                 return cm_id;
996
997         spin_lock_irqsave(&cm.lock, flags);
998
999         if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1000                 goto new_id;
1001
1002         /* Find an existing ID */
1003         cm_id_priv = cm_find_listen(device, service_id);
1004         if (cm_id_priv) {
1005                 if (cm_id->cm_handler != cm_handler || cm_id->context) {
1006                         /* Sharing an ib_cm_id with different handlers is not
1007                          * supported */
1008                         spin_unlock_irqrestore(&cm.lock, flags);
1009                         return ERR_PTR(-EINVAL);
1010                 }
1011                 atomic_inc(&cm_id_priv->refcount);
1012                 ++cm_id_priv->listen_sharecount;
1013                 spin_unlock_irqrestore(&cm.lock, flags);
1014
1015                 ib_destroy_cm_id(cm_id);
1016                 cm_id = &cm_id_priv->id;
1017                 return cm_id;
1018         }
1019
1020 new_id:
1021         /* Use newly created ID */
1022         err = __ib_cm_listen(cm_id, service_id, 0);
1023
1024         spin_unlock_irqrestore(&cm.lock, flags);
1025
1026         if (err) {
1027                 ib_destroy_cm_id(cm_id);
1028                 return ERR_PTR(err);
1029         }
1030         return cm_id;
1031 }
1032 EXPORT_SYMBOL(ib_cm_insert_listen);
1033
1034 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
1035                           enum cm_msg_sequence msg_seq)
1036 {
1037         u64 hi_tid, low_tid;
1038
1039         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1040         low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
1041                           (msg_seq << 30));
1042         return cpu_to_be64(hi_tid | low_tid);
1043 }
1044
1045 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1046                               __be16 attr_id, __be64 tid)
1047 {
1048         hdr->base_version  = IB_MGMT_BASE_VERSION;
1049         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
1050         hdr->class_version = IB_CM_CLASS_VERSION;
1051         hdr->method        = IB_MGMT_METHOD_SEND;
1052         hdr->attr_id       = attr_id;
1053         hdr->tid           = tid;
1054 }
1055
1056 static void cm_format_req(struct cm_req_msg *req_msg,
1057                           struct cm_id_private *cm_id_priv,
1058                           struct ib_cm_req_param *param)
1059 {
1060         struct ib_sa_path_rec *pri_path = param->primary_path;
1061         struct ib_sa_path_rec *alt_path = param->alternate_path;
1062
1063         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1064                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1065
1066         req_msg->local_comm_id = cm_id_priv->id.local_id;
1067         req_msg->service_id = param->service_id;
1068         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1069         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1070         cm_req_set_init_depth(req_msg, param->initiator_depth);
1071         cm_req_set_remote_resp_timeout(req_msg,
1072                                        param->remote_cm_response_timeout);
1073         cm_req_set_qp_type(req_msg, param->qp_type);
1074         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1075         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1076         cm_req_set_local_resp_timeout(req_msg,
1077                                       param->local_cm_response_timeout);
1078         req_msg->pkey = param->primary_path->pkey;
1079         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1080         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1081
1082         if (param->qp_type != IB_QPT_XRC_INI) {
1083                 cm_req_set_resp_res(req_msg, param->responder_resources);
1084                 cm_req_set_retry_count(req_msg, param->retry_count);
1085                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1086                 cm_req_set_srq(req_msg, param->srq);
1087         }
1088
1089         if (pri_path->hop_limit <= 1) {
1090                 req_msg->primary_local_lid = pri_path->slid;
1091                 req_msg->primary_remote_lid = pri_path->dlid;
1092         } else {
1093                 /* Work-around until there's a way to obtain remote LID info */
1094                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1095                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1096         }
1097         req_msg->primary_local_gid = pri_path->sgid;
1098         req_msg->primary_remote_gid = pri_path->dgid;
1099         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1100         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1101         req_msg->primary_traffic_class = pri_path->traffic_class;
1102         req_msg->primary_hop_limit = pri_path->hop_limit;
1103         cm_req_set_primary_sl(req_msg, pri_path->sl);
1104         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1105         cm_req_set_primary_local_ack_timeout(req_msg,
1106                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1107                                pri_path->packet_life_time));
1108
1109         if (alt_path) {
1110                 if (alt_path->hop_limit <= 1) {
1111                         req_msg->alt_local_lid = alt_path->slid;
1112                         req_msg->alt_remote_lid = alt_path->dlid;
1113                 } else {
1114                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1115                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1116                 }
1117                 req_msg->alt_local_gid = alt_path->sgid;
1118                 req_msg->alt_remote_gid = alt_path->dgid;
1119                 cm_req_set_alt_flow_label(req_msg,
1120                                           alt_path->flow_label);
1121                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1122                 req_msg->alt_traffic_class = alt_path->traffic_class;
1123                 req_msg->alt_hop_limit = alt_path->hop_limit;
1124                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1125                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1126                 cm_req_set_alt_local_ack_timeout(req_msg,
1127                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1128                                        alt_path->packet_life_time));
1129         }
1130
1131         if (param->private_data && param->private_data_len)
1132                 memcpy(req_msg->private_data, param->private_data,
1133                        param->private_data_len);
1134 }
1135
1136 static int cm_validate_req_param(struct ib_cm_req_param *param)
1137 {
1138         /* peer-to-peer not supported */
1139         if (param->peer_to_peer)
1140                 return -EINVAL;
1141
1142         if (!param->primary_path)
1143                 return -EINVAL;
1144
1145         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1146             param->qp_type != IB_QPT_XRC_INI)
1147                 return -EINVAL;
1148
1149         if (param->private_data &&
1150             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1151                 return -EINVAL;
1152
1153         if (param->alternate_path &&
1154             (param->alternate_path->pkey != param->primary_path->pkey ||
1155              param->alternate_path->mtu != param->primary_path->mtu))
1156                 return -EINVAL;
1157
1158         return 0;
1159 }
1160
1161 int ib_send_cm_req(struct ib_cm_id *cm_id,
1162                    struct ib_cm_req_param *param)
1163 {
1164         struct cm_id_private *cm_id_priv;
1165         struct cm_req_msg *req_msg;
1166         unsigned long flags;
1167         int ret;
1168
1169         ret = cm_validate_req_param(param);
1170         if (ret)
1171                 return ret;
1172
1173         /* Verify that we're not in timewait. */
1174         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1175         spin_lock_irqsave(&cm_id_priv->lock, flags);
1176         if (cm_id->state != IB_CM_IDLE) {
1177                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1178                 ret = -EINVAL;
1179                 goto out;
1180         }
1181         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1182
1183         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1184                                                             id.local_id);
1185         if (IS_ERR(cm_id_priv->timewait_info)) {
1186                 ret = PTR_ERR(cm_id_priv->timewait_info);
1187                 goto out;
1188         }
1189
1190         ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
1191         if (ret)
1192                 goto error1;
1193         if (param->alternate_path) {
1194                 ret = cm_init_av_by_path(param->alternate_path,
1195                                          &cm_id_priv->alt_av);
1196                 if (ret)
1197                         goto error1;
1198         }
1199         cm_id->service_id = param->service_id;
1200         cm_id->service_mask = ~cpu_to_be64(0);
1201         cm_id_priv->timeout_ms = cm_convert_to_ms(
1202                                     param->primary_path->packet_life_time) * 2 +
1203                                  cm_convert_to_ms(
1204                                     param->remote_cm_response_timeout);
1205         cm_id_priv->max_cm_retries = param->max_cm_retries;
1206         cm_id_priv->initiator_depth = param->initiator_depth;
1207         cm_id_priv->responder_resources = param->responder_resources;
1208         cm_id_priv->retry_count = param->retry_count;
1209         cm_id_priv->path_mtu = param->primary_path->mtu;
1210         cm_id_priv->pkey = param->primary_path->pkey;
1211         cm_id_priv->qp_type = param->qp_type;
1212
1213         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1214         if (ret)
1215                 goto error1;
1216
1217         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1218         cm_format_req(req_msg, cm_id_priv, param);
1219         cm_id_priv->tid = req_msg->hdr.tid;
1220         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1221         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1222
1223         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1224         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1225
1226         spin_lock_irqsave(&cm_id_priv->lock, flags);
1227         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1228         if (ret) {
1229                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1230                 goto error2;
1231         }
1232         BUG_ON(cm_id->state != IB_CM_IDLE);
1233         cm_id->state = IB_CM_REQ_SENT;
1234         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1235         return 0;
1236
1237 error2: cm_free_msg(cm_id_priv->msg);
1238 error1: kfree(cm_id_priv->timewait_info);
1239 out:    return ret;
1240 }
1241 EXPORT_SYMBOL(ib_send_cm_req);
1242
1243 static int cm_issue_rej(struct cm_port *port,
1244                         struct ib_mad_recv_wc *mad_recv_wc,
1245                         enum ib_cm_rej_reason reason,
1246                         enum cm_msg_response msg_rejected,
1247                         void *ari, u8 ari_length)
1248 {
1249         struct ib_mad_send_buf *msg = NULL;
1250         struct cm_rej_msg *rej_msg, *rcv_msg;
1251         int ret;
1252
1253         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1254         if (ret)
1255                 return ret;
1256
1257         /* We just need common CM header information.  Cast to any message. */
1258         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1259         rej_msg = (struct cm_rej_msg *) msg->mad;
1260
1261         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1262         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1263         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1264         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1265         rej_msg->reason = cpu_to_be16(reason);
1266
1267         if (ari && ari_length) {
1268                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1269                 memcpy(rej_msg->ari, ari, ari_length);
1270         }
1271
1272         ret = ib_post_send_mad(msg, NULL);
1273         if (ret)
1274                 cm_free_msg(msg);
1275
1276         return ret;
1277 }
1278
1279 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1280                                     __be32 local_qpn, __be32 remote_qpn)
1281 {
1282         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1283                 ((local_ca_guid == remote_ca_guid) &&
1284                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1285 }
1286
1287 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1288                                             struct ib_sa_path_rec *primary_path,
1289                                             struct ib_sa_path_rec *alt_path)
1290 {
1291         memset(primary_path, 0, sizeof *primary_path);
1292         primary_path->dgid = req_msg->primary_local_gid;
1293         primary_path->sgid = req_msg->primary_remote_gid;
1294         primary_path->dlid = req_msg->primary_local_lid;
1295         primary_path->slid = req_msg->primary_remote_lid;
1296         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1297         primary_path->hop_limit = req_msg->primary_hop_limit;
1298         primary_path->traffic_class = req_msg->primary_traffic_class;
1299         primary_path->reversible = 1;
1300         primary_path->pkey = req_msg->pkey;
1301         primary_path->sl = cm_req_get_primary_sl(req_msg);
1302         primary_path->mtu_selector = IB_SA_EQ;
1303         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1304         primary_path->rate_selector = IB_SA_EQ;
1305         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1306         primary_path->packet_life_time_selector = IB_SA_EQ;
1307         primary_path->packet_life_time =
1308                 cm_req_get_primary_local_ack_timeout(req_msg);
1309         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1310         primary_path->service_id = req_msg->service_id;
1311
1312         if (req_msg->alt_local_lid) {
1313                 memset(alt_path, 0, sizeof *alt_path);
1314                 alt_path->dgid = req_msg->alt_local_gid;
1315                 alt_path->sgid = req_msg->alt_remote_gid;
1316                 alt_path->dlid = req_msg->alt_local_lid;
1317                 alt_path->slid = req_msg->alt_remote_lid;
1318                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1319                 alt_path->hop_limit = req_msg->alt_hop_limit;
1320                 alt_path->traffic_class = req_msg->alt_traffic_class;
1321                 alt_path->reversible = 1;
1322                 alt_path->pkey = req_msg->pkey;
1323                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1324                 alt_path->mtu_selector = IB_SA_EQ;
1325                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1326                 alt_path->rate_selector = IB_SA_EQ;
1327                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1328                 alt_path->packet_life_time_selector = IB_SA_EQ;
1329                 alt_path->packet_life_time =
1330                         cm_req_get_alt_local_ack_timeout(req_msg);
1331                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1332                 alt_path->service_id = req_msg->service_id;
1333         }
1334 }
1335
1336 static u16 cm_get_bth_pkey(struct cm_work *work)
1337 {
1338         struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1339         u8 port_num = work->port->port_num;
1340         u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1341         u16 pkey;
1342         int ret;
1343
1344         ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1345         if (ret) {
1346                 dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1347                                      port_num, pkey_index, ret);
1348                 return 0;
1349         }
1350
1351         return pkey;
1352 }
1353
1354 static void cm_format_req_event(struct cm_work *work,
1355                                 struct cm_id_private *cm_id_priv,
1356                                 struct ib_cm_id *listen_id)
1357 {
1358         struct cm_req_msg *req_msg;
1359         struct ib_cm_req_event_param *param;
1360
1361         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1362         param = &work->cm_event.param.req_rcvd;
1363         param->listen_id = listen_id;
1364         param->bth_pkey = cm_get_bth_pkey(work);
1365         param->port = cm_id_priv->av.port->port_num;
1366         param->primary_path = &work->path[0];
1367         if (req_msg->alt_local_lid)
1368                 param->alternate_path = &work->path[1];
1369         else
1370                 param->alternate_path = NULL;
1371         param->remote_ca_guid = req_msg->local_ca_guid;
1372         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1373         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1374         param->qp_type = cm_req_get_qp_type(req_msg);
1375         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1376         param->responder_resources = cm_req_get_init_depth(req_msg);
1377         param->initiator_depth = cm_req_get_resp_res(req_msg);
1378         param->local_cm_response_timeout =
1379                                         cm_req_get_remote_resp_timeout(req_msg);
1380         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1381         param->remote_cm_response_timeout =
1382                                         cm_req_get_local_resp_timeout(req_msg);
1383         param->retry_count = cm_req_get_retry_count(req_msg);
1384         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1385         param->srq = cm_req_get_srq(req_msg);
1386         work->cm_event.private_data = &req_msg->private_data;
1387 }
1388
1389 static void cm_process_work(struct cm_id_private *cm_id_priv,
1390                             struct cm_work *work)
1391 {
1392         int ret;
1393
1394         /* We will typically only have the current event to report. */
1395         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1396         cm_free_work(work);
1397
1398         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1399                 spin_lock_irq(&cm_id_priv->lock);
1400                 work = cm_dequeue_work(cm_id_priv);
1401                 spin_unlock_irq(&cm_id_priv->lock);
1402                 BUG_ON(!work);
1403                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1404                                                 &work->cm_event);
1405                 cm_free_work(work);
1406         }
1407         cm_deref_id(cm_id_priv);
1408         if (ret)
1409                 cm_destroy_id(&cm_id_priv->id, ret);
1410 }
1411
1412 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1413                           struct cm_id_private *cm_id_priv,
1414                           enum cm_msg_response msg_mraed, u8 service_timeout,
1415                           const void *private_data, u8 private_data_len)
1416 {
1417         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1418         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1419         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1420         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1421         cm_mra_set_service_timeout(mra_msg, service_timeout);
1422
1423         if (private_data && private_data_len)
1424                 memcpy(mra_msg->private_data, private_data, private_data_len);
1425 }
1426
1427 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1428                           struct cm_id_private *cm_id_priv,
1429                           enum ib_cm_rej_reason reason,
1430                           void *ari,
1431                           u8 ari_length,
1432                           const void *private_data,
1433                           u8 private_data_len)
1434 {
1435         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1436         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1437
1438         switch(cm_id_priv->id.state) {
1439         case IB_CM_REQ_RCVD:
1440                 rej_msg->local_comm_id = 0;
1441                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1442                 break;
1443         case IB_CM_MRA_REQ_SENT:
1444                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1445                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1446                 break;
1447         case IB_CM_REP_RCVD:
1448         case IB_CM_MRA_REP_SENT:
1449                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1450                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1451                 break;
1452         default:
1453                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1454                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1455                 break;
1456         }
1457
1458         rej_msg->reason = cpu_to_be16(reason);
1459         if (ari && ari_length) {
1460                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1461                 memcpy(rej_msg->ari, ari, ari_length);
1462         }
1463
1464         if (private_data && private_data_len)
1465                 memcpy(rej_msg->private_data, private_data, private_data_len);
1466 }
1467
1468 static void cm_dup_req_handler(struct cm_work *work,
1469                                struct cm_id_private *cm_id_priv)
1470 {
1471         struct ib_mad_send_buf *msg = NULL;
1472         int ret;
1473
1474         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1475                         counter[CM_REQ_COUNTER]);
1476
1477         /* Quick state check to discard duplicate REQs. */
1478         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1479                 return;
1480
1481         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1482         if (ret)
1483                 return;
1484
1485         spin_lock_irq(&cm_id_priv->lock);
1486         switch (cm_id_priv->id.state) {
1487         case IB_CM_MRA_REQ_SENT:
1488                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1489                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1490                               cm_id_priv->private_data,
1491                               cm_id_priv->private_data_len);
1492                 break;
1493         case IB_CM_TIMEWAIT:
1494                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1495                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1496                 break;
1497         default:
1498                 goto unlock;
1499         }
1500         spin_unlock_irq(&cm_id_priv->lock);
1501
1502         ret = ib_post_send_mad(msg, NULL);
1503         if (ret)
1504                 goto free;
1505         return;
1506
1507 unlock: spin_unlock_irq(&cm_id_priv->lock);
1508 free:   cm_free_msg(msg);
1509 }
1510
1511 static struct cm_id_private * cm_match_req(struct cm_work *work,
1512                                            struct cm_id_private *cm_id_priv)
1513 {
1514         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1515         struct cm_timewait_info *timewait_info;
1516         struct cm_req_msg *req_msg;
1517
1518         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1519
1520         /* Check for possible duplicate REQ. */
1521         spin_lock_irq(&cm.lock);
1522         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1523         if (timewait_info) {
1524                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1525                                            timewait_info->work.remote_id);
1526                 spin_unlock_irq(&cm.lock);
1527                 if (cur_cm_id_priv) {
1528                         cm_dup_req_handler(work, cur_cm_id_priv);
1529                         cm_deref_id(cur_cm_id_priv);
1530                 }
1531                 return NULL;
1532         }
1533
1534         /* Check for stale connections. */
1535         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1536         if (timewait_info) {
1537                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1538                 spin_unlock_irq(&cm.lock);
1539                 cm_issue_rej(work->port, work->mad_recv_wc,
1540                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1541                              NULL, 0);
1542                 return NULL;
1543         }
1544
1545         /* Find matching listen request. */
1546         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1547                                            req_msg->service_id);
1548         if (!listen_cm_id_priv) {
1549                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1550                 spin_unlock_irq(&cm.lock);
1551                 cm_issue_rej(work->port, work->mad_recv_wc,
1552                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1553                              NULL, 0);
1554                 goto out;
1555         }
1556         atomic_inc(&listen_cm_id_priv->refcount);
1557         atomic_inc(&cm_id_priv->refcount);
1558         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1559         atomic_inc(&cm_id_priv->work_count);
1560         spin_unlock_irq(&cm.lock);
1561 out:
1562         return listen_cm_id_priv;
1563 }
1564
1565 /*
1566  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1567  * we need to override the LID/SL data in the REQ with the LID information
1568  * in the work completion.
1569  */
1570 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1571 {
1572         if (!cm_req_get_primary_subnet_local(req_msg)) {
1573                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1574                         req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1575                         cm_req_set_primary_sl(req_msg, wc->sl);
1576                 }
1577
1578                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1579                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1580         }
1581
1582         if (!cm_req_get_alt_subnet_local(req_msg)) {
1583                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1584                         req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1585                         cm_req_set_alt_sl(req_msg, wc->sl);
1586                 }
1587
1588                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1589                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1590         }
1591 }
1592
1593 static int cm_req_handler(struct cm_work *work)
1594 {
1595         struct ib_cm_id *cm_id;
1596         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1597         struct cm_req_msg *req_msg;
1598         int ret;
1599
1600         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1601
1602         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1603         if (IS_ERR(cm_id))
1604                 return PTR_ERR(cm_id);
1605
1606         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1607         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1608         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1609                                 work->mad_recv_wc->recv_buf.grh,
1610                                 &cm_id_priv->av);
1611         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1612                                                             id.local_id);
1613         if (IS_ERR(cm_id_priv->timewait_info)) {
1614                 ret = PTR_ERR(cm_id_priv->timewait_info);
1615                 goto destroy;
1616         }
1617         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1618         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1619         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1620
1621         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1622         if (!listen_cm_id_priv) {
1623                 ret = -EINVAL;
1624                 kfree(cm_id_priv->timewait_info);
1625                 goto destroy;
1626         }
1627
1628         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1629         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1630         cm_id_priv->id.service_id = req_msg->service_id;
1631         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1632
1633         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1634         cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1635
1636         memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1637         work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
1638         ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1639         if (ret) {
1640                 ib_get_cached_gid(work->port->cm_dev->ib_device,
1641                                   work->port->port_num, 0, &work->path[0].sgid);
1642                 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1643                                &work->path[0].sgid, sizeof work->path[0].sgid,
1644                                NULL, 0);
1645                 goto rejected;
1646         }
1647         if (req_msg->alt_local_lid) {
1648                 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1649                 if (ret) {
1650                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1651                                        &work->path[0].sgid,
1652                                        sizeof work->path[0].sgid, NULL, 0);
1653                         goto rejected;
1654                 }
1655         }
1656         cm_id_priv->tid = req_msg->hdr.tid;
1657         cm_id_priv->timeout_ms = cm_convert_to_ms(
1658                                         cm_req_get_local_resp_timeout(req_msg));
1659         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1660         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1661         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1662         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1663         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1664         cm_id_priv->pkey = req_msg->pkey;
1665         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1666         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1667         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1668         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1669
1670         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1671         cm_process_work(cm_id_priv, work);
1672         cm_deref_id(listen_cm_id_priv);
1673         return 0;
1674
1675 rejected:
1676         atomic_dec(&cm_id_priv->refcount);
1677         cm_deref_id(listen_cm_id_priv);
1678 destroy:
1679         ib_destroy_cm_id(cm_id);
1680         return ret;
1681 }
1682
1683 static void cm_format_rep(struct cm_rep_msg *rep_msg,
1684                           struct cm_id_private *cm_id_priv,
1685                           struct ib_cm_rep_param *param)
1686 {
1687         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1688         rep_msg->local_comm_id = cm_id_priv->id.local_id;
1689         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1690         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1691         rep_msg->resp_resources = param->responder_resources;
1692         cm_rep_set_target_ack_delay(rep_msg,
1693                                     cm_id_priv->av.port->cm_dev->ack_delay);
1694         cm_rep_set_failover(rep_msg, param->failover_accepted);
1695         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1696         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1697
1698         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1699                 rep_msg->initiator_depth = param->initiator_depth;
1700                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1701                 cm_rep_set_srq(rep_msg, param->srq);
1702                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1703         } else {
1704                 cm_rep_set_srq(rep_msg, 1);
1705                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1706         }
1707
1708         if (param->private_data && param->private_data_len)
1709                 memcpy(rep_msg->private_data, param->private_data,
1710                        param->private_data_len);
1711 }
1712
1713 int ib_send_cm_rep(struct ib_cm_id *cm_id,
1714                    struct ib_cm_rep_param *param)
1715 {
1716         struct cm_id_private *cm_id_priv;
1717         struct ib_mad_send_buf *msg;
1718         struct cm_rep_msg *rep_msg;
1719         unsigned long flags;
1720         int ret;
1721
1722         if (param->private_data &&
1723             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1724                 return -EINVAL;
1725
1726         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1727         spin_lock_irqsave(&cm_id_priv->lock, flags);
1728         if (cm_id->state != IB_CM_REQ_RCVD &&
1729             cm_id->state != IB_CM_MRA_REQ_SENT) {
1730                 ret = -EINVAL;
1731                 goto out;
1732         }
1733
1734         ret = cm_alloc_msg(cm_id_priv, &msg);
1735         if (ret)
1736                 goto out;
1737
1738         rep_msg = (struct cm_rep_msg *) msg->mad;
1739         cm_format_rep(rep_msg, cm_id_priv, param);
1740         msg->timeout_ms = cm_id_priv->timeout_ms;
1741         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1742
1743         ret = ib_post_send_mad(msg, NULL);
1744         if (ret) {
1745                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1746                 cm_free_msg(msg);
1747                 return ret;
1748         }
1749
1750         cm_id->state = IB_CM_REP_SENT;
1751         cm_id_priv->msg = msg;
1752         cm_id_priv->initiator_depth = param->initiator_depth;
1753         cm_id_priv->responder_resources = param->responder_resources;
1754         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1755         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
1756
1757 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1758         return ret;
1759 }
1760 EXPORT_SYMBOL(ib_send_cm_rep);
1761
1762 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1763                           struct cm_id_private *cm_id_priv,
1764                           const void *private_data,
1765                           u8 private_data_len)
1766 {
1767         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1768         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1769         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1770
1771         if (private_data && private_data_len)
1772                 memcpy(rtu_msg->private_data, private_data, private_data_len);
1773 }
1774
1775 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1776                    const void *private_data,
1777                    u8 private_data_len)
1778 {
1779         struct cm_id_private *cm_id_priv;
1780         struct ib_mad_send_buf *msg;
1781         unsigned long flags;
1782         void *data;
1783         int ret;
1784
1785         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1786                 return -EINVAL;
1787
1788         data = cm_copy_private_data(private_data, private_data_len);
1789         if (IS_ERR(data))
1790                 return PTR_ERR(data);
1791
1792         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1793         spin_lock_irqsave(&cm_id_priv->lock, flags);
1794         if (cm_id->state != IB_CM_REP_RCVD &&
1795             cm_id->state != IB_CM_MRA_REP_SENT) {
1796                 ret = -EINVAL;
1797                 goto error;
1798         }
1799
1800         ret = cm_alloc_msg(cm_id_priv, &msg);
1801         if (ret)
1802                 goto error;
1803
1804         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1805                       private_data, private_data_len);
1806
1807         ret = ib_post_send_mad(msg, NULL);
1808         if (ret) {
1809                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1810                 cm_free_msg(msg);
1811                 kfree(data);
1812                 return ret;
1813         }
1814
1815         cm_id->state = IB_CM_ESTABLISHED;
1816         cm_set_private_data(cm_id_priv, data, private_data_len);
1817         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1818         return 0;
1819
1820 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1821         kfree(data);
1822         return ret;
1823 }
1824 EXPORT_SYMBOL(ib_send_cm_rtu);
1825
1826 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
1827 {
1828         struct cm_rep_msg *rep_msg;
1829         struct ib_cm_rep_event_param *param;
1830
1831         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1832         param = &work->cm_event.param.rep_rcvd;
1833         param->remote_ca_guid = rep_msg->local_ca_guid;
1834         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1835         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
1836         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1837         param->responder_resources = rep_msg->initiator_depth;
1838         param->initiator_depth = rep_msg->resp_resources;
1839         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1840         param->failover_accepted = cm_rep_get_failover(rep_msg);
1841         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1842         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1843         param->srq = cm_rep_get_srq(rep_msg);
1844         work->cm_event.private_data = &rep_msg->private_data;
1845 }
1846
1847 static void cm_dup_rep_handler(struct cm_work *work)
1848 {
1849         struct cm_id_private *cm_id_priv;
1850         struct cm_rep_msg *rep_msg;
1851         struct ib_mad_send_buf *msg = NULL;
1852         int ret;
1853
1854         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1855         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1856                                    rep_msg->local_comm_id);
1857         if (!cm_id_priv)
1858                 return;
1859
1860         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1861                         counter[CM_REP_COUNTER]);
1862         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1863         if (ret)
1864                 goto deref;
1865
1866         spin_lock_irq(&cm_id_priv->lock);
1867         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1868                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1869                               cm_id_priv->private_data,
1870                               cm_id_priv->private_data_len);
1871         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1872                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1873                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1874                               cm_id_priv->private_data,
1875                               cm_id_priv->private_data_len);
1876         else
1877                 goto unlock;
1878         spin_unlock_irq(&cm_id_priv->lock);
1879
1880         ret = ib_post_send_mad(msg, NULL);
1881         if (ret)
1882                 goto free;
1883         goto deref;
1884
1885 unlock: spin_unlock_irq(&cm_id_priv->lock);
1886 free:   cm_free_msg(msg);
1887 deref:  cm_deref_id(cm_id_priv);
1888 }
1889
1890 static int cm_rep_handler(struct cm_work *work)
1891 {
1892         struct cm_id_private *cm_id_priv;
1893         struct cm_rep_msg *rep_msg;
1894         int ret;
1895
1896         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1897         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1898         if (!cm_id_priv) {
1899                 cm_dup_rep_handler(work);
1900                 return -EINVAL;
1901         }
1902
1903         cm_format_rep_event(work, cm_id_priv->qp_type);
1904
1905         spin_lock_irq(&cm_id_priv->lock);
1906         switch (cm_id_priv->id.state) {
1907         case IB_CM_REQ_SENT:
1908         case IB_CM_MRA_REQ_RCVD:
1909                 break;
1910         default:
1911                 spin_unlock_irq(&cm_id_priv->lock);
1912                 ret = -EINVAL;
1913                 goto error;
1914         }
1915
1916         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1917         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1918         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1919
1920         spin_lock(&cm.lock);
1921         /* Check for duplicate REP. */
1922         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1923                 spin_unlock(&cm.lock);
1924                 spin_unlock_irq(&cm_id_priv->lock);
1925                 ret = -EINVAL;
1926                 goto error;
1927         }
1928         /* Check for a stale connection. */
1929         if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1930                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1931                          &cm.remote_id_table);
1932                 cm_id_priv->timewait_info->inserted_remote_id = 0;
1933                 spin_unlock(&cm.lock);
1934                 spin_unlock_irq(&cm_id_priv->lock);
1935                 cm_issue_rej(work->port, work->mad_recv_wc,
1936                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1937                              NULL, 0);
1938                 ret = -EINVAL;
1939                 goto error;
1940         }
1941         spin_unlock(&cm.lock);
1942
1943         cm_id_priv->id.state = IB_CM_REP_RCVD;
1944         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1945         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1946         cm_id_priv->initiator_depth = rep_msg->resp_resources;
1947         cm_id_priv->responder_resources = rep_msg->initiator_depth;
1948         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1949         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1950         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1951         cm_id_priv->av.timeout =
1952                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1953                                        cm_id_priv->av.timeout - 1);
1954         cm_id_priv->alt_av.timeout =
1955                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1956                                        cm_id_priv->alt_av.timeout - 1);
1957
1958         /* todo: handle peer_to_peer */
1959
1960         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1961         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1962         if (!ret)
1963                 list_add_tail(&work->list, &cm_id_priv->work_list);
1964         spin_unlock_irq(&cm_id_priv->lock);
1965
1966         if (ret)
1967                 cm_process_work(cm_id_priv, work);
1968         else
1969                 cm_deref_id(cm_id_priv);
1970         return 0;
1971
1972 error:
1973         cm_deref_id(cm_id_priv);
1974         return ret;
1975 }
1976
1977 static int cm_establish_handler(struct cm_work *work)
1978 {
1979         struct cm_id_private *cm_id_priv;
1980         int ret;
1981
1982         /* See comment in cm_establish about lookup. */
1983         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1984         if (!cm_id_priv)
1985                 return -EINVAL;
1986
1987         spin_lock_irq(&cm_id_priv->lock);
1988         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1989                 spin_unlock_irq(&cm_id_priv->lock);
1990                 goto out;
1991         }
1992
1993         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1994         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1995         if (!ret)
1996                 list_add_tail(&work->list, &cm_id_priv->work_list);
1997         spin_unlock_irq(&cm_id_priv->lock);
1998
1999         if (ret)
2000                 cm_process_work(cm_id_priv, work);
2001         else
2002                 cm_deref_id(cm_id_priv);
2003         return 0;
2004 out:
2005         cm_deref_id(cm_id_priv);
2006         return -EINVAL;
2007 }
2008
2009 static int cm_rtu_handler(struct cm_work *work)
2010 {
2011         struct cm_id_private *cm_id_priv;
2012         struct cm_rtu_msg *rtu_msg;
2013         int ret;
2014
2015         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2016         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2017                                    rtu_msg->local_comm_id);
2018         if (!cm_id_priv)
2019                 return -EINVAL;
2020
2021         work->cm_event.private_data = &rtu_msg->private_data;
2022
2023         spin_lock_irq(&cm_id_priv->lock);
2024         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2025             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2026                 spin_unlock_irq(&cm_id_priv->lock);
2027                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2028                                 counter[CM_RTU_COUNTER]);
2029                 goto out;
2030         }
2031         cm_id_priv->id.state = IB_CM_ESTABLISHED;
2032
2033         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2034         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2035         if (!ret)
2036                 list_add_tail(&work->list, &cm_id_priv->work_list);
2037         spin_unlock_irq(&cm_id_priv->lock);
2038
2039         if (ret)
2040                 cm_process_work(cm_id_priv, work);
2041         else
2042                 cm_deref_id(cm_id_priv);
2043         return 0;
2044 out:
2045         cm_deref_id(cm_id_priv);
2046         return -EINVAL;
2047 }
2048
2049 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2050                           struct cm_id_private *cm_id_priv,
2051                           const void *private_data,
2052                           u8 private_data_len)
2053 {
2054         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2055                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
2056         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2057         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2058         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2059
2060         if (private_data && private_data_len)
2061                 memcpy(dreq_msg->private_data, private_data, private_data_len);
2062 }
2063
2064 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2065                     const void *private_data,
2066                     u8 private_data_len)
2067 {
2068         struct cm_id_private *cm_id_priv;
2069         struct ib_mad_send_buf *msg;
2070         unsigned long flags;
2071         int ret;
2072
2073         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2074                 return -EINVAL;
2075
2076         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2077         spin_lock_irqsave(&cm_id_priv->lock, flags);
2078         if (cm_id->state != IB_CM_ESTABLISHED) {
2079                 ret = -EINVAL;
2080                 goto out;
2081         }
2082
2083         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2084             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2085                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2086
2087         ret = cm_alloc_msg(cm_id_priv, &msg);
2088         if (ret) {
2089                 cm_enter_timewait(cm_id_priv);
2090                 goto out;
2091         }
2092
2093         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2094                        private_data, private_data_len);
2095         msg->timeout_ms = cm_id_priv->timeout_ms;
2096         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2097
2098         ret = ib_post_send_mad(msg, NULL);
2099         if (ret) {
2100                 cm_enter_timewait(cm_id_priv);
2101                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2102                 cm_free_msg(msg);
2103                 return ret;
2104         }
2105
2106         cm_id->state = IB_CM_DREQ_SENT;
2107         cm_id_priv->msg = msg;
2108 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2109         return ret;
2110 }
2111 EXPORT_SYMBOL(ib_send_cm_dreq);
2112
2113 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2114                           struct cm_id_private *cm_id_priv,
2115                           const void *private_data,
2116                           u8 private_data_len)
2117 {
2118         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2119         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2120         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2121
2122         if (private_data && private_data_len)
2123                 memcpy(drep_msg->private_data, private_data, private_data_len);
2124 }
2125
2126 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2127                     const void *private_data,
2128                     u8 private_data_len)
2129 {
2130         struct cm_id_private *cm_id_priv;
2131         struct ib_mad_send_buf *msg;
2132         unsigned long flags;
2133         void *data;
2134         int ret;
2135
2136         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2137                 return -EINVAL;
2138
2139         data = cm_copy_private_data(private_data, private_data_len);
2140         if (IS_ERR(data))
2141                 return PTR_ERR(data);
2142
2143         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2144         spin_lock_irqsave(&cm_id_priv->lock, flags);
2145         if (cm_id->state != IB_CM_DREQ_RCVD) {
2146                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2147                 kfree(data);
2148                 return -EINVAL;
2149         }
2150
2151         cm_set_private_data(cm_id_priv, data, private_data_len);
2152         cm_enter_timewait(cm_id_priv);
2153
2154         ret = cm_alloc_msg(cm_id_priv, &msg);
2155         if (ret)
2156                 goto out;
2157
2158         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2159                        private_data, private_data_len);
2160
2161         ret = ib_post_send_mad(msg, NULL);
2162         if (ret) {
2163                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2164                 cm_free_msg(msg);
2165                 return ret;
2166         }
2167
2168 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2169         return ret;
2170 }
2171 EXPORT_SYMBOL(ib_send_cm_drep);
2172
2173 static int cm_issue_drep(struct cm_port *port,
2174                          struct ib_mad_recv_wc *mad_recv_wc)
2175 {
2176         struct ib_mad_send_buf *msg = NULL;
2177         struct cm_dreq_msg *dreq_msg;
2178         struct cm_drep_msg *drep_msg;
2179         int ret;
2180
2181         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2182         if (ret)
2183                 return ret;
2184
2185         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2186         drep_msg = (struct cm_drep_msg *) msg->mad;
2187
2188         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2189         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2190         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2191
2192         ret = ib_post_send_mad(msg, NULL);
2193         if (ret)
2194                 cm_free_msg(msg);
2195
2196         return ret;
2197 }
2198
2199 static int cm_dreq_handler(struct cm_work *work)
2200 {
2201         struct cm_id_private *cm_id_priv;
2202         struct cm_dreq_msg *dreq_msg;
2203         struct ib_mad_send_buf *msg = NULL;
2204         int ret;
2205
2206         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2207         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2208                                    dreq_msg->local_comm_id);
2209         if (!cm_id_priv) {
2210                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2211                                 counter[CM_DREQ_COUNTER]);
2212                 cm_issue_drep(work->port, work->mad_recv_wc);
2213                 return -EINVAL;
2214         }
2215
2216         work->cm_event.private_data = &dreq_msg->private_data;
2217
2218         spin_lock_irq(&cm_id_priv->lock);
2219         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2220                 goto unlock;
2221
2222         switch (cm_id_priv->id.state) {
2223         case IB_CM_REP_SENT:
2224         case IB_CM_DREQ_SENT:
2225                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2226                 break;
2227         case IB_CM_ESTABLISHED:
2228                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2229                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2230                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2231                 break;
2232         case IB_CM_MRA_REP_RCVD:
2233                 break;
2234         case IB_CM_TIMEWAIT:
2235                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2236                                 counter[CM_DREQ_COUNTER]);
2237                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2238                         goto unlock;
2239
2240                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2241                                cm_id_priv->private_data,
2242                                cm_id_priv->private_data_len);
2243                 spin_unlock_irq(&cm_id_priv->lock);
2244
2245                 if (ib_post_send_mad(msg, NULL))
2246                         cm_free_msg(msg);
2247                 goto deref;
2248         case IB_CM_DREQ_RCVD:
2249                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2250                                 counter[CM_DREQ_COUNTER]);
2251                 goto unlock;
2252         default:
2253                 goto unlock;
2254         }
2255         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2256         cm_id_priv->tid = dreq_msg->hdr.tid;
2257         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2258         if (!ret)
2259                 list_add_tail(&work->list, &cm_id_priv->work_list);
2260         spin_unlock_irq(&cm_id_priv->lock);
2261
2262         if (ret)
2263                 cm_process_work(cm_id_priv, work);
2264         else
2265                 cm_deref_id(cm_id_priv);
2266         return 0;
2267
2268 unlock: spin_unlock_irq(&cm_id_priv->lock);
2269 deref:  cm_deref_id(cm_id_priv);
2270         return -EINVAL;
2271 }
2272
2273 static int cm_drep_handler(struct cm_work *work)
2274 {
2275         struct cm_id_private *cm_id_priv;
2276         struct cm_drep_msg *drep_msg;
2277         int ret;
2278
2279         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2280         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2281                                    drep_msg->local_comm_id);
2282         if (!cm_id_priv)
2283                 return -EINVAL;
2284
2285         work->cm_event.private_data = &drep_msg->private_data;
2286
2287         spin_lock_irq(&cm_id_priv->lock);
2288         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2289             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2290                 spin_unlock_irq(&cm_id_priv->lock);
2291                 goto out;
2292         }
2293         cm_enter_timewait(cm_id_priv);
2294
2295         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2296         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2297         if (!ret)
2298                 list_add_tail(&work->list, &cm_id_priv->work_list);
2299         spin_unlock_irq(&cm_id_priv->lock);
2300
2301         if (ret)
2302                 cm_process_work(cm_id_priv, work);
2303         else
2304                 cm_deref_id(cm_id_priv);
2305         return 0;
2306 out:
2307         cm_deref_id(cm_id_priv);
2308         return -EINVAL;
2309 }
2310
2311 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2312                    enum ib_cm_rej_reason reason,
2313                    void *ari,
2314                    u8 ari_length,
2315                    const void *private_data,
2316                    u8 private_data_len)
2317 {
2318         struct cm_id_private *cm_id_priv;
2319         struct ib_mad_send_buf *msg;
2320         unsigned long flags;
2321         int ret;
2322
2323         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2324             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2325                 return -EINVAL;
2326
2327         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2328
2329         spin_lock_irqsave(&cm_id_priv->lock, flags);
2330         switch (cm_id->state) {
2331         case IB_CM_REQ_SENT:
2332         case IB_CM_MRA_REQ_RCVD:
2333         case IB_CM_REQ_RCVD:
2334         case IB_CM_MRA_REQ_SENT:
2335         case IB_CM_REP_RCVD:
2336         case IB_CM_MRA_REP_SENT:
2337                 ret = cm_alloc_msg(cm_id_priv, &msg);
2338                 if (!ret)
2339                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2340                                       cm_id_priv, reason, ari, ari_length,
2341                                       private_data, private_data_len);
2342
2343                 cm_reset_to_idle(cm_id_priv);
2344                 break;
2345         case IB_CM_REP_SENT:
2346         case IB_CM_MRA_REP_RCVD:
2347                 ret = cm_alloc_msg(cm_id_priv, &msg);
2348                 if (!ret)
2349                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2350                                       cm_id_priv, reason, ari, ari_length,
2351                                       private_data, private_data_len);
2352
2353                 cm_enter_timewait(cm_id_priv);
2354                 break;
2355         default:
2356                 ret = -EINVAL;
2357                 goto out;
2358         }
2359
2360         if (ret)
2361                 goto out;
2362
2363         ret = ib_post_send_mad(msg, NULL);
2364         if (ret)
2365                 cm_free_msg(msg);
2366
2367 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2368         return ret;
2369 }
2370 EXPORT_SYMBOL(ib_send_cm_rej);
2371
2372 static void cm_format_rej_event(struct cm_work *work)
2373 {
2374         struct cm_rej_msg *rej_msg;
2375         struct ib_cm_rej_event_param *param;
2376
2377         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2378         param = &work->cm_event.param.rej_rcvd;
2379         param->ari = rej_msg->ari;
2380         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2381         param->reason = __be16_to_cpu(rej_msg->reason);
2382         work->cm_event.private_data = &rej_msg->private_data;
2383 }
2384
2385 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2386 {
2387         struct cm_timewait_info *timewait_info;
2388         struct cm_id_private *cm_id_priv;
2389         __be32 remote_id;
2390
2391         remote_id = rej_msg->local_comm_id;
2392
2393         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2394                 spin_lock_irq(&cm.lock);
2395                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2396                                                   remote_id);
2397                 if (!timewait_info) {
2398                         spin_unlock_irq(&cm.lock);
2399                         return NULL;
2400                 }
2401                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2402                                       (timewait_info->work.local_id ^
2403                                        cm.random_id_operand));
2404                 if (cm_id_priv) {
2405                         if (cm_id_priv->id.remote_id == remote_id)
2406                                 atomic_inc(&cm_id_priv->refcount);
2407                         else
2408                                 cm_id_priv = NULL;
2409                 }
2410                 spin_unlock_irq(&cm.lock);
2411         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2412                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2413         else
2414                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2415
2416         return cm_id_priv;
2417 }
2418
2419 static int cm_rej_handler(struct cm_work *work)
2420 {
2421         struct cm_id_private *cm_id_priv;
2422         struct cm_rej_msg *rej_msg;
2423         int ret;
2424
2425         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2426         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2427         if (!cm_id_priv)
2428                 return -EINVAL;
2429
2430         cm_format_rej_event(work);
2431
2432         spin_lock_irq(&cm_id_priv->lock);
2433         switch (cm_id_priv->id.state) {
2434         case IB_CM_REQ_SENT:
2435         case IB_CM_MRA_REQ_RCVD:
2436         case IB_CM_REP_SENT:
2437         case IB_CM_MRA_REP_RCVD:
2438                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2439                 /* fall through */
2440         case IB_CM_REQ_RCVD:
2441         case IB_CM_MRA_REQ_SENT:
2442                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2443                         cm_enter_timewait(cm_id_priv);
2444                 else
2445                         cm_reset_to_idle(cm_id_priv);
2446                 break;
2447         case IB_CM_DREQ_SENT:
2448                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2449                 /* fall through */
2450         case IB_CM_REP_RCVD:
2451         case IB_CM_MRA_REP_SENT:
2452                 cm_enter_timewait(cm_id_priv);
2453                 break;
2454         case IB_CM_ESTABLISHED:
2455                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2456                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2457                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2458                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2459                                               cm_id_priv->msg);
2460                         cm_enter_timewait(cm_id_priv);
2461                         break;
2462                 }
2463                 /* fall through */
2464         default:
2465                 spin_unlock_irq(&cm_id_priv->lock);
2466                 ret = -EINVAL;
2467                 goto out;
2468         }
2469
2470         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2471         if (!ret)
2472                 list_add_tail(&work->list, &cm_id_priv->work_list);
2473         spin_unlock_irq(&cm_id_priv->lock);
2474
2475         if (ret)
2476                 cm_process_work(cm_id_priv, work);
2477         else
2478                 cm_deref_id(cm_id_priv);
2479         return 0;
2480 out:
2481         cm_deref_id(cm_id_priv);
2482         return -EINVAL;
2483 }
2484
2485 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2486                    u8 service_timeout,
2487                    const void *private_data,
2488                    u8 private_data_len)
2489 {
2490         struct cm_id_private *cm_id_priv;
2491         struct ib_mad_send_buf *msg;
2492         enum ib_cm_state cm_state;
2493         enum ib_cm_lap_state lap_state;
2494         enum cm_msg_response msg_response;
2495         void *data;
2496         unsigned long flags;
2497         int ret;
2498
2499         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2500                 return -EINVAL;
2501
2502         data = cm_copy_private_data(private_data, private_data_len);
2503         if (IS_ERR(data))
2504                 return PTR_ERR(data);
2505
2506         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2507
2508         spin_lock_irqsave(&cm_id_priv->lock, flags);
2509         switch(cm_id_priv->id.state) {
2510         case IB_CM_REQ_RCVD:
2511                 cm_state = IB_CM_MRA_REQ_SENT;
2512                 lap_state = cm_id->lap_state;
2513                 msg_response = CM_MSG_RESPONSE_REQ;
2514                 break;
2515         case IB_CM_REP_RCVD:
2516                 cm_state = IB_CM_MRA_REP_SENT;
2517                 lap_state = cm_id->lap_state;
2518                 msg_response = CM_MSG_RESPONSE_REP;
2519                 break;
2520         case IB_CM_ESTABLISHED:
2521                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2522                         cm_state = cm_id->state;
2523                         lap_state = IB_CM_MRA_LAP_SENT;
2524                         msg_response = CM_MSG_RESPONSE_OTHER;
2525                         break;
2526                 }
2527         default:
2528                 ret = -EINVAL;
2529                 goto error1;
2530         }
2531
2532         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2533                 ret = cm_alloc_msg(cm_id_priv, &msg);
2534                 if (ret)
2535                         goto error1;
2536
2537                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2538                               msg_response, service_timeout,
2539                               private_data, private_data_len);
2540                 ret = ib_post_send_mad(msg, NULL);
2541                 if (ret)
2542                         goto error2;
2543         }
2544
2545         cm_id->state = cm_state;
2546         cm_id->lap_state = lap_state;
2547         cm_id_priv->service_timeout = service_timeout;
2548         cm_set_private_data(cm_id_priv, data, private_data_len);
2549         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2550         return 0;
2551
2552 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2553         kfree(data);
2554         return ret;
2555
2556 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2557         kfree(data);
2558         cm_free_msg(msg);
2559         return ret;
2560 }
2561 EXPORT_SYMBOL(ib_send_cm_mra);
2562
2563 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2564 {
2565         switch (cm_mra_get_msg_mraed(mra_msg)) {
2566         case CM_MSG_RESPONSE_REQ:
2567                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
2568         case CM_MSG_RESPONSE_REP:
2569         case CM_MSG_RESPONSE_OTHER:
2570                 return cm_acquire_id(mra_msg->remote_comm_id,
2571                                      mra_msg->local_comm_id);
2572         default:
2573                 return NULL;
2574         }
2575 }
2576
2577 static int cm_mra_handler(struct cm_work *work)
2578 {
2579         struct cm_id_private *cm_id_priv;
2580         struct cm_mra_msg *mra_msg;
2581         int timeout, ret;
2582
2583         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2584         cm_id_priv = cm_acquire_mraed_id(mra_msg);
2585         if (!cm_id_priv)
2586                 return -EINVAL;
2587
2588         work->cm_event.private_data = &mra_msg->private_data;
2589         work->cm_event.param.mra_rcvd.service_timeout =
2590                                         cm_mra_get_service_timeout(mra_msg);
2591         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2592                   cm_convert_to_ms(cm_id_priv->av.timeout);
2593
2594         spin_lock_irq(&cm_id_priv->lock);
2595         switch (cm_id_priv->id.state) {
2596         case IB_CM_REQ_SENT:
2597                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2598                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2599                                   cm_id_priv->msg, timeout))
2600                         goto out;
2601                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2602                 break;
2603         case IB_CM_REP_SENT:
2604                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2605                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2606                                   cm_id_priv->msg, timeout))
2607                         goto out;
2608                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2609                 break;
2610         case IB_CM_ESTABLISHED:
2611                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2612                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2613                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2614                                   cm_id_priv->msg, timeout)) {
2615                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2616                                 atomic_long_inc(&work->port->
2617                                                 counter_group[CM_RECV_DUPLICATES].
2618                                                 counter[CM_MRA_COUNTER]);
2619                         goto out;
2620                 }
2621                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2622                 break;
2623         case IB_CM_MRA_REQ_RCVD:
2624         case IB_CM_MRA_REP_RCVD:
2625                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2626                                 counter[CM_MRA_COUNTER]);
2627                 /* fall through */
2628         default:
2629                 goto out;
2630         }
2631
2632         cm_id_priv->msg->context[1] = (void *) (unsigned long)
2633                                       cm_id_priv->id.state;
2634         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2635         if (!ret)
2636                 list_add_tail(&work->list, &cm_id_priv->work_list);
2637         spin_unlock_irq(&cm_id_priv->lock);
2638
2639         if (ret)
2640                 cm_process_work(cm_id_priv, work);
2641         else
2642                 cm_deref_id(cm_id_priv);
2643         return 0;
2644 out:
2645         spin_unlock_irq(&cm_id_priv->lock);
2646         cm_deref_id(cm_id_priv);
2647         return -EINVAL;
2648 }
2649
2650 static void cm_format_lap(struct cm_lap_msg *lap_msg,
2651                           struct cm_id_private *cm_id_priv,
2652                           struct ib_sa_path_rec *alternate_path,
2653                           const void *private_data,
2654                           u8 private_data_len)
2655 {
2656         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2657                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2658         lap_msg->local_comm_id = cm_id_priv->id.local_id;
2659         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2660         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2661         /* todo: need remote CM response timeout */
2662         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2663         lap_msg->alt_local_lid = alternate_path->slid;
2664         lap_msg->alt_remote_lid = alternate_path->dlid;
2665         lap_msg->alt_local_gid = alternate_path->sgid;
2666         lap_msg->alt_remote_gid = alternate_path->dgid;
2667         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2668         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2669         lap_msg->alt_hop_limit = alternate_path->hop_limit;
2670         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2671         cm_lap_set_sl(lap_msg, alternate_path->sl);
2672         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2673         cm_lap_set_local_ack_timeout(lap_msg,
2674                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2675                                alternate_path->packet_life_time));
2676
2677         if (private_data && private_data_len)
2678                 memcpy(lap_msg->private_data, private_data, private_data_len);
2679 }
2680
2681 int ib_send_cm_lap(struct ib_cm_id *cm_id,
2682                    struct ib_sa_path_rec *alternate_path,
2683                    const void *private_data,
2684                    u8 private_data_len)
2685 {
2686         struct cm_id_private *cm_id_priv;
2687         struct ib_mad_send_buf *msg;
2688         unsigned long flags;
2689         int ret;
2690
2691         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2692                 return -EINVAL;
2693
2694         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2695         spin_lock_irqsave(&cm_id_priv->lock, flags);
2696         if (cm_id->state != IB_CM_ESTABLISHED ||
2697             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2698              cm_id->lap_state != IB_CM_LAP_IDLE)) {
2699                 ret = -EINVAL;
2700                 goto out;
2701         }
2702
2703         ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2704         if (ret)
2705                 goto out;
2706         cm_id_priv->alt_av.timeout =
2707                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2708                                        cm_id_priv->alt_av.timeout - 1);
2709
2710         ret = cm_alloc_msg(cm_id_priv, &msg);
2711         if (ret)
2712                 goto out;
2713
2714         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2715                       alternate_path, private_data, private_data_len);
2716         msg->timeout_ms = cm_id_priv->timeout_ms;
2717         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2718
2719         ret = ib_post_send_mad(msg, NULL);
2720         if (ret) {
2721                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2722                 cm_free_msg(msg);
2723                 return ret;
2724         }
2725
2726         cm_id->lap_state = IB_CM_LAP_SENT;
2727         cm_id_priv->msg = msg;
2728
2729 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2730         return ret;
2731 }
2732 EXPORT_SYMBOL(ib_send_cm_lap);
2733
2734 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2735                                     struct ib_sa_path_rec *path,
2736                                     struct cm_lap_msg *lap_msg)
2737 {
2738         memset(path, 0, sizeof *path);
2739         path->dgid = lap_msg->alt_local_gid;
2740         path->sgid = lap_msg->alt_remote_gid;
2741         path->dlid = lap_msg->alt_local_lid;
2742         path->slid = lap_msg->alt_remote_lid;
2743         path->flow_label = cm_lap_get_flow_label(lap_msg);
2744         path->hop_limit = lap_msg->alt_hop_limit;
2745         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2746         path->reversible = 1;
2747         path->pkey = cm_id_priv->pkey;
2748         path->sl = cm_lap_get_sl(lap_msg);
2749         path->mtu_selector = IB_SA_EQ;
2750         path->mtu = cm_id_priv->path_mtu;
2751         path->rate_selector = IB_SA_EQ;
2752         path->rate = cm_lap_get_packet_rate(lap_msg);
2753         path->packet_life_time_selector = IB_SA_EQ;
2754         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2755         path->packet_life_time -= (path->packet_life_time > 0);
2756 }
2757
2758 static int cm_lap_handler(struct cm_work *work)
2759 {
2760         struct cm_id_private *cm_id_priv;
2761         struct cm_lap_msg *lap_msg;
2762         struct ib_cm_lap_event_param *param;
2763         struct ib_mad_send_buf *msg = NULL;
2764         int ret;
2765
2766         /* todo: verify LAP request and send reject APR if invalid. */
2767         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2768         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2769                                    lap_msg->local_comm_id);
2770         if (!cm_id_priv)
2771                 return -EINVAL;
2772
2773         param = &work->cm_event.param.lap_rcvd;
2774         param->alternate_path = &work->path[0];
2775         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2776         work->cm_event.private_data = &lap_msg->private_data;
2777
2778         spin_lock_irq(&cm_id_priv->lock);
2779         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2780                 goto unlock;
2781
2782         switch (cm_id_priv->id.lap_state) {
2783         case IB_CM_LAP_UNINIT:
2784         case IB_CM_LAP_IDLE:
2785                 break;
2786         case IB_CM_MRA_LAP_SENT:
2787                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2788                                 counter[CM_LAP_COUNTER]);
2789                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2790                         goto unlock;
2791
2792                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2793                               CM_MSG_RESPONSE_OTHER,
2794                               cm_id_priv->service_timeout,
2795                               cm_id_priv->private_data,
2796                               cm_id_priv->private_data_len);
2797                 spin_unlock_irq(&cm_id_priv->lock);
2798
2799                 if (ib_post_send_mad(msg, NULL))
2800                         cm_free_msg(msg);
2801                 goto deref;
2802         case IB_CM_LAP_RCVD:
2803                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2804                                 counter[CM_LAP_COUNTER]);
2805                 goto unlock;
2806         default:
2807                 goto unlock;
2808         }
2809
2810         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2811         cm_id_priv->tid = lap_msg->hdr.tid;
2812         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2813                                 work->mad_recv_wc->recv_buf.grh,
2814                                 &cm_id_priv->av);
2815         cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
2816         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2817         if (!ret)
2818                 list_add_tail(&work->list, &cm_id_priv->work_list);
2819         spin_unlock_irq(&cm_id_priv->lock);
2820
2821         if (ret)
2822                 cm_process_work(cm_id_priv, work);
2823         else
2824                 cm_deref_id(cm_id_priv);
2825         return 0;
2826
2827 unlock: spin_unlock_irq(&cm_id_priv->lock);
2828 deref:  cm_deref_id(cm_id_priv);
2829         return -EINVAL;
2830 }
2831
2832 static void cm_format_apr(struct cm_apr_msg *apr_msg,
2833                           struct cm_id_private *cm_id_priv,
2834                           enum ib_cm_apr_status status,
2835                           void *info,
2836                           u8 info_length,
2837                           const void *private_data,
2838                           u8 private_data_len)
2839 {
2840         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2841         apr_msg->local_comm_id = cm_id_priv->id.local_id;
2842         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2843         apr_msg->ap_status = (u8) status;
2844
2845         if (info && info_length) {
2846                 apr_msg->info_length = info_length;
2847                 memcpy(apr_msg->info, info, info_length);
2848         }
2849
2850         if (private_data && private_data_len)
2851                 memcpy(apr_msg->private_data, private_data, private_data_len);
2852 }
2853
2854 int ib_send_cm_apr(struct ib_cm_id *cm_id,
2855                    enum ib_cm_apr_status status,
2856                    void *info,
2857                    u8 info_length,
2858                    const void *private_data,
2859                    u8 private_data_len)
2860 {
2861         struct cm_id_private *cm_id_priv;
2862         struct ib_mad_send_buf *msg;
2863         unsigned long flags;
2864         int ret;
2865
2866         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2867             (info && info_length > IB_CM_APR_INFO_LENGTH))
2868                 return -EINVAL;
2869
2870         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2871         spin_lock_irqsave(&cm_id_priv->lock, flags);
2872         if (cm_id->state != IB_CM_ESTABLISHED ||
2873             (cm_id->lap_state != IB_CM_LAP_RCVD &&
2874              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2875                 ret = -EINVAL;
2876                 goto out;
2877         }
2878
2879         ret = cm_alloc_msg(cm_id_priv, &msg);
2880         if (ret)
2881                 goto out;
2882
2883         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2884                       info, info_length, private_data, private_data_len);
2885         ret = ib_post_send_mad(msg, NULL);
2886         if (ret) {
2887                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2888                 cm_free_msg(msg);
2889                 return ret;
2890         }
2891
2892         cm_id->lap_state = IB_CM_LAP_IDLE;
2893 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2894         return ret;
2895 }
2896 EXPORT_SYMBOL(ib_send_cm_apr);
2897
2898 static int cm_apr_handler(struct cm_work *work)
2899 {
2900         struct cm_id_private *cm_id_priv;
2901         struct cm_apr_msg *apr_msg;
2902         int ret;
2903
2904         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2905         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2906                                    apr_msg->local_comm_id);
2907         if (!cm_id_priv)
2908                 return -EINVAL; /* Unmatched reply. */
2909
2910         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2911         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2912         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2913         work->cm_event.private_data = &apr_msg->private_data;
2914
2915         spin_lock_irq(&cm_id_priv->lock);
2916         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2917             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2918              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2919                 spin_unlock_irq(&cm_id_priv->lock);
2920                 goto out;
2921         }
2922         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2923         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2924         cm_id_priv->msg = NULL;
2925
2926         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2927         if (!ret)
2928                 list_add_tail(&work->list, &cm_id_priv->work_list);
2929         spin_unlock_irq(&cm_id_priv->lock);
2930
2931         if (ret)
2932                 cm_process_work(cm_id_priv, work);
2933         else
2934                 cm_deref_id(cm_id_priv);
2935         return 0;
2936 out:
2937         cm_deref_id(cm_id_priv);
2938         return -EINVAL;
2939 }
2940
2941 static int cm_timewait_handler(struct cm_work *work)
2942 {
2943         struct cm_timewait_info *timewait_info;
2944         struct cm_id_private *cm_id_priv;
2945         int ret;
2946
2947         timewait_info = (struct cm_timewait_info *)work;
2948         spin_lock_irq(&cm.lock);
2949         list_del(&timewait_info->list);
2950         spin_unlock_irq(&cm.lock);
2951
2952         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2953                                    timewait_info->work.remote_id);
2954         if (!cm_id_priv)
2955                 return -EINVAL;
2956
2957         spin_lock_irq(&cm_id_priv->lock);
2958         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2959             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2960                 spin_unlock_irq(&cm_id_priv->lock);
2961                 goto out;
2962         }
2963         cm_id_priv->id.state = IB_CM_IDLE;
2964         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2965         if (!ret)
2966                 list_add_tail(&work->list, &cm_id_priv->work_list);
2967         spin_unlock_irq(&cm_id_priv->lock);
2968
2969         if (ret)
2970                 cm_process_work(cm_id_priv, work);
2971         else
2972                 cm_deref_id(cm_id_priv);
2973         return 0;
2974 out:
2975         cm_deref_id(cm_id_priv);
2976         return -EINVAL;
2977 }
2978
2979 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2980                                struct cm_id_private *cm_id_priv,
2981                                struct ib_cm_sidr_req_param *param)
2982 {
2983         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2984                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2985         sidr_req_msg->request_id = cm_id_priv->id.local_id;
2986         sidr_req_msg->pkey = param->path->pkey;
2987         sidr_req_msg->service_id = param->service_id;
2988
2989         if (param->private_data && param->private_data_len)
2990                 memcpy(sidr_req_msg->private_data, param->private_data,
2991                        param->private_data_len);
2992 }
2993
2994 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2995                         struct ib_cm_sidr_req_param *param)
2996 {
2997         struct cm_id_private *cm_id_priv;
2998         struct ib_mad_send_buf *msg;
2999         unsigned long flags;
3000         int ret;
3001
3002         if (!param->path || (param->private_data &&
3003              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3004                 return -EINVAL;
3005
3006         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3007         ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
3008         if (ret)
3009                 goto out;
3010
3011         cm_id->service_id = param->service_id;
3012         cm_id->service_mask = ~cpu_to_be64(0);
3013         cm_id_priv->timeout_ms = param->timeout_ms;
3014         cm_id_priv->max_cm_retries = param->max_cm_retries;
3015         ret = cm_alloc_msg(cm_id_priv, &msg);
3016         if (ret)
3017                 goto out;
3018
3019         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3020                            param);
3021         msg->timeout_ms = cm_id_priv->timeout_ms;
3022         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3023
3024         spin_lock_irqsave(&cm_id_priv->lock, flags);
3025         if (cm_id->state == IB_CM_IDLE)
3026                 ret = ib_post_send_mad(msg, NULL);
3027         else
3028                 ret = -EINVAL;
3029
3030         if (ret) {
3031                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3032                 cm_free_msg(msg);
3033                 goto out;
3034         }
3035         cm_id->state = IB_CM_SIDR_REQ_SENT;
3036         cm_id_priv->msg = msg;
3037         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3038 out:
3039         return ret;
3040 }
3041 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3042
3043 static void cm_format_sidr_req_event(struct cm_work *work,
3044                                      struct ib_cm_id *listen_id)
3045 {
3046         struct cm_sidr_req_msg *sidr_req_msg;
3047         struct ib_cm_sidr_req_event_param *param;
3048
3049         sidr_req_msg = (struct cm_sidr_req_msg *)
3050                                 work->mad_recv_wc->recv_buf.mad;
3051         param = &work->cm_event.param.sidr_req_rcvd;
3052         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3053         param->listen_id = listen_id;
3054         param->service_id = sidr_req_msg->service_id;
3055         param->bth_pkey = cm_get_bth_pkey(work);
3056         param->port = work->port->port_num;
3057         work->cm_event.private_data = &sidr_req_msg->private_data;
3058 }
3059
3060 static int cm_sidr_req_handler(struct cm_work *work)
3061 {
3062         struct ib_cm_id *cm_id;
3063         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3064         struct cm_sidr_req_msg *sidr_req_msg;
3065         struct ib_wc *wc;
3066
3067         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3068         if (IS_ERR(cm_id))
3069                 return PTR_ERR(cm_id);
3070         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3071
3072         /* Record SGID/SLID and request ID for lookup. */
3073         sidr_req_msg = (struct cm_sidr_req_msg *)
3074                                 work->mad_recv_wc->recv_buf.mad;
3075         wc = work->mad_recv_wc->wc;
3076         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3077         cm_id_priv->av.dgid.global.interface_id = 0;
3078         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3079                                 work->mad_recv_wc->recv_buf.grh,
3080                                 &cm_id_priv->av);
3081         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3082         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3083         atomic_inc(&cm_id_priv->work_count);
3084
3085         spin_lock_irq(&cm.lock);
3086         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3087         if (cur_cm_id_priv) {
3088                 spin_unlock_irq(&cm.lock);
3089                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3090                                 counter[CM_SIDR_REQ_COUNTER]);
3091                 goto out; /* Duplicate message. */
3092         }
3093         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3094         cur_cm_id_priv = cm_find_listen(cm_id->device,
3095                                         sidr_req_msg->service_id);
3096         if (!cur_cm_id_priv) {
3097                 spin_unlock_irq(&cm.lock);
3098                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3099                 goto out; /* No match. */
3100         }
3101         atomic_inc(&cur_cm_id_priv->refcount);
3102         atomic_inc(&cm_id_priv->refcount);
3103         spin_unlock_irq(&cm.lock);
3104
3105         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3106         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3107         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3108         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3109
3110         cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
3111         cm_process_work(cm_id_priv, work);
3112         cm_deref_id(cur_cm_id_priv);
3113         return 0;
3114 out:
3115         ib_destroy_cm_id(&cm_id_priv->id);
3116         return -EINVAL;
3117 }
3118
3119 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3120                                struct cm_id_private *cm_id_priv,
3121                                struct ib_cm_sidr_rep_param *param)
3122 {
3123         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3124                           cm_id_priv->tid);
3125         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3126         sidr_rep_msg->status = param->status;
3127         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3128         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3129         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3130
3131         if (param->info && param->info_length)
3132                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3133
3134         if (param->private_data && param->private_data_len)
3135                 memcpy(sidr_rep_msg->private_data, param->private_data,
3136                        param->private_data_len);
3137 }
3138
3139 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3140                         struct ib_cm_sidr_rep_param *param)
3141 {
3142         struct cm_id_private *cm_id_priv;
3143         struct ib_mad_send_buf *msg;
3144         unsigned long flags;
3145         int ret;
3146
3147         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3148             (param->private_data &&
3149              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3150                 return -EINVAL;
3151
3152         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3153         spin_lock_irqsave(&cm_id_priv->lock, flags);
3154         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3155                 ret = -EINVAL;
3156                 goto error;
3157         }
3158
3159         ret = cm_alloc_msg(cm_id_priv, &msg);
3160         if (ret)
3161                 goto error;
3162
3163         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3164                            param);
3165         ret = ib_post_send_mad(msg, NULL);
3166         if (ret) {
3167                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3168                 cm_free_msg(msg);
3169                 return ret;
3170         }
3171         cm_id->state = IB_CM_IDLE;
3172         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3173
3174         spin_lock_irqsave(&cm.lock, flags);
3175         rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3176         spin_unlock_irqrestore(&cm.lock, flags);
3177         return 0;
3178
3179 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3180         return ret;
3181 }
3182 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3183
3184 static void cm_format_sidr_rep_event(struct cm_work *work)
3185 {
3186         struct cm_sidr_rep_msg *sidr_rep_msg;
3187         struct ib_cm_sidr_rep_event_param *param;
3188
3189         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3190                                 work->mad_recv_wc->recv_buf.mad;
3191         param = &work->cm_event.param.sidr_rep_rcvd;
3192         param->status = sidr_rep_msg->status;
3193         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3194         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3195         param->info = &sidr_rep_msg->info;
3196         param->info_len = sidr_rep_msg->info_length;
3197         work->cm_event.private_data = &sidr_rep_msg->private_data;
3198 }
3199
3200 static int cm_sidr_rep_handler(struct cm_work *work)
3201 {
3202         struct cm_sidr_rep_msg *sidr_rep_msg;
3203         struct cm_id_private *cm_id_priv;
3204
3205         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3206                                 work->mad_recv_wc->recv_buf.mad;
3207         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3208         if (!cm_id_priv)
3209                 return -EINVAL; /* Unmatched reply. */
3210
3211         spin_lock_irq(&cm_id_priv->lock);
3212         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3213                 spin_unlock_irq(&cm_id_priv->lock);
3214                 goto out;
3215         }
3216         cm_id_priv->id.state = IB_CM_IDLE;
3217         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3218         spin_unlock_irq(&cm_id_priv->lock);
3219
3220         cm_format_sidr_rep_event(work);
3221         cm_process_work(cm_id_priv, work);
3222         return 0;
3223 out:
3224         cm_deref_id(cm_id_priv);
3225         return -EINVAL;
3226 }
3227
3228 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3229                                   enum ib_wc_status wc_status)
3230 {
3231         struct cm_id_private *cm_id_priv;
3232         struct ib_cm_event cm_event;
3233         enum ib_cm_state state;
3234         int ret;
3235
3236         memset(&cm_event, 0, sizeof cm_event);
3237         cm_id_priv = msg->context[0];
3238
3239         /* Discard old sends or ones without a response. */
3240         spin_lock_irq(&cm_id_priv->lock);
3241         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3242         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3243                 goto discard;
3244
3245         switch (state) {
3246         case IB_CM_REQ_SENT:
3247         case IB_CM_MRA_REQ_RCVD:
3248                 cm_reset_to_idle(cm_id_priv);
3249                 cm_event.event = IB_CM_REQ_ERROR;
3250                 break;
3251         case IB_CM_REP_SENT:
3252         case IB_CM_MRA_REP_RCVD:
3253                 cm_reset_to_idle(cm_id_priv);
3254                 cm_event.event = IB_CM_REP_ERROR;
3255                 break;
3256         case IB_CM_DREQ_SENT:
3257                 cm_enter_timewait(cm_id_priv);
3258                 cm_event.event = IB_CM_DREQ_ERROR;
3259                 break;
3260         case IB_CM_SIDR_REQ_SENT:
3261                 cm_id_priv->id.state = IB_CM_IDLE;
3262                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3263                 break;
3264         default:
3265                 goto discard;
3266         }
3267         spin_unlock_irq(&cm_id_priv->lock);
3268         cm_event.param.send_status = wc_status;
3269
3270         /* No other events can occur on the cm_id at this point. */
3271         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3272         cm_free_msg(msg);
3273         if (ret)
3274                 ib_destroy_cm_id(&cm_id_priv->id);
3275         return;
3276 discard:
3277         spin_unlock_irq(&cm_id_priv->lock);
3278         cm_free_msg(msg);
3279 }
3280
3281 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3282                             struct ib_mad_send_wc *mad_send_wc)
3283 {
3284         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3285         struct cm_port *port;
3286         u16 attr_index;
3287
3288         port = mad_agent->context;
3289         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3290                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3291
3292         /*
3293          * If the send was in response to a received message (context[0] is not
3294          * set to a cm_id), and is not a REJ, then it is a send that was
3295          * manually retried.
3296          */
3297         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3298                 msg->retries = 1;
3299
3300         atomic_long_add(1 + msg->retries,
3301                         &port->counter_group[CM_XMIT].counter[attr_index]);
3302         if (msg->retries)
3303                 atomic_long_add(msg->retries,
3304                                 &port->counter_group[CM_XMIT_RETRIES].
3305                                 counter[attr_index]);
3306
3307         switch (mad_send_wc->status) {
3308         case IB_WC_SUCCESS:
3309         case IB_WC_WR_FLUSH_ERR:
3310                 cm_free_msg(msg);
3311                 break;
3312         default:
3313                 if (msg->context[0] && msg->context[1])
3314                         cm_process_send_error(msg, mad_send_wc->status);
3315                 else
3316                         cm_free_msg(msg);
3317                 break;
3318         }
3319 }
3320
3321 static void cm_work_handler(struct work_struct *_work)
3322 {
3323         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3324         int ret;
3325
3326         switch (work->cm_event.event) {
3327         case IB_CM_REQ_RECEIVED:
3328                 ret = cm_req_handler(work);
3329                 break;
3330         case IB_CM_MRA_RECEIVED:
3331                 ret = cm_mra_handler(work);
3332                 break;
3333         case IB_CM_REJ_RECEIVED:
3334                 ret = cm_rej_handler(work);
3335                 break;
3336         case IB_CM_REP_RECEIVED:
3337                 ret = cm_rep_handler(work);
3338                 break;
3339         case IB_CM_RTU_RECEIVED:
3340                 ret = cm_rtu_handler(work);
3341                 break;
3342         case IB_CM_USER_ESTABLISHED:
3343                 ret = cm_establish_handler(work);
3344                 break;
3345         case IB_CM_DREQ_RECEIVED:
3346                 ret = cm_dreq_handler(work);
3347                 break;
3348         case IB_CM_DREP_RECEIVED:
3349                 ret = cm_drep_handler(work);
3350                 break;
3351         case IB_CM_SIDR_REQ_RECEIVED:
3352                 ret = cm_sidr_req_handler(work);
3353                 break;
3354         case IB_CM_SIDR_REP_RECEIVED:
3355                 ret = cm_sidr_rep_handler(work);
3356                 break;
3357         case IB_CM_LAP_RECEIVED:
3358                 ret = cm_lap_handler(work);
3359                 break;
3360         case IB_CM_APR_RECEIVED:
3361                 ret = cm_apr_handler(work);
3362                 break;
3363         case IB_CM_TIMEWAIT_EXIT:
3364                 ret = cm_timewait_handler(work);
3365                 break;
3366         default:
3367                 ret = -EINVAL;
3368                 break;
3369         }
3370         if (ret)
3371                 cm_free_work(work);
3372 }
3373
3374 static int cm_establish(struct ib_cm_id *cm_id)
3375 {
3376         struct cm_id_private *cm_id_priv;
3377         struct cm_work *work;
3378         unsigned long flags;
3379         int ret = 0;
3380         struct cm_device *cm_dev;
3381
3382         cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3383         if (!cm_dev)
3384                 return -ENODEV;
3385
3386         work = kmalloc(sizeof *work, GFP_ATOMIC);
3387         if (!work)
3388                 return -ENOMEM;
3389
3390         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3391         spin_lock_irqsave(&cm_id_priv->lock, flags);
3392         switch (cm_id->state)
3393         {
3394         case IB_CM_REP_SENT:
3395         case IB_CM_MRA_REP_RCVD:
3396                 cm_id->state = IB_CM_ESTABLISHED;
3397                 break;
3398         case IB_CM_ESTABLISHED:
3399                 ret = -EISCONN;
3400                 break;
3401         default:
3402                 ret = -EINVAL;
3403                 break;
3404         }
3405         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3406
3407         if (ret) {
3408                 kfree(work);
3409                 goto out;
3410         }
3411
3412         /*
3413          * The CM worker thread may try to destroy the cm_id before it
3414          * can execute this work item.  To prevent potential deadlock,
3415          * we need to find the cm_id once we're in the context of the
3416          * worker thread, rather than holding a reference on it.
3417          */
3418         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3419         work->local_id = cm_id->local_id;
3420         work->remote_id = cm_id->remote_id;
3421         work->mad_recv_wc = NULL;
3422         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3423
3424         /* Check if the device started its remove_one */
3425         spin_lock_irq(&cm.lock);
3426         if (!cm_dev->going_down) {
3427                 queue_delayed_work(cm.wq, &work->work, 0);
3428         } else {
3429                 kfree(work);
3430                 ret = -ENODEV;
3431         }
3432         spin_unlock_irq(&cm.lock);
3433
3434 out:
3435         return ret;
3436 }
3437
3438 static int cm_migrate(struct ib_cm_id *cm_id)
3439 {
3440         struct cm_id_private *cm_id_priv;
3441         unsigned long flags;
3442         int ret = 0;
3443
3444         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3445         spin_lock_irqsave(&cm_id_priv->lock, flags);
3446         if (cm_id->state == IB_CM_ESTABLISHED &&
3447             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3448              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3449                 cm_id->lap_state = IB_CM_LAP_IDLE;
3450                 cm_id_priv->av = cm_id_priv->alt_av;
3451         } else
3452                 ret = -EINVAL;
3453         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3454
3455         return ret;
3456 }
3457
3458 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3459 {
3460         int ret;
3461
3462         switch (event) {
3463         case IB_EVENT_COMM_EST:
3464                 ret = cm_establish(cm_id);
3465                 break;
3466         case IB_EVENT_PATH_MIG:
3467                 ret = cm_migrate(cm_id);
3468                 break;
3469         default:
3470                 ret = -EINVAL;
3471         }
3472         return ret;
3473 }
3474 EXPORT_SYMBOL(ib_cm_notify);
3475
3476 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3477                             struct ib_mad_recv_wc *mad_recv_wc)
3478 {
3479         struct cm_port *port = mad_agent->context;
3480         struct cm_work *work;
3481         enum ib_cm_event_type event;
3482         u16 attr_id;
3483         int paths = 0;
3484         int going_down = 0;
3485
3486         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3487         case CM_REQ_ATTR_ID:
3488                 paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3489                                                     alt_local_lid != 0);
3490                 event = IB_CM_REQ_RECEIVED;
3491                 break;
3492         case CM_MRA_ATTR_ID:
3493                 event = IB_CM_MRA_RECEIVED;
3494                 break;
3495         case CM_REJ_ATTR_ID:
3496                 event = IB_CM_REJ_RECEIVED;
3497                 break;
3498         case CM_REP_ATTR_ID:
3499                 event = IB_CM_REP_RECEIVED;
3500                 break;
3501         case CM_RTU_ATTR_ID:
3502                 event = IB_CM_RTU_RECEIVED;
3503                 break;
3504         case CM_DREQ_ATTR_ID:
3505                 event = IB_CM_DREQ_RECEIVED;
3506                 break;
3507         case CM_DREP_ATTR_ID:
3508                 event = IB_CM_DREP_RECEIVED;
3509                 break;
3510         case CM_SIDR_REQ_ATTR_ID:
3511                 event = IB_CM_SIDR_REQ_RECEIVED;
3512                 break;
3513         case CM_SIDR_REP_ATTR_ID:
3514                 event = IB_CM_SIDR_REP_RECEIVED;
3515                 break;
3516         case CM_LAP_ATTR_ID:
3517                 paths = 1;
3518                 event = IB_CM_LAP_RECEIVED;
3519                 break;
3520         case CM_APR_ATTR_ID:
3521                 event = IB_CM_APR_RECEIVED;
3522                 break;
3523         default:
3524                 ib_free_recv_mad(mad_recv_wc);
3525                 return;
3526         }
3527
3528         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3529         atomic_long_inc(&port->counter_group[CM_RECV].
3530                         counter[attr_id - CM_ATTR_ID_OFFSET]);
3531
3532         work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3533                        GFP_KERNEL);
3534         if (!work) {
3535                 ib_free_recv_mad(mad_recv_wc);
3536                 return;
3537         }
3538
3539         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3540         work->cm_event.event = event;
3541         work->mad_recv_wc = mad_recv_wc;
3542         work->port = port;
3543
3544         /* Check if the device started its remove_one */
3545         spin_lock_irq(&cm.lock);
3546         if (!port->cm_dev->going_down)
3547                 queue_delayed_work(cm.wq, &work->work, 0);
3548         else
3549                 going_down = 1;
3550         spin_unlock_irq(&cm.lock);
3551
3552         if (going_down) {
3553                 kfree(work);
3554                 ib_free_recv_mad(mad_recv_wc);
3555         }
3556 }
3557
3558 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3559                                 struct ib_qp_attr *qp_attr,
3560                                 int *qp_attr_mask)
3561 {
3562         unsigned long flags;
3563         int ret;
3564
3565         spin_lock_irqsave(&cm_id_priv->lock, flags);
3566         switch (cm_id_priv->id.state) {
3567         case IB_CM_REQ_SENT:
3568         case IB_CM_MRA_REQ_RCVD:
3569         case IB_CM_REQ_RCVD:
3570         case IB_CM_MRA_REQ_SENT:
3571         case IB_CM_REP_RCVD:
3572         case IB_CM_MRA_REP_SENT:
3573         case IB_CM_REP_SENT:
3574         case IB_CM_MRA_REP_RCVD:
3575         case IB_CM_ESTABLISHED:
3576                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3577                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
3578                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3579                 if (cm_id_priv->responder_resources)
3580                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3581                                                     IB_ACCESS_REMOTE_ATOMIC;
3582                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3583                 qp_attr->port_num = cm_id_priv->av.port->port_num;
3584                 ret = 0;
3585                 break;
3586         default:
3587                 ret = -EINVAL;
3588                 break;
3589         }
3590         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3591         return ret;
3592 }
3593
3594 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3595                                struct ib_qp_attr *qp_attr,
3596                                int *qp_attr_mask)
3597 {
3598         unsigned long flags;
3599         int ret;
3600
3601         spin_lock_irqsave(&cm_id_priv->lock, flags);
3602         switch (cm_id_priv->id.state) {
3603         case IB_CM_REQ_RCVD:
3604         case IB_CM_MRA_REQ_SENT:
3605         case IB_CM_REP_RCVD:
3606         case IB_CM_MRA_REP_SENT:
3607         case IB_CM_REP_SENT:
3608         case IB_CM_MRA_REP_RCVD:
3609         case IB_CM_ESTABLISHED:
3610                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3611                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3612                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3613                 if (!cm_id_priv->av.valid) {
3614                         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3615                         return -EINVAL;
3616                 }
3617                 if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
3618                         qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
3619                         *qp_attr_mask |= IB_QP_VID;
3620                 }
3621                 if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
3622                         memcpy(qp_attr->smac, cm_id_priv->av.smac,
3623                                sizeof(qp_attr->smac));
3624                         *qp_attr_mask |= IB_QP_SMAC;
3625                 }
3626                 if (cm_id_priv->alt_av.valid) {
3627                         if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
3628                                 qp_attr->alt_vlan_id =
3629                                         cm_id_priv->alt_av.ah_attr.vlan_id;
3630                                 *qp_attr_mask |= IB_QP_ALT_VID;
3631                         }
3632                         if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
3633                                 memcpy(qp_attr->alt_smac,
3634                                        cm_id_priv->alt_av.smac,
3635                                        sizeof(qp_attr->alt_smac));
3636                                 *qp_attr_mask |= IB_QP_ALT_SMAC;
3637                         }
3638                 }
3639                 qp_attr->path_mtu = cm_id_priv->path_mtu;
3640                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3641                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3642                 if (cm_id_priv->qp_type == IB_QPT_RC ||
3643                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3644                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3645                                          IB_QP_MIN_RNR_TIMER;
3646                         qp_attr->max_dest_rd_atomic =
3647                                         cm_id_priv->responder_resources;
3648                         qp_attr->min_rnr_timer = 0;
3649                 }
3650                 if (cm_id_priv->alt_av.ah_attr.dlid) {
3651                         *qp_attr_mask |= IB_QP_ALT_PATH;
3652                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3653                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3654                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3655                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3656                 }
3657                 ret = 0;
3658                 break;
3659         default:
3660                 ret = -EINVAL;
3661                 break;
3662         }
3663         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3664         return ret;
3665 }
3666
3667 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3668                                struct ib_qp_attr *qp_attr,
3669                                int *qp_attr_mask)
3670 {
3671         unsigned long flags;
3672         int ret;
3673
3674         spin_lock_irqsave(&cm_id_priv->lock, flags);
3675         switch (cm_id_priv->id.state) {
3676         /* Allow transition to RTS before sending REP */
3677         case IB_CM_REQ_RCVD:
3678         case IB_CM_MRA_REQ_SENT:
3679
3680         case IB_CM_REP_RCVD:
3681         case IB_CM_MRA_REP_SENT:
3682         case IB_CM_REP_SENT:
3683         case IB_CM_MRA_REP_RCVD:
3684         case IB_CM_ESTABLISHED:
3685                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3686                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3687                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3688                         switch (cm_id_priv->qp_type) {
3689                         case IB_QPT_RC:
3690                         case IB_QPT_XRC_INI:
3691                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3692                                                  IB_QP_MAX_QP_RD_ATOMIC;
3693                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
3694                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3695                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3696                                 /* fall through */
3697                         case IB_QPT_XRC_TGT:
3698                                 *qp_attr_mask |= IB_QP_TIMEOUT;
3699                                 qp_attr->timeout = cm_id_priv->av.timeout;
3700                                 break;
3701                         default:
3702                                 break;
3703                         }
3704                         if (cm_id_priv->alt_av.ah_attr.dlid) {
3705                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3706                                 qp_attr->path_mig_state = IB_MIG_REARM;
3707                         }
3708                 } else {
3709                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3710                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3711                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3712                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3713                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3714                         qp_attr->path_mig_state = IB_MIG_REARM;
3715                 }
3716                 ret = 0;
3717                 break;
3718         default:
3719                 ret = -EINVAL;
3720                 break;
3721         }
3722         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3723         return ret;
3724 }
3725
3726 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3727                        struct ib_qp_attr *qp_attr,
3728                        int *qp_attr_mask)
3729 {
3730         struct cm_id_private *cm_id_priv;
3731         int ret;
3732
3733         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3734         switch (qp_attr->qp_state) {
3735         case IB_QPS_INIT:
3736                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3737                 break;
3738         case IB_QPS_RTR:
3739                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3740                 break;
3741         case IB_QPS_RTS:
3742                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3743                 break;
3744         default:
3745                 ret = -EINVAL;
3746                 break;
3747         }
3748         return ret;
3749 }
3750 EXPORT_SYMBOL(ib_cm_init_qp_attr);
3751
3752 static void cm_get_ack_delay(struct cm_device *cm_dev)
3753 {
3754         struct ib_device_attr attr;
3755
3756         if (ib_query_device(cm_dev->ib_device, &attr))
3757                 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3758         else
3759                 cm_dev->ack_delay = attr.local_ca_ack_delay;
3760 }
3761
3762 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3763                                char *buf)
3764 {
3765         struct cm_counter_group *group;
3766         struct cm_counter_attribute *cm_attr;
3767
3768         group = container_of(obj, struct cm_counter_group, obj);
3769         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3770
3771         return sprintf(buf, "%ld\n",
3772                        atomic_long_read(&group->counter[cm_attr->index]));
3773 }
3774
3775 static const struct sysfs_ops cm_counter_ops = {
3776         .show = cm_show_counter
3777 };
3778
3779 static struct kobj_type cm_counter_obj_type = {
3780         .sysfs_ops = &cm_counter_ops,
3781         .default_attrs = cm_counter_default_attrs
3782 };
3783
3784 static void cm_release_port_obj(struct kobject *obj)
3785 {
3786         struct cm_port *cm_port;
3787
3788         cm_port = container_of(obj, struct cm_port, port_obj);
3789         kfree(cm_port);
3790 }
3791
3792 static struct kobj_type cm_port_obj_type = {
3793         .release = cm_release_port_obj
3794 };
3795
3796 static char *cm_devnode(struct device *dev, umode_t *mode)
3797 {
3798         if (mode)
3799                 *mode = 0666;
3800         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
3801 }
3802
3803 struct class cm_class = {
3804         .owner   = THIS_MODULE,
3805         .name    = "infiniband_cm",
3806         .devnode = cm_devnode,
3807 };
3808 EXPORT_SYMBOL(cm_class);
3809
3810 static int cm_create_port_fs(struct cm_port *port)
3811 {
3812         int i, ret;
3813
3814         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3815                                    &port->cm_dev->device->kobj,
3816                                    "%d", port->port_num);
3817         if (ret) {
3818                 kfree(port);
3819                 return ret;
3820         }
3821
3822         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3823                 ret = kobject_init_and_add(&port->counter_group[i].obj,
3824                                            &cm_counter_obj_type,
3825                                            &port->port_obj,
3826                                            "%s", counter_group_names[i]);
3827                 if (ret)
3828                         goto error;
3829         }
3830
3831         return 0;
3832
3833 error:
3834         while (i--)
3835                 kobject_put(&port->counter_group[i].obj);
3836         kobject_put(&port->port_obj);
3837         return ret;
3838
3839 }
3840
3841 static void cm_remove_port_fs(struct cm_port *port)
3842 {
3843         int i;
3844
3845         for (i = 0; i < CM_COUNTER_GROUPS; i++)
3846                 kobject_put(&port->counter_group[i].obj);
3847
3848         kobject_put(&port->port_obj);
3849 }
3850
3851 static void cm_add_one(struct ib_device *ib_device)
3852 {
3853         struct cm_device *cm_dev;
3854         struct cm_port *port;
3855         struct ib_mad_reg_req reg_req = {
3856                 .mgmt_class = IB_MGMT_CLASS_CM,
3857                 .mgmt_class_version = IB_CM_CLASS_VERSION,
3858         };
3859         struct ib_port_modify port_modify = {
3860                 .set_port_cap_mask = IB_PORT_CM_SUP
3861         };
3862         unsigned long flags;
3863         int ret;
3864         int count = 0;
3865         u8 i;
3866
3867         cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3868                          ib_device->phys_port_cnt, GFP_KERNEL);
3869         if (!cm_dev)
3870                 return;
3871
3872         cm_dev->ib_device = ib_device;
3873         cm_get_ack_delay(cm_dev);
3874         cm_dev->going_down = 0;
3875         cm_dev->device = device_create(&cm_class, &ib_device->dev,
3876                                        MKDEV(0, 0), NULL,
3877                                        "%s", ib_device->name);
3878         if (IS_ERR(cm_dev->device)) {
3879                 kfree(cm_dev);
3880                 return;
3881         }
3882
3883         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3884         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3885                 if (!rdma_cap_ib_cm(ib_device, i))
3886                         continue;
3887
3888                 port = kzalloc(sizeof *port, GFP_KERNEL);
3889                 if (!port)
3890                         goto error1;
3891
3892                 cm_dev->port[i-1] = port;
3893                 port->cm_dev = cm_dev;
3894                 port->port_num = i;
3895
3896                 ret = cm_create_port_fs(port);
3897                 if (ret)
3898                         goto error1;
3899
3900                 port->mad_agent = ib_register_mad_agent(ib_device, i,
3901                                                         IB_QPT_GSI,
3902                                                         &reg_req,
3903                                                         0,
3904                                                         cm_send_handler,
3905                                                         cm_recv_handler,
3906                                                         port,
3907                                                         0);
3908                 if (IS_ERR(port->mad_agent))
3909                         goto error2;
3910
3911                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
3912                 if (ret)
3913                         goto error3;
3914
3915                 count++;
3916         }
3917
3918         if (!count)
3919                 goto free;
3920
3921         ib_set_client_data(ib_device, &cm_client, cm_dev);
3922
3923         write_lock_irqsave(&cm.device_lock, flags);
3924         list_add_tail(&cm_dev->list, &cm.device_list);
3925         write_unlock_irqrestore(&cm.device_lock, flags);
3926         return;
3927
3928 error3:
3929         ib_unregister_mad_agent(port->mad_agent);
3930 error2:
3931         cm_remove_port_fs(port);
3932 error1:
3933         port_modify.set_port_cap_mask = 0;
3934         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3935         while (--i) {
3936                 if (!rdma_cap_ib_cm(ib_device, i))
3937                         continue;
3938
3939                 port = cm_dev->port[i-1];
3940                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3941                 ib_unregister_mad_agent(port->mad_agent);
3942                 cm_remove_port_fs(port);
3943         }
3944 free:
3945         device_unregister(cm_dev->device);
3946         kfree(cm_dev);
3947 }
3948
3949 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
3950 {
3951         struct cm_device *cm_dev = client_data;
3952         struct cm_port *port;
3953         struct ib_port_modify port_modify = {
3954                 .clr_port_cap_mask = IB_PORT_CM_SUP
3955         };
3956         unsigned long flags;
3957         int i;
3958
3959         if (!cm_dev)
3960                 return;
3961
3962         write_lock_irqsave(&cm.device_lock, flags);
3963         list_del(&cm_dev->list);
3964         write_unlock_irqrestore(&cm.device_lock, flags);
3965
3966         spin_lock_irq(&cm.lock);
3967         cm_dev->going_down = 1;
3968         spin_unlock_irq(&cm.lock);
3969
3970         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3971                 if (!rdma_cap_ib_cm(ib_device, i))
3972                         continue;
3973
3974                 port = cm_dev->port[i-1];
3975                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3976                 /*
3977                  * We flush the queue here after the going_down set, this
3978                  * verify that no new works will be queued in the recv handler,
3979                  * after that we can call the unregister_mad_agent
3980                  */
3981                 flush_workqueue(cm.wq);
3982                 ib_unregister_mad_agent(port->mad_agent);
3983                 cm_remove_port_fs(port);
3984         }
3985         device_unregister(cm_dev->device);
3986         kfree(cm_dev);
3987 }
3988
3989 static int __init ib_cm_init(void)
3990 {
3991         int ret;
3992
3993         memset(&cm, 0, sizeof cm);
3994         INIT_LIST_HEAD(&cm.device_list);
3995         rwlock_init(&cm.device_lock);
3996         spin_lock_init(&cm.lock);
3997         cm.listen_service_table = RB_ROOT;
3998         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3999         cm.remote_id_table = RB_ROOT;
4000         cm.remote_qp_table = RB_ROOT;
4001         cm.remote_sidr_table = RB_ROOT;
4002         idr_init(&cm.local_id_table);
4003         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4004         INIT_LIST_HEAD(&cm.timewait_list);
4005
4006         ret = class_register(&cm_class);
4007         if (ret) {
4008                 ret = -ENOMEM;
4009                 goto error1;
4010         }
4011
4012         cm.wq = create_workqueue("ib_cm");
4013         if (!cm.wq) {
4014                 ret = -ENOMEM;
4015                 goto error2;
4016         }
4017
4018         ret = ib_register_client(&cm_client);
4019         if (ret)
4020                 goto error3;
4021
4022         return 0;
4023 error3:
4024         destroy_workqueue(cm.wq);
4025 error2:
4026         class_unregister(&cm_class);
4027 error1:
4028         idr_destroy(&cm.local_id_table);
4029         return ret;
4030 }
4031
4032 static void __exit ib_cm_cleanup(void)
4033 {
4034         struct cm_timewait_info *timewait_info, *tmp;
4035
4036         spin_lock_irq(&cm.lock);
4037         list_for_each_entry(timewait_info, &cm.timewait_list, list)
4038                 cancel_delayed_work(&timewait_info->work.work);
4039         spin_unlock_irq(&cm.lock);
4040
4041         ib_unregister_client(&cm_client);
4042         destroy_workqueue(cm.wq);
4043
4044         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4045                 list_del(&timewait_info->list);
4046                 kfree(timewait_info);
4047         }
4048
4049         class_unregister(&cm_class);
4050         idr_destroy(&cm.local_id_table);
4051 }
4052
4053 module_init(ib_cm_init);
4054 module_exit(ib_cm_cleanup);
4055