dbddddd6fb5d111e94e44e2800282c84312131a0
[cascardo/linux.git] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/module.h>
40 #include <linux/err.h>
41 #include <linux/idr.h>
42 #include <linux/interrupt.h>
43 #include <linux/random.h>
44 #include <linux/rbtree.h>
45 #include <linux/spinlock.h>
46 #include <linux/slab.h>
47 #include <linux/sysfs.h>
48 #include <linux/workqueue.h>
49 #include <linux/kdev_t.h>
50 #include <linux/etherdevice.h>
51
52 #include <rdma/ib_cache.h>
53 #include <rdma/ib_cm.h>
54 #include "cm_msgs.h"
55
56 MODULE_AUTHOR("Sean Hefty");
57 MODULE_DESCRIPTION("InfiniBand CM");
58 MODULE_LICENSE("Dual BSD/GPL");
59
60 static void cm_add_one(struct ib_device *device);
61 static void cm_remove_one(struct ib_device *device);
62
63 static struct ib_client cm_client = {
64         .name   = "cm",
65         .add    = cm_add_one,
66         .remove = cm_remove_one
67 };
68
69 static struct ib_cm {
70         spinlock_t lock;
71         struct list_head device_list;
72         rwlock_t device_lock;
73         struct rb_root listen_service_table;
74         u64 listen_service_id;
75         /* struct rb_root peer_service_table; todo: fix peer to peer */
76         struct rb_root remote_qp_table;
77         struct rb_root remote_id_table;
78         struct rb_root remote_sidr_table;
79         struct idr local_id_table;
80         __be32 random_id_operand;
81         struct list_head timewait_list;
82         struct workqueue_struct *wq;
83 } cm;
84
85 /* Counter indexes ordered by attribute ID */
86 enum {
87         CM_REQ_COUNTER,
88         CM_MRA_COUNTER,
89         CM_REJ_COUNTER,
90         CM_REP_COUNTER,
91         CM_RTU_COUNTER,
92         CM_DREQ_COUNTER,
93         CM_DREP_COUNTER,
94         CM_SIDR_REQ_COUNTER,
95         CM_SIDR_REP_COUNTER,
96         CM_LAP_COUNTER,
97         CM_APR_COUNTER,
98         CM_ATTR_COUNT,
99         CM_ATTR_ID_OFFSET = 0x0010,
100 };
101
102 enum {
103         CM_XMIT,
104         CM_XMIT_RETRIES,
105         CM_RECV,
106         CM_RECV_DUPLICATES,
107         CM_COUNTER_GROUPS
108 };
109
110 static char const counter_group_names[CM_COUNTER_GROUPS]
111                                      [sizeof("cm_rx_duplicates")] = {
112         "cm_tx_msgs", "cm_tx_retries",
113         "cm_rx_msgs", "cm_rx_duplicates"
114 };
115
116 struct cm_counter_group {
117         struct kobject obj;
118         atomic_long_t counter[CM_ATTR_COUNT];
119 };
120
121 struct cm_counter_attribute {
122         struct attribute attr;
123         int index;
124 };
125
126 #define CM_COUNTER_ATTR(_name, _index) \
127 struct cm_counter_attribute cm_##_name##_counter_attr = { \
128         .attr = { .name = __stringify(_name), .mode = 0444 }, \
129         .index = _index \
130 }
131
132 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
133 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
134 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
135 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
136 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
137 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
138 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
139 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
140 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
141 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
142 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
143
144 static struct attribute *cm_counter_default_attrs[] = {
145         &cm_req_counter_attr.attr,
146         &cm_mra_counter_attr.attr,
147         &cm_rej_counter_attr.attr,
148         &cm_rep_counter_attr.attr,
149         &cm_rtu_counter_attr.attr,
150         &cm_dreq_counter_attr.attr,
151         &cm_drep_counter_attr.attr,
152         &cm_sidr_req_counter_attr.attr,
153         &cm_sidr_rep_counter_attr.attr,
154         &cm_lap_counter_attr.attr,
155         &cm_apr_counter_attr.attr,
156         NULL
157 };
158
159 struct cm_port {
160         struct cm_device *cm_dev;
161         struct ib_mad_agent *mad_agent;
162         struct kobject port_obj;
163         u8 port_num;
164         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
165 };
166
167 struct cm_device {
168         struct list_head list;
169         struct ib_device *ib_device;
170         struct device *device;
171         u8 ack_delay;
172         struct cm_port *port[0];
173 };
174
175 struct cm_av {
176         struct cm_port *port;
177         union ib_gid dgid;
178         struct ib_ah_attr ah_attr;
179         u16 pkey_index;
180         u8 timeout;
181         u8  valid;
182         u8  smac[ETH_ALEN];
183 };
184
185 struct cm_work {
186         struct delayed_work work;
187         struct list_head list;
188         struct cm_port *port;
189         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
190         __be32 local_id;                        /* Established / timewait */
191         __be32 remote_id;
192         struct ib_cm_event cm_event;
193         struct ib_sa_path_rec path[0];
194 };
195
196 struct cm_timewait_info {
197         struct cm_work work;                    /* Must be first. */
198         struct list_head list;
199         struct rb_node remote_qp_node;
200         struct rb_node remote_id_node;
201         __be64 remote_ca_guid;
202         __be32 remote_qpn;
203         u8 inserted_remote_qp;
204         u8 inserted_remote_id;
205 };
206
207 struct cm_id_private {
208         struct ib_cm_id id;
209
210         struct rb_node service_node;
211         struct rb_node sidr_id_node;
212         spinlock_t lock;        /* Do not acquire inside cm.lock */
213         struct completion comp;
214         atomic_t refcount;
215
216         struct ib_mad_send_buf *msg;
217         struct cm_timewait_info *timewait_info;
218         /* todo: use alternate port on send failure */
219         struct cm_av av;
220         struct cm_av alt_av;
221         struct ib_cm_compare_data *compare_data;
222
223         void *private_data;
224         __be64 tid;
225         __be32 local_qpn;
226         __be32 remote_qpn;
227         enum ib_qp_type qp_type;
228         __be32 sq_psn;
229         __be32 rq_psn;
230         int timeout_ms;
231         enum ib_mtu path_mtu;
232         __be16 pkey;
233         u8 private_data_len;
234         u8 max_cm_retries;
235         u8 peer_to_peer;
236         u8 responder_resources;
237         u8 initiator_depth;
238         u8 retry_count;
239         u8 rnr_retry_count;
240         u8 service_timeout;
241         u8 target_ack_delay;
242
243         struct list_head work_list;
244         atomic_t work_count;
245 };
246
247 static void cm_work_handler(struct work_struct *work);
248
249 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
250 {
251         if (atomic_dec_and_test(&cm_id_priv->refcount))
252                 complete(&cm_id_priv->comp);
253 }
254
255 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
256                         struct ib_mad_send_buf **msg)
257 {
258         struct ib_mad_agent *mad_agent;
259         struct ib_mad_send_buf *m;
260         struct ib_ah *ah;
261
262         mad_agent = cm_id_priv->av.port->mad_agent;
263         ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
264         if (IS_ERR(ah))
265                 return PTR_ERR(ah);
266
267         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
268                                cm_id_priv->av.pkey_index,
269                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
270                                GFP_ATOMIC,
271                                IB_MGMT_BASE_VERSION);
272         if (IS_ERR(m)) {
273                 ib_destroy_ah(ah);
274                 return PTR_ERR(m);
275         }
276
277         /* Timeout set by caller if response is expected. */
278         m->ah = ah;
279         m->retries = cm_id_priv->max_cm_retries;
280
281         atomic_inc(&cm_id_priv->refcount);
282         m->context[0] = cm_id_priv;
283         *msg = m;
284         return 0;
285 }
286
287 static int cm_alloc_response_msg(struct cm_port *port,
288                                  struct ib_mad_recv_wc *mad_recv_wc,
289                                  struct ib_mad_send_buf **msg)
290 {
291         struct ib_mad_send_buf *m;
292         struct ib_ah *ah;
293
294         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
295                                   mad_recv_wc->recv_buf.grh, port->port_num);
296         if (IS_ERR(ah))
297                 return PTR_ERR(ah);
298
299         m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
300                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
301                                GFP_ATOMIC,
302                                IB_MGMT_BASE_VERSION);
303         if (IS_ERR(m)) {
304                 ib_destroy_ah(ah);
305                 return PTR_ERR(m);
306         }
307         m->ah = ah;
308         *msg = m;
309         return 0;
310 }
311
312 static void cm_free_msg(struct ib_mad_send_buf *msg)
313 {
314         ib_destroy_ah(msg->ah);
315         if (msg->context[0])
316                 cm_deref_id(msg->context[0]);
317         ib_free_send_mad(msg);
318 }
319
320 static void * cm_copy_private_data(const void *private_data,
321                                    u8 private_data_len)
322 {
323         void *data;
324
325         if (!private_data || !private_data_len)
326                 return NULL;
327
328         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
329         if (!data)
330                 return ERR_PTR(-ENOMEM);
331
332         return data;
333 }
334
335 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
336                                  void *private_data, u8 private_data_len)
337 {
338         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
339                 kfree(cm_id_priv->private_data);
340
341         cm_id_priv->private_data = private_data;
342         cm_id_priv->private_data_len = private_data_len;
343 }
344
345 static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
346                                     struct ib_grh *grh, struct cm_av *av)
347 {
348         av->port = port;
349         av->pkey_index = wc->pkey_index;
350         ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
351                            grh, &av->ah_attr);
352 }
353
354 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
355 {
356         struct cm_device *cm_dev;
357         struct cm_port *port = NULL;
358         unsigned long flags;
359         int ret;
360         u8 p;
361
362         read_lock_irqsave(&cm.device_lock, flags);
363         list_for_each_entry(cm_dev, &cm.device_list, list) {
364                 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
365                                         &p, NULL)) {
366                         port = cm_dev->port[p-1];
367                         break;
368                 }
369         }
370         read_unlock_irqrestore(&cm.device_lock, flags);
371
372         if (!port)
373                 return -EINVAL;
374
375         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
376                                   be16_to_cpu(path->pkey), &av->pkey_index);
377         if (ret)
378                 return ret;
379
380         av->port = port;
381         ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
382                              &av->ah_attr);
383         av->timeout = path->packet_life_time + 1;
384         memcpy(av->smac, path->smac, sizeof(av->smac));
385
386         av->valid = 1;
387         return 0;
388 }
389
390 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
391 {
392         unsigned long flags;
393         int id;
394
395         idr_preload(GFP_KERNEL);
396         spin_lock_irqsave(&cm.lock, flags);
397
398         id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
399
400         spin_unlock_irqrestore(&cm.lock, flags);
401         idr_preload_end();
402
403         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
404         return id < 0 ? id : 0;
405 }
406
407 static void cm_free_id(__be32 local_id)
408 {
409         spin_lock_irq(&cm.lock);
410         idr_remove(&cm.local_id_table,
411                    (__force int) (local_id ^ cm.random_id_operand));
412         spin_unlock_irq(&cm.lock);
413 }
414
415 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
416 {
417         struct cm_id_private *cm_id_priv;
418
419         cm_id_priv = idr_find(&cm.local_id_table,
420                               (__force int) (local_id ^ cm.random_id_operand));
421         if (cm_id_priv) {
422                 if (cm_id_priv->id.remote_id == remote_id)
423                         atomic_inc(&cm_id_priv->refcount);
424                 else
425                         cm_id_priv = NULL;
426         }
427
428         return cm_id_priv;
429 }
430
431 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
432 {
433         struct cm_id_private *cm_id_priv;
434
435         spin_lock_irq(&cm.lock);
436         cm_id_priv = cm_get_id(local_id, remote_id);
437         spin_unlock_irq(&cm.lock);
438
439         return cm_id_priv;
440 }
441
442 static void cm_mask_copy(u32 *dst, const u32 *src, const u32 *mask)
443 {
444         int i;
445
446         for (i = 0; i < IB_CM_COMPARE_SIZE; i++)
447                 dst[i] = src[i] & mask[i];
448 }
449
450 static int cm_compare_data(struct ib_cm_compare_data *src_data,
451                            struct ib_cm_compare_data *dst_data)
452 {
453         u32 src[IB_CM_COMPARE_SIZE];
454         u32 dst[IB_CM_COMPARE_SIZE];
455
456         if (!src_data || !dst_data)
457                 return 0;
458
459         cm_mask_copy(src, src_data->data, dst_data->mask);
460         cm_mask_copy(dst, dst_data->data, src_data->mask);
461         return memcmp(src, dst, sizeof(src));
462 }
463
464 static int cm_compare_private_data(u32 *private_data,
465                                    struct ib_cm_compare_data *dst_data)
466 {
467         u32 src[IB_CM_COMPARE_SIZE];
468
469         if (!dst_data)
470                 return 0;
471
472         cm_mask_copy(src, private_data, dst_data->mask);
473         return memcmp(src, dst_data->data, sizeof(src));
474 }
475
476 /*
477  * Trivial helpers to strip endian annotation and compare; the
478  * endianness doesn't actually matter since we just need a stable
479  * order for the RB tree.
480  */
481 static int be32_lt(__be32 a, __be32 b)
482 {
483         return (__force u32) a < (__force u32) b;
484 }
485
486 static int be32_gt(__be32 a, __be32 b)
487 {
488         return (__force u32) a > (__force u32) b;
489 }
490
491 static int be64_lt(__be64 a, __be64 b)
492 {
493         return (__force u64) a < (__force u64) b;
494 }
495
496 static int be64_gt(__be64 a, __be64 b)
497 {
498         return (__force u64) a > (__force u64) b;
499 }
500
501 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
502 {
503         struct rb_node **link = &cm.listen_service_table.rb_node;
504         struct rb_node *parent = NULL;
505         struct cm_id_private *cur_cm_id_priv;
506         __be64 service_id = cm_id_priv->id.service_id;
507         __be64 service_mask = cm_id_priv->id.service_mask;
508         int data_cmp;
509
510         while (*link) {
511                 parent = *link;
512                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
513                                           service_node);
514                 data_cmp = cm_compare_data(cm_id_priv->compare_data,
515                                            cur_cm_id_priv->compare_data);
516                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
517                     (service_mask & cur_cm_id_priv->id.service_id) &&
518                     (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
519                     !data_cmp)
520                         return cur_cm_id_priv;
521
522                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
523                         link = &(*link)->rb_left;
524                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
525                         link = &(*link)->rb_right;
526                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
527                         link = &(*link)->rb_left;
528                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
529                         link = &(*link)->rb_right;
530                 else if (data_cmp < 0)
531                         link = &(*link)->rb_left;
532                 else
533                         link = &(*link)->rb_right;
534         }
535         rb_link_node(&cm_id_priv->service_node, parent, link);
536         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
537         return NULL;
538 }
539
540 static struct cm_id_private * cm_find_listen(struct ib_device *device,
541                                              __be64 service_id,
542                                              u32 *private_data)
543 {
544         struct rb_node *node = cm.listen_service_table.rb_node;
545         struct cm_id_private *cm_id_priv;
546         int data_cmp;
547
548         while (node) {
549                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
550                 data_cmp = cm_compare_private_data(private_data,
551                                                    cm_id_priv->compare_data);
552                 if ((cm_id_priv->id.service_mask & service_id) ==
553                      cm_id_priv->id.service_id &&
554                     (cm_id_priv->id.device == device) && !data_cmp)
555                         return cm_id_priv;
556
557                 if (device < cm_id_priv->id.device)
558                         node = node->rb_left;
559                 else if (device > cm_id_priv->id.device)
560                         node = node->rb_right;
561                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
562                         node = node->rb_left;
563                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
564                         node = node->rb_right;
565                 else if (data_cmp < 0)
566                         node = node->rb_left;
567                 else
568                         node = node->rb_right;
569         }
570         return NULL;
571 }
572
573 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
574                                                      *timewait_info)
575 {
576         struct rb_node **link = &cm.remote_id_table.rb_node;
577         struct rb_node *parent = NULL;
578         struct cm_timewait_info *cur_timewait_info;
579         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
580         __be32 remote_id = timewait_info->work.remote_id;
581
582         while (*link) {
583                 parent = *link;
584                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
585                                              remote_id_node);
586                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
587                         link = &(*link)->rb_left;
588                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
589                         link = &(*link)->rb_right;
590                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
591                         link = &(*link)->rb_left;
592                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
593                         link = &(*link)->rb_right;
594                 else
595                         return cur_timewait_info;
596         }
597         timewait_info->inserted_remote_id = 1;
598         rb_link_node(&timewait_info->remote_id_node, parent, link);
599         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
600         return NULL;
601 }
602
603 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
604                                                    __be32 remote_id)
605 {
606         struct rb_node *node = cm.remote_id_table.rb_node;
607         struct cm_timewait_info *timewait_info;
608
609         while (node) {
610                 timewait_info = rb_entry(node, struct cm_timewait_info,
611                                          remote_id_node);
612                 if (be32_lt(remote_id, timewait_info->work.remote_id))
613                         node = node->rb_left;
614                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
615                         node = node->rb_right;
616                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
617                         node = node->rb_left;
618                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
619                         node = node->rb_right;
620                 else
621                         return timewait_info;
622         }
623         return NULL;
624 }
625
626 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
627                                                       *timewait_info)
628 {
629         struct rb_node **link = &cm.remote_qp_table.rb_node;
630         struct rb_node *parent = NULL;
631         struct cm_timewait_info *cur_timewait_info;
632         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
633         __be32 remote_qpn = timewait_info->remote_qpn;
634
635         while (*link) {
636                 parent = *link;
637                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
638                                              remote_qp_node);
639                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
640                         link = &(*link)->rb_left;
641                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
642                         link = &(*link)->rb_right;
643                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
644                         link = &(*link)->rb_left;
645                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
646                         link = &(*link)->rb_right;
647                 else
648                         return cur_timewait_info;
649         }
650         timewait_info->inserted_remote_qp = 1;
651         rb_link_node(&timewait_info->remote_qp_node, parent, link);
652         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
653         return NULL;
654 }
655
656 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
657                                                     *cm_id_priv)
658 {
659         struct rb_node **link = &cm.remote_sidr_table.rb_node;
660         struct rb_node *parent = NULL;
661         struct cm_id_private *cur_cm_id_priv;
662         union ib_gid *port_gid = &cm_id_priv->av.dgid;
663         __be32 remote_id = cm_id_priv->id.remote_id;
664
665         while (*link) {
666                 parent = *link;
667                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
668                                           sidr_id_node);
669                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
670                         link = &(*link)->rb_left;
671                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
672                         link = &(*link)->rb_right;
673                 else {
674                         int cmp;
675                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
676                                      sizeof *port_gid);
677                         if (cmp < 0)
678                                 link = &(*link)->rb_left;
679                         else if (cmp > 0)
680                                 link = &(*link)->rb_right;
681                         else
682                                 return cur_cm_id_priv;
683                 }
684         }
685         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
686         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
687         return NULL;
688 }
689
690 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
691                                enum ib_cm_sidr_status status)
692 {
693         struct ib_cm_sidr_rep_param param;
694
695         memset(&param, 0, sizeof param);
696         param.status = status;
697         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
698 }
699
700 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
701                                  ib_cm_handler cm_handler,
702                                  void *context)
703 {
704         struct cm_id_private *cm_id_priv;
705         int ret;
706
707         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
708         if (!cm_id_priv)
709                 return ERR_PTR(-ENOMEM);
710
711         cm_id_priv->id.state = IB_CM_IDLE;
712         cm_id_priv->id.device = device;
713         cm_id_priv->id.cm_handler = cm_handler;
714         cm_id_priv->id.context = context;
715         cm_id_priv->id.remote_cm_qpn = 1;
716         ret = cm_alloc_id(cm_id_priv);
717         if (ret)
718                 goto error;
719
720         spin_lock_init(&cm_id_priv->lock);
721         init_completion(&cm_id_priv->comp);
722         INIT_LIST_HEAD(&cm_id_priv->work_list);
723         atomic_set(&cm_id_priv->work_count, -1);
724         atomic_set(&cm_id_priv->refcount, 1);
725         return &cm_id_priv->id;
726
727 error:
728         kfree(cm_id_priv);
729         return ERR_PTR(-ENOMEM);
730 }
731 EXPORT_SYMBOL(ib_create_cm_id);
732
733 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
734 {
735         struct cm_work *work;
736
737         if (list_empty(&cm_id_priv->work_list))
738                 return NULL;
739
740         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
741         list_del(&work->list);
742         return work;
743 }
744
745 static void cm_free_work(struct cm_work *work)
746 {
747         if (work->mad_recv_wc)
748                 ib_free_recv_mad(work->mad_recv_wc);
749         kfree(work);
750 }
751
752 static inline int cm_convert_to_ms(int iba_time)
753 {
754         /* approximate conversion to ms from 4.096us x 2^iba_time */
755         return 1 << max(iba_time - 8, 0);
756 }
757
758 /*
759  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
760  * Because of how ack_timeout is stored, adding one doubles the timeout.
761  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
762  * increment it (round up) only if the other is within 50%.
763  */
764 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
765 {
766         int ack_timeout = packet_life_time + 1;
767
768         if (ack_timeout >= ca_ack_delay)
769                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
770         else
771                 ack_timeout = ca_ack_delay +
772                               (ack_timeout >= (ca_ack_delay - 1));
773
774         return min(31, ack_timeout);
775 }
776
777 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
778 {
779         if (timewait_info->inserted_remote_id) {
780                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
781                 timewait_info->inserted_remote_id = 0;
782         }
783
784         if (timewait_info->inserted_remote_qp) {
785                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
786                 timewait_info->inserted_remote_qp = 0;
787         }
788 }
789
790 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
791 {
792         struct cm_timewait_info *timewait_info;
793
794         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
795         if (!timewait_info)
796                 return ERR_PTR(-ENOMEM);
797
798         timewait_info->work.local_id = local_id;
799         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
800         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
801         return timewait_info;
802 }
803
804 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
805 {
806         int wait_time;
807         unsigned long flags;
808
809         spin_lock_irqsave(&cm.lock, flags);
810         cm_cleanup_timewait(cm_id_priv->timewait_info);
811         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
812         spin_unlock_irqrestore(&cm.lock, flags);
813
814         /*
815          * The cm_id could be destroyed by the user before we exit timewait.
816          * To protect against this, we search for the cm_id after exiting
817          * timewait before notifying the user that we've exited timewait.
818          */
819         cm_id_priv->id.state = IB_CM_TIMEWAIT;
820         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
821         queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
822                            msecs_to_jiffies(wait_time));
823         cm_id_priv->timewait_info = NULL;
824 }
825
826 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
827 {
828         unsigned long flags;
829
830         cm_id_priv->id.state = IB_CM_IDLE;
831         if (cm_id_priv->timewait_info) {
832                 spin_lock_irqsave(&cm.lock, flags);
833                 cm_cleanup_timewait(cm_id_priv->timewait_info);
834                 spin_unlock_irqrestore(&cm.lock, flags);
835                 kfree(cm_id_priv->timewait_info);
836                 cm_id_priv->timewait_info = NULL;
837         }
838 }
839
840 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
841 {
842         struct cm_id_private *cm_id_priv;
843         struct cm_work *work;
844
845         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
846 retest:
847         spin_lock_irq(&cm_id_priv->lock);
848         switch (cm_id->state) {
849         case IB_CM_LISTEN:
850                 cm_id->state = IB_CM_IDLE;
851                 spin_unlock_irq(&cm_id_priv->lock);
852                 spin_lock_irq(&cm.lock);
853                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
854                 spin_unlock_irq(&cm.lock);
855                 break;
856         case IB_CM_SIDR_REQ_SENT:
857                 cm_id->state = IB_CM_IDLE;
858                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
859                 spin_unlock_irq(&cm_id_priv->lock);
860                 break;
861         case IB_CM_SIDR_REQ_RCVD:
862                 spin_unlock_irq(&cm_id_priv->lock);
863                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
864                 break;
865         case IB_CM_REQ_SENT:
866         case IB_CM_MRA_REQ_RCVD:
867                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
868                 spin_unlock_irq(&cm_id_priv->lock);
869                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
870                                &cm_id_priv->id.device->node_guid,
871                                sizeof cm_id_priv->id.device->node_guid,
872                                NULL, 0);
873                 break;
874         case IB_CM_REQ_RCVD:
875                 if (err == -ENOMEM) {
876                         /* Do not reject to allow future retries. */
877                         cm_reset_to_idle(cm_id_priv);
878                         spin_unlock_irq(&cm_id_priv->lock);
879                 } else {
880                         spin_unlock_irq(&cm_id_priv->lock);
881                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
882                                        NULL, 0, NULL, 0);
883                 }
884                 break;
885         case IB_CM_REP_SENT:
886         case IB_CM_MRA_REP_RCVD:
887                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
888                 /* Fall through */
889         case IB_CM_MRA_REQ_SENT:
890         case IB_CM_REP_RCVD:
891         case IB_CM_MRA_REP_SENT:
892                 spin_unlock_irq(&cm_id_priv->lock);
893                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
894                                NULL, 0, NULL, 0);
895                 break;
896         case IB_CM_ESTABLISHED:
897                 spin_unlock_irq(&cm_id_priv->lock);
898                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
899                         break;
900                 ib_send_cm_dreq(cm_id, NULL, 0);
901                 goto retest;
902         case IB_CM_DREQ_SENT:
903                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
904                 cm_enter_timewait(cm_id_priv);
905                 spin_unlock_irq(&cm_id_priv->lock);
906                 break;
907         case IB_CM_DREQ_RCVD:
908                 spin_unlock_irq(&cm_id_priv->lock);
909                 ib_send_cm_drep(cm_id, NULL, 0);
910                 break;
911         default:
912                 spin_unlock_irq(&cm_id_priv->lock);
913                 break;
914         }
915
916         cm_free_id(cm_id->local_id);
917         cm_deref_id(cm_id_priv);
918         wait_for_completion(&cm_id_priv->comp);
919         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
920                 cm_free_work(work);
921         kfree(cm_id_priv->compare_data);
922         kfree(cm_id_priv->private_data);
923         kfree(cm_id_priv);
924 }
925
926 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
927 {
928         cm_destroy_id(cm_id, 0);
929 }
930 EXPORT_SYMBOL(ib_destroy_cm_id);
931
932 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
933                  struct ib_cm_compare_data *compare_data)
934 {
935         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
936         unsigned long flags;
937         int ret = 0;
938
939         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
940         service_id &= service_mask;
941         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
942             (service_id != IB_CM_ASSIGN_SERVICE_ID))
943                 return -EINVAL;
944
945         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
946         if (cm_id->state != IB_CM_IDLE)
947                 return -EINVAL;
948
949         if (compare_data) {
950                 cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
951                                                    GFP_KERNEL);
952                 if (!cm_id_priv->compare_data)
953                         return -ENOMEM;
954                 cm_mask_copy(cm_id_priv->compare_data->data,
955                              compare_data->data, compare_data->mask);
956                 memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
957                        sizeof(compare_data->mask));
958         }
959
960         cm_id->state = IB_CM_LISTEN;
961
962         spin_lock_irqsave(&cm.lock, flags);
963         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
964                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
965                 cm_id->service_mask = ~cpu_to_be64(0);
966         } else {
967                 cm_id->service_id = service_id;
968                 cm_id->service_mask = service_mask;
969         }
970         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
971         spin_unlock_irqrestore(&cm.lock, flags);
972
973         if (cur_cm_id_priv) {
974                 cm_id->state = IB_CM_IDLE;
975                 kfree(cm_id_priv->compare_data);
976                 cm_id_priv->compare_data = NULL;
977                 ret = -EBUSY;
978         }
979         return ret;
980 }
981 EXPORT_SYMBOL(ib_cm_listen);
982
983 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
984                           enum cm_msg_sequence msg_seq)
985 {
986         u64 hi_tid, low_tid;
987
988         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
989         low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
990                           (msg_seq << 30));
991         return cpu_to_be64(hi_tid | low_tid);
992 }
993
994 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
995                               __be16 attr_id, __be64 tid)
996 {
997         hdr->base_version  = IB_MGMT_BASE_VERSION;
998         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
999         hdr->class_version = IB_CM_CLASS_VERSION;
1000         hdr->method        = IB_MGMT_METHOD_SEND;
1001         hdr->attr_id       = attr_id;
1002         hdr->tid           = tid;
1003 }
1004
1005 static void cm_format_req(struct cm_req_msg *req_msg,
1006                           struct cm_id_private *cm_id_priv,
1007                           struct ib_cm_req_param *param)
1008 {
1009         struct ib_sa_path_rec *pri_path = param->primary_path;
1010         struct ib_sa_path_rec *alt_path = param->alternate_path;
1011
1012         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1013                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1014
1015         req_msg->local_comm_id = cm_id_priv->id.local_id;
1016         req_msg->service_id = param->service_id;
1017         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1018         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1019         cm_req_set_init_depth(req_msg, param->initiator_depth);
1020         cm_req_set_remote_resp_timeout(req_msg,
1021                                        param->remote_cm_response_timeout);
1022         cm_req_set_qp_type(req_msg, param->qp_type);
1023         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1024         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1025         cm_req_set_local_resp_timeout(req_msg,
1026                                       param->local_cm_response_timeout);
1027         req_msg->pkey = param->primary_path->pkey;
1028         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1029         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1030
1031         if (param->qp_type != IB_QPT_XRC_INI) {
1032                 cm_req_set_resp_res(req_msg, param->responder_resources);
1033                 cm_req_set_retry_count(req_msg, param->retry_count);
1034                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1035                 cm_req_set_srq(req_msg, param->srq);
1036         }
1037
1038         if (pri_path->hop_limit <= 1) {
1039                 req_msg->primary_local_lid = pri_path->slid;
1040                 req_msg->primary_remote_lid = pri_path->dlid;
1041         } else {
1042                 /* Work-around until there's a way to obtain remote LID info */
1043                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1044                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1045         }
1046         req_msg->primary_local_gid = pri_path->sgid;
1047         req_msg->primary_remote_gid = pri_path->dgid;
1048         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1049         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1050         req_msg->primary_traffic_class = pri_path->traffic_class;
1051         req_msg->primary_hop_limit = pri_path->hop_limit;
1052         cm_req_set_primary_sl(req_msg, pri_path->sl);
1053         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1054         cm_req_set_primary_local_ack_timeout(req_msg,
1055                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1056                                pri_path->packet_life_time));
1057
1058         if (alt_path) {
1059                 if (alt_path->hop_limit <= 1) {
1060                         req_msg->alt_local_lid = alt_path->slid;
1061                         req_msg->alt_remote_lid = alt_path->dlid;
1062                 } else {
1063                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1064                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1065                 }
1066                 req_msg->alt_local_gid = alt_path->sgid;
1067                 req_msg->alt_remote_gid = alt_path->dgid;
1068                 cm_req_set_alt_flow_label(req_msg,
1069                                           alt_path->flow_label);
1070                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1071                 req_msg->alt_traffic_class = alt_path->traffic_class;
1072                 req_msg->alt_hop_limit = alt_path->hop_limit;
1073                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1074                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1075                 cm_req_set_alt_local_ack_timeout(req_msg,
1076                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1077                                        alt_path->packet_life_time));
1078         }
1079
1080         if (param->private_data && param->private_data_len)
1081                 memcpy(req_msg->private_data, param->private_data,
1082                        param->private_data_len);
1083 }
1084
1085 static int cm_validate_req_param(struct ib_cm_req_param *param)
1086 {
1087         /* peer-to-peer not supported */
1088         if (param->peer_to_peer)
1089                 return -EINVAL;
1090
1091         if (!param->primary_path)
1092                 return -EINVAL;
1093
1094         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1095             param->qp_type != IB_QPT_XRC_INI)
1096                 return -EINVAL;
1097
1098         if (param->private_data &&
1099             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1100                 return -EINVAL;
1101
1102         if (param->alternate_path &&
1103             (param->alternate_path->pkey != param->primary_path->pkey ||
1104              param->alternate_path->mtu != param->primary_path->mtu))
1105                 return -EINVAL;
1106
1107         return 0;
1108 }
1109
1110 int ib_send_cm_req(struct ib_cm_id *cm_id,
1111                    struct ib_cm_req_param *param)
1112 {
1113         struct cm_id_private *cm_id_priv;
1114         struct cm_req_msg *req_msg;
1115         unsigned long flags;
1116         int ret;
1117
1118         ret = cm_validate_req_param(param);
1119         if (ret)
1120                 return ret;
1121
1122         /* Verify that we're not in timewait. */
1123         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1124         spin_lock_irqsave(&cm_id_priv->lock, flags);
1125         if (cm_id->state != IB_CM_IDLE) {
1126                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1127                 ret = -EINVAL;
1128                 goto out;
1129         }
1130         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1131
1132         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1133                                                             id.local_id);
1134         if (IS_ERR(cm_id_priv->timewait_info)) {
1135                 ret = PTR_ERR(cm_id_priv->timewait_info);
1136                 goto out;
1137         }
1138
1139         ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
1140         if (ret)
1141                 goto error1;
1142         if (param->alternate_path) {
1143                 ret = cm_init_av_by_path(param->alternate_path,
1144                                          &cm_id_priv->alt_av);
1145                 if (ret)
1146                         goto error1;
1147         }
1148         cm_id->service_id = param->service_id;
1149         cm_id->service_mask = ~cpu_to_be64(0);
1150         cm_id_priv->timeout_ms = cm_convert_to_ms(
1151                                     param->primary_path->packet_life_time) * 2 +
1152                                  cm_convert_to_ms(
1153                                     param->remote_cm_response_timeout);
1154         cm_id_priv->max_cm_retries = param->max_cm_retries;
1155         cm_id_priv->initiator_depth = param->initiator_depth;
1156         cm_id_priv->responder_resources = param->responder_resources;
1157         cm_id_priv->retry_count = param->retry_count;
1158         cm_id_priv->path_mtu = param->primary_path->mtu;
1159         cm_id_priv->pkey = param->primary_path->pkey;
1160         cm_id_priv->qp_type = param->qp_type;
1161
1162         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1163         if (ret)
1164                 goto error1;
1165
1166         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1167         cm_format_req(req_msg, cm_id_priv, param);
1168         cm_id_priv->tid = req_msg->hdr.tid;
1169         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1170         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1171
1172         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1173         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1174
1175         spin_lock_irqsave(&cm_id_priv->lock, flags);
1176         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1177         if (ret) {
1178                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1179                 goto error2;
1180         }
1181         BUG_ON(cm_id->state != IB_CM_IDLE);
1182         cm_id->state = IB_CM_REQ_SENT;
1183         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1184         return 0;
1185
1186 error2: cm_free_msg(cm_id_priv->msg);
1187 error1: kfree(cm_id_priv->timewait_info);
1188 out:    return ret;
1189 }
1190 EXPORT_SYMBOL(ib_send_cm_req);
1191
1192 static int cm_issue_rej(struct cm_port *port,
1193                         struct ib_mad_recv_wc *mad_recv_wc,
1194                         enum ib_cm_rej_reason reason,
1195                         enum cm_msg_response msg_rejected,
1196                         void *ari, u8 ari_length)
1197 {
1198         struct ib_mad_send_buf *msg = NULL;
1199         struct cm_rej_msg *rej_msg, *rcv_msg;
1200         int ret;
1201
1202         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1203         if (ret)
1204                 return ret;
1205
1206         /* We just need common CM header information.  Cast to any message. */
1207         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1208         rej_msg = (struct cm_rej_msg *) msg->mad;
1209
1210         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1211         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1212         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1213         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1214         rej_msg->reason = cpu_to_be16(reason);
1215
1216         if (ari && ari_length) {
1217                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1218                 memcpy(rej_msg->ari, ari, ari_length);
1219         }
1220
1221         ret = ib_post_send_mad(msg, NULL);
1222         if (ret)
1223                 cm_free_msg(msg);
1224
1225         return ret;
1226 }
1227
1228 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1229                                     __be32 local_qpn, __be32 remote_qpn)
1230 {
1231         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1232                 ((local_ca_guid == remote_ca_guid) &&
1233                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1234 }
1235
1236 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1237                                             struct ib_sa_path_rec *primary_path,
1238                                             struct ib_sa_path_rec *alt_path)
1239 {
1240         memset(primary_path, 0, sizeof *primary_path);
1241         primary_path->dgid = req_msg->primary_local_gid;
1242         primary_path->sgid = req_msg->primary_remote_gid;
1243         primary_path->dlid = req_msg->primary_local_lid;
1244         primary_path->slid = req_msg->primary_remote_lid;
1245         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1246         primary_path->hop_limit = req_msg->primary_hop_limit;
1247         primary_path->traffic_class = req_msg->primary_traffic_class;
1248         primary_path->reversible = 1;
1249         primary_path->pkey = req_msg->pkey;
1250         primary_path->sl = cm_req_get_primary_sl(req_msg);
1251         primary_path->mtu_selector = IB_SA_EQ;
1252         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1253         primary_path->rate_selector = IB_SA_EQ;
1254         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1255         primary_path->packet_life_time_selector = IB_SA_EQ;
1256         primary_path->packet_life_time =
1257                 cm_req_get_primary_local_ack_timeout(req_msg);
1258         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1259
1260         if (req_msg->alt_local_lid) {
1261                 memset(alt_path, 0, sizeof *alt_path);
1262                 alt_path->dgid = req_msg->alt_local_gid;
1263                 alt_path->sgid = req_msg->alt_remote_gid;
1264                 alt_path->dlid = req_msg->alt_local_lid;
1265                 alt_path->slid = req_msg->alt_remote_lid;
1266                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1267                 alt_path->hop_limit = req_msg->alt_hop_limit;
1268                 alt_path->traffic_class = req_msg->alt_traffic_class;
1269                 alt_path->reversible = 1;
1270                 alt_path->pkey = req_msg->pkey;
1271                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1272                 alt_path->mtu_selector = IB_SA_EQ;
1273                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1274                 alt_path->rate_selector = IB_SA_EQ;
1275                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1276                 alt_path->packet_life_time_selector = IB_SA_EQ;
1277                 alt_path->packet_life_time =
1278                         cm_req_get_alt_local_ack_timeout(req_msg);
1279                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1280         }
1281 }
1282
1283 static void cm_format_req_event(struct cm_work *work,
1284                                 struct cm_id_private *cm_id_priv,
1285                                 struct ib_cm_id *listen_id)
1286 {
1287         struct cm_req_msg *req_msg;
1288         struct ib_cm_req_event_param *param;
1289
1290         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1291         param = &work->cm_event.param.req_rcvd;
1292         param->listen_id = listen_id;
1293         param->port = cm_id_priv->av.port->port_num;
1294         param->primary_path = &work->path[0];
1295         if (req_msg->alt_local_lid)
1296                 param->alternate_path = &work->path[1];
1297         else
1298                 param->alternate_path = NULL;
1299         param->remote_ca_guid = req_msg->local_ca_guid;
1300         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1301         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1302         param->qp_type = cm_req_get_qp_type(req_msg);
1303         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1304         param->responder_resources = cm_req_get_init_depth(req_msg);
1305         param->initiator_depth = cm_req_get_resp_res(req_msg);
1306         param->local_cm_response_timeout =
1307                                         cm_req_get_remote_resp_timeout(req_msg);
1308         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1309         param->remote_cm_response_timeout =
1310                                         cm_req_get_local_resp_timeout(req_msg);
1311         param->retry_count = cm_req_get_retry_count(req_msg);
1312         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1313         param->srq = cm_req_get_srq(req_msg);
1314         work->cm_event.private_data = &req_msg->private_data;
1315 }
1316
1317 static void cm_process_work(struct cm_id_private *cm_id_priv,
1318                             struct cm_work *work)
1319 {
1320         int ret;
1321
1322         /* We will typically only have the current event to report. */
1323         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1324         cm_free_work(work);
1325
1326         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1327                 spin_lock_irq(&cm_id_priv->lock);
1328                 work = cm_dequeue_work(cm_id_priv);
1329                 spin_unlock_irq(&cm_id_priv->lock);
1330                 BUG_ON(!work);
1331                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1332                                                 &work->cm_event);
1333                 cm_free_work(work);
1334         }
1335         cm_deref_id(cm_id_priv);
1336         if (ret)
1337                 cm_destroy_id(&cm_id_priv->id, ret);
1338 }
1339
1340 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1341                           struct cm_id_private *cm_id_priv,
1342                           enum cm_msg_response msg_mraed, u8 service_timeout,
1343                           const void *private_data, u8 private_data_len)
1344 {
1345         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1346         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1347         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1348         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1349         cm_mra_set_service_timeout(mra_msg, service_timeout);
1350
1351         if (private_data && private_data_len)
1352                 memcpy(mra_msg->private_data, private_data, private_data_len);
1353 }
1354
1355 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1356                           struct cm_id_private *cm_id_priv,
1357                           enum ib_cm_rej_reason reason,
1358                           void *ari,
1359                           u8 ari_length,
1360                           const void *private_data,
1361                           u8 private_data_len)
1362 {
1363         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1364         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1365
1366         switch(cm_id_priv->id.state) {
1367         case IB_CM_REQ_RCVD:
1368                 rej_msg->local_comm_id = 0;
1369                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1370                 break;
1371         case IB_CM_MRA_REQ_SENT:
1372                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1373                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1374                 break;
1375         case IB_CM_REP_RCVD:
1376         case IB_CM_MRA_REP_SENT:
1377                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1378                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1379                 break;
1380         default:
1381                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1382                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1383                 break;
1384         }
1385
1386         rej_msg->reason = cpu_to_be16(reason);
1387         if (ari && ari_length) {
1388                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1389                 memcpy(rej_msg->ari, ari, ari_length);
1390         }
1391
1392         if (private_data && private_data_len)
1393                 memcpy(rej_msg->private_data, private_data, private_data_len);
1394 }
1395
1396 static void cm_dup_req_handler(struct cm_work *work,
1397                                struct cm_id_private *cm_id_priv)
1398 {
1399         struct ib_mad_send_buf *msg = NULL;
1400         int ret;
1401
1402         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1403                         counter[CM_REQ_COUNTER]);
1404
1405         /* Quick state check to discard duplicate REQs. */
1406         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1407                 return;
1408
1409         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1410         if (ret)
1411                 return;
1412
1413         spin_lock_irq(&cm_id_priv->lock);
1414         switch (cm_id_priv->id.state) {
1415         case IB_CM_MRA_REQ_SENT:
1416                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1417                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1418                               cm_id_priv->private_data,
1419                               cm_id_priv->private_data_len);
1420                 break;
1421         case IB_CM_TIMEWAIT:
1422                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1423                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1424                 break;
1425         default:
1426                 goto unlock;
1427         }
1428         spin_unlock_irq(&cm_id_priv->lock);
1429
1430         ret = ib_post_send_mad(msg, NULL);
1431         if (ret)
1432                 goto free;
1433         return;
1434
1435 unlock: spin_unlock_irq(&cm_id_priv->lock);
1436 free:   cm_free_msg(msg);
1437 }
1438
1439 static struct cm_id_private * cm_match_req(struct cm_work *work,
1440                                            struct cm_id_private *cm_id_priv)
1441 {
1442         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1443         struct cm_timewait_info *timewait_info;
1444         struct cm_req_msg *req_msg;
1445
1446         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1447
1448         /* Check for possible duplicate REQ. */
1449         spin_lock_irq(&cm.lock);
1450         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1451         if (timewait_info) {
1452                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1453                                            timewait_info->work.remote_id);
1454                 spin_unlock_irq(&cm.lock);
1455                 if (cur_cm_id_priv) {
1456                         cm_dup_req_handler(work, cur_cm_id_priv);
1457                         cm_deref_id(cur_cm_id_priv);
1458                 }
1459                 return NULL;
1460         }
1461
1462         /* Check for stale connections. */
1463         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1464         if (timewait_info) {
1465                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1466                 spin_unlock_irq(&cm.lock);
1467                 cm_issue_rej(work->port, work->mad_recv_wc,
1468                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1469                              NULL, 0);
1470                 return NULL;
1471         }
1472
1473         /* Find matching listen request. */
1474         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1475                                            req_msg->service_id,
1476                                            req_msg->private_data);
1477         if (!listen_cm_id_priv) {
1478                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1479                 spin_unlock_irq(&cm.lock);
1480                 cm_issue_rej(work->port, work->mad_recv_wc,
1481                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1482                              NULL, 0);
1483                 goto out;
1484         }
1485         atomic_inc(&listen_cm_id_priv->refcount);
1486         atomic_inc(&cm_id_priv->refcount);
1487         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1488         atomic_inc(&cm_id_priv->work_count);
1489         spin_unlock_irq(&cm.lock);
1490 out:
1491         return listen_cm_id_priv;
1492 }
1493
1494 /*
1495  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1496  * we need to override the LID/SL data in the REQ with the LID information
1497  * in the work completion.
1498  */
1499 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1500 {
1501         if (!cm_req_get_primary_subnet_local(req_msg)) {
1502                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1503                         req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1504                         cm_req_set_primary_sl(req_msg, wc->sl);
1505                 }
1506
1507                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1508                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1509         }
1510
1511         if (!cm_req_get_alt_subnet_local(req_msg)) {
1512                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1513                         req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1514                         cm_req_set_alt_sl(req_msg, wc->sl);
1515                 }
1516
1517                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1518                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1519         }
1520 }
1521
1522 static int cm_req_handler(struct cm_work *work)
1523 {
1524         struct ib_cm_id *cm_id;
1525         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1526         struct cm_req_msg *req_msg;
1527         int ret;
1528
1529         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1530
1531         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1532         if (IS_ERR(cm_id))
1533                 return PTR_ERR(cm_id);
1534
1535         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1536         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1537         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1538                                 work->mad_recv_wc->recv_buf.grh,
1539                                 &cm_id_priv->av);
1540         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1541                                                             id.local_id);
1542         if (IS_ERR(cm_id_priv->timewait_info)) {
1543                 ret = PTR_ERR(cm_id_priv->timewait_info);
1544                 goto destroy;
1545         }
1546         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1547         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1548         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1549
1550         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1551         if (!listen_cm_id_priv) {
1552                 ret = -EINVAL;
1553                 kfree(cm_id_priv->timewait_info);
1554                 goto destroy;
1555         }
1556
1557         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1558         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1559         cm_id_priv->id.service_id = req_msg->service_id;
1560         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1561
1562         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1563         cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1564
1565         memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1566         work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
1567         ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1568         if (ret) {
1569                 ib_get_cached_gid(work->port->cm_dev->ib_device,
1570                                   work->port->port_num, 0, &work->path[0].sgid);
1571                 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1572                                &work->path[0].sgid, sizeof work->path[0].sgid,
1573                                NULL, 0);
1574                 goto rejected;
1575         }
1576         if (req_msg->alt_local_lid) {
1577                 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1578                 if (ret) {
1579                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1580                                        &work->path[0].sgid,
1581                                        sizeof work->path[0].sgid, NULL, 0);
1582                         goto rejected;
1583                 }
1584         }
1585         cm_id_priv->tid = req_msg->hdr.tid;
1586         cm_id_priv->timeout_ms = cm_convert_to_ms(
1587                                         cm_req_get_local_resp_timeout(req_msg));
1588         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1589         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1590         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1591         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1592         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1593         cm_id_priv->pkey = req_msg->pkey;
1594         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1595         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1596         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1597         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1598
1599         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1600         cm_process_work(cm_id_priv, work);
1601         cm_deref_id(listen_cm_id_priv);
1602         return 0;
1603
1604 rejected:
1605         atomic_dec(&cm_id_priv->refcount);
1606         cm_deref_id(listen_cm_id_priv);
1607 destroy:
1608         ib_destroy_cm_id(cm_id);
1609         return ret;
1610 }
1611
1612 static void cm_format_rep(struct cm_rep_msg *rep_msg,
1613                           struct cm_id_private *cm_id_priv,
1614                           struct ib_cm_rep_param *param)
1615 {
1616         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1617         rep_msg->local_comm_id = cm_id_priv->id.local_id;
1618         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1619         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1620         rep_msg->resp_resources = param->responder_resources;
1621         cm_rep_set_target_ack_delay(rep_msg,
1622                                     cm_id_priv->av.port->cm_dev->ack_delay);
1623         cm_rep_set_failover(rep_msg, param->failover_accepted);
1624         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1625         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1626
1627         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1628                 rep_msg->initiator_depth = param->initiator_depth;
1629                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1630                 cm_rep_set_srq(rep_msg, param->srq);
1631                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1632         } else {
1633                 cm_rep_set_srq(rep_msg, 1);
1634                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1635         }
1636
1637         if (param->private_data && param->private_data_len)
1638                 memcpy(rep_msg->private_data, param->private_data,
1639                        param->private_data_len);
1640 }
1641
1642 int ib_send_cm_rep(struct ib_cm_id *cm_id,
1643                    struct ib_cm_rep_param *param)
1644 {
1645         struct cm_id_private *cm_id_priv;
1646         struct ib_mad_send_buf *msg;
1647         struct cm_rep_msg *rep_msg;
1648         unsigned long flags;
1649         int ret;
1650
1651         if (param->private_data &&
1652             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1653                 return -EINVAL;
1654
1655         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1656         spin_lock_irqsave(&cm_id_priv->lock, flags);
1657         if (cm_id->state != IB_CM_REQ_RCVD &&
1658             cm_id->state != IB_CM_MRA_REQ_SENT) {
1659                 ret = -EINVAL;
1660                 goto out;
1661         }
1662
1663         ret = cm_alloc_msg(cm_id_priv, &msg);
1664         if (ret)
1665                 goto out;
1666
1667         rep_msg = (struct cm_rep_msg *) msg->mad;
1668         cm_format_rep(rep_msg, cm_id_priv, param);
1669         msg->timeout_ms = cm_id_priv->timeout_ms;
1670         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1671
1672         ret = ib_post_send_mad(msg, NULL);
1673         if (ret) {
1674                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1675                 cm_free_msg(msg);
1676                 return ret;
1677         }
1678
1679         cm_id->state = IB_CM_REP_SENT;
1680         cm_id_priv->msg = msg;
1681         cm_id_priv->initiator_depth = param->initiator_depth;
1682         cm_id_priv->responder_resources = param->responder_resources;
1683         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1684         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
1685
1686 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1687         return ret;
1688 }
1689 EXPORT_SYMBOL(ib_send_cm_rep);
1690
1691 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1692                           struct cm_id_private *cm_id_priv,
1693                           const void *private_data,
1694                           u8 private_data_len)
1695 {
1696         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1697         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1698         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1699
1700         if (private_data && private_data_len)
1701                 memcpy(rtu_msg->private_data, private_data, private_data_len);
1702 }
1703
1704 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1705                    const void *private_data,
1706                    u8 private_data_len)
1707 {
1708         struct cm_id_private *cm_id_priv;
1709         struct ib_mad_send_buf *msg;
1710         unsigned long flags;
1711         void *data;
1712         int ret;
1713
1714         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1715                 return -EINVAL;
1716
1717         data = cm_copy_private_data(private_data, private_data_len);
1718         if (IS_ERR(data))
1719                 return PTR_ERR(data);
1720
1721         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1722         spin_lock_irqsave(&cm_id_priv->lock, flags);
1723         if (cm_id->state != IB_CM_REP_RCVD &&
1724             cm_id->state != IB_CM_MRA_REP_SENT) {
1725                 ret = -EINVAL;
1726                 goto error;
1727         }
1728
1729         ret = cm_alloc_msg(cm_id_priv, &msg);
1730         if (ret)
1731                 goto error;
1732
1733         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1734                       private_data, private_data_len);
1735
1736         ret = ib_post_send_mad(msg, NULL);
1737         if (ret) {
1738                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1739                 cm_free_msg(msg);
1740                 kfree(data);
1741                 return ret;
1742         }
1743
1744         cm_id->state = IB_CM_ESTABLISHED;
1745         cm_set_private_data(cm_id_priv, data, private_data_len);
1746         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1747         return 0;
1748
1749 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1750         kfree(data);
1751         return ret;
1752 }
1753 EXPORT_SYMBOL(ib_send_cm_rtu);
1754
1755 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
1756 {
1757         struct cm_rep_msg *rep_msg;
1758         struct ib_cm_rep_event_param *param;
1759
1760         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1761         param = &work->cm_event.param.rep_rcvd;
1762         param->remote_ca_guid = rep_msg->local_ca_guid;
1763         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1764         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
1765         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1766         param->responder_resources = rep_msg->initiator_depth;
1767         param->initiator_depth = rep_msg->resp_resources;
1768         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1769         param->failover_accepted = cm_rep_get_failover(rep_msg);
1770         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1771         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1772         param->srq = cm_rep_get_srq(rep_msg);
1773         work->cm_event.private_data = &rep_msg->private_data;
1774 }
1775
1776 static void cm_dup_rep_handler(struct cm_work *work)
1777 {
1778         struct cm_id_private *cm_id_priv;
1779         struct cm_rep_msg *rep_msg;
1780         struct ib_mad_send_buf *msg = NULL;
1781         int ret;
1782
1783         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1784         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1785                                    rep_msg->local_comm_id);
1786         if (!cm_id_priv)
1787                 return;
1788
1789         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1790                         counter[CM_REP_COUNTER]);
1791         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1792         if (ret)
1793                 goto deref;
1794
1795         spin_lock_irq(&cm_id_priv->lock);
1796         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1797                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1798                               cm_id_priv->private_data,
1799                               cm_id_priv->private_data_len);
1800         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1801                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1802                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1803                               cm_id_priv->private_data,
1804                               cm_id_priv->private_data_len);
1805         else
1806                 goto unlock;
1807         spin_unlock_irq(&cm_id_priv->lock);
1808
1809         ret = ib_post_send_mad(msg, NULL);
1810         if (ret)
1811                 goto free;
1812         goto deref;
1813
1814 unlock: spin_unlock_irq(&cm_id_priv->lock);
1815 free:   cm_free_msg(msg);
1816 deref:  cm_deref_id(cm_id_priv);
1817 }
1818
1819 static int cm_rep_handler(struct cm_work *work)
1820 {
1821         struct cm_id_private *cm_id_priv;
1822         struct cm_rep_msg *rep_msg;
1823         int ret;
1824
1825         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1826         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1827         if (!cm_id_priv) {
1828                 cm_dup_rep_handler(work);
1829                 return -EINVAL;
1830         }
1831
1832         cm_format_rep_event(work, cm_id_priv->qp_type);
1833
1834         spin_lock_irq(&cm_id_priv->lock);
1835         switch (cm_id_priv->id.state) {
1836         case IB_CM_REQ_SENT:
1837         case IB_CM_MRA_REQ_RCVD:
1838                 break;
1839         default:
1840                 spin_unlock_irq(&cm_id_priv->lock);
1841                 ret = -EINVAL;
1842                 goto error;
1843         }
1844
1845         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1846         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1847         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1848
1849         spin_lock(&cm.lock);
1850         /* Check for duplicate REP. */
1851         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1852                 spin_unlock(&cm.lock);
1853                 spin_unlock_irq(&cm_id_priv->lock);
1854                 ret = -EINVAL;
1855                 goto error;
1856         }
1857         /* Check for a stale connection. */
1858         if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1859                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1860                          &cm.remote_id_table);
1861                 cm_id_priv->timewait_info->inserted_remote_id = 0;
1862                 spin_unlock(&cm.lock);
1863                 spin_unlock_irq(&cm_id_priv->lock);
1864                 cm_issue_rej(work->port, work->mad_recv_wc,
1865                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1866                              NULL, 0);
1867                 ret = -EINVAL;
1868                 goto error;
1869         }
1870         spin_unlock(&cm.lock);
1871
1872         cm_id_priv->id.state = IB_CM_REP_RCVD;
1873         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1874         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
1875         cm_id_priv->initiator_depth = rep_msg->resp_resources;
1876         cm_id_priv->responder_resources = rep_msg->initiator_depth;
1877         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1878         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1879         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1880         cm_id_priv->av.timeout =
1881                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1882                                        cm_id_priv->av.timeout - 1);
1883         cm_id_priv->alt_av.timeout =
1884                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1885                                        cm_id_priv->alt_av.timeout - 1);
1886
1887         /* todo: handle peer_to_peer */
1888
1889         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1890         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1891         if (!ret)
1892                 list_add_tail(&work->list, &cm_id_priv->work_list);
1893         spin_unlock_irq(&cm_id_priv->lock);
1894
1895         if (ret)
1896                 cm_process_work(cm_id_priv, work);
1897         else
1898                 cm_deref_id(cm_id_priv);
1899         return 0;
1900
1901 error:
1902         cm_deref_id(cm_id_priv);
1903         return ret;
1904 }
1905
1906 static int cm_establish_handler(struct cm_work *work)
1907 {
1908         struct cm_id_private *cm_id_priv;
1909         int ret;
1910
1911         /* See comment in cm_establish about lookup. */
1912         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1913         if (!cm_id_priv)
1914                 return -EINVAL;
1915
1916         spin_lock_irq(&cm_id_priv->lock);
1917         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1918                 spin_unlock_irq(&cm_id_priv->lock);
1919                 goto out;
1920         }
1921
1922         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1923         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1924         if (!ret)
1925                 list_add_tail(&work->list, &cm_id_priv->work_list);
1926         spin_unlock_irq(&cm_id_priv->lock);
1927
1928         if (ret)
1929                 cm_process_work(cm_id_priv, work);
1930         else
1931                 cm_deref_id(cm_id_priv);
1932         return 0;
1933 out:
1934         cm_deref_id(cm_id_priv);
1935         return -EINVAL;
1936 }
1937
1938 static int cm_rtu_handler(struct cm_work *work)
1939 {
1940         struct cm_id_private *cm_id_priv;
1941         struct cm_rtu_msg *rtu_msg;
1942         int ret;
1943
1944         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
1945         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
1946                                    rtu_msg->local_comm_id);
1947         if (!cm_id_priv)
1948                 return -EINVAL;
1949
1950         work->cm_event.private_data = &rtu_msg->private_data;
1951
1952         spin_lock_irq(&cm_id_priv->lock);
1953         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1954             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1955                 spin_unlock_irq(&cm_id_priv->lock);
1956                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1957                                 counter[CM_RTU_COUNTER]);
1958                 goto out;
1959         }
1960         cm_id_priv->id.state = IB_CM_ESTABLISHED;
1961
1962         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1963         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1964         if (!ret)
1965                 list_add_tail(&work->list, &cm_id_priv->work_list);
1966         spin_unlock_irq(&cm_id_priv->lock);
1967
1968         if (ret)
1969                 cm_process_work(cm_id_priv, work);
1970         else
1971                 cm_deref_id(cm_id_priv);
1972         return 0;
1973 out:
1974         cm_deref_id(cm_id_priv);
1975         return -EINVAL;
1976 }
1977
1978 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
1979                           struct cm_id_private *cm_id_priv,
1980                           const void *private_data,
1981                           u8 private_data_len)
1982 {
1983         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
1984                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
1985         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
1986         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
1987         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
1988
1989         if (private_data && private_data_len)
1990                 memcpy(dreq_msg->private_data, private_data, private_data_len);
1991 }
1992
1993 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1994                     const void *private_data,
1995                     u8 private_data_len)
1996 {
1997         struct cm_id_private *cm_id_priv;
1998         struct ib_mad_send_buf *msg;
1999         unsigned long flags;
2000         int ret;
2001
2002         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2003                 return -EINVAL;
2004
2005         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2006         spin_lock_irqsave(&cm_id_priv->lock, flags);
2007         if (cm_id->state != IB_CM_ESTABLISHED) {
2008                 ret = -EINVAL;
2009                 goto out;
2010         }
2011
2012         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2013             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2014                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2015
2016         ret = cm_alloc_msg(cm_id_priv, &msg);
2017         if (ret) {
2018                 cm_enter_timewait(cm_id_priv);
2019                 goto out;
2020         }
2021
2022         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2023                        private_data, private_data_len);
2024         msg->timeout_ms = cm_id_priv->timeout_ms;
2025         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2026
2027         ret = ib_post_send_mad(msg, NULL);
2028         if (ret) {
2029                 cm_enter_timewait(cm_id_priv);
2030                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2031                 cm_free_msg(msg);
2032                 return ret;
2033         }
2034
2035         cm_id->state = IB_CM_DREQ_SENT;
2036         cm_id_priv->msg = msg;
2037 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2038         return ret;
2039 }
2040 EXPORT_SYMBOL(ib_send_cm_dreq);
2041
2042 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2043                           struct cm_id_private *cm_id_priv,
2044                           const void *private_data,
2045                           u8 private_data_len)
2046 {
2047         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2048         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2049         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2050
2051         if (private_data && private_data_len)
2052                 memcpy(drep_msg->private_data, private_data, private_data_len);
2053 }
2054
2055 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2056                     const void *private_data,
2057                     u8 private_data_len)
2058 {
2059         struct cm_id_private *cm_id_priv;
2060         struct ib_mad_send_buf *msg;
2061         unsigned long flags;
2062         void *data;
2063         int ret;
2064
2065         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2066                 return -EINVAL;
2067
2068         data = cm_copy_private_data(private_data, private_data_len);
2069         if (IS_ERR(data))
2070                 return PTR_ERR(data);
2071
2072         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2073         spin_lock_irqsave(&cm_id_priv->lock, flags);
2074         if (cm_id->state != IB_CM_DREQ_RCVD) {
2075                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2076                 kfree(data);
2077                 return -EINVAL;
2078         }
2079
2080         cm_set_private_data(cm_id_priv, data, private_data_len);
2081         cm_enter_timewait(cm_id_priv);
2082
2083         ret = cm_alloc_msg(cm_id_priv, &msg);
2084         if (ret)
2085                 goto out;
2086
2087         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2088                        private_data, private_data_len);
2089
2090         ret = ib_post_send_mad(msg, NULL);
2091         if (ret) {
2092                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2093                 cm_free_msg(msg);
2094                 return ret;
2095         }
2096
2097 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2098         return ret;
2099 }
2100 EXPORT_SYMBOL(ib_send_cm_drep);
2101
2102 static int cm_issue_drep(struct cm_port *port,
2103                          struct ib_mad_recv_wc *mad_recv_wc)
2104 {
2105         struct ib_mad_send_buf *msg = NULL;
2106         struct cm_dreq_msg *dreq_msg;
2107         struct cm_drep_msg *drep_msg;
2108         int ret;
2109
2110         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2111         if (ret)
2112                 return ret;
2113
2114         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2115         drep_msg = (struct cm_drep_msg *) msg->mad;
2116
2117         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2118         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2119         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2120
2121         ret = ib_post_send_mad(msg, NULL);
2122         if (ret)
2123                 cm_free_msg(msg);
2124
2125         return ret;
2126 }
2127
2128 static int cm_dreq_handler(struct cm_work *work)
2129 {
2130         struct cm_id_private *cm_id_priv;
2131         struct cm_dreq_msg *dreq_msg;
2132         struct ib_mad_send_buf *msg = NULL;
2133         int ret;
2134
2135         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2136         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2137                                    dreq_msg->local_comm_id);
2138         if (!cm_id_priv) {
2139                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2140                                 counter[CM_DREQ_COUNTER]);
2141                 cm_issue_drep(work->port, work->mad_recv_wc);
2142                 return -EINVAL;
2143         }
2144
2145         work->cm_event.private_data = &dreq_msg->private_data;
2146
2147         spin_lock_irq(&cm_id_priv->lock);
2148         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2149                 goto unlock;
2150
2151         switch (cm_id_priv->id.state) {
2152         case IB_CM_REP_SENT:
2153         case IB_CM_DREQ_SENT:
2154                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2155                 break;
2156         case IB_CM_ESTABLISHED:
2157                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2158                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2159                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2160                 break;
2161         case IB_CM_MRA_REP_RCVD:
2162                 break;
2163         case IB_CM_TIMEWAIT:
2164                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2165                                 counter[CM_DREQ_COUNTER]);
2166                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2167                         goto unlock;
2168
2169                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2170                                cm_id_priv->private_data,
2171                                cm_id_priv->private_data_len);
2172                 spin_unlock_irq(&cm_id_priv->lock);
2173
2174                 if (ib_post_send_mad(msg, NULL))
2175                         cm_free_msg(msg);
2176                 goto deref;
2177         case IB_CM_DREQ_RCVD:
2178                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2179                                 counter[CM_DREQ_COUNTER]);
2180                 goto unlock;
2181         default:
2182                 goto unlock;
2183         }
2184         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2185         cm_id_priv->tid = dreq_msg->hdr.tid;
2186         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2187         if (!ret)
2188                 list_add_tail(&work->list, &cm_id_priv->work_list);
2189         spin_unlock_irq(&cm_id_priv->lock);
2190
2191         if (ret)
2192                 cm_process_work(cm_id_priv, work);
2193         else
2194                 cm_deref_id(cm_id_priv);
2195         return 0;
2196
2197 unlock: spin_unlock_irq(&cm_id_priv->lock);
2198 deref:  cm_deref_id(cm_id_priv);
2199         return -EINVAL;
2200 }
2201
2202 static int cm_drep_handler(struct cm_work *work)
2203 {
2204         struct cm_id_private *cm_id_priv;
2205         struct cm_drep_msg *drep_msg;
2206         int ret;
2207
2208         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2209         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2210                                    drep_msg->local_comm_id);
2211         if (!cm_id_priv)
2212                 return -EINVAL;
2213
2214         work->cm_event.private_data = &drep_msg->private_data;
2215
2216         spin_lock_irq(&cm_id_priv->lock);
2217         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2218             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2219                 spin_unlock_irq(&cm_id_priv->lock);
2220                 goto out;
2221         }
2222         cm_enter_timewait(cm_id_priv);
2223
2224         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2225         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2226         if (!ret)
2227                 list_add_tail(&work->list, &cm_id_priv->work_list);
2228         spin_unlock_irq(&cm_id_priv->lock);
2229
2230         if (ret)
2231                 cm_process_work(cm_id_priv, work);
2232         else
2233                 cm_deref_id(cm_id_priv);
2234         return 0;
2235 out:
2236         cm_deref_id(cm_id_priv);
2237         return -EINVAL;
2238 }
2239
2240 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2241                    enum ib_cm_rej_reason reason,
2242                    void *ari,
2243                    u8 ari_length,
2244                    const void *private_data,
2245                    u8 private_data_len)
2246 {
2247         struct cm_id_private *cm_id_priv;
2248         struct ib_mad_send_buf *msg;
2249         unsigned long flags;
2250         int ret;
2251
2252         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2253             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2254                 return -EINVAL;
2255
2256         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2257
2258         spin_lock_irqsave(&cm_id_priv->lock, flags);
2259         switch (cm_id->state) {
2260         case IB_CM_REQ_SENT:
2261         case IB_CM_MRA_REQ_RCVD:
2262         case IB_CM_REQ_RCVD:
2263         case IB_CM_MRA_REQ_SENT:
2264         case IB_CM_REP_RCVD:
2265         case IB_CM_MRA_REP_SENT:
2266                 ret = cm_alloc_msg(cm_id_priv, &msg);
2267                 if (!ret)
2268                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2269                                       cm_id_priv, reason, ari, ari_length,
2270                                       private_data, private_data_len);
2271
2272                 cm_reset_to_idle(cm_id_priv);
2273                 break;
2274         case IB_CM_REP_SENT:
2275         case IB_CM_MRA_REP_RCVD:
2276                 ret = cm_alloc_msg(cm_id_priv, &msg);
2277                 if (!ret)
2278                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2279                                       cm_id_priv, reason, ari, ari_length,
2280                                       private_data, private_data_len);
2281
2282                 cm_enter_timewait(cm_id_priv);
2283                 break;
2284         default:
2285                 ret = -EINVAL;
2286                 goto out;
2287         }
2288
2289         if (ret)
2290                 goto out;
2291
2292         ret = ib_post_send_mad(msg, NULL);
2293         if (ret)
2294                 cm_free_msg(msg);
2295
2296 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2297         return ret;
2298 }
2299 EXPORT_SYMBOL(ib_send_cm_rej);
2300
2301 static void cm_format_rej_event(struct cm_work *work)
2302 {
2303         struct cm_rej_msg *rej_msg;
2304         struct ib_cm_rej_event_param *param;
2305
2306         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2307         param = &work->cm_event.param.rej_rcvd;
2308         param->ari = rej_msg->ari;
2309         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2310         param->reason = __be16_to_cpu(rej_msg->reason);
2311         work->cm_event.private_data = &rej_msg->private_data;
2312 }
2313
2314 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2315 {
2316         struct cm_timewait_info *timewait_info;
2317         struct cm_id_private *cm_id_priv;
2318         __be32 remote_id;
2319
2320         remote_id = rej_msg->local_comm_id;
2321
2322         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2323                 spin_lock_irq(&cm.lock);
2324                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2325                                                   remote_id);
2326                 if (!timewait_info) {
2327                         spin_unlock_irq(&cm.lock);
2328                         return NULL;
2329                 }
2330                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2331                                       (timewait_info->work.local_id ^
2332                                        cm.random_id_operand));
2333                 if (cm_id_priv) {
2334                         if (cm_id_priv->id.remote_id == remote_id)
2335                                 atomic_inc(&cm_id_priv->refcount);
2336                         else
2337                                 cm_id_priv = NULL;
2338                 }
2339                 spin_unlock_irq(&cm.lock);
2340         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2341                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2342         else
2343                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2344
2345         return cm_id_priv;
2346 }
2347
2348 static int cm_rej_handler(struct cm_work *work)
2349 {
2350         struct cm_id_private *cm_id_priv;
2351         struct cm_rej_msg *rej_msg;
2352         int ret;
2353
2354         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2355         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2356         if (!cm_id_priv)
2357                 return -EINVAL;
2358
2359         cm_format_rej_event(work);
2360
2361         spin_lock_irq(&cm_id_priv->lock);
2362         switch (cm_id_priv->id.state) {
2363         case IB_CM_REQ_SENT:
2364         case IB_CM_MRA_REQ_RCVD:
2365         case IB_CM_REP_SENT:
2366         case IB_CM_MRA_REP_RCVD:
2367                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2368                 /* fall through */
2369         case IB_CM_REQ_RCVD:
2370         case IB_CM_MRA_REQ_SENT:
2371                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2372                         cm_enter_timewait(cm_id_priv);
2373                 else
2374                         cm_reset_to_idle(cm_id_priv);
2375                 break;
2376         case IB_CM_DREQ_SENT:
2377                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2378                 /* fall through */
2379         case IB_CM_REP_RCVD:
2380         case IB_CM_MRA_REP_SENT:
2381                 cm_enter_timewait(cm_id_priv);
2382                 break;
2383         case IB_CM_ESTABLISHED:
2384                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2385                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2386                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2387                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2388                                               cm_id_priv->msg);
2389                         cm_enter_timewait(cm_id_priv);
2390                         break;
2391                 }
2392                 /* fall through */
2393         default:
2394                 spin_unlock_irq(&cm_id_priv->lock);
2395                 ret = -EINVAL;
2396                 goto out;
2397         }
2398
2399         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2400         if (!ret)
2401                 list_add_tail(&work->list, &cm_id_priv->work_list);
2402         spin_unlock_irq(&cm_id_priv->lock);
2403
2404         if (ret)
2405                 cm_process_work(cm_id_priv, work);
2406         else
2407                 cm_deref_id(cm_id_priv);
2408         return 0;
2409 out:
2410         cm_deref_id(cm_id_priv);
2411         return -EINVAL;
2412 }
2413
2414 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2415                    u8 service_timeout,
2416                    const void *private_data,
2417                    u8 private_data_len)
2418 {
2419         struct cm_id_private *cm_id_priv;
2420         struct ib_mad_send_buf *msg;
2421         enum ib_cm_state cm_state;
2422         enum ib_cm_lap_state lap_state;
2423         enum cm_msg_response msg_response;
2424         void *data;
2425         unsigned long flags;
2426         int ret;
2427
2428         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2429                 return -EINVAL;
2430
2431         data = cm_copy_private_data(private_data, private_data_len);
2432         if (IS_ERR(data))
2433                 return PTR_ERR(data);
2434
2435         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2436
2437         spin_lock_irqsave(&cm_id_priv->lock, flags);
2438         switch(cm_id_priv->id.state) {
2439         case IB_CM_REQ_RCVD:
2440                 cm_state = IB_CM_MRA_REQ_SENT;
2441                 lap_state = cm_id->lap_state;
2442                 msg_response = CM_MSG_RESPONSE_REQ;
2443                 break;
2444         case IB_CM_REP_RCVD:
2445                 cm_state = IB_CM_MRA_REP_SENT;
2446                 lap_state = cm_id->lap_state;
2447                 msg_response = CM_MSG_RESPONSE_REP;
2448                 break;
2449         case IB_CM_ESTABLISHED:
2450                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2451                         cm_state = cm_id->state;
2452                         lap_state = IB_CM_MRA_LAP_SENT;
2453                         msg_response = CM_MSG_RESPONSE_OTHER;
2454                         break;
2455                 }
2456         default:
2457                 ret = -EINVAL;
2458                 goto error1;
2459         }
2460
2461         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2462                 ret = cm_alloc_msg(cm_id_priv, &msg);
2463                 if (ret)
2464                         goto error1;
2465
2466                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2467                               msg_response, service_timeout,
2468                               private_data, private_data_len);
2469                 ret = ib_post_send_mad(msg, NULL);
2470                 if (ret)
2471                         goto error2;
2472         }
2473
2474         cm_id->state = cm_state;
2475         cm_id->lap_state = lap_state;
2476         cm_id_priv->service_timeout = service_timeout;
2477         cm_set_private_data(cm_id_priv, data, private_data_len);
2478         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2479         return 0;
2480
2481 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2482         kfree(data);
2483         return ret;
2484
2485 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2486         kfree(data);
2487         cm_free_msg(msg);
2488         return ret;
2489 }
2490 EXPORT_SYMBOL(ib_send_cm_mra);
2491
2492 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2493 {
2494         switch (cm_mra_get_msg_mraed(mra_msg)) {
2495         case CM_MSG_RESPONSE_REQ:
2496                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
2497         case CM_MSG_RESPONSE_REP:
2498         case CM_MSG_RESPONSE_OTHER:
2499                 return cm_acquire_id(mra_msg->remote_comm_id,
2500                                      mra_msg->local_comm_id);
2501         default:
2502                 return NULL;
2503         }
2504 }
2505
2506 static int cm_mra_handler(struct cm_work *work)
2507 {
2508         struct cm_id_private *cm_id_priv;
2509         struct cm_mra_msg *mra_msg;
2510         int timeout, ret;
2511
2512         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2513         cm_id_priv = cm_acquire_mraed_id(mra_msg);
2514         if (!cm_id_priv)
2515                 return -EINVAL;
2516
2517         work->cm_event.private_data = &mra_msg->private_data;
2518         work->cm_event.param.mra_rcvd.service_timeout =
2519                                         cm_mra_get_service_timeout(mra_msg);
2520         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2521                   cm_convert_to_ms(cm_id_priv->av.timeout);
2522
2523         spin_lock_irq(&cm_id_priv->lock);
2524         switch (cm_id_priv->id.state) {
2525         case IB_CM_REQ_SENT:
2526                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2527                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2528                                   cm_id_priv->msg, timeout))
2529                         goto out;
2530                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2531                 break;
2532         case IB_CM_REP_SENT:
2533                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2534                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2535                                   cm_id_priv->msg, timeout))
2536                         goto out;
2537                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2538                 break;
2539         case IB_CM_ESTABLISHED:
2540                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2541                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2542                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2543                                   cm_id_priv->msg, timeout)) {
2544                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2545                                 atomic_long_inc(&work->port->
2546                                                 counter_group[CM_RECV_DUPLICATES].
2547                                                 counter[CM_MRA_COUNTER]);
2548                         goto out;
2549                 }
2550                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2551                 break;
2552         case IB_CM_MRA_REQ_RCVD:
2553         case IB_CM_MRA_REP_RCVD:
2554                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2555                                 counter[CM_MRA_COUNTER]);
2556                 /* fall through */
2557         default:
2558                 goto out;
2559         }
2560
2561         cm_id_priv->msg->context[1] = (void *) (unsigned long)
2562                                       cm_id_priv->id.state;
2563         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2564         if (!ret)
2565                 list_add_tail(&work->list, &cm_id_priv->work_list);
2566         spin_unlock_irq(&cm_id_priv->lock);
2567
2568         if (ret)
2569                 cm_process_work(cm_id_priv, work);
2570         else
2571                 cm_deref_id(cm_id_priv);
2572         return 0;
2573 out:
2574         spin_unlock_irq(&cm_id_priv->lock);
2575         cm_deref_id(cm_id_priv);
2576         return -EINVAL;
2577 }
2578
2579 static void cm_format_lap(struct cm_lap_msg *lap_msg,
2580                           struct cm_id_private *cm_id_priv,
2581                           struct ib_sa_path_rec *alternate_path,
2582                           const void *private_data,
2583                           u8 private_data_len)
2584 {
2585         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2586                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2587         lap_msg->local_comm_id = cm_id_priv->id.local_id;
2588         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2589         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2590         /* todo: need remote CM response timeout */
2591         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2592         lap_msg->alt_local_lid = alternate_path->slid;
2593         lap_msg->alt_remote_lid = alternate_path->dlid;
2594         lap_msg->alt_local_gid = alternate_path->sgid;
2595         lap_msg->alt_remote_gid = alternate_path->dgid;
2596         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2597         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2598         lap_msg->alt_hop_limit = alternate_path->hop_limit;
2599         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2600         cm_lap_set_sl(lap_msg, alternate_path->sl);
2601         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2602         cm_lap_set_local_ack_timeout(lap_msg,
2603                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2604                                alternate_path->packet_life_time));
2605
2606         if (private_data && private_data_len)
2607                 memcpy(lap_msg->private_data, private_data, private_data_len);
2608 }
2609
2610 int ib_send_cm_lap(struct ib_cm_id *cm_id,
2611                    struct ib_sa_path_rec *alternate_path,
2612                    const void *private_data,
2613                    u8 private_data_len)
2614 {
2615         struct cm_id_private *cm_id_priv;
2616         struct ib_mad_send_buf *msg;
2617         unsigned long flags;
2618         int ret;
2619
2620         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2621                 return -EINVAL;
2622
2623         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2624         spin_lock_irqsave(&cm_id_priv->lock, flags);
2625         if (cm_id->state != IB_CM_ESTABLISHED ||
2626             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2627              cm_id->lap_state != IB_CM_LAP_IDLE)) {
2628                 ret = -EINVAL;
2629                 goto out;
2630         }
2631
2632         ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2633         if (ret)
2634                 goto out;
2635         cm_id_priv->alt_av.timeout =
2636                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2637                                        cm_id_priv->alt_av.timeout - 1);
2638
2639         ret = cm_alloc_msg(cm_id_priv, &msg);
2640         if (ret)
2641                 goto out;
2642
2643         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2644                       alternate_path, private_data, private_data_len);
2645         msg->timeout_ms = cm_id_priv->timeout_ms;
2646         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2647
2648         ret = ib_post_send_mad(msg, NULL);
2649         if (ret) {
2650                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2651                 cm_free_msg(msg);
2652                 return ret;
2653         }
2654
2655         cm_id->lap_state = IB_CM_LAP_SENT;
2656         cm_id_priv->msg = msg;
2657
2658 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2659         return ret;
2660 }
2661 EXPORT_SYMBOL(ib_send_cm_lap);
2662
2663 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2664                                     struct ib_sa_path_rec *path,
2665                                     struct cm_lap_msg *lap_msg)
2666 {
2667         memset(path, 0, sizeof *path);
2668         path->dgid = lap_msg->alt_local_gid;
2669         path->sgid = lap_msg->alt_remote_gid;
2670         path->dlid = lap_msg->alt_local_lid;
2671         path->slid = lap_msg->alt_remote_lid;
2672         path->flow_label = cm_lap_get_flow_label(lap_msg);
2673         path->hop_limit = lap_msg->alt_hop_limit;
2674         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2675         path->reversible = 1;
2676         path->pkey = cm_id_priv->pkey;
2677         path->sl = cm_lap_get_sl(lap_msg);
2678         path->mtu_selector = IB_SA_EQ;
2679         path->mtu = cm_id_priv->path_mtu;
2680         path->rate_selector = IB_SA_EQ;
2681         path->rate = cm_lap_get_packet_rate(lap_msg);
2682         path->packet_life_time_selector = IB_SA_EQ;
2683         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2684         path->packet_life_time -= (path->packet_life_time > 0);
2685 }
2686
2687 static int cm_lap_handler(struct cm_work *work)
2688 {
2689         struct cm_id_private *cm_id_priv;
2690         struct cm_lap_msg *lap_msg;
2691         struct ib_cm_lap_event_param *param;
2692         struct ib_mad_send_buf *msg = NULL;
2693         int ret;
2694
2695         /* todo: verify LAP request and send reject APR if invalid. */
2696         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2697         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2698                                    lap_msg->local_comm_id);
2699         if (!cm_id_priv)
2700                 return -EINVAL;
2701
2702         param = &work->cm_event.param.lap_rcvd;
2703         param->alternate_path = &work->path[0];
2704         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2705         work->cm_event.private_data = &lap_msg->private_data;
2706
2707         spin_lock_irq(&cm_id_priv->lock);
2708         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2709                 goto unlock;
2710
2711         switch (cm_id_priv->id.lap_state) {
2712         case IB_CM_LAP_UNINIT:
2713         case IB_CM_LAP_IDLE:
2714                 break;
2715         case IB_CM_MRA_LAP_SENT:
2716                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2717                                 counter[CM_LAP_COUNTER]);
2718                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2719                         goto unlock;
2720
2721                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2722                               CM_MSG_RESPONSE_OTHER,
2723                               cm_id_priv->service_timeout,
2724                               cm_id_priv->private_data,
2725                               cm_id_priv->private_data_len);
2726                 spin_unlock_irq(&cm_id_priv->lock);
2727
2728                 if (ib_post_send_mad(msg, NULL))
2729                         cm_free_msg(msg);
2730                 goto deref;
2731         case IB_CM_LAP_RCVD:
2732                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2733                                 counter[CM_LAP_COUNTER]);
2734                 goto unlock;
2735         default:
2736                 goto unlock;
2737         }
2738
2739         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2740         cm_id_priv->tid = lap_msg->hdr.tid;
2741         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2742                                 work->mad_recv_wc->recv_buf.grh,
2743                                 &cm_id_priv->av);
2744         cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
2745         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2746         if (!ret)
2747                 list_add_tail(&work->list, &cm_id_priv->work_list);
2748         spin_unlock_irq(&cm_id_priv->lock);
2749
2750         if (ret)
2751                 cm_process_work(cm_id_priv, work);
2752         else
2753                 cm_deref_id(cm_id_priv);
2754         return 0;
2755
2756 unlock: spin_unlock_irq(&cm_id_priv->lock);
2757 deref:  cm_deref_id(cm_id_priv);
2758         return -EINVAL;
2759 }
2760
2761 static void cm_format_apr(struct cm_apr_msg *apr_msg,
2762                           struct cm_id_private *cm_id_priv,
2763                           enum ib_cm_apr_status status,
2764                           void *info,
2765                           u8 info_length,
2766                           const void *private_data,
2767                           u8 private_data_len)
2768 {
2769         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2770         apr_msg->local_comm_id = cm_id_priv->id.local_id;
2771         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2772         apr_msg->ap_status = (u8) status;
2773
2774         if (info && info_length) {
2775                 apr_msg->info_length = info_length;
2776                 memcpy(apr_msg->info, info, info_length);
2777         }
2778
2779         if (private_data && private_data_len)
2780                 memcpy(apr_msg->private_data, private_data, private_data_len);
2781 }
2782
2783 int ib_send_cm_apr(struct ib_cm_id *cm_id,
2784                    enum ib_cm_apr_status status,
2785                    void *info,
2786                    u8 info_length,
2787                    const void *private_data,
2788                    u8 private_data_len)
2789 {
2790         struct cm_id_private *cm_id_priv;
2791         struct ib_mad_send_buf *msg;
2792         unsigned long flags;
2793         int ret;
2794
2795         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2796             (info && info_length > IB_CM_APR_INFO_LENGTH))
2797                 return -EINVAL;
2798
2799         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2800         spin_lock_irqsave(&cm_id_priv->lock, flags);
2801         if (cm_id->state != IB_CM_ESTABLISHED ||
2802             (cm_id->lap_state != IB_CM_LAP_RCVD &&
2803              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2804                 ret = -EINVAL;
2805                 goto out;
2806         }
2807
2808         ret = cm_alloc_msg(cm_id_priv, &msg);
2809         if (ret)
2810                 goto out;
2811
2812         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2813                       info, info_length, private_data, private_data_len);
2814         ret = ib_post_send_mad(msg, NULL);
2815         if (ret) {
2816                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2817                 cm_free_msg(msg);
2818                 return ret;
2819         }
2820
2821         cm_id->lap_state = IB_CM_LAP_IDLE;
2822 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2823         return ret;
2824 }
2825 EXPORT_SYMBOL(ib_send_cm_apr);
2826
2827 static int cm_apr_handler(struct cm_work *work)
2828 {
2829         struct cm_id_private *cm_id_priv;
2830         struct cm_apr_msg *apr_msg;
2831         int ret;
2832
2833         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2834         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2835                                    apr_msg->local_comm_id);
2836         if (!cm_id_priv)
2837                 return -EINVAL; /* Unmatched reply. */
2838
2839         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2840         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2841         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2842         work->cm_event.private_data = &apr_msg->private_data;
2843
2844         spin_lock_irq(&cm_id_priv->lock);
2845         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2846             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2847              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2848                 spin_unlock_irq(&cm_id_priv->lock);
2849                 goto out;
2850         }
2851         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2852         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2853         cm_id_priv->msg = NULL;
2854
2855         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2856         if (!ret)
2857                 list_add_tail(&work->list, &cm_id_priv->work_list);
2858         spin_unlock_irq(&cm_id_priv->lock);
2859
2860         if (ret)
2861                 cm_process_work(cm_id_priv, work);
2862         else
2863                 cm_deref_id(cm_id_priv);
2864         return 0;
2865 out:
2866         cm_deref_id(cm_id_priv);
2867         return -EINVAL;
2868 }
2869
2870 static int cm_timewait_handler(struct cm_work *work)
2871 {
2872         struct cm_timewait_info *timewait_info;
2873         struct cm_id_private *cm_id_priv;
2874         int ret;
2875
2876         timewait_info = (struct cm_timewait_info *)work;
2877         spin_lock_irq(&cm.lock);
2878         list_del(&timewait_info->list);
2879         spin_unlock_irq(&cm.lock);
2880
2881         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2882                                    timewait_info->work.remote_id);
2883         if (!cm_id_priv)
2884                 return -EINVAL;
2885
2886         spin_lock_irq(&cm_id_priv->lock);
2887         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2888             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2889                 spin_unlock_irq(&cm_id_priv->lock);
2890                 goto out;
2891         }
2892         cm_id_priv->id.state = IB_CM_IDLE;
2893         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2894         if (!ret)
2895                 list_add_tail(&work->list, &cm_id_priv->work_list);
2896         spin_unlock_irq(&cm_id_priv->lock);
2897
2898         if (ret)
2899                 cm_process_work(cm_id_priv, work);
2900         else
2901                 cm_deref_id(cm_id_priv);
2902         return 0;
2903 out:
2904         cm_deref_id(cm_id_priv);
2905         return -EINVAL;
2906 }
2907
2908 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2909                                struct cm_id_private *cm_id_priv,
2910                                struct ib_cm_sidr_req_param *param)
2911 {
2912         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2913                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2914         sidr_req_msg->request_id = cm_id_priv->id.local_id;
2915         sidr_req_msg->pkey = param->path->pkey;
2916         sidr_req_msg->service_id = param->service_id;
2917
2918         if (param->private_data && param->private_data_len)
2919                 memcpy(sidr_req_msg->private_data, param->private_data,
2920                        param->private_data_len);
2921 }
2922
2923 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2924                         struct ib_cm_sidr_req_param *param)
2925 {
2926         struct cm_id_private *cm_id_priv;
2927         struct ib_mad_send_buf *msg;
2928         unsigned long flags;
2929         int ret;
2930
2931         if (!param->path || (param->private_data &&
2932              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
2933                 return -EINVAL;
2934
2935         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2936         ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
2937         if (ret)
2938                 goto out;
2939
2940         cm_id->service_id = param->service_id;
2941         cm_id->service_mask = ~cpu_to_be64(0);
2942         cm_id_priv->timeout_ms = param->timeout_ms;
2943         cm_id_priv->max_cm_retries = param->max_cm_retries;
2944         ret = cm_alloc_msg(cm_id_priv, &msg);
2945         if (ret)
2946                 goto out;
2947
2948         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
2949                            param);
2950         msg->timeout_ms = cm_id_priv->timeout_ms;
2951         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
2952
2953         spin_lock_irqsave(&cm_id_priv->lock, flags);
2954         if (cm_id->state == IB_CM_IDLE)
2955                 ret = ib_post_send_mad(msg, NULL);
2956         else
2957                 ret = -EINVAL;
2958
2959         if (ret) {
2960                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2961                 cm_free_msg(msg);
2962                 goto out;
2963         }
2964         cm_id->state = IB_CM_SIDR_REQ_SENT;
2965         cm_id_priv->msg = msg;
2966         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2967 out:
2968         return ret;
2969 }
2970 EXPORT_SYMBOL(ib_send_cm_sidr_req);
2971
2972 static void cm_format_sidr_req_event(struct cm_work *work,
2973                                      struct ib_cm_id *listen_id)
2974 {
2975         struct cm_sidr_req_msg *sidr_req_msg;
2976         struct ib_cm_sidr_req_event_param *param;
2977
2978         sidr_req_msg = (struct cm_sidr_req_msg *)
2979                                 work->mad_recv_wc->recv_buf.mad;
2980         param = &work->cm_event.param.sidr_req_rcvd;
2981         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
2982         param->listen_id = listen_id;
2983         param->port = work->port->port_num;
2984         work->cm_event.private_data = &sidr_req_msg->private_data;
2985 }
2986
2987 static int cm_sidr_req_handler(struct cm_work *work)
2988 {
2989         struct ib_cm_id *cm_id;
2990         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
2991         struct cm_sidr_req_msg *sidr_req_msg;
2992         struct ib_wc *wc;
2993
2994         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
2995         if (IS_ERR(cm_id))
2996                 return PTR_ERR(cm_id);
2997         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2998
2999         /* Record SGID/SLID and request ID for lookup. */
3000         sidr_req_msg = (struct cm_sidr_req_msg *)
3001                                 work->mad_recv_wc->recv_buf.mad;
3002         wc = work->mad_recv_wc->wc;
3003         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3004         cm_id_priv->av.dgid.global.interface_id = 0;
3005         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3006                                 work->mad_recv_wc->recv_buf.grh,
3007                                 &cm_id_priv->av);
3008         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3009         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3010         atomic_inc(&cm_id_priv->work_count);
3011
3012         spin_lock_irq(&cm.lock);
3013         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3014         if (cur_cm_id_priv) {
3015                 spin_unlock_irq(&cm.lock);
3016                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3017                                 counter[CM_SIDR_REQ_COUNTER]);
3018                 goto out; /* Duplicate message. */
3019         }
3020         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3021         cur_cm_id_priv = cm_find_listen(cm_id->device,
3022                                         sidr_req_msg->service_id,
3023                                         sidr_req_msg->private_data);
3024         if (!cur_cm_id_priv) {
3025                 spin_unlock_irq(&cm.lock);
3026                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3027                 goto out; /* No match. */
3028         }
3029         atomic_inc(&cur_cm_id_priv->refcount);
3030         atomic_inc(&cm_id_priv->refcount);
3031         spin_unlock_irq(&cm.lock);
3032
3033         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3034         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3035         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3036         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3037
3038         cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
3039         cm_process_work(cm_id_priv, work);
3040         cm_deref_id(cur_cm_id_priv);
3041         return 0;
3042 out:
3043         ib_destroy_cm_id(&cm_id_priv->id);
3044         return -EINVAL;
3045 }
3046
3047 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3048                                struct cm_id_private *cm_id_priv,
3049                                struct ib_cm_sidr_rep_param *param)
3050 {
3051         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3052                           cm_id_priv->tid);
3053         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3054         sidr_rep_msg->status = param->status;
3055         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3056         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3057         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3058
3059         if (param->info && param->info_length)
3060                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3061
3062         if (param->private_data && param->private_data_len)
3063                 memcpy(sidr_rep_msg->private_data, param->private_data,
3064                        param->private_data_len);
3065 }
3066
3067 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3068                         struct ib_cm_sidr_rep_param *param)
3069 {
3070         struct cm_id_private *cm_id_priv;
3071         struct ib_mad_send_buf *msg;
3072         unsigned long flags;
3073         int ret;
3074
3075         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3076             (param->private_data &&
3077              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3078                 return -EINVAL;
3079
3080         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3081         spin_lock_irqsave(&cm_id_priv->lock, flags);
3082         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3083                 ret = -EINVAL;
3084                 goto error;
3085         }
3086
3087         ret = cm_alloc_msg(cm_id_priv, &msg);
3088         if (ret)
3089                 goto error;
3090
3091         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3092                            param);
3093         ret = ib_post_send_mad(msg, NULL);
3094         if (ret) {
3095                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3096                 cm_free_msg(msg);
3097                 return ret;
3098         }
3099         cm_id->state = IB_CM_IDLE;
3100         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3101
3102         spin_lock_irqsave(&cm.lock, flags);
3103         rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3104         spin_unlock_irqrestore(&cm.lock, flags);
3105         return 0;
3106
3107 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3108         return ret;
3109 }
3110 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3111
3112 static void cm_format_sidr_rep_event(struct cm_work *work)
3113 {
3114         struct cm_sidr_rep_msg *sidr_rep_msg;
3115         struct ib_cm_sidr_rep_event_param *param;
3116
3117         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3118                                 work->mad_recv_wc->recv_buf.mad;
3119         param = &work->cm_event.param.sidr_rep_rcvd;
3120         param->status = sidr_rep_msg->status;
3121         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3122         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3123         param->info = &sidr_rep_msg->info;
3124         param->info_len = sidr_rep_msg->info_length;
3125         work->cm_event.private_data = &sidr_rep_msg->private_data;
3126 }
3127
3128 static int cm_sidr_rep_handler(struct cm_work *work)
3129 {
3130         struct cm_sidr_rep_msg *sidr_rep_msg;
3131         struct cm_id_private *cm_id_priv;
3132
3133         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3134                                 work->mad_recv_wc->recv_buf.mad;
3135         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3136         if (!cm_id_priv)
3137                 return -EINVAL; /* Unmatched reply. */
3138
3139         spin_lock_irq(&cm_id_priv->lock);
3140         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3141                 spin_unlock_irq(&cm_id_priv->lock);
3142                 goto out;
3143         }
3144         cm_id_priv->id.state = IB_CM_IDLE;
3145         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3146         spin_unlock_irq(&cm_id_priv->lock);
3147
3148         cm_format_sidr_rep_event(work);
3149         cm_process_work(cm_id_priv, work);
3150         return 0;
3151 out:
3152         cm_deref_id(cm_id_priv);
3153         return -EINVAL;
3154 }
3155
3156 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3157                                   enum ib_wc_status wc_status)
3158 {
3159         struct cm_id_private *cm_id_priv;
3160         struct ib_cm_event cm_event;
3161         enum ib_cm_state state;
3162         int ret;
3163
3164         memset(&cm_event, 0, sizeof cm_event);
3165         cm_id_priv = msg->context[0];
3166
3167         /* Discard old sends or ones without a response. */
3168         spin_lock_irq(&cm_id_priv->lock);
3169         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3170         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3171                 goto discard;
3172
3173         switch (state) {
3174         case IB_CM_REQ_SENT:
3175         case IB_CM_MRA_REQ_RCVD:
3176                 cm_reset_to_idle(cm_id_priv);
3177                 cm_event.event = IB_CM_REQ_ERROR;
3178                 break;
3179         case IB_CM_REP_SENT:
3180         case IB_CM_MRA_REP_RCVD:
3181                 cm_reset_to_idle(cm_id_priv);
3182                 cm_event.event = IB_CM_REP_ERROR;
3183                 break;
3184         case IB_CM_DREQ_SENT:
3185                 cm_enter_timewait(cm_id_priv);
3186                 cm_event.event = IB_CM_DREQ_ERROR;
3187                 break;
3188         case IB_CM_SIDR_REQ_SENT:
3189                 cm_id_priv->id.state = IB_CM_IDLE;
3190                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3191                 break;
3192         default:
3193                 goto discard;
3194         }
3195         spin_unlock_irq(&cm_id_priv->lock);
3196         cm_event.param.send_status = wc_status;
3197
3198         /* No other events can occur on the cm_id at this point. */
3199         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3200         cm_free_msg(msg);
3201         if (ret)
3202                 ib_destroy_cm_id(&cm_id_priv->id);
3203         return;
3204 discard:
3205         spin_unlock_irq(&cm_id_priv->lock);
3206         cm_free_msg(msg);
3207 }
3208
3209 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3210                             struct ib_mad_send_wc *mad_send_wc)
3211 {
3212         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3213         struct cm_port *port;
3214         u16 attr_index;
3215
3216         port = mad_agent->context;
3217         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3218                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3219
3220         /*
3221          * If the send was in response to a received message (context[0] is not
3222          * set to a cm_id), and is not a REJ, then it is a send that was
3223          * manually retried.
3224          */
3225         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3226                 msg->retries = 1;
3227
3228         atomic_long_add(1 + msg->retries,
3229                         &port->counter_group[CM_XMIT].counter[attr_index]);
3230         if (msg->retries)
3231                 atomic_long_add(msg->retries,
3232                                 &port->counter_group[CM_XMIT_RETRIES].
3233                                 counter[attr_index]);
3234
3235         switch (mad_send_wc->status) {
3236         case IB_WC_SUCCESS:
3237         case IB_WC_WR_FLUSH_ERR:
3238                 cm_free_msg(msg);
3239                 break;
3240         default:
3241                 if (msg->context[0] && msg->context[1])
3242                         cm_process_send_error(msg, mad_send_wc->status);
3243                 else
3244                         cm_free_msg(msg);
3245                 break;
3246         }
3247 }
3248
3249 static void cm_work_handler(struct work_struct *_work)
3250 {
3251         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3252         int ret;
3253
3254         switch (work->cm_event.event) {
3255         case IB_CM_REQ_RECEIVED:
3256                 ret = cm_req_handler(work);
3257                 break;
3258         case IB_CM_MRA_RECEIVED:
3259                 ret = cm_mra_handler(work);
3260                 break;
3261         case IB_CM_REJ_RECEIVED:
3262                 ret = cm_rej_handler(work);
3263                 break;
3264         case IB_CM_REP_RECEIVED:
3265                 ret = cm_rep_handler(work);
3266                 break;
3267         case IB_CM_RTU_RECEIVED:
3268                 ret = cm_rtu_handler(work);
3269                 break;
3270         case IB_CM_USER_ESTABLISHED:
3271                 ret = cm_establish_handler(work);
3272                 break;
3273         case IB_CM_DREQ_RECEIVED:
3274                 ret = cm_dreq_handler(work);
3275                 break;
3276         case IB_CM_DREP_RECEIVED:
3277                 ret = cm_drep_handler(work);
3278                 break;
3279         case IB_CM_SIDR_REQ_RECEIVED:
3280                 ret = cm_sidr_req_handler(work);
3281                 break;
3282         case IB_CM_SIDR_REP_RECEIVED:
3283                 ret = cm_sidr_rep_handler(work);
3284                 break;
3285         case IB_CM_LAP_RECEIVED:
3286                 ret = cm_lap_handler(work);
3287                 break;
3288         case IB_CM_APR_RECEIVED:
3289                 ret = cm_apr_handler(work);
3290                 break;
3291         case IB_CM_TIMEWAIT_EXIT:
3292                 ret = cm_timewait_handler(work);
3293                 break;
3294         default:
3295                 ret = -EINVAL;
3296                 break;
3297         }
3298         if (ret)
3299                 cm_free_work(work);
3300 }
3301
3302 static int cm_establish(struct ib_cm_id *cm_id)
3303 {
3304         struct cm_id_private *cm_id_priv;
3305         struct cm_work *work;
3306         unsigned long flags;
3307         int ret = 0;
3308
3309         work = kmalloc(sizeof *work, GFP_ATOMIC);
3310         if (!work)
3311                 return -ENOMEM;
3312
3313         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3314         spin_lock_irqsave(&cm_id_priv->lock, flags);
3315         switch (cm_id->state)
3316         {
3317         case IB_CM_REP_SENT:
3318         case IB_CM_MRA_REP_RCVD:
3319                 cm_id->state = IB_CM_ESTABLISHED;
3320                 break;
3321         case IB_CM_ESTABLISHED:
3322                 ret = -EISCONN;
3323                 break;
3324         default:
3325                 ret = -EINVAL;
3326                 break;
3327         }
3328         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3329
3330         if (ret) {
3331                 kfree(work);
3332                 goto out;
3333         }
3334
3335         /*
3336          * The CM worker thread may try to destroy the cm_id before it
3337          * can execute this work item.  To prevent potential deadlock,
3338          * we need to find the cm_id once we're in the context of the
3339          * worker thread, rather than holding a reference on it.
3340          */
3341         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3342         work->local_id = cm_id->local_id;
3343         work->remote_id = cm_id->remote_id;
3344         work->mad_recv_wc = NULL;
3345         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3346         queue_delayed_work(cm.wq, &work->work, 0);
3347 out:
3348         return ret;
3349 }
3350
3351 static int cm_migrate(struct ib_cm_id *cm_id)
3352 {
3353         struct cm_id_private *cm_id_priv;
3354         unsigned long flags;
3355         int ret = 0;
3356
3357         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3358         spin_lock_irqsave(&cm_id_priv->lock, flags);
3359         if (cm_id->state == IB_CM_ESTABLISHED &&
3360             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3361              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3362                 cm_id->lap_state = IB_CM_LAP_IDLE;
3363                 cm_id_priv->av = cm_id_priv->alt_av;
3364         } else
3365                 ret = -EINVAL;
3366         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3367
3368         return ret;
3369 }
3370
3371 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3372 {
3373         int ret;
3374
3375         switch (event) {
3376         case IB_EVENT_COMM_EST:
3377                 ret = cm_establish(cm_id);
3378                 break;
3379         case IB_EVENT_PATH_MIG:
3380                 ret = cm_migrate(cm_id);
3381                 break;
3382         default:
3383                 ret = -EINVAL;
3384         }
3385         return ret;
3386 }
3387 EXPORT_SYMBOL(ib_cm_notify);
3388
3389 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3390                             struct ib_mad_recv_wc *mad_recv_wc)
3391 {
3392         struct cm_port *port = mad_agent->context;
3393         struct cm_work *work;
3394         enum ib_cm_event_type event;
3395         u16 attr_id;
3396         int paths = 0;
3397
3398         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3399         case CM_REQ_ATTR_ID:
3400                 paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3401                                                     alt_local_lid != 0);
3402                 event = IB_CM_REQ_RECEIVED;
3403                 break;
3404         case CM_MRA_ATTR_ID:
3405                 event = IB_CM_MRA_RECEIVED;
3406                 break;
3407         case CM_REJ_ATTR_ID:
3408                 event = IB_CM_REJ_RECEIVED;
3409                 break;
3410         case CM_REP_ATTR_ID:
3411                 event = IB_CM_REP_RECEIVED;
3412                 break;
3413         case CM_RTU_ATTR_ID:
3414                 event = IB_CM_RTU_RECEIVED;
3415                 break;
3416         case CM_DREQ_ATTR_ID:
3417                 event = IB_CM_DREQ_RECEIVED;
3418                 break;
3419         case CM_DREP_ATTR_ID:
3420                 event = IB_CM_DREP_RECEIVED;
3421                 break;
3422         case CM_SIDR_REQ_ATTR_ID:
3423                 event = IB_CM_SIDR_REQ_RECEIVED;
3424                 break;
3425         case CM_SIDR_REP_ATTR_ID:
3426                 event = IB_CM_SIDR_REP_RECEIVED;
3427                 break;
3428         case CM_LAP_ATTR_ID:
3429                 paths = 1;
3430                 event = IB_CM_LAP_RECEIVED;
3431                 break;
3432         case CM_APR_ATTR_ID:
3433                 event = IB_CM_APR_RECEIVED;
3434                 break;
3435         default:
3436                 ib_free_recv_mad(mad_recv_wc);
3437                 return;
3438         }
3439
3440         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3441         atomic_long_inc(&port->counter_group[CM_RECV].
3442                         counter[attr_id - CM_ATTR_ID_OFFSET]);
3443
3444         work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3445                        GFP_KERNEL);
3446         if (!work) {
3447                 ib_free_recv_mad(mad_recv_wc);
3448                 return;
3449         }
3450
3451         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3452         work->cm_event.event = event;
3453         work->mad_recv_wc = mad_recv_wc;
3454         work->port = port;
3455         queue_delayed_work(cm.wq, &work->work, 0);
3456 }
3457
3458 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3459                                 struct ib_qp_attr *qp_attr,
3460                                 int *qp_attr_mask)
3461 {
3462         unsigned long flags;
3463         int ret;
3464
3465         spin_lock_irqsave(&cm_id_priv->lock, flags);
3466         switch (cm_id_priv->id.state) {
3467         case IB_CM_REQ_SENT:
3468         case IB_CM_MRA_REQ_RCVD:
3469         case IB_CM_REQ_RCVD:
3470         case IB_CM_MRA_REQ_SENT:
3471         case IB_CM_REP_RCVD:
3472         case IB_CM_MRA_REP_SENT:
3473         case IB_CM_REP_SENT:
3474         case IB_CM_MRA_REP_RCVD:
3475         case IB_CM_ESTABLISHED:
3476                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3477                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
3478                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3479                 if (cm_id_priv->responder_resources)
3480                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3481                                                     IB_ACCESS_REMOTE_ATOMIC;
3482                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3483                 qp_attr->port_num = cm_id_priv->av.port->port_num;
3484                 ret = 0;
3485                 break;
3486         default:
3487                 ret = -EINVAL;
3488                 break;
3489         }
3490         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3491         return ret;
3492 }
3493
3494 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3495                                struct ib_qp_attr *qp_attr,
3496                                int *qp_attr_mask)
3497 {
3498         unsigned long flags;
3499         int ret;
3500
3501         spin_lock_irqsave(&cm_id_priv->lock, flags);
3502         switch (cm_id_priv->id.state) {
3503         case IB_CM_REQ_RCVD:
3504         case IB_CM_MRA_REQ_SENT:
3505         case IB_CM_REP_RCVD:
3506         case IB_CM_MRA_REP_SENT:
3507         case IB_CM_REP_SENT:
3508         case IB_CM_MRA_REP_RCVD:
3509         case IB_CM_ESTABLISHED:
3510                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3511                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3512                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3513                 if (!cm_id_priv->av.valid) {
3514                         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3515                         return -EINVAL;
3516                 }
3517                 if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
3518                         qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
3519                         *qp_attr_mask |= IB_QP_VID;
3520                 }
3521                 if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
3522                         memcpy(qp_attr->smac, cm_id_priv->av.smac,
3523                                sizeof(qp_attr->smac));
3524                         *qp_attr_mask |= IB_QP_SMAC;
3525                 }
3526                 if (cm_id_priv->alt_av.valid) {
3527                         if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
3528                                 qp_attr->alt_vlan_id =
3529                                         cm_id_priv->alt_av.ah_attr.vlan_id;
3530                                 *qp_attr_mask |= IB_QP_ALT_VID;
3531                         }
3532                         if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
3533                                 memcpy(qp_attr->alt_smac,
3534                                        cm_id_priv->alt_av.smac,
3535                                        sizeof(qp_attr->alt_smac));
3536                                 *qp_attr_mask |= IB_QP_ALT_SMAC;
3537                         }
3538                 }
3539                 qp_attr->path_mtu = cm_id_priv->path_mtu;
3540                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3541                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3542                 if (cm_id_priv->qp_type == IB_QPT_RC ||
3543                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3544                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3545                                          IB_QP_MIN_RNR_TIMER;
3546                         qp_attr->max_dest_rd_atomic =
3547                                         cm_id_priv->responder_resources;
3548                         qp_attr->min_rnr_timer = 0;
3549                 }
3550                 if (cm_id_priv->alt_av.ah_attr.dlid) {
3551                         *qp_attr_mask |= IB_QP_ALT_PATH;
3552                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3553                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3554                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3555                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3556                 }
3557                 ret = 0;
3558                 break;
3559         default:
3560                 ret = -EINVAL;
3561                 break;
3562         }
3563         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3564         return ret;
3565 }
3566
3567 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3568                                struct ib_qp_attr *qp_attr,
3569                                int *qp_attr_mask)
3570 {
3571         unsigned long flags;
3572         int ret;
3573
3574         spin_lock_irqsave(&cm_id_priv->lock, flags);
3575         switch (cm_id_priv->id.state) {
3576         /* Allow transition to RTS before sending REP */
3577         case IB_CM_REQ_RCVD:
3578         case IB_CM_MRA_REQ_SENT:
3579
3580         case IB_CM_REP_RCVD:
3581         case IB_CM_MRA_REP_SENT:
3582         case IB_CM_REP_SENT:
3583         case IB_CM_MRA_REP_RCVD:
3584         case IB_CM_ESTABLISHED:
3585                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3586                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3587                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3588                         switch (cm_id_priv->qp_type) {
3589                         case IB_QPT_RC:
3590                         case IB_QPT_XRC_INI:
3591                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3592                                                  IB_QP_MAX_QP_RD_ATOMIC;
3593                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
3594                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3595                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3596                                 /* fall through */
3597                         case IB_QPT_XRC_TGT:
3598                                 *qp_attr_mask |= IB_QP_TIMEOUT;
3599                                 qp_attr->timeout = cm_id_priv->av.timeout;
3600                                 break;
3601                         default:
3602                                 break;
3603                         }
3604                         if (cm_id_priv->alt_av.ah_attr.dlid) {
3605                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3606                                 qp_attr->path_mig_state = IB_MIG_REARM;
3607                         }
3608                 } else {
3609                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3610                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3611                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3612                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3613                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3614                         qp_attr->path_mig_state = IB_MIG_REARM;
3615                 }
3616                 ret = 0;
3617                 break;
3618         default:
3619                 ret = -EINVAL;
3620                 break;
3621         }
3622         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3623         return ret;
3624 }
3625
3626 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3627                        struct ib_qp_attr *qp_attr,
3628                        int *qp_attr_mask)
3629 {
3630         struct cm_id_private *cm_id_priv;
3631         int ret;
3632
3633         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3634         switch (qp_attr->qp_state) {
3635         case IB_QPS_INIT:
3636                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3637                 break;
3638         case IB_QPS_RTR:
3639                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3640                 break;
3641         case IB_QPS_RTS:
3642                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3643                 break;
3644         default:
3645                 ret = -EINVAL;
3646                 break;
3647         }
3648         return ret;
3649 }
3650 EXPORT_SYMBOL(ib_cm_init_qp_attr);
3651
3652 static void cm_get_ack_delay(struct cm_device *cm_dev)
3653 {
3654         struct ib_device_attr attr;
3655
3656         if (ib_query_device(cm_dev->ib_device, &attr))
3657                 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3658         else
3659                 cm_dev->ack_delay = attr.local_ca_ack_delay;
3660 }
3661
3662 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3663                                char *buf)
3664 {
3665         struct cm_counter_group *group;
3666         struct cm_counter_attribute *cm_attr;
3667
3668         group = container_of(obj, struct cm_counter_group, obj);
3669         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3670
3671         return sprintf(buf, "%ld\n",
3672                        atomic_long_read(&group->counter[cm_attr->index]));
3673 }
3674
3675 static const struct sysfs_ops cm_counter_ops = {
3676         .show = cm_show_counter
3677 };
3678
3679 static struct kobj_type cm_counter_obj_type = {
3680         .sysfs_ops = &cm_counter_ops,
3681         .default_attrs = cm_counter_default_attrs
3682 };
3683
3684 static void cm_release_port_obj(struct kobject *obj)
3685 {
3686         struct cm_port *cm_port;
3687
3688         cm_port = container_of(obj, struct cm_port, port_obj);
3689         kfree(cm_port);
3690 }
3691
3692 static struct kobj_type cm_port_obj_type = {
3693         .release = cm_release_port_obj
3694 };
3695
3696 static char *cm_devnode(struct device *dev, umode_t *mode)
3697 {
3698         if (mode)
3699                 *mode = 0666;
3700         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
3701 }
3702
3703 struct class cm_class = {
3704         .owner   = THIS_MODULE,
3705         .name    = "infiniband_cm",
3706         .devnode = cm_devnode,
3707 };
3708 EXPORT_SYMBOL(cm_class);
3709
3710 static int cm_create_port_fs(struct cm_port *port)
3711 {
3712         int i, ret;
3713
3714         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3715                                    &port->cm_dev->device->kobj,
3716                                    "%d", port->port_num);
3717         if (ret) {
3718                 kfree(port);
3719                 return ret;
3720         }
3721
3722         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3723                 ret = kobject_init_and_add(&port->counter_group[i].obj,
3724                                            &cm_counter_obj_type,
3725                                            &port->port_obj,
3726                                            "%s", counter_group_names[i]);
3727                 if (ret)
3728                         goto error;
3729         }
3730
3731         return 0;
3732
3733 error:
3734         while (i--)
3735                 kobject_put(&port->counter_group[i].obj);
3736         kobject_put(&port->port_obj);
3737         return ret;
3738
3739 }
3740
3741 static void cm_remove_port_fs(struct cm_port *port)
3742 {
3743         int i;
3744
3745         for (i = 0; i < CM_COUNTER_GROUPS; i++)
3746                 kobject_put(&port->counter_group[i].obj);
3747
3748         kobject_put(&port->port_obj);
3749 }
3750
3751 static void cm_add_one(struct ib_device *ib_device)
3752 {
3753         struct cm_device *cm_dev;
3754         struct cm_port *port;
3755         struct ib_mad_reg_req reg_req = {
3756                 .mgmt_class = IB_MGMT_CLASS_CM,
3757                 .mgmt_class_version = IB_CM_CLASS_VERSION,
3758         };
3759         struct ib_port_modify port_modify = {
3760                 .set_port_cap_mask = IB_PORT_CM_SUP
3761         };
3762         unsigned long flags;
3763         int ret;
3764         int count = 0;
3765         u8 i;
3766
3767         cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3768                          ib_device->phys_port_cnt, GFP_KERNEL);
3769         if (!cm_dev)
3770                 return;
3771
3772         cm_dev->ib_device = ib_device;
3773         cm_get_ack_delay(cm_dev);
3774
3775         cm_dev->device = device_create(&cm_class, &ib_device->dev,
3776                                        MKDEV(0, 0), NULL,
3777                                        "%s", ib_device->name);
3778         if (IS_ERR(cm_dev->device)) {
3779                 kfree(cm_dev);
3780                 return;
3781         }
3782
3783         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3784         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3785                 if (!rdma_cap_ib_cm(ib_device, i))
3786                         continue;
3787
3788                 port = kzalloc(sizeof *port, GFP_KERNEL);
3789                 if (!port)
3790                         goto error1;
3791
3792                 cm_dev->port[i-1] = port;
3793                 port->cm_dev = cm_dev;
3794                 port->port_num = i;
3795
3796                 ret = cm_create_port_fs(port);
3797                 if (ret)
3798                         goto error1;
3799
3800                 port->mad_agent = ib_register_mad_agent(ib_device, i,
3801                                                         IB_QPT_GSI,
3802                                                         &reg_req,
3803                                                         0,
3804                                                         cm_send_handler,
3805                                                         cm_recv_handler,
3806                                                         port,
3807                                                         0);
3808                 if (IS_ERR(port->mad_agent))
3809                         goto error2;
3810
3811                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
3812                 if (ret)
3813                         goto error3;
3814
3815                 count++;
3816         }
3817
3818         if (!count)
3819                 goto free;
3820
3821         ib_set_client_data(ib_device, &cm_client, cm_dev);
3822
3823         write_lock_irqsave(&cm.device_lock, flags);
3824         list_add_tail(&cm_dev->list, &cm.device_list);
3825         write_unlock_irqrestore(&cm.device_lock, flags);
3826         return;
3827
3828 error3:
3829         ib_unregister_mad_agent(port->mad_agent);
3830 error2:
3831         cm_remove_port_fs(port);
3832 error1:
3833         port_modify.set_port_cap_mask = 0;
3834         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3835         while (--i) {
3836                 if (!rdma_cap_ib_cm(ib_device, i))
3837                         continue;
3838
3839                 port = cm_dev->port[i-1];
3840                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3841                 ib_unregister_mad_agent(port->mad_agent);
3842                 cm_remove_port_fs(port);
3843         }
3844 free:
3845         device_unregister(cm_dev->device);
3846         kfree(cm_dev);
3847 }
3848
3849 static void cm_remove_one(struct ib_device *ib_device)
3850 {
3851         struct cm_device *cm_dev;
3852         struct cm_port *port;
3853         struct ib_port_modify port_modify = {
3854                 .clr_port_cap_mask = IB_PORT_CM_SUP
3855         };
3856         unsigned long flags;
3857         int i;
3858
3859         cm_dev = ib_get_client_data(ib_device, &cm_client);
3860         if (!cm_dev)
3861                 return;
3862
3863         write_lock_irqsave(&cm.device_lock, flags);
3864         list_del(&cm_dev->list);
3865         write_unlock_irqrestore(&cm.device_lock, flags);
3866
3867         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3868                 if (!rdma_cap_ib_cm(ib_device, i))
3869                         continue;
3870
3871                 port = cm_dev->port[i-1];
3872                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3873                 ib_unregister_mad_agent(port->mad_agent);
3874                 flush_workqueue(cm.wq);
3875                 cm_remove_port_fs(port);
3876         }
3877         device_unregister(cm_dev->device);
3878         kfree(cm_dev);
3879 }
3880
3881 static int __init ib_cm_init(void)
3882 {
3883         int ret;
3884
3885         memset(&cm, 0, sizeof cm);
3886         INIT_LIST_HEAD(&cm.device_list);
3887         rwlock_init(&cm.device_lock);
3888         spin_lock_init(&cm.lock);
3889         cm.listen_service_table = RB_ROOT;
3890         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3891         cm.remote_id_table = RB_ROOT;
3892         cm.remote_qp_table = RB_ROOT;
3893         cm.remote_sidr_table = RB_ROOT;
3894         idr_init(&cm.local_id_table);
3895         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
3896         INIT_LIST_HEAD(&cm.timewait_list);
3897
3898         ret = class_register(&cm_class);
3899         if (ret) {
3900                 ret = -ENOMEM;
3901                 goto error1;
3902         }
3903
3904         cm.wq = create_workqueue("ib_cm");
3905         if (!cm.wq) {
3906                 ret = -ENOMEM;
3907                 goto error2;
3908         }
3909
3910         ret = ib_register_client(&cm_client);
3911         if (ret)
3912                 goto error3;
3913
3914         return 0;
3915 error3:
3916         destroy_workqueue(cm.wq);
3917 error2:
3918         class_unregister(&cm_class);
3919 error1:
3920         idr_destroy(&cm.local_id_table);
3921         return ret;
3922 }
3923
3924 static void __exit ib_cm_cleanup(void)
3925 {
3926         struct cm_timewait_info *timewait_info, *tmp;
3927
3928         spin_lock_irq(&cm.lock);
3929         list_for_each_entry(timewait_info, &cm.timewait_list, list)
3930                 cancel_delayed_work(&timewait_info->work.work);
3931         spin_unlock_irq(&cm.lock);
3932
3933         ib_unregister_client(&cm_client);
3934         destroy_workqueue(cm.wq);
3935
3936         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
3937                 list_del(&timewait_info->list);
3938                 kfree(timewait_info);
3939         }
3940
3941         class_unregister(&cm_class);
3942         idr_destroy(&cm.local_id_table);
3943 }
3944
3945 module_init(ib_cm_init);
3946 module_exit(ib_cm_cleanup);
3947