2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/module.h>
33 #include <linux/list.h>
34 #include <linux/workqueue.h>
35 #include <linux/skbuff.h>
36 #include <linux/timer.h>
37 #include <linux/notifier.h>
38 #include <linux/inetdevice.h>
40 #include <linux/tcp.h>
42 #include <net/neighbour.h>
43 #include <net/netevent.h>
44 #include <net/route.h>
48 static char *states[] = {
64 static int dack_mode = 1;
65 module_param(dack_mode, int, 0644);
66 MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
68 int c4iw_max_read_depth = 8;
69 module_param(c4iw_max_read_depth, int, 0644);
70 MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD (default=8)");
72 static int enable_tcp_timestamps;
73 module_param(enable_tcp_timestamps, int, 0644);
74 MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
76 static int enable_tcp_sack;
77 module_param(enable_tcp_sack, int, 0644);
78 MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
80 static int enable_tcp_window_scaling = 1;
81 module_param(enable_tcp_window_scaling, int, 0644);
82 MODULE_PARM_DESC(enable_tcp_window_scaling,
83 "Enable tcp window scaling (default=1)");
86 module_param(c4iw_debug, int, 0644);
87 MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
90 module_param(peer2peer, int, 0644);
91 MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
93 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
94 module_param(p2p_type, int, 0644);
95 MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
96 "1=RDMA_READ 0=RDMA_WRITE (default 1)");
98 static int ep_timeout_secs = 60;
99 module_param(ep_timeout_secs, int, 0644);
100 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
101 "in seconds (default=60)");
103 static int mpa_rev = 1;
104 module_param(mpa_rev, int, 0644);
105 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
106 "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
107 " compliant (default=1)");
109 static int markers_enabled;
110 module_param(markers_enabled, int, 0644);
111 MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
113 static int crc_enabled = 1;
114 module_param(crc_enabled, int, 0644);
115 MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
117 static int rcv_win = 256 * 1024;
118 module_param(rcv_win, int, 0644);
119 MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
121 static int snd_win = 128 * 1024;
122 module_param(snd_win, int, 0644);
123 MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
125 static struct workqueue_struct *workq;
127 static struct sk_buff_head rxq;
129 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
130 static void ep_timeout(unsigned long arg);
131 static void connect_reply_upcall(struct c4iw_ep *ep, int status);
133 static LIST_HEAD(timeout_list);
134 static spinlock_t timeout_lock;
136 static void start_ep_timer(struct c4iw_ep *ep)
138 PDBG("%s ep %p\n", __func__, ep);
139 if (timer_pending(&ep->timer)) {
140 PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
141 del_timer_sync(&ep->timer);
143 c4iw_get_ep(&ep->com);
144 ep->timer.expires = jiffies + ep_timeout_secs * HZ;
145 ep->timer.data = (unsigned long)ep;
146 ep->timer.function = ep_timeout;
147 add_timer(&ep->timer);
150 static void stop_ep_timer(struct c4iw_ep *ep)
152 PDBG("%s ep %p\n", __func__, ep);
153 if (!timer_pending(&ep->timer)) {
154 WARN(1, "%s timer stopped when its not running! "
155 "ep %p state %u\n", __func__, ep, ep->com.state);
158 del_timer_sync(&ep->timer);
159 c4iw_put_ep(&ep->com);
162 static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
163 struct l2t_entry *l2e)
167 if (c4iw_fatal_error(rdev)) {
169 PDBG("%s - device in error state - dropping\n", __func__);
172 error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
175 return error < 0 ? error : 0;
178 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
182 if (c4iw_fatal_error(rdev)) {
184 PDBG("%s - device in error state - dropping\n", __func__);
187 error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
190 return error < 0 ? error : 0;
193 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
195 struct cpl_tid_release *req;
197 skb = get_skb(skb, sizeof *req, GFP_KERNEL);
200 req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
201 INIT_TP_WR(req, hwtid);
202 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
203 set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
204 c4iw_ofld_send(rdev, skb);
208 static void set_emss(struct c4iw_ep *ep, u16 opt)
210 ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40;
212 if (GET_TCPOPT_TSTAMP(opt))
216 PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
220 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
222 enum c4iw_ep_state state;
224 mutex_lock(&epc->mutex);
226 mutex_unlock(&epc->mutex);
230 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
235 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
237 mutex_lock(&epc->mutex);
238 PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
239 __state_set(epc, new);
240 mutex_unlock(&epc->mutex);
244 static void *alloc_ep(int size, gfp_t gfp)
246 struct c4iw_ep_common *epc;
248 epc = kzalloc(size, gfp);
250 kref_init(&epc->kref);
251 mutex_init(&epc->mutex);
252 c4iw_init_wr_wait(&epc->wr_wait);
254 PDBG("%s alloc ep %p\n", __func__, epc);
258 void _c4iw_free_ep(struct kref *kref)
262 ep = container_of(kref, struct c4iw_ep, com.kref);
263 PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
264 if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
265 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
266 dst_release(ep->dst);
267 cxgb4_l2t_release(ep->l2t);
272 static void release_ep_resources(struct c4iw_ep *ep)
274 set_bit(RELEASE_RESOURCES, &ep->com.flags);
275 c4iw_put_ep(&ep->com);
278 static int status2errno(int status)
283 case CPL_ERR_CONN_RESET:
285 case CPL_ERR_ARP_MISS:
286 return -EHOSTUNREACH;
287 case CPL_ERR_CONN_TIMEDOUT:
289 case CPL_ERR_TCAM_FULL:
291 case CPL_ERR_CONN_EXIST:
299 * Try and reuse skbs already allocated...
301 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
303 if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
306 skb_reset_transport_header(skb);
308 skb = alloc_skb(len, gfp);
313 static struct rtable *find_route(struct c4iw_dev *dev, __be32 local_ip,
314 __be32 peer_ip, __be16 local_port,
315 __be16 peer_port, u8 tos)
320 rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
321 peer_port, local_port, IPPROTO_TCP,
328 static void arp_failure_discard(void *handle, struct sk_buff *skb)
330 PDBG("%s c4iw_dev %p\n", __func__, handle);
335 * Handle an ARP failure for an active open.
337 static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
339 printk(KERN_ERR MOD "ARP failure duing connect\n");
344 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
347 static void abort_arp_failure(void *handle, struct sk_buff *skb)
349 struct c4iw_rdev *rdev = handle;
350 struct cpl_abort_req *req = cplhdr(skb);
352 PDBG("%s rdev %p\n", __func__, rdev);
353 req->cmd = CPL_ABORT_NO_RST;
354 c4iw_ofld_send(rdev, skb);
357 static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
359 unsigned int flowclen = 80;
360 struct fw_flowc_wr *flowc;
363 skb = get_skb(skb, flowclen, GFP_KERNEL);
364 flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
366 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP(FW_FLOWC_WR) |
367 FW_FLOWC_WR_NPARAMS(8));
368 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flowclen,
369 16)) | FW_WR_FLOWID(ep->hwtid));
371 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
372 flowc->mnemval[0].val = cpu_to_be32(PCI_FUNC(ep->com.dev->rdev.lldi.pdev->devfn) << 8);
373 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
374 flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
375 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
376 flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
377 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
378 flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
379 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
380 flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
381 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
382 flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
383 flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
384 flowc->mnemval[6].val = cpu_to_be32(snd_win);
385 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
386 flowc->mnemval[7].val = cpu_to_be32(ep->emss);
387 /* Pad WR to 16 byte boundary */
388 flowc->mnemval[8].mnemonic = 0;
389 flowc->mnemval[8].val = 0;
390 for (i = 0; i < 9; i++) {
391 flowc->mnemval[i].r4[0] = 0;
392 flowc->mnemval[i].r4[1] = 0;
393 flowc->mnemval[i].r4[2] = 0;
396 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
397 c4iw_ofld_send(&ep->com.dev->rdev, skb);
400 static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
402 struct cpl_close_con_req *req;
404 int wrlen = roundup(sizeof *req, 16);
406 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
407 skb = get_skb(NULL, wrlen, gfp);
409 printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
412 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
413 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
414 req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
415 memset(req, 0, wrlen);
416 INIT_TP_WR(req, ep->hwtid);
417 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
419 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
422 static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
424 struct cpl_abort_req *req;
425 int wrlen = roundup(sizeof *req, 16);
427 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
428 skb = get_skb(skb, wrlen, gfp);
430 printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
434 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
435 t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure);
436 req = (struct cpl_abort_req *) skb_put(skb, wrlen);
437 memset(req, 0, wrlen);
438 INIT_TP_WR(req, ep->hwtid);
439 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
440 req->cmd = CPL_ABORT_SEND_RST;
441 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
444 static int send_connect(struct c4iw_ep *ep)
446 struct cpl_act_open_req *req;
450 unsigned int mtu_idx;
452 int wrlen = roundup(sizeof *req, 16);
454 PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
456 skb = get_skb(NULL, wrlen, GFP_KERNEL);
458 printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
462 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
464 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
465 wscale = compute_wscale(rcv_win);
466 opt0 = KEEP_ALIVE(1) |
470 L2T_IDX(ep->l2t->idx) |
471 TX_CHAN(ep->tx_chan) |
472 SMAC_SEL(ep->smac_idx) |
474 ULP_MODE(ULP_MODE_TCPDDP) |
475 RCV_BUFSIZ(rcv_win>>10);
476 opt2 = RX_CHANNEL(0) |
477 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
478 if (enable_tcp_timestamps)
479 opt2 |= TSTAMPS_EN(1);
482 if (wscale && enable_tcp_window_scaling)
483 opt2 |= WND_SCALE_EN(1);
484 t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
486 req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
488 OPCODE_TID(req) = cpu_to_be32(
489 MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ((ep->rss_qid<<14)|ep->atid)));
490 req->local_port = ep->com.local_addr.sin_port;
491 req->peer_port = ep->com.remote_addr.sin_port;
492 req->local_ip = ep->com.local_addr.sin_addr.s_addr;
493 req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
494 req->opt0 = cpu_to_be64(opt0);
496 req->opt2 = cpu_to_be32(opt2);
497 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
500 static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
504 struct fw_ofld_tx_data_wr *req;
505 struct mpa_message *mpa;
506 struct mpa_v2_conn_params mpa_v2_params;
508 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
510 BUG_ON(skb_cloned(skb));
512 mpalen = sizeof(*mpa) + ep->plen;
513 if (mpa_rev_to_use == 2)
514 mpalen += sizeof(struct mpa_v2_conn_params);
515 wrlen = roundup(mpalen + sizeof *req, 16);
516 skb = get_skb(skb, wrlen, GFP_KERNEL);
518 connect_reply_upcall(ep, -ENOMEM);
521 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
523 req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
524 memset(req, 0, wrlen);
525 req->op_to_immdlen = cpu_to_be32(
526 FW_WR_OP(FW_OFLD_TX_DATA_WR) |
528 FW_WR_IMMDLEN(mpalen));
529 req->flowid_len16 = cpu_to_be32(
530 FW_WR_FLOWID(ep->hwtid) |
531 FW_WR_LEN16(wrlen >> 4));
532 req->plen = cpu_to_be32(mpalen);
533 req->tunnel_to_proxy = cpu_to_be32(
534 FW_OFLD_TX_DATA_WR_FLUSH(1) |
535 FW_OFLD_TX_DATA_WR_SHOVE(1));
537 mpa = (struct mpa_message *)(req + 1);
538 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
539 mpa->flags = (crc_enabled ? MPA_CRC : 0) |
540 (markers_enabled ? MPA_MARKERS : 0) |
541 (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
542 mpa->private_data_size = htons(ep->plen);
543 mpa->revision = mpa_rev_to_use;
544 if (mpa_rev_to_use == 1) {
545 ep->tried_with_mpa_v1 = 1;
546 ep->retry_with_mpa_v1 = 0;
549 if (mpa_rev_to_use == 2) {
550 mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
551 sizeof (struct mpa_v2_conn_params));
552 mpa_v2_params.ird = htons((u16)ep->ird);
553 mpa_v2_params.ord = htons((u16)ep->ord);
556 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
557 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
559 htons(MPA_V2_RDMA_WRITE_RTR);
560 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
562 htons(MPA_V2_RDMA_READ_RTR);
564 memcpy(mpa->private_data, &mpa_v2_params,
565 sizeof(struct mpa_v2_conn_params));
568 memcpy(mpa->private_data +
569 sizeof(struct mpa_v2_conn_params),
570 ep->mpa_pkt + sizeof(*mpa), ep->plen);
573 memcpy(mpa->private_data,
574 ep->mpa_pkt + sizeof(*mpa), ep->plen);
577 * Reference the mpa skb. This ensures the data area
578 * will remain in memory until the hw acks the tx.
579 * Function fw4_ack() will deref it.
582 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
585 c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
587 state_set(&ep->com, MPA_REQ_SENT);
588 ep->mpa_attr.initiator = 1;
592 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
595 struct fw_ofld_tx_data_wr *req;
596 struct mpa_message *mpa;
598 struct mpa_v2_conn_params mpa_v2_params;
600 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
602 mpalen = sizeof(*mpa) + plen;
603 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
604 mpalen += sizeof(struct mpa_v2_conn_params);
605 wrlen = roundup(mpalen + sizeof *req, 16);
607 skb = get_skb(NULL, wrlen, GFP_KERNEL);
609 printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
612 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
614 req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
615 memset(req, 0, wrlen);
616 req->op_to_immdlen = cpu_to_be32(
617 FW_WR_OP(FW_OFLD_TX_DATA_WR) |
619 FW_WR_IMMDLEN(mpalen));
620 req->flowid_len16 = cpu_to_be32(
621 FW_WR_FLOWID(ep->hwtid) |
622 FW_WR_LEN16(wrlen >> 4));
623 req->plen = cpu_to_be32(mpalen);
624 req->tunnel_to_proxy = cpu_to_be32(
625 FW_OFLD_TX_DATA_WR_FLUSH(1) |
626 FW_OFLD_TX_DATA_WR_SHOVE(1));
628 mpa = (struct mpa_message *)(req + 1);
629 memset(mpa, 0, sizeof(*mpa));
630 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
631 mpa->flags = MPA_REJECT;
632 mpa->revision = mpa_rev;
633 mpa->private_data_size = htons(plen);
635 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
636 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
637 mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
638 sizeof (struct mpa_v2_conn_params));
639 mpa_v2_params.ird = htons(((u16)ep->ird) |
640 (peer2peer ? MPA_V2_PEER2PEER_MODEL :
642 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
644 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
645 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
646 FW_RI_INIT_P2PTYPE_READ_REQ ?
647 MPA_V2_RDMA_READ_RTR : 0) : 0));
648 memcpy(mpa->private_data, &mpa_v2_params,
649 sizeof(struct mpa_v2_conn_params));
652 memcpy(mpa->private_data +
653 sizeof(struct mpa_v2_conn_params), pdata, plen);
656 memcpy(mpa->private_data, pdata, plen);
659 * Reference the mpa skb again. This ensures the data area
660 * will remain in memory until the hw acks the tx.
661 * Function fw4_ack() will deref it.
664 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
665 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
668 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
671 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
674 struct fw_ofld_tx_data_wr *req;
675 struct mpa_message *mpa;
677 struct mpa_v2_conn_params mpa_v2_params;
679 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
681 mpalen = sizeof(*mpa) + plen;
682 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
683 mpalen += sizeof(struct mpa_v2_conn_params);
684 wrlen = roundup(mpalen + sizeof *req, 16);
686 skb = get_skb(NULL, wrlen, GFP_KERNEL);
688 printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
691 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
693 req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
694 memset(req, 0, wrlen);
695 req->op_to_immdlen = cpu_to_be32(
696 FW_WR_OP(FW_OFLD_TX_DATA_WR) |
698 FW_WR_IMMDLEN(mpalen));
699 req->flowid_len16 = cpu_to_be32(
700 FW_WR_FLOWID(ep->hwtid) |
701 FW_WR_LEN16(wrlen >> 4));
702 req->plen = cpu_to_be32(mpalen);
703 req->tunnel_to_proxy = cpu_to_be32(
704 FW_OFLD_TX_DATA_WR_FLUSH(1) |
705 FW_OFLD_TX_DATA_WR_SHOVE(1));
707 mpa = (struct mpa_message *)(req + 1);
708 memset(mpa, 0, sizeof(*mpa));
709 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
710 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
711 (markers_enabled ? MPA_MARKERS : 0);
712 mpa->revision = ep->mpa_attr.version;
713 mpa->private_data_size = htons(plen);
715 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
716 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
717 mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
718 sizeof (struct mpa_v2_conn_params));
719 mpa_v2_params.ird = htons((u16)ep->ird);
720 mpa_v2_params.ord = htons((u16)ep->ord);
721 if (peer2peer && (ep->mpa_attr.p2p_type !=
722 FW_RI_INIT_P2PTYPE_DISABLED)) {
723 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
725 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
727 htons(MPA_V2_RDMA_WRITE_RTR);
728 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
730 htons(MPA_V2_RDMA_READ_RTR);
733 memcpy(mpa->private_data, &mpa_v2_params,
734 sizeof(struct mpa_v2_conn_params));
737 memcpy(mpa->private_data +
738 sizeof(struct mpa_v2_conn_params), pdata, plen);
741 memcpy(mpa->private_data, pdata, plen);
744 * Reference the mpa skb. This ensures the data area
745 * will remain in memory until the hw acks the tx.
746 * Function fw4_ack() will deref it.
749 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
751 state_set(&ep->com, MPA_REP_SENT);
752 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
755 static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
758 struct cpl_act_establish *req = cplhdr(skb);
759 unsigned int tid = GET_TID(req);
760 unsigned int atid = GET_TID_TID(ntohl(req->tos_atid));
761 struct tid_info *t = dev->rdev.lldi.tids;
763 ep = lookup_atid(t, atid);
765 PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
766 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
768 dst_confirm(ep->dst);
770 /* setup the hwtid for this connection */
772 cxgb4_insert_tid(t, ep, tid);
774 ep->snd_seq = be32_to_cpu(req->snd_isn);
775 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
777 set_emss(ep, ntohs(req->tcp_opt));
779 /* dealloc the atid */
780 cxgb4_free_atid(t, atid);
782 /* start MPA negotiation */
783 send_flowc(ep, NULL);
784 if (ep->retry_with_mpa_v1)
785 send_mpa_req(ep, skb, 1);
787 send_mpa_req(ep, skb, mpa_rev);
792 static void close_complete_upcall(struct c4iw_ep *ep)
794 struct iw_cm_event event;
796 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
797 memset(&event, 0, sizeof(event));
798 event.event = IW_CM_EVENT_CLOSE;
800 PDBG("close complete delivered ep %p cm_id %p tid %u\n",
801 ep, ep->com.cm_id, ep->hwtid);
802 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
803 ep->com.cm_id->rem_ref(ep->com.cm_id);
804 ep->com.cm_id = NULL;
809 static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
811 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
812 close_complete_upcall(ep);
813 state_set(&ep->com, ABORTING);
814 return send_abort(ep, skb, gfp);
817 static void peer_close_upcall(struct c4iw_ep *ep)
819 struct iw_cm_event event;
821 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
822 memset(&event, 0, sizeof(event));
823 event.event = IW_CM_EVENT_DISCONNECT;
825 PDBG("peer close delivered ep %p cm_id %p tid %u\n",
826 ep, ep->com.cm_id, ep->hwtid);
827 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
831 static void peer_abort_upcall(struct c4iw_ep *ep)
833 struct iw_cm_event event;
835 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
836 memset(&event, 0, sizeof(event));
837 event.event = IW_CM_EVENT_CLOSE;
838 event.status = -ECONNRESET;
840 PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
841 ep->com.cm_id, ep->hwtid);
842 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
843 ep->com.cm_id->rem_ref(ep->com.cm_id);
844 ep->com.cm_id = NULL;
849 static void connect_reply_upcall(struct c4iw_ep *ep, int status)
851 struct iw_cm_event event;
853 PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
854 memset(&event, 0, sizeof(event));
855 event.event = IW_CM_EVENT_CONNECT_REPLY;
856 event.status = status;
857 event.local_addr = ep->com.local_addr;
858 event.remote_addr = ep->com.remote_addr;
860 if ((status == 0) || (status == -ECONNREFUSED)) {
861 if (!ep->tried_with_mpa_v1) {
862 /* this means MPA_v2 is used */
863 event.private_data_len = ep->plen -
864 sizeof(struct mpa_v2_conn_params);
865 event.private_data = ep->mpa_pkt +
866 sizeof(struct mpa_message) +
867 sizeof(struct mpa_v2_conn_params);
869 /* this means MPA_v1 is used */
870 event.private_data_len = ep->plen;
871 event.private_data = ep->mpa_pkt +
872 sizeof(struct mpa_message);
876 PDBG("%s ep %p tid %u status %d\n", __func__, ep,
878 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
881 ep->com.cm_id->rem_ref(ep->com.cm_id);
882 ep->com.cm_id = NULL;
887 static void connect_request_upcall(struct c4iw_ep *ep)
889 struct iw_cm_event event;
891 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
892 memset(&event, 0, sizeof(event));
893 event.event = IW_CM_EVENT_CONNECT_REQUEST;
894 event.local_addr = ep->com.local_addr;
895 event.remote_addr = ep->com.remote_addr;
896 event.provider_data = ep;
897 if (!ep->tried_with_mpa_v1) {
898 /* this means MPA_v2 is used */
901 event.private_data_len = ep->plen -
902 sizeof(struct mpa_v2_conn_params);
903 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
904 sizeof(struct mpa_v2_conn_params);
906 /* this means MPA_v1 is used. Send max supported */
907 event.ord = c4iw_max_read_depth;
908 event.ird = c4iw_max_read_depth;
909 event.private_data_len = ep->plen;
910 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
912 if (state_read(&ep->parent_ep->com) != DEAD) {
913 c4iw_get_ep(&ep->com);
914 ep->parent_ep->com.cm_id->event_handler(
915 ep->parent_ep->com.cm_id,
918 c4iw_put_ep(&ep->parent_ep->com);
919 ep->parent_ep = NULL;
922 static void established_upcall(struct c4iw_ep *ep)
924 struct iw_cm_event event;
926 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
927 memset(&event, 0, sizeof(event));
928 event.event = IW_CM_EVENT_ESTABLISHED;
932 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
933 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
937 static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
939 struct cpl_rx_data_ack *req;
941 int wrlen = roundup(sizeof *req, 16);
943 PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
944 skb = get_skb(NULL, wrlen, GFP_KERNEL);
946 printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
950 req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
951 memset(req, 0, wrlen);
952 INIT_TP_WR(req, ep->hwtid);
953 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
955 req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) |
957 V_RX_DACK_MODE(dack_mode));
958 set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
959 c4iw_ofld_send(&ep->com.dev->rdev, skb);
963 static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
965 struct mpa_message *mpa;
966 struct mpa_v2_conn_params *mpa_v2_params;
968 u16 resp_ird, resp_ord;
969 u8 rtr_mismatch = 0, insuff_ird = 0;
970 struct c4iw_qp_attributes attrs;
971 enum c4iw_qp_attr_mask mask;
974 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
977 * Stop mpa timer. If it expired, then the state has
978 * changed and we bail since ep_timeout already aborted
982 if (state_read(&ep->com) != MPA_REQ_SENT)
986 * If we get more than the supported amount of private data
987 * then we must fail this connection.
989 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
995 * copy the new data into our accumulation buffer.
997 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
999 ep->mpa_pkt_len += skb->len;
1002 * if we don't even have the mpa message, then bail.
1004 if (ep->mpa_pkt_len < sizeof(*mpa))
1006 mpa = (struct mpa_message *) ep->mpa_pkt;
1008 /* Validate MPA header. */
1009 if (mpa->revision > mpa_rev) {
1010 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1011 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1015 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1020 plen = ntohs(mpa->private_data_size);
1023 * Fail if there's too much private data.
1025 if (plen > MPA_MAX_PRIVATE_DATA) {
1031 * If plen does not account for pkt size
1033 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1038 ep->plen = (u8) plen;
1041 * If we don't have all the pdata yet, then bail.
1042 * We'll continue process when more data arrives.
1044 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1047 if (mpa->flags & MPA_REJECT) {
1048 err = -ECONNREFUSED;
1053 * If we get here we have accumulated the entire mpa
1054 * start reply message including private data. And
1055 * the MPA header is valid.
1057 state_set(&ep->com, FPDU_MODE);
1058 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1059 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1060 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1061 ep->mpa_attr.version = mpa->revision;
1062 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1064 if (mpa->revision == 2) {
1065 ep->mpa_attr.enhanced_rdma_conn =
1066 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1067 if (ep->mpa_attr.enhanced_rdma_conn) {
1068 mpa_v2_params = (struct mpa_v2_conn_params *)
1069 (ep->mpa_pkt + sizeof(*mpa));
1070 resp_ird = ntohs(mpa_v2_params->ird) &
1071 MPA_V2_IRD_ORD_MASK;
1072 resp_ord = ntohs(mpa_v2_params->ord) &
1073 MPA_V2_IRD_ORD_MASK;
1076 * This is a double-check. Ideally, below checks are
1077 * not required since ird/ord stuff has been taken
1078 * care of in c4iw_accept_cr
1080 if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
1087 if (ntohs(mpa_v2_params->ird) &
1088 MPA_V2_PEER2PEER_MODEL) {
1089 if (ntohs(mpa_v2_params->ord) &
1090 MPA_V2_RDMA_WRITE_RTR)
1091 ep->mpa_attr.p2p_type =
1092 FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1093 else if (ntohs(mpa_v2_params->ord) &
1094 MPA_V2_RDMA_READ_RTR)
1095 ep->mpa_attr.p2p_type =
1096 FW_RI_INIT_P2PTYPE_READ_REQ;
1099 } else if (mpa->revision == 1)
1101 ep->mpa_attr.p2p_type = p2p_type;
1103 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1104 "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
1105 "%d\n", __func__, ep->mpa_attr.crc_enabled,
1106 ep->mpa_attr.recv_marker_enabled,
1107 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1108 ep->mpa_attr.p2p_type, p2p_type);
1111 * If responder's RTR does not match with that of initiator, assign
1112 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1113 * generated when moving QP to RTS state.
1114 * A TERM message will be sent after QP has moved to RTS state
1116 if ((ep->mpa_attr.version == 2) && peer2peer &&
1117 (ep->mpa_attr.p2p_type != p2p_type)) {
1118 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1122 attrs.mpa_attr = ep->mpa_attr;
1123 attrs.max_ird = ep->ird;
1124 attrs.max_ord = ep->ord;
1125 attrs.llp_stream_handle = ep;
1126 attrs.next_state = C4IW_QP_STATE_RTS;
1128 mask = C4IW_QP_ATTR_NEXT_STATE |
1129 C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1130 C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1132 /* bind QP and TID with INIT_WR */
1133 err = c4iw_modify_qp(ep->com.qp->rhp,
1134 ep->com.qp, mask, &attrs, 1);
1139 * If responder's RTR requirement did not match with what initiator
1140 * supports, generate TERM message
1143 printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1144 attrs.layer_etype = LAYER_MPA | DDP_LLP;
1145 attrs.ecode = MPA_NOMATCH_RTR;
1146 attrs.next_state = C4IW_QP_STATE_TERMINATE;
1147 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1148 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1154 * Generate TERM if initiator IRD is not sufficient for responder
1155 * provided ORD. Currently, we do the same behaviour even when
1156 * responder provided IRD is also not sufficient as regards to
1160 printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1162 attrs.layer_etype = LAYER_MPA | DDP_LLP;
1163 attrs.ecode = MPA_INSUFF_IRD;
1164 attrs.next_state = C4IW_QP_STATE_TERMINATE;
1165 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1166 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1172 state_set(&ep->com, ABORTING);
1173 send_abort(ep, skb, GFP_KERNEL);
1175 connect_reply_upcall(ep, err);
1179 static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1181 struct mpa_message *mpa;
1182 struct mpa_v2_conn_params *mpa_v2_params;
1185 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1187 if (state_read(&ep->com) != MPA_REQ_WAIT)
1191 * If we get more than the supported amount of private data
1192 * then we must fail this connection.
1194 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1196 abort_connection(ep, skb, GFP_KERNEL);
1200 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1203 * Copy the new data into our accumulation buffer.
1205 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1207 ep->mpa_pkt_len += skb->len;
1210 * If we don't even have the mpa message, then bail.
1211 * We'll continue process when more data arrives.
1213 if (ep->mpa_pkt_len < sizeof(*mpa))
1216 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1218 mpa = (struct mpa_message *) ep->mpa_pkt;
1221 * Validate MPA Header.
1223 if (mpa->revision > mpa_rev) {
1224 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1225 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1226 abort_connection(ep, skb, GFP_KERNEL);
1230 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
1231 abort_connection(ep, skb, GFP_KERNEL);
1235 plen = ntohs(mpa->private_data_size);
1238 * Fail if there's too much private data.
1240 if (plen > MPA_MAX_PRIVATE_DATA) {
1241 abort_connection(ep, skb, GFP_KERNEL);
1246 * If plen does not account for pkt size
1248 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1249 abort_connection(ep, skb, GFP_KERNEL);
1252 ep->plen = (u8) plen;
1255 * If we don't have all the pdata yet, then bail.
1257 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1261 * If we get here we have accumulated the entire mpa
1262 * start reply message including private data.
1264 ep->mpa_attr.initiator = 0;
1265 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1266 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1267 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1268 ep->mpa_attr.version = mpa->revision;
1269 if (mpa->revision == 1)
1270 ep->tried_with_mpa_v1 = 1;
1271 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1273 if (mpa->revision == 2) {
1274 ep->mpa_attr.enhanced_rdma_conn =
1275 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1276 if (ep->mpa_attr.enhanced_rdma_conn) {
1277 mpa_v2_params = (struct mpa_v2_conn_params *)
1278 (ep->mpa_pkt + sizeof(*mpa));
1279 ep->ird = ntohs(mpa_v2_params->ird) &
1280 MPA_V2_IRD_ORD_MASK;
1281 ep->ord = ntohs(mpa_v2_params->ord) &
1282 MPA_V2_IRD_ORD_MASK;
1283 if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1285 if (ntohs(mpa_v2_params->ord) &
1286 MPA_V2_RDMA_WRITE_RTR)
1287 ep->mpa_attr.p2p_type =
1288 FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1289 else if (ntohs(mpa_v2_params->ord) &
1290 MPA_V2_RDMA_READ_RTR)
1291 ep->mpa_attr.p2p_type =
1292 FW_RI_INIT_P2PTYPE_READ_REQ;
1295 } else if (mpa->revision == 1)
1297 ep->mpa_attr.p2p_type = p2p_type;
1299 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1300 "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
1301 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1302 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1303 ep->mpa_attr.p2p_type);
1305 state_set(&ep->com, MPA_REQ_RCVD);
1308 connect_request_upcall(ep);
1312 static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1315 struct cpl_rx_data *hdr = cplhdr(skb);
1316 unsigned int dlen = ntohs(hdr->len);
1317 unsigned int tid = GET_TID(hdr);
1318 struct tid_info *t = dev->rdev.lldi.tids;
1320 ep = lookup_tid(t, tid);
1321 PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
1322 skb_pull(skb, sizeof(*hdr));
1323 skb_trim(skb, dlen);
1325 ep->rcv_seq += dlen;
1326 BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
1328 /* update RX credits */
1329 update_rx_credits(ep, dlen);
1331 switch (state_read(&ep->com)) {
1333 process_mpa_reply(ep, skb);
1336 process_mpa_request(ep, skb);
1341 printk(KERN_ERR MOD "%s Unexpected streaming data."
1342 " ep %p state %d tid %u\n",
1343 __func__, ep, state_read(&ep->com), ep->hwtid);
1346 * The ep will timeout and inform the ULP of the failure.
1354 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1357 struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
1359 unsigned int tid = GET_TID(rpl);
1360 struct tid_info *t = dev->rdev.lldi.tids;
1362 ep = lookup_tid(t, tid);
1364 printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
1367 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1368 mutex_lock(&ep->com.mutex);
1369 switch (ep->com.state) {
1371 __state_set(&ep->com, DEAD);
1375 printk(KERN_ERR "%s ep %p state %d\n",
1376 __func__, ep, ep->com.state);
1379 mutex_unlock(&ep->com.mutex);
1382 release_ep_resources(ep);
1387 * Return whether a failed active open has allocated a TID
1389 static inline int act_open_has_tid(int status)
1391 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1392 status != CPL_ERR_ARP_MISS;
1395 static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1398 struct cpl_act_open_rpl *rpl = cplhdr(skb);
1399 unsigned int atid = GET_TID_TID(GET_AOPEN_ATID(
1400 ntohl(rpl->atid_status)));
1401 struct tid_info *t = dev->rdev.lldi.tids;
1402 int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status));
1404 ep = lookup_atid(t, atid);
1406 PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
1407 status, status2errno(status));
1409 if (status == CPL_ERR_RTX_NEG_ADVICE) {
1410 printk(KERN_WARNING MOD "Connection problems for atid %u\n",
1416 * Log interesting failures.
1419 case CPL_ERR_CONN_RESET:
1420 case CPL_ERR_CONN_TIMEDOUT:
1423 printk(KERN_INFO MOD "Active open failure - "
1424 "atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
1425 atid, status, status2errno(status),
1426 &ep->com.local_addr.sin_addr.s_addr,
1427 ntohs(ep->com.local_addr.sin_port),
1428 &ep->com.remote_addr.sin_addr.s_addr,
1429 ntohs(ep->com.remote_addr.sin_port));
1433 connect_reply_upcall(ep, status2errno(status));
1434 state_set(&ep->com, DEAD);
1436 if (status && act_open_has_tid(status))
1437 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
1439 cxgb4_free_atid(t, atid);
1440 dst_release(ep->dst);
1441 cxgb4_l2t_release(ep->l2t);
1442 c4iw_put_ep(&ep->com);
1447 static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1449 struct cpl_pass_open_rpl *rpl = cplhdr(skb);
1450 struct tid_info *t = dev->rdev.lldi.tids;
1451 unsigned int stid = GET_TID(rpl);
1452 struct c4iw_listen_ep *ep = lookup_stid(t, stid);
1455 printk(KERN_ERR MOD "stid %d lookup failure!\n", stid);
1458 PDBG("%s ep %p status %d error %d\n", __func__, ep,
1459 rpl->status, status2errno(rpl->status));
1460 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1465 static int listen_stop(struct c4iw_listen_ep *ep)
1467 struct sk_buff *skb;
1468 struct cpl_close_listsvr_req *req;
1470 PDBG("%s ep %p\n", __func__, ep);
1471 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1473 printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
1476 req = (struct cpl_close_listsvr_req *) skb_put(skb, sizeof(*req));
1478 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
1480 req->reply_ctrl = cpu_to_be16(
1481 QUEUENO(ep->com.dev->rdev.lldi.rxq_ids[0]));
1482 set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
1483 return c4iw_ofld_send(&ep->com.dev->rdev, skb);
1486 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1488 struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
1489 struct tid_info *t = dev->rdev.lldi.tids;
1490 unsigned int stid = GET_TID(rpl);
1491 struct c4iw_listen_ep *ep = lookup_stid(t, stid);
1493 PDBG("%s ep %p\n", __func__, ep);
1494 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1498 static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
1499 struct cpl_pass_accept_req *req)
1501 struct cpl_pass_accept_rpl *rpl;
1502 unsigned int mtu_idx;
1507 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1508 BUG_ON(skb_cloned(skb));
1509 skb_trim(skb, sizeof(*rpl));
1511 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
1512 wscale = compute_wscale(rcv_win);
1513 opt0 = KEEP_ALIVE(1) |
1517 L2T_IDX(ep->l2t->idx) |
1518 TX_CHAN(ep->tx_chan) |
1519 SMAC_SEL(ep->smac_idx) |
1521 ULP_MODE(ULP_MODE_TCPDDP) |
1522 RCV_BUFSIZ(rcv_win>>10);
1523 opt2 = RX_CHANNEL(0) |
1524 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
1526 if (enable_tcp_timestamps && req->tcpopt.tstamp)
1527 opt2 |= TSTAMPS_EN(1);
1528 if (enable_tcp_sack && req->tcpopt.sack)
1530 if (wscale && enable_tcp_window_scaling)
1531 opt2 |= WND_SCALE_EN(1);
1534 INIT_TP_WR(rpl, ep->hwtid);
1535 OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1537 rpl->opt0 = cpu_to_be64(opt0);
1538 rpl->opt2 = cpu_to_be32(opt2);
1539 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
1540 c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1545 static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip,
1546 struct sk_buff *skb)
1548 PDBG("%s c4iw_dev %p tid %u peer_ip %x\n", __func__, dev, hwtid,
1550 BUG_ON(skb_cloned(skb));
1551 skb_trim(skb, sizeof(struct cpl_tid_release));
1553 release_tid(&dev->rdev, hwtid, skb);
1557 static void get_4tuple(struct cpl_pass_accept_req *req,
1558 __be32 *local_ip, __be32 *peer_ip,
1559 __be16 *local_port, __be16 *peer_port)
1561 int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len));
1562 int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len));
1563 struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
1564 struct tcphdr *tcp = (struct tcphdr *)
1565 ((u8 *)(req + 1) + eth_len + ip_len);
1567 PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
1568 ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
1571 *peer_ip = ip->saddr;
1572 *local_ip = ip->daddr;
1573 *peer_port = tcp->source;
1574 *local_port = tcp->dest;
1579 static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst,
1580 struct c4iw_dev *cdev, bool clear_mpa_v1)
1582 struct neighbour *n;
1585 n = dst_neigh_lookup(dst, &peer_ip);
1591 if (n->dev->flags & IFF_LOOPBACK) {
1592 struct net_device *pdev;
1594 pdev = ip_dev_find(&init_net, peer_ip);
1599 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1603 ep->mtu = pdev->mtu;
1604 ep->tx_chan = cxgb4_port_chan(pdev);
1605 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1606 step = cdev->rdev.lldi.ntxq /
1607 cdev->rdev.lldi.nchan;
1608 ep->txq_idx = cxgb4_port_idx(pdev) * step;
1609 step = cdev->rdev.lldi.nrxq /
1610 cdev->rdev.lldi.nchan;
1611 ep->ctrlq_idx = cxgb4_port_idx(pdev);
1612 ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1613 cxgb4_port_idx(pdev) * step];
1616 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1620 ep->mtu = dst_mtu(dst);
1621 ep->tx_chan = cxgb4_port_chan(n->dev);
1622 ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
1623 step = cdev->rdev.lldi.ntxq /
1624 cdev->rdev.lldi.nchan;
1625 ep->txq_idx = cxgb4_port_idx(n->dev) * step;
1626 ep->ctrlq_idx = cxgb4_port_idx(n->dev);
1627 step = cdev->rdev.lldi.nrxq /
1628 cdev->rdev.lldi.nchan;
1629 ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1630 cxgb4_port_idx(n->dev) * step];
1633 ep->retry_with_mpa_v1 = 0;
1634 ep->tried_with_mpa_v1 = 0;
1646 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
1648 struct c4iw_ep *child_ep, *parent_ep;
1649 struct cpl_pass_accept_req *req = cplhdr(skb);
1650 unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid));
1651 struct tid_info *t = dev->rdev.lldi.tids;
1652 unsigned int hwtid = GET_TID(req);
1653 struct dst_entry *dst;
1655 __be32 local_ip, peer_ip;
1656 __be16 local_port, peer_port;
1659 parent_ep = lookup_stid(t, stid);
1660 PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
1662 get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port);
1664 if (state_read(&parent_ep->com) != LISTEN) {
1665 printk(KERN_ERR "%s - listening ep not in LISTEN\n",
1670 /* Find output route */
1671 rt = find_route(dev, local_ip, peer_ip, local_port, peer_port,
1672 GET_POPEN_TOS(ntohl(req->tos_stid)));
1674 printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
1680 child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1682 printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
1688 err = import_ep(child_ep, peer_ip, dst, dev, false);
1690 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
1697 state_set(&child_ep->com, CONNECTING);
1698 child_ep->com.dev = dev;
1699 child_ep->com.cm_id = NULL;
1700 child_ep->com.local_addr.sin_family = PF_INET;
1701 child_ep->com.local_addr.sin_port = local_port;
1702 child_ep->com.local_addr.sin_addr.s_addr = local_ip;
1703 child_ep->com.remote_addr.sin_family = PF_INET;
1704 child_ep->com.remote_addr.sin_port = peer_port;
1705 child_ep->com.remote_addr.sin_addr.s_addr = peer_ip;
1706 c4iw_get_ep(&parent_ep->com);
1707 child_ep->parent_ep = parent_ep;
1708 child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid));
1709 child_ep->dst = dst;
1710 child_ep->hwtid = hwtid;
1712 PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
1713 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
1715 init_timer(&child_ep->timer);
1716 cxgb4_insert_tid(t, child_ep, hwtid);
1717 accept_cr(child_ep, peer_ip, skb, req);
1720 reject_cr(dev, hwtid, peer_ip, skb);
1725 static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1728 struct cpl_pass_establish *req = cplhdr(skb);
1729 struct tid_info *t = dev->rdev.lldi.tids;
1730 unsigned int tid = GET_TID(req);
1732 ep = lookup_tid(t, tid);
1733 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1734 ep->snd_seq = be32_to_cpu(req->snd_isn);
1735 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1737 set_emss(ep, ntohs(req->tcp_opt));
1739 dst_confirm(ep->dst);
1740 state_set(&ep->com, MPA_REQ_WAIT);
1742 send_flowc(ep, skb);
1747 static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
1749 struct cpl_peer_close *hdr = cplhdr(skb);
1751 struct c4iw_qp_attributes attrs;
1754 struct tid_info *t = dev->rdev.lldi.tids;
1755 unsigned int tid = GET_TID(hdr);
1758 ep = lookup_tid(t, tid);
1759 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1760 dst_confirm(ep->dst);
1762 mutex_lock(&ep->com.mutex);
1763 switch (ep->com.state) {
1765 __state_set(&ep->com, CLOSING);
1768 __state_set(&ep->com, CLOSING);
1769 connect_reply_upcall(ep, -ECONNRESET);
1774 * We're gonna mark this puppy DEAD, but keep
1775 * the reference on it until the ULP accepts or
1776 * rejects the CR. Also wake up anyone waiting
1777 * in rdma connection migration (see c4iw_accept_cr()).
1779 __state_set(&ep->com, CLOSING);
1780 PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
1781 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1784 __state_set(&ep->com, CLOSING);
1785 PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
1786 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1790 __state_set(&ep->com, CLOSING);
1791 attrs.next_state = C4IW_QP_STATE_CLOSING;
1792 ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1793 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1794 if (ret != -ECONNRESET) {
1795 peer_close_upcall(ep);
1803 __state_set(&ep->com, MORIBUND);
1808 if (ep->com.cm_id && ep->com.qp) {
1809 attrs.next_state = C4IW_QP_STATE_IDLE;
1810 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1811 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1813 close_complete_upcall(ep);
1814 __state_set(&ep->com, DEAD);
1824 mutex_unlock(&ep->com.mutex);
1826 c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1828 release_ep_resources(ep);
1833 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1835 static int is_neg_adv_abort(unsigned int status)
1837 return status == CPL_ERR_RTX_NEG_ADVICE ||
1838 status == CPL_ERR_PERSIST_NEG_ADVICE;
1841 static int c4iw_reconnect(struct c4iw_ep *ep)
1846 PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
1847 init_timer(&ep->timer);
1850 * Allocate an active TID to initiate a TCP connection.
1852 ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
1853 if (ep->atid == -1) {
1854 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
1860 rt = find_route(ep->com.dev,
1861 ep->com.cm_id->local_addr.sin_addr.s_addr,
1862 ep->com.cm_id->remote_addr.sin_addr.s_addr,
1863 ep->com.cm_id->local_addr.sin_port,
1864 ep->com.cm_id->remote_addr.sin_port, 0);
1866 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
1867 err = -EHOSTUNREACH;
1872 err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr,
1873 ep->dst, ep->com.dev, false);
1875 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
1879 PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
1880 __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
1883 state_set(&ep->com, CONNECTING);
1886 /* send connect request to rnic */
1887 err = send_connect(ep);
1891 cxgb4_l2t_release(ep->l2t);
1893 dst_release(ep->dst);
1895 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
1898 * remember to send notification to upper layer.
1899 * We are in here so the upper layer is not aware that this is
1900 * re-connect attempt and so, upper layer is still waiting for
1901 * response of 1st connect request.
1903 connect_reply_upcall(ep, -ECONNRESET);
1904 c4iw_put_ep(&ep->com);
1909 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1911 struct cpl_abort_req_rss *req = cplhdr(skb);
1913 struct cpl_abort_rpl *rpl;
1914 struct sk_buff *rpl_skb;
1915 struct c4iw_qp_attributes attrs;
1918 struct tid_info *t = dev->rdev.lldi.tids;
1919 unsigned int tid = GET_TID(req);
1921 ep = lookup_tid(t, tid);
1922 if (is_neg_adv_abort(req->status)) {
1923 PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
1927 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
1931 * Wake up any threads in rdma_init() or rdma_fini().
1932 * However, this is not needed if com state is just
1935 if (ep->com.state != MPA_REQ_SENT)
1936 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1938 mutex_lock(&ep->com.mutex);
1939 switch (ep->com.state) {
1947 if (mpa_rev == 2 && ep->tried_with_mpa_v1)
1948 connect_reply_upcall(ep, -ECONNRESET);
1951 * we just don't send notification upwards because we
1952 * want to retry with mpa_v1 without upper layers even
1955 * do some housekeeping so as to re-initiate the
1958 PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
1960 ep->retry_with_mpa_v1 = 1;
1972 if (ep->com.cm_id && ep->com.qp) {
1973 attrs.next_state = C4IW_QP_STATE_ERROR;
1974 ret = c4iw_modify_qp(ep->com.qp->rhp,
1975 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
1979 "%s - qp <- error failed!\n",
1982 peer_abort_upcall(ep);
1987 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
1988 mutex_unlock(&ep->com.mutex);
1994 dst_confirm(ep->dst);
1995 if (ep->com.state != ABORTING) {
1996 __state_set(&ep->com, DEAD);
1997 /* we don't release if we want to retry with mpa_v1 */
1998 if (!ep->retry_with_mpa_v1)
2001 mutex_unlock(&ep->com.mutex);
2003 rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
2005 printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
2010 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
2011 rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
2012 INIT_TP_WR(rpl, ep->hwtid);
2013 OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
2014 rpl->cmd = CPL_ABORT_NO_RST;
2015 c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2018 release_ep_resources(ep);
2020 /* retry with mpa-v1 */
2021 if (ep && ep->retry_with_mpa_v1) {
2022 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
2023 dst_release(ep->dst);
2024 cxgb4_l2t_release(ep->l2t);
2031 static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2034 struct c4iw_qp_attributes attrs;
2035 struct cpl_close_con_rpl *rpl = cplhdr(skb);
2037 struct tid_info *t = dev->rdev.lldi.tids;
2038 unsigned int tid = GET_TID(rpl);
2040 ep = lookup_tid(t, tid);
2042 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2045 /* The cm_id may be null if we failed to connect */
2046 mutex_lock(&ep->com.mutex);
2047 switch (ep->com.state) {
2049 __state_set(&ep->com, MORIBUND);
2053 if ((ep->com.cm_id) && (ep->com.qp)) {
2054 attrs.next_state = C4IW_QP_STATE_IDLE;
2055 c4iw_modify_qp(ep->com.qp->rhp,
2057 C4IW_QP_ATTR_NEXT_STATE,
2060 close_complete_upcall(ep);
2061 __state_set(&ep->com, DEAD);
2071 mutex_unlock(&ep->com.mutex);
2073 release_ep_resources(ep);
2077 static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
2079 struct cpl_rdma_terminate *rpl = cplhdr(skb);
2080 struct tid_info *t = dev->rdev.lldi.tids;
2081 unsigned int tid = GET_TID(rpl);
2083 struct c4iw_qp_attributes attrs;
2085 ep = lookup_tid(t, tid);
2088 if (ep && ep->com.qp) {
2089 printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
2090 ep->com.qp->wq.sq.qid);
2091 attrs.next_state = C4IW_QP_STATE_TERMINATE;
2092 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2093 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2095 printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
2101 * Upcall from the adapter indicating data has been transmitted.
2102 * For us its just the single MPA request or reply. We can now free
2103 * the skb holding the mpa message.
2105 static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
2108 struct cpl_fw4_ack *hdr = cplhdr(skb);
2109 u8 credits = hdr->credits;
2110 unsigned int tid = GET_TID(hdr);
2111 struct tid_info *t = dev->rdev.lldi.tids;
2114 ep = lookup_tid(t, tid);
2115 PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
2117 PDBG("%s 0 credit ack ep %p tid %u state %u\n",
2118 __func__, ep, ep->hwtid, state_read(&ep->com));
2122 dst_confirm(ep->dst);
2124 PDBG("%s last streaming msg ack ep %p tid %u state %u "
2125 "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
2126 state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
2127 kfree_skb(ep->mpa_skb);
2133 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2136 struct c4iw_ep *ep = to_ep(cm_id);
2137 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2139 if (state_read(&ep->com) == DEAD) {
2140 c4iw_put_ep(&ep->com);
2143 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2145 abort_connection(ep, NULL, GFP_KERNEL);
2147 err = send_mpa_reject(ep, pdata, pdata_len);
2148 err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2150 c4iw_put_ep(&ep->com);
2154 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2157 struct c4iw_qp_attributes attrs;
2158 enum c4iw_qp_attr_mask mask;
2159 struct c4iw_ep *ep = to_ep(cm_id);
2160 struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2161 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2163 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2164 if (state_read(&ep->com) == DEAD) {
2169 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2172 if ((conn_param->ord > c4iw_max_read_depth) ||
2173 (conn_param->ird > c4iw_max_read_depth)) {
2174 abort_connection(ep, NULL, GFP_KERNEL);
2179 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2180 if (conn_param->ord > ep->ird) {
2181 ep->ird = conn_param->ird;
2182 ep->ord = conn_param->ord;
2183 send_mpa_reject(ep, conn_param->private_data,
2184 conn_param->private_data_len);
2185 abort_connection(ep, NULL, GFP_KERNEL);
2189 if (conn_param->ird > ep->ord) {
2191 conn_param->ird = 1;
2193 abort_connection(ep, NULL, GFP_KERNEL);
2200 ep->ird = conn_param->ird;
2201 ep->ord = conn_param->ord;
2203 if (ep->mpa_attr.version != 2)
2204 if (peer2peer && ep->ird == 0)
2207 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
2209 cm_id->add_ref(cm_id);
2210 ep->com.cm_id = cm_id;
2213 /* bind QP to EP and move to RTS */
2214 attrs.mpa_attr = ep->mpa_attr;
2215 attrs.max_ird = ep->ird;
2216 attrs.max_ord = ep->ord;
2217 attrs.llp_stream_handle = ep;
2218 attrs.next_state = C4IW_QP_STATE_RTS;
2220 /* bind QP and TID with INIT_WR */
2221 mask = C4IW_QP_ATTR_NEXT_STATE |
2222 C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2223 C4IW_QP_ATTR_MPA_ATTR |
2224 C4IW_QP_ATTR_MAX_IRD |
2225 C4IW_QP_ATTR_MAX_ORD;
2227 err = c4iw_modify_qp(ep->com.qp->rhp,
2228 ep->com.qp, mask, &attrs, 1);
2231 err = send_mpa_reply(ep, conn_param->private_data,
2232 conn_param->private_data_len);
2236 state_set(&ep->com, FPDU_MODE);
2237 established_upcall(ep);
2238 c4iw_put_ep(&ep->com);
2241 ep->com.cm_id = NULL;
2243 cm_id->rem_ref(cm_id);
2245 c4iw_put_ep(&ep->com);
2249 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2251 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2256 if ((conn_param->ord > c4iw_max_read_depth) ||
2257 (conn_param->ird > c4iw_max_read_depth)) {
2261 ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2263 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2267 init_timer(&ep->timer);
2268 ep->plen = conn_param->private_data_len;
2270 memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2271 conn_param->private_data, ep->plen);
2272 ep->ird = conn_param->ird;
2273 ep->ord = conn_param->ord;
2275 if (peer2peer && ep->ord == 0)
2278 cm_id->add_ref(cm_id);
2280 ep->com.cm_id = cm_id;
2281 ep->com.qp = get_qhp(dev, conn_param->qpn);
2282 BUG_ON(!ep->com.qp);
2283 PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
2287 * Allocate an active TID to initiate a TCP connection.
2289 ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
2290 if (ep->atid == -1) {
2291 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
2296 PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__,
2297 ntohl(cm_id->local_addr.sin_addr.s_addr),
2298 ntohs(cm_id->local_addr.sin_port),
2299 ntohl(cm_id->remote_addr.sin_addr.s_addr),
2300 ntohs(cm_id->remote_addr.sin_port));
2303 rt = find_route(dev,
2304 cm_id->local_addr.sin_addr.s_addr,
2305 cm_id->remote_addr.sin_addr.s_addr,
2306 cm_id->local_addr.sin_port,
2307 cm_id->remote_addr.sin_port, 0);
2309 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2310 err = -EHOSTUNREACH;
2315 err = import_ep(ep, cm_id->remote_addr.sin_addr.s_addr,
2316 ep->dst, ep->com.dev, true);
2318 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
2322 PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2323 __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2326 state_set(&ep->com, CONNECTING);
2328 ep->com.local_addr = cm_id->local_addr;
2329 ep->com.remote_addr = cm_id->remote_addr;
2331 /* send connect request to rnic */
2332 err = send_connect(ep);
2336 cxgb4_l2t_release(ep->l2t);
2338 dst_release(ep->dst);
2340 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2342 cm_id->rem_ref(cm_id);
2343 c4iw_put_ep(&ep->com);
2348 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
2351 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2352 struct c4iw_listen_ep *ep;
2357 ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2359 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2363 PDBG("%s ep %p\n", __func__, ep);
2364 cm_id->add_ref(cm_id);
2365 ep->com.cm_id = cm_id;
2367 ep->backlog = backlog;
2368 ep->com.local_addr = cm_id->local_addr;
2371 * Allocate a server TID.
2373 ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep);
2374 if (ep->stid == -1) {
2375 printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
2380 state_set(&ep->com, LISTEN);
2381 c4iw_init_wr_wait(&ep->com.wr_wait);
2382 err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid,
2383 ep->com.local_addr.sin_addr.s_addr,
2384 ep->com.local_addr.sin_port,
2385 ep->com.dev->rdev.lldi.rxq_ids[0]);
2389 /* wait for pass_open_rpl */
2390 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0,
2393 cm_id->provider_data = ep;
2397 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
2399 cm_id->rem_ref(cm_id);
2400 c4iw_put_ep(&ep->com);
2406 int c4iw_destroy_listen(struct iw_cm_id *cm_id)
2409 struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
2411 PDBG("%s ep %p\n", __func__, ep);
2414 state_set(&ep->com, DEAD);
2415 c4iw_init_wr_wait(&ep->com.wr_wait);
2416 err = listen_stop(ep);
2419 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0,
2421 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
2423 cm_id->rem_ref(cm_id);
2424 c4iw_put_ep(&ep->com);
2428 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2433 struct c4iw_rdev *rdev;
2435 mutex_lock(&ep->com.mutex);
2437 PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
2438 states[ep->com.state], abrupt);
2440 rdev = &ep->com.dev->rdev;
2441 if (c4iw_fatal_error(rdev)) {
2443 close_complete_upcall(ep);
2444 ep->com.state = DEAD;
2446 switch (ep->com.state) {
2454 ep->com.state = ABORTING;
2456 ep->com.state = CLOSING;
2459 set_bit(CLOSE_SENT, &ep->com.flags);
2462 if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2466 ep->com.state = ABORTING;
2468 ep->com.state = MORIBUND;
2474 PDBG("%s ignoring disconnect ep %p state %u\n",
2475 __func__, ep, ep->com.state);
2484 close_complete_upcall(ep);
2485 ret = send_abort(ep, NULL, gfp);
2487 ret = send_halfclose(ep, gfp);
2491 mutex_unlock(&ep->com.mutex);
2493 release_ep_resources(ep);
2497 static int async_event(struct c4iw_dev *dev, struct sk_buff *skb)
2499 struct cpl_fw6_msg *rpl = cplhdr(skb);
2500 c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
2505 * These are the real handlers that are called from a
2508 static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
2509 [CPL_ACT_ESTABLISH] = act_establish,
2510 [CPL_ACT_OPEN_RPL] = act_open_rpl,
2511 [CPL_RX_DATA] = rx_data,
2512 [CPL_ABORT_RPL_RSS] = abort_rpl,
2513 [CPL_ABORT_RPL] = abort_rpl,
2514 [CPL_PASS_OPEN_RPL] = pass_open_rpl,
2515 [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
2516 [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
2517 [CPL_PASS_ESTABLISH] = pass_establish,
2518 [CPL_PEER_CLOSE] = peer_close,
2519 [CPL_ABORT_REQ_RSS] = peer_abort,
2520 [CPL_CLOSE_CON_RPL] = close_con_rpl,
2521 [CPL_RDMA_TERMINATE] = terminate,
2522 [CPL_FW4_ACK] = fw4_ack,
2523 [CPL_FW6_MSG] = async_event
2526 static void process_timeout(struct c4iw_ep *ep)
2528 struct c4iw_qp_attributes attrs;
2531 mutex_lock(&ep->com.mutex);
2532 PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
2534 switch (ep->com.state) {
2536 __state_set(&ep->com, ABORTING);
2537 connect_reply_upcall(ep, -ETIMEDOUT);
2540 __state_set(&ep->com, ABORTING);
2544 if (ep->com.cm_id && ep->com.qp) {
2545 attrs.next_state = C4IW_QP_STATE_ERROR;
2546 c4iw_modify_qp(ep->com.qp->rhp,
2547 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2550 __state_set(&ep->com, ABORTING);
2553 WARN(1, "%s unexpected state ep %p tid %u state %u\n",
2554 __func__, ep, ep->hwtid, ep->com.state);
2557 mutex_unlock(&ep->com.mutex);
2559 abort_connection(ep, NULL, GFP_KERNEL);
2560 c4iw_put_ep(&ep->com);
2563 static void process_timedout_eps(void)
2567 spin_lock_irq(&timeout_lock);
2568 while (!list_empty(&timeout_list)) {
2569 struct list_head *tmp;
2571 tmp = timeout_list.next;
2573 spin_unlock_irq(&timeout_lock);
2574 ep = list_entry(tmp, struct c4iw_ep, entry);
2575 process_timeout(ep);
2576 spin_lock_irq(&timeout_lock);
2578 spin_unlock_irq(&timeout_lock);
2581 static void process_work(struct work_struct *work)
2583 struct sk_buff *skb = NULL;
2584 struct c4iw_dev *dev;
2585 struct cpl_act_establish *rpl;
2586 unsigned int opcode;
2589 while ((skb = skb_dequeue(&rxq))) {
2591 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
2592 opcode = rpl->ot.opcode;
2594 BUG_ON(!work_handlers[opcode]);
2595 ret = work_handlers[opcode](dev, skb);
2599 process_timedout_eps();
2602 static DECLARE_WORK(skb_work, process_work);
2604 static void ep_timeout(unsigned long arg)
2606 struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2608 spin_lock(&timeout_lock);
2609 list_add_tail(&ep->entry, &timeout_list);
2610 spin_unlock(&timeout_lock);
2611 queue_work(workq, &skb_work);
2615 * All the CM events are handled on a work queue to have a safe context.
2617 static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
2621 * Save dev in the skb->cb area.
2623 *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
2626 * Queue the skb and schedule the worker thread.
2628 skb_queue_tail(&rxq, skb);
2629 queue_work(workq, &skb_work);
2633 static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2635 struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
2637 if (rpl->status != CPL_ERR_NONE) {
2638 printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
2639 "for tid %u\n", rpl->status, GET_TID(rpl));
2645 static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
2647 struct cpl_fw6_msg *rpl = cplhdr(skb);
2648 struct c4iw_wr_wait *wr_waitp;
2651 PDBG("%s type %u\n", __func__, rpl->type);
2653 switch (rpl->type) {
2655 ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
2656 wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
2657 PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
2659 c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2666 printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
2674 static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
2676 struct cpl_abort_req_rss *req = cplhdr(skb);
2678 struct tid_info *t = dev->rdev.lldi.tids;
2679 unsigned int tid = GET_TID(req);
2681 ep = lookup_tid(t, tid);
2683 printk(KERN_WARNING MOD
2684 "Abort on non-existent endpoint, tid %d\n", tid);
2688 if (is_neg_adv_abort(req->status)) {
2689 PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
2694 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
2698 * Wake up any threads in rdma_init() or rdma_fini().
2700 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2706 * Most upcalls from the T4 Core go to sched() to
2707 * schedule the processing on a work queue.
2709 c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
2710 [CPL_ACT_ESTABLISH] = sched,
2711 [CPL_ACT_OPEN_RPL] = sched,
2712 [CPL_RX_DATA] = sched,
2713 [CPL_ABORT_RPL_RSS] = sched,
2714 [CPL_ABORT_RPL] = sched,
2715 [CPL_PASS_OPEN_RPL] = sched,
2716 [CPL_CLOSE_LISTSRV_RPL] = sched,
2717 [CPL_PASS_ACCEPT_REQ] = sched,
2718 [CPL_PASS_ESTABLISH] = sched,
2719 [CPL_PEER_CLOSE] = sched,
2720 [CPL_CLOSE_CON_RPL] = sched,
2721 [CPL_ABORT_REQ_RSS] = peer_abort_intr,
2722 [CPL_RDMA_TERMINATE] = sched,
2723 [CPL_FW4_ACK] = sched,
2724 [CPL_SET_TCB_RPL] = set_tcb_rpl,
2725 [CPL_FW6_MSG] = fw6_msg
2728 int __init c4iw_cm_init(void)
2730 spin_lock_init(&timeout_lock);
2731 skb_queue_head_init(&rxq);
2733 workq = create_singlethread_workqueue("iw_cxgb4");
2740 void __exit c4iw_cm_term(void)
2742 WARN_ON(!list_empty(&timeout_list));
2743 flush_workqueue(workq);
2744 destroy_workqueue(workq);