RDMA/cxgb4: Use vmalloc() for debugfs QP dump
[cascardo/linux.git] / drivers / infiniband / hw / cxgb4 / device.c
1 /*
2  * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/module.h>
33 #include <linux/moduleparam.h>
34 #include <linux/debugfs.h>
35
36 #include <rdma/ib_verbs.h>
37
38 #include "iw_cxgb4.h"
39
40 #define DRV_VERSION "0.1"
41
42 MODULE_AUTHOR("Steve Wise");
43 MODULE_DESCRIPTION("Chelsio T4 RDMA Driver");
44 MODULE_LICENSE("Dual BSD/GPL");
45 MODULE_VERSION(DRV_VERSION);
46
47 struct uld_ctx {
48         struct list_head entry;
49         struct cxgb4_lld_info lldi;
50         struct c4iw_dev *dev;
51 };
52
53 static LIST_HEAD(uld_ctx_list);
54 static DEFINE_MUTEX(dev_mutex);
55
56 static struct dentry *c4iw_debugfs_root;
57
58 struct c4iw_debugfs_data {
59         struct c4iw_dev *devp;
60         char *buf;
61         int bufsize;
62         int pos;
63 };
64
65 static int count_idrs(int id, void *p, void *data)
66 {
67         int *countp = data;
68
69         *countp = *countp + 1;
70         return 0;
71 }
72
73 static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
74                             loff_t *ppos)
75 {
76         struct c4iw_debugfs_data *d = file->private_data;
77
78         return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
79 }
80
81 static int dump_qp(int id, void *p, void *data)
82 {
83         struct c4iw_qp *qp = p;
84         struct c4iw_debugfs_data *qpd = data;
85         int space;
86         int cc;
87
88         if (id != qp->wq.sq.qid)
89                 return 0;
90
91         space = qpd->bufsize - qpd->pos - 1;
92         if (space == 0)
93                 return 1;
94
95         if (qp->ep)
96                 cc = snprintf(qpd->buf + qpd->pos, space,
97                              "qp sq id %u rq id %u state %u onchip %u "
98                              "ep tid %u state %u %pI4:%u->%pI4:%u\n",
99                              qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
100                              qp->wq.sq.flags & T4_SQ_ONCHIP,
101                              qp->ep->hwtid, (int)qp->ep->com.state,
102                              &qp->ep->com.local_addr.sin_addr.s_addr,
103                              ntohs(qp->ep->com.local_addr.sin_port),
104                              &qp->ep->com.remote_addr.sin_addr.s_addr,
105                              ntohs(qp->ep->com.remote_addr.sin_port));
106         else
107                 cc = snprintf(qpd->buf + qpd->pos, space,
108                              "qp sq id %u rq id %u state %u onchip %u\n",
109                               qp->wq.sq.qid, qp->wq.rq.qid,
110                               (int)qp->attr.state,
111                               qp->wq.sq.flags & T4_SQ_ONCHIP);
112         if (cc < space)
113                 qpd->pos += cc;
114         return 0;
115 }
116
117 static int qp_release(struct inode *inode, struct file *file)
118 {
119         struct c4iw_debugfs_data *qpd = file->private_data;
120         if (!qpd) {
121                 printk(KERN_INFO "%s null qpd?\n", __func__);
122                 return 0;
123         }
124         vfree(qpd->buf);
125         kfree(qpd);
126         return 0;
127 }
128
129 static int qp_open(struct inode *inode, struct file *file)
130 {
131         struct c4iw_debugfs_data *qpd;
132         int ret = 0;
133         int count = 1;
134
135         qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
136         if (!qpd) {
137                 ret = -ENOMEM;
138                 goto out;
139         }
140         qpd->devp = inode->i_private;
141         qpd->pos = 0;
142
143         spin_lock_irq(&qpd->devp->lock);
144         idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
145         spin_unlock_irq(&qpd->devp->lock);
146
147         qpd->bufsize = count * 128;
148         qpd->buf = vmalloc(qpd->bufsize);
149         if (!qpd->buf) {
150                 ret = -ENOMEM;
151                 goto err1;
152         }
153
154         spin_lock_irq(&qpd->devp->lock);
155         idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
156         spin_unlock_irq(&qpd->devp->lock);
157
158         qpd->buf[qpd->pos++] = 0;
159         file->private_data = qpd;
160         goto out;
161 err1:
162         kfree(qpd);
163 out:
164         return ret;
165 }
166
167 static const struct file_operations qp_debugfs_fops = {
168         .owner   = THIS_MODULE,
169         .open    = qp_open,
170         .release = qp_release,
171         .read    = debugfs_read,
172         .llseek  = default_llseek,
173 };
174
175 static int dump_stag(int id, void *p, void *data)
176 {
177         struct c4iw_debugfs_data *stagd = data;
178         int space;
179         int cc;
180
181         space = stagd->bufsize - stagd->pos - 1;
182         if (space == 0)
183                 return 1;
184
185         cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8);
186         if (cc < space)
187                 stagd->pos += cc;
188         return 0;
189 }
190
191 static int stag_release(struct inode *inode, struct file *file)
192 {
193         struct c4iw_debugfs_data *stagd = file->private_data;
194         if (!stagd) {
195                 printk(KERN_INFO "%s null stagd?\n", __func__);
196                 return 0;
197         }
198         kfree(stagd->buf);
199         kfree(stagd);
200         return 0;
201 }
202
203 static int stag_open(struct inode *inode, struct file *file)
204 {
205         struct c4iw_debugfs_data *stagd;
206         int ret = 0;
207         int count = 1;
208
209         stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
210         if (!stagd) {
211                 ret = -ENOMEM;
212                 goto out;
213         }
214         stagd->devp = inode->i_private;
215         stagd->pos = 0;
216
217         spin_lock_irq(&stagd->devp->lock);
218         idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
219         spin_unlock_irq(&stagd->devp->lock);
220
221         stagd->bufsize = count * sizeof("0x12345678\n");
222         stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL);
223         if (!stagd->buf) {
224                 ret = -ENOMEM;
225                 goto err1;
226         }
227
228         spin_lock_irq(&stagd->devp->lock);
229         idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
230         spin_unlock_irq(&stagd->devp->lock);
231
232         stagd->buf[stagd->pos++] = 0;
233         file->private_data = stagd;
234         goto out;
235 err1:
236         kfree(stagd);
237 out:
238         return ret;
239 }
240
241 static const struct file_operations stag_debugfs_fops = {
242         .owner   = THIS_MODULE,
243         .open    = stag_open,
244         .release = stag_release,
245         .read    = debugfs_read,
246         .llseek  = default_llseek,
247 };
248
249 static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
250
251 static int stats_show(struct seq_file *seq, void *v)
252 {
253         struct c4iw_dev *dev = seq->private;
254
255         seq_printf(seq, " Object: %10s %10s %10s\n", "Total", "Current", "Max");
256         seq_printf(seq, "     PDID: %10llu %10llu %10llu\n",
257                         dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
258                         dev->rdev.stats.pd.max);
259         seq_printf(seq, "      QID: %10llu %10llu %10llu\n",
260                         dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
261                         dev->rdev.stats.qid.max);
262         seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu\n",
263                         dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
264                         dev->rdev.stats.stag.max);
265         seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu\n",
266                         dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
267                         dev->rdev.stats.pbl.max);
268         seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu\n",
269                         dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
270                         dev->rdev.stats.rqt.max);
271         seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu\n",
272                         dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
273                         dev->rdev.stats.ocqp.max);
274         seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
275         seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
276         seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
277         seq_printf(seq, " DB State: %s Transitions %llu\n",
278                    db_state_str[dev->db_state],
279                    dev->rdev.stats.db_state_transitions);
280         return 0;
281 }
282
283 static int stats_open(struct inode *inode, struct file *file)
284 {
285         return single_open(file, stats_show, inode->i_private);
286 }
287
288 static ssize_t stats_clear(struct file *file, const char __user *buf,
289                 size_t count, loff_t *pos)
290 {
291         struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
292
293         mutex_lock(&dev->rdev.stats.lock);
294         dev->rdev.stats.pd.max = 0;
295         dev->rdev.stats.qid.max = 0;
296         dev->rdev.stats.stag.max = 0;
297         dev->rdev.stats.pbl.max = 0;
298         dev->rdev.stats.rqt.max = 0;
299         dev->rdev.stats.ocqp.max = 0;
300         dev->rdev.stats.db_full = 0;
301         dev->rdev.stats.db_empty = 0;
302         dev->rdev.stats.db_drop = 0;
303         dev->rdev.stats.db_state_transitions = 0;
304         mutex_unlock(&dev->rdev.stats.lock);
305         return count;
306 }
307
308 static const struct file_operations stats_debugfs_fops = {
309         .owner   = THIS_MODULE,
310         .open    = stats_open,
311         .release = single_release,
312         .read    = seq_read,
313         .llseek  = seq_lseek,
314         .write   = stats_clear,
315 };
316
317 static int setup_debugfs(struct c4iw_dev *devp)
318 {
319         struct dentry *de;
320
321         if (!devp->debugfs_root)
322                 return -1;
323
324         de = debugfs_create_file("qps", S_IWUSR, devp->debugfs_root,
325                                  (void *)devp, &qp_debugfs_fops);
326         if (de && de->d_inode)
327                 de->d_inode->i_size = 4096;
328
329         de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root,
330                                  (void *)devp, &stag_debugfs_fops);
331         if (de && de->d_inode)
332                 de->d_inode->i_size = 4096;
333
334         de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root,
335                         (void *)devp, &stats_debugfs_fops);
336         if (de && de->d_inode)
337                 de->d_inode->i_size = 4096;
338
339         return 0;
340 }
341
342 void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
343                                struct c4iw_dev_ucontext *uctx)
344 {
345         struct list_head *pos, *nxt;
346         struct c4iw_qid_list *entry;
347
348         mutex_lock(&uctx->lock);
349         list_for_each_safe(pos, nxt, &uctx->qpids) {
350                 entry = list_entry(pos, struct c4iw_qid_list, entry);
351                 list_del_init(&entry->entry);
352                 if (!(entry->qid & rdev->qpmask)) {
353                         c4iw_put_resource(&rdev->resource.qid_fifo, entry->qid,
354                                         &rdev->resource.qid_fifo_lock);
355                         mutex_lock(&rdev->stats.lock);
356                         rdev->stats.qid.cur -= rdev->qpmask + 1;
357                         mutex_unlock(&rdev->stats.lock);
358                 }
359                 kfree(entry);
360         }
361
362         list_for_each_safe(pos, nxt, &uctx->qpids) {
363                 entry = list_entry(pos, struct c4iw_qid_list, entry);
364                 list_del_init(&entry->entry);
365                 kfree(entry);
366         }
367         mutex_unlock(&uctx->lock);
368 }
369
370 void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
371                             struct c4iw_dev_ucontext *uctx)
372 {
373         INIT_LIST_HEAD(&uctx->qpids);
374         INIT_LIST_HEAD(&uctx->cqids);
375         mutex_init(&uctx->lock);
376 }
377
378 /* Caller takes care of locking if needed */
379 static int c4iw_rdev_open(struct c4iw_rdev *rdev)
380 {
381         int err;
382
383         c4iw_init_dev_ucontext(rdev, &rdev->uctx);
384
385         /*
386          * qpshift is the number of bits to shift the qpid left in order
387          * to get the correct address of the doorbell for that qp.
388          */
389         rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density);
390         rdev->qpmask = rdev->lldi.udb_density - 1;
391         rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density);
392         rdev->cqmask = rdev->lldi.ucq_density - 1;
393         PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
394              "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
395              "qp qid start %u size %u cq qid start %u size %u\n",
396              __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
397              rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
398              rdev->lldi.vr->pbl.start,
399              rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
400              rdev->lldi.vr->rq.size,
401              rdev->lldi.vr->qp.start,
402              rdev->lldi.vr->qp.size,
403              rdev->lldi.vr->cq.start,
404              rdev->lldi.vr->cq.size);
405         PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
406              "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
407              (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
408              (void *)pci_resource_start(rdev->lldi.pdev, 2),
409              rdev->lldi.db_reg,
410              rdev->lldi.gts_reg,
411              rdev->qpshift, rdev->qpmask,
412              rdev->cqshift, rdev->cqmask);
413
414         if (c4iw_num_stags(rdev) == 0) {
415                 err = -EINVAL;
416                 goto err1;
417         }
418
419         rdev->stats.pd.total = T4_MAX_NUM_PD;
420         rdev->stats.stag.total = rdev->lldi.vr->stag.size;
421         rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
422         rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
423         rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
424         rdev->stats.qid.total = rdev->lldi.vr->qp.size;
425
426         err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
427         if (err) {
428                 printk(KERN_ERR MOD "error %d initializing resources\n", err);
429                 goto err1;
430         }
431         err = c4iw_pblpool_create(rdev);
432         if (err) {
433                 printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
434                 goto err2;
435         }
436         err = c4iw_rqtpool_create(rdev);
437         if (err) {
438                 printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
439                 goto err3;
440         }
441         err = c4iw_ocqp_pool_create(rdev);
442         if (err) {
443                 printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
444                 goto err4;
445         }
446         return 0;
447 err4:
448         c4iw_rqtpool_destroy(rdev);
449 err3:
450         c4iw_pblpool_destroy(rdev);
451 err2:
452         c4iw_destroy_resource(&rdev->resource);
453 err1:
454         return err;
455 }
456
457 static void c4iw_rdev_close(struct c4iw_rdev *rdev)
458 {
459         c4iw_pblpool_destroy(rdev);
460         c4iw_rqtpool_destroy(rdev);
461         c4iw_destroy_resource(&rdev->resource);
462 }
463
464 static void c4iw_dealloc(struct uld_ctx *ctx)
465 {
466         c4iw_rdev_close(&ctx->dev->rdev);
467         idr_destroy(&ctx->dev->cqidr);
468         idr_destroy(&ctx->dev->qpidr);
469         idr_destroy(&ctx->dev->mmidr);
470         iounmap(ctx->dev->rdev.oc_mw_kva);
471         ib_dealloc_device(&ctx->dev->ibdev);
472         ctx->dev = NULL;
473 }
474
475 static void c4iw_remove(struct uld_ctx *ctx)
476 {
477         PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
478         c4iw_unregister_device(ctx->dev);
479         c4iw_dealloc(ctx);
480 }
481
482 static int rdma_supported(const struct cxgb4_lld_info *infop)
483 {
484         return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
485                infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
486                infop->vr->cq.size > 0 && infop->vr->ocq.size > 0;
487 }
488
489 static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
490 {
491         struct c4iw_dev *devp;
492         int ret;
493
494         if (!rdma_supported(infop)) {
495                 printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
496                        pci_name(infop->pdev));
497                 return ERR_PTR(-ENOSYS);
498         }
499         devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
500         if (!devp) {
501                 printk(KERN_ERR MOD "Cannot allocate ib device\n");
502                 return ERR_PTR(-ENOMEM);
503         }
504         devp->rdev.lldi = *infop;
505
506         devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) +
507                 (pci_resource_len(devp->rdev.lldi.pdev, 2) -
508                  roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size));
509         devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
510                                                devp->rdev.lldi.vr->ocq.size);
511
512         PDBG(KERN_INFO MOD "ocq memory: "
513                "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
514                devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
515                devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
516
517         ret = c4iw_rdev_open(&devp->rdev);
518         if (ret) {
519                 printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
520                 ib_dealloc_device(&devp->ibdev);
521                 return ERR_PTR(ret);
522         }
523
524         idr_init(&devp->cqidr);
525         idr_init(&devp->qpidr);
526         idr_init(&devp->mmidr);
527         spin_lock_init(&devp->lock);
528         mutex_init(&devp->rdev.stats.lock);
529         mutex_init(&devp->db_mutex);
530
531         if (c4iw_debugfs_root) {
532                 devp->debugfs_root = debugfs_create_dir(
533                                         pci_name(devp->rdev.lldi.pdev),
534                                         c4iw_debugfs_root);
535                 setup_debugfs(devp);
536         }
537         return devp;
538 }
539
540 static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
541 {
542         struct uld_ctx *ctx;
543         static int vers_printed;
544         int i;
545
546         if (!vers_printed++)
547                 printk(KERN_INFO MOD "Chelsio T4 RDMA Driver - version %s\n",
548                        DRV_VERSION);
549
550         ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
551         if (!ctx) {
552                 ctx = ERR_PTR(-ENOMEM);
553                 goto out;
554         }
555         ctx->lldi = *infop;
556
557         PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
558              __func__, pci_name(ctx->lldi.pdev),
559              ctx->lldi.nchan, ctx->lldi.nrxq,
560              ctx->lldi.ntxq, ctx->lldi.nports);
561
562         mutex_lock(&dev_mutex);
563         list_add_tail(&ctx->entry, &uld_ctx_list);
564         mutex_unlock(&dev_mutex);
565
566         for (i = 0; i < ctx->lldi.nrxq; i++)
567                 PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
568 out:
569         return ctx;
570 }
571
572 static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
573                         const struct pkt_gl *gl)
574 {
575         struct uld_ctx *ctx = handle;
576         struct c4iw_dev *dev = ctx->dev;
577         struct sk_buff *skb;
578         const struct cpl_act_establish *rpl;
579         unsigned int opcode;
580
581         if (gl == NULL) {
582                 /* omit RSS and rsp_ctrl at end of descriptor */
583                 unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
584
585                 skb = alloc_skb(256, GFP_ATOMIC);
586                 if (!skb)
587                         goto nomem;
588                 __skb_put(skb, len);
589                 skb_copy_to_linear_data(skb, &rsp[1], len);
590         } else if (gl == CXGB4_MSG_AN) {
591                 const struct rsp_ctrl *rc = (void *)rsp;
592
593                 u32 qid = be32_to_cpu(rc->pldbuflen_qid);
594                 c4iw_ev_handler(dev, qid);
595                 return 0;
596         } else {
597                 skb = cxgb4_pktgl_to_skb(gl, 128, 128);
598                 if (unlikely(!skb))
599                         goto nomem;
600         }
601
602         rpl = cplhdr(skb);
603         opcode = rpl->ot.opcode;
604
605         if (c4iw_handlers[opcode])
606                 c4iw_handlers[opcode](dev, skb);
607         else
608                 printk(KERN_INFO "%s no handler opcode 0x%x...\n", __func__,
609                        opcode);
610
611         return 0;
612 nomem:
613         return -1;
614 }
615
616 static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
617 {
618         struct uld_ctx *ctx = handle;
619
620         PDBG("%s new_state %u\n", __func__, new_state);
621         switch (new_state) {
622         case CXGB4_STATE_UP:
623                 printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
624                 if (!ctx->dev) {
625                         int ret;
626
627                         ctx->dev = c4iw_alloc(&ctx->lldi);
628                         if (IS_ERR(ctx->dev)) {
629                                 printk(KERN_ERR MOD
630                                        "%s: initialization failed: %ld\n",
631                                        pci_name(ctx->lldi.pdev),
632                                        PTR_ERR(ctx->dev));
633                                 ctx->dev = NULL;
634                                 break;
635                         }
636                         ret = c4iw_register_device(ctx->dev);
637                         if (ret) {
638                                 printk(KERN_ERR MOD
639                                        "%s: RDMA registration failed: %d\n",
640                                        pci_name(ctx->lldi.pdev), ret);
641                                 c4iw_dealloc(ctx);
642                         }
643                 }
644                 break;
645         case CXGB4_STATE_DOWN:
646                 printk(KERN_INFO MOD "%s: Down\n",
647                        pci_name(ctx->lldi.pdev));
648                 if (ctx->dev)
649                         c4iw_remove(ctx);
650                 break;
651         case CXGB4_STATE_START_RECOVERY:
652                 printk(KERN_INFO MOD "%s: Fatal Error\n",
653                        pci_name(ctx->lldi.pdev));
654                 if (ctx->dev) {
655                         struct ib_event event;
656
657                         ctx->dev->rdev.flags |= T4_FATAL_ERROR;
658                         memset(&event, 0, sizeof event);
659                         event.event  = IB_EVENT_DEVICE_FATAL;
660                         event.device = &ctx->dev->ibdev;
661                         ib_dispatch_event(&event);
662                         c4iw_remove(ctx);
663                 }
664                 break;
665         case CXGB4_STATE_DETACH:
666                 printk(KERN_INFO MOD "%s: Detach\n",
667                        pci_name(ctx->lldi.pdev));
668                 if (ctx->dev)
669                         c4iw_remove(ctx);
670                 break;
671         }
672         return 0;
673 }
674
675 static int disable_qp_db(int id, void *p, void *data)
676 {
677         struct c4iw_qp *qp = p;
678
679         t4_disable_wq_db(&qp->wq);
680         return 0;
681 }
682
683 static void stop_queues(struct uld_ctx *ctx)
684 {
685         spin_lock_irq(&ctx->dev->lock);
686         if (ctx->dev->db_state == NORMAL) {
687                 ctx->dev->rdev.stats.db_state_transitions++;
688                 ctx->dev->db_state = FLOW_CONTROL;
689                 idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
690         }
691         spin_unlock_irq(&ctx->dev->lock);
692 }
693
694 static int enable_qp_db(int id, void *p, void *data)
695 {
696         struct c4iw_qp *qp = p;
697
698         t4_enable_wq_db(&qp->wq);
699         return 0;
700 }
701
702 static void resume_queues(struct uld_ctx *ctx)
703 {
704         spin_lock_irq(&ctx->dev->lock);
705         if (ctx->dev->qpcnt <= db_fc_threshold &&
706             ctx->dev->db_state == FLOW_CONTROL) {
707                 ctx->dev->db_state = NORMAL;
708                 ctx->dev->rdev.stats.db_state_transitions++;
709                 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
710         }
711         spin_unlock_irq(&ctx->dev->lock);
712 }
713
714 struct qp_list {
715         unsigned idx;
716         struct c4iw_qp **qps;
717 };
718
719 static int add_and_ref_qp(int id, void *p, void *data)
720 {
721         struct qp_list *qp_listp = data;
722         struct c4iw_qp *qp = p;
723
724         c4iw_qp_add_ref(&qp->ibqp);
725         qp_listp->qps[qp_listp->idx++] = qp;
726         return 0;
727 }
728
729 static int count_qps(int id, void *p, void *data)
730 {
731         unsigned *countp = data;
732         (*countp)++;
733         return 0;
734 }
735
736 static void deref_qps(struct qp_list qp_list)
737 {
738         int idx;
739
740         for (idx = 0; idx < qp_list.idx; idx++)
741                 c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
742 }
743
744 static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
745 {
746         int idx;
747         int ret;
748
749         for (idx = 0; idx < qp_list->idx; idx++) {
750                 struct c4iw_qp *qp = qp_list->qps[idx];
751
752                 ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
753                                           qp->wq.sq.qid,
754                                           t4_sq_host_wq_pidx(&qp->wq),
755                                           t4_sq_wq_size(&qp->wq));
756                 if (ret) {
757                         printk(KERN_ERR MOD "%s: Fatal error - "
758                                "DB overflow recovery failed - "
759                                "error syncing SQ qid %u\n",
760                                pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
761                         return;
762                 }
763
764                 ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
765                                           qp->wq.rq.qid,
766                                           t4_rq_host_wq_pidx(&qp->wq),
767                                           t4_rq_wq_size(&qp->wq));
768
769                 if (ret) {
770                         printk(KERN_ERR MOD "%s: Fatal error - "
771                                "DB overflow recovery failed - "
772                                "error syncing RQ qid %u\n",
773                                pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
774                         return;
775                 }
776
777                 /* Wait for the dbfifo to drain */
778                 while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
779                         set_current_state(TASK_UNINTERRUPTIBLE);
780                         schedule_timeout(usecs_to_jiffies(10));
781                 }
782         }
783 }
784
785 static void recover_queues(struct uld_ctx *ctx)
786 {
787         int count = 0;
788         struct qp_list qp_list;
789         int ret;
790
791         /* lock out kernel db ringers */
792         mutex_lock(&ctx->dev->db_mutex);
793
794         /* put all queues in to recovery mode */
795         spin_lock_irq(&ctx->dev->lock);
796         ctx->dev->db_state = RECOVERY;
797         ctx->dev->rdev.stats.db_state_transitions++;
798         idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
799         spin_unlock_irq(&ctx->dev->lock);
800
801         /* slow everybody down */
802         set_current_state(TASK_UNINTERRUPTIBLE);
803         schedule_timeout(usecs_to_jiffies(1000));
804
805         /* Wait for the dbfifo to completely drain. */
806         while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
807                 set_current_state(TASK_UNINTERRUPTIBLE);
808                 schedule_timeout(usecs_to_jiffies(10));
809         }
810
811         /* flush the SGE contexts */
812         ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
813         if (ret) {
814                 printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
815                        pci_name(ctx->lldi.pdev));
816                 goto out;
817         }
818
819         /* Count active queues so we can build a list of queues to recover */
820         spin_lock_irq(&ctx->dev->lock);
821         idr_for_each(&ctx->dev->qpidr, count_qps, &count);
822
823         qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
824         if (!qp_list.qps) {
825                 printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
826                        pci_name(ctx->lldi.pdev));
827                 spin_unlock_irq(&ctx->dev->lock);
828                 goto out;
829         }
830         qp_list.idx = 0;
831
832         /* add and ref each qp so it doesn't get freed */
833         idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
834
835         spin_unlock_irq(&ctx->dev->lock);
836
837         /* now traverse the list in a safe context to recover the db state*/
838         recover_lost_dbs(ctx, &qp_list);
839
840         /* we're almost done!  deref the qps and clean up */
841         deref_qps(qp_list);
842         kfree(qp_list.qps);
843
844         /* Wait for the dbfifo to completely drain again */
845         while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
846                 set_current_state(TASK_UNINTERRUPTIBLE);
847                 schedule_timeout(usecs_to_jiffies(10));
848         }
849
850         /* resume the queues */
851         spin_lock_irq(&ctx->dev->lock);
852         if (ctx->dev->qpcnt > db_fc_threshold)
853                 ctx->dev->db_state = FLOW_CONTROL;
854         else {
855                 ctx->dev->db_state = NORMAL;
856                 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
857         }
858         ctx->dev->rdev.stats.db_state_transitions++;
859         spin_unlock_irq(&ctx->dev->lock);
860
861 out:
862         /* start up kernel db ringers again */
863         mutex_unlock(&ctx->dev->db_mutex);
864 }
865
866 static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
867 {
868         struct uld_ctx *ctx = handle;
869
870         switch (control) {
871         case CXGB4_CONTROL_DB_FULL:
872                 stop_queues(ctx);
873                 mutex_lock(&ctx->dev->rdev.stats.lock);
874                 ctx->dev->rdev.stats.db_full++;
875                 mutex_unlock(&ctx->dev->rdev.stats.lock);
876                 break;
877         case CXGB4_CONTROL_DB_EMPTY:
878                 resume_queues(ctx);
879                 mutex_lock(&ctx->dev->rdev.stats.lock);
880                 ctx->dev->rdev.stats.db_empty++;
881                 mutex_unlock(&ctx->dev->rdev.stats.lock);
882                 break;
883         case CXGB4_CONTROL_DB_DROP:
884                 recover_queues(ctx);
885                 mutex_lock(&ctx->dev->rdev.stats.lock);
886                 ctx->dev->rdev.stats.db_drop++;
887                 mutex_unlock(&ctx->dev->rdev.stats.lock);
888                 break;
889         default:
890                 printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
891                        pci_name(ctx->lldi.pdev), control);
892                 break;
893         }
894         return 0;
895 }
896
897 static struct cxgb4_uld_info c4iw_uld_info = {
898         .name = DRV_NAME,
899         .add = c4iw_uld_add,
900         .rx_handler = c4iw_uld_rx_handler,
901         .state_change = c4iw_uld_state_change,
902         .control = c4iw_uld_control,
903 };
904
905 static int __init c4iw_init_module(void)
906 {
907         int err;
908
909         err = c4iw_cm_init();
910         if (err)
911                 return err;
912
913         c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
914         if (!c4iw_debugfs_root)
915                 printk(KERN_WARNING MOD
916                        "could not create debugfs entry, continuing\n");
917
918         cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
919
920         return 0;
921 }
922
923 static void __exit c4iw_exit_module(void)
924 {
925         struct uld_ctx *ctx, *tmp;
926
927         mutex_lock(&dev_mutex);
928         list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
929                 if (ctx->dev)
930                         c4iw_remove(ctx);
931                 kfree(ctx);
932         }
933         mutex_unlock(&dev_mutex);
934         cxgb4_unregister_uld(CXGB4_ULD_RDMA);
935         c4iw_cm_term();
936         debugfs_remove_recursive(c4iw_debugfs_root);
937 }
938
939 module_init(c4iw_init_module);
940 module_exit(c4iw_exit_module);