2 * Copyright (c) 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/spinlock.h>
36 #include <linux/pci.h>
38 #include <linux/delay.h>
39 #include <linux/netdevice.h>
40 #include <linux/vmalloc.h>
41 #include <linux/module.h>
42 #include <linux/prefetch.h>
47 * The size has to be longer than this string, so we can append
48 * board/chip information to it in the init code.
50 const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
52 DEFINE_SPINLOCK(qib_devs_lock);
53 LIST_HEAD(qib_dev_list);
54 DEFINE_MUTEX(qib_mutex); /* general driver use */
57 module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
58 MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
60 unsigned qib_compat_ddr_negotiate = 1;
61 module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
63 MODULE_PARM_DESC(compat_ddr_negotiate,
64 "Attempt pre-IBTA 1.2 DDR speed negotiation");
66 MODULE_LICENSE("Dual BSD/GPL");
67 MODULE_AUTHOR("Intel <ibsupport@intel.com>");
68 MODULE_DESCRIPTION("Intel IB driver");
69 MODULE_VERSION(QIB_DRIVER_VERSION);
72 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
73 * PIO send buffers. This is well beyond anything currently
74 * defined in the InfiniBand spec.
76 #define QIB_PIO_MAXIBHDR 128
79 * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
81 #define QIB_MAX_PKT_RECV 64
83 struct qlogic_ib_stats qib_stats;
85 const char *qib_get_unit_name(int unit)
87 static char iname[16];
89 snprintf(iname, sizeof(iname), "infinipath%u", unit);
94 * Return count of units with at least one port ACTIVE.
96 int qib_count_active_units(void)
98 struct qib_devdata *dd;
99 struct qib_pportdata *ppd;
101 int pidx, nunits_active = 0;
103 spin_lock_irqsave(&qib_devs_lock, flags);
104 list_for_each_entry(dd, &qib_dev_list, list) {
105 if (!(dd->flags & QIB_PRESENT) || !dd->kregbase)
107 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
108 ppd = dd->pport + pidx;
109 if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
110 QIBL_LINKARMED | QIBL_LINKACTIVE))) {
116 spin_unlock_irqrestore(&qib_devs_lock, flags);
117 return nunits_active;
121 * Return count of all units, optionally return in arguments
122 * the number of usable (present) units, and the number of
125 int qib_count_units(int *npresentp, int *nupp)
127 int nunits = 0, npresent = 0, nup = 0;
128 struct qib_devdata *dd;
131 struct qib_pportdata *ppd;
133 spin_lock_irqsave(&qib_devs_lock, flags);
135 list_for_each_entry(dd, &qib_dev_list, list) {
137 if ((dd->flags & QIB_PRESENT) && dd->kregbase)
139 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
140 ppd = dd->pport + pidx;
141 if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
142 QIBL_LINKARMED | QIBL_LINKACTIVE)))
147 spin_unlock_irqrestore(&qib_devs_lock, flags);
150 *npresentp = npresent;
158 * qib_wait_linkstate - wait for an IB link state change to occur
159 * @dd: the qlogic_ib device
160 * @state: the state to wait for
161 * @msecs: the number of milliseconds to wait
163 * wait up to msecs milliseconds for IB link state change to occur for
164 * now, take the easy polling route. Currently used only by
165 * qib_set_linkstate. Returns 0 if state reached, otherwise
166 * -ETIMEDOUT state can have multiple states set, for any of several
169 int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs)
174 spin_lock_irqsave(&ppd->lflags_lock, flags);
175 if (ppd->state_wanted) {
176 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
180 ppd->state_wanted = state;
181 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
182 wait_event_interruptible_timeout(ppd->state_wait,
183 (ppd->lflags & state),
184 msecs_to_jiffies(msecs));
185 spin_lock_irqsave(&ppd->lflags_lock, flags);
186 ppd->state_wanted = 0;
187 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
189 if (!(ppd->lflags & state))
197 int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate)
201 struct qib_devdata *dd = ppd->dd;
205 case QIB_IB_LINKDOWN_ONLY:
206 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
207 IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP);
212 case QIB_IB_LINKDOWN:
213 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
214 IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL);
219 case QIB_IB_LINKDOWN_SLEEP:
220 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
221 IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP);
226 case QIB_IB_LINKDOWN_DISABLE:
227 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
228 IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE);
234 if (ppd->lflags & QIBL_LINKARMED) {
238 if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) {
243 * Since the port can be ACTIVE when we ask for ARMED,
244 * clear QIBL_LINKV so we can wait for a transition.
245 * If the link isn't ARMED, then something else happened
246 * and there is no point waiting for ARMED.
248 spin_lock_irqsave(&ppd->lflags_lock, flags);
249 ppd->lflags &= ~QIBL_LINKV;
250 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
251 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
252 IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP);
256 case QIB_IB_LINKACTIVE:
257 if (ppd->lflags & QIBL_LINKACTIVE) {
261 if (!(ppd->lflags & QIBL_LINKARMED)) {
265 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
266 IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP);
267 lstate = QIBL_LINKACTIVE;
274 ret = qib_wait_linkstate(ppd, lstate, 10);
281 * Get address of eager buffer from it's index (allocated in chunks, not
284 static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
286 const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
287 const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
289 return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
293 * Returns 1 if error was a CRC, else 0.
294 * Needed for some chip's synthesized error counters.
296 static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
297 u32 ctxt, u32 eflags, u32 l, u32 etail,
298 __le32 *rhf_addr, struct qib_message_header *rhdr)
302 if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
304 else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
305 /* For TIDERR and RC QPs premptively schedule a NAK */
306 struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
307 struct qib_other_headers *ohdr = NULL;
308 struct qib_ibport *ibp = &ppd->ibport_data;
309 struct qib_qp *qp = NULL;
310 u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
311 u16 lid = be16_to_cpu(hdr->lrh[1]);
312 int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
318 /* Sanity check packet */
322 if (lid < QIB_MULTICAST_LID_BASE) {
323 lid &= ~((1 << ppd->lmc) - 1);
324 if (unlikely(lid != ppd->lid))
329 if (lnh == QIB_LRH_BTH)
331 else if (lnh == QIB_LRH_GRH) {
334 ohdr = &hdr->u.l.oth;
335 if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
337 vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
338 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
343 /* Get opcode and PSN from packet */
344 opcode = be32_to_cpu(ohdr->bth[0]);
346 psn = be32_to_cpu(ohdr->bth[2]);
348 /* Get the destination QP number. */
349 qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
350 if (qp_num != QIB_MULTICAST_QPN) {
353 qp = qib_lookup_qpn(ibp, qp_num);
358 * Handle only RC QPs - for other QP types drop error
361 spin_lock(&qp->r_lock);
363 /* Check for valid receive state. */
364 if (!(ib_qib_state_ops[qp->state] &
365 QIB_PROCESS_RECV_OK)) {
370 switch (qp->ibqp.qp_type) {
377 be32_to_cpu(ohdr->bth[0]));
381 /* Only deal with RDMA Writes for now */
383 IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
384 diff = qib_cmp24(psn, qp->r_psn);
385 if (!qp->r_nak_state && diff >= 0) {
389 /* Use the expected PSN. */
390 qp->r_ack_psn = qp->r_psn;
392 * Wait to send the sequence
393 * NAK until all packets
394 * in the receive queue have
396 * Otherwise, we end up
397 * propagating congestion.
399 if (list_empty(&qp->rspwait)) {
408 } /* Out of sequence NAK */
409 } /* QP Request NAKs */
416 /* For now don't handle any other QP types */
421 spin_unlock(&qp->r_lock);
423 * Notify qib_destroy_qp() if it is waiting
426 if (atomic_dec_and_test(&qp->refcount))
429 } /* Valid packet with TIDErr */
436 * qib_kreceive - receive a packet
437 * @rcd: the qlogic_ib context
438 * @llic: gets count of good packets needed to clear lli,
439 * (used with chips that need need to track crcs for lli)
441 * called from interrupt handler for errors or receive interrupt
442 * Returns number of CRC error packets, needed by some chips for
443 * local link integrity tracking. crcs are adjusted down by following
444 * good packets, if any, and count of good packets is also tracked.
446 u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
448 struct qib_devdata *dd = rcd->dd;
449 struct qib_pportdata *ppd = rcd->ppd;
452 const u32 rsize = dd->rcvhdrentsize; /* words */
453 const u32 maxcnt = dd->rcvhdrcnt * rsize; /* words */
454 u32 etail = -1, l, hdrqtail;
455 struct qib_message_header *hdr;
456 u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
459 struct qib_qp *qp, *nqp;
462 rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
463 if (dd->flags & QIB_NODMA_RTAIL) {
464 u32 seq = qib_hdrget_seq(rhf_addr);
466 if (seq != rcd->seq_cnt)
470 hdrqtail = qib_get_rcvhdrtail(rcd);
473 smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
476 for (last = 0, i = 1; !last; i += !last) {
477 hdr = dd->f_get_msgheader(dd, rhf_addr);
478 eflags = qib_hdrget_err_flags(rhf_addr);
479 etype = qib_hdrget_rcv_type(rhf_addr);
481 tlen = qib_hdrget_length_in_bytes(rhf_addr);
483 if ((dd->flags & QIB_NODMA_RTAIL) ?
484 qib_hdrget_use_egr_buf(rhf_addr) :
485 (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
486 etail = qib_hdrget_index(rhf_addr);
488 if (tlen > sizeof(*hdr) ||
489 etype >= RCVHQ_RCV_TYPE_NON_KD) {
490 ebuf = qib_get_egrbuf(rcd, etail);
491 prefetch_range(ebuf, tlen - sizeof(*hdr));
495 u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
497 if (lrh_len != tlen) {
498 qib_stats.sps_lenerrs++;
502 if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags &&
504 tlen > (dd->rcvhdrentsize - 2 + 1 -
505 qib_hdrget_offset(rhf_addr)) << 2) {
510 * Both tiderr and qibhdrerr are set for all plain IB
511 * packets; only qibhdrerr should be set.
513 if (unlikely(eflags))
514 crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
515 etail, rhf_addr, hdr);
516 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
517 qib_ib_rcv(rcd, hdr, ebuf, tlen);
520 else if (llic && *llic)
527 if (i == QIB_MAX_PKT_RECV)
530 rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
531 if (dd->flags & QIB_NODMA_RTAIL) {
532 u32 seq = qib_hdrget_seq(rhf_addr);
534 if (++rcd->seq_cnt > 13)
536 if (seq != rcd->seq_cnt)
538 } else if (l == hdrqtail)
541 * Update head regs etc., every 16 packets, if not last pkt,
542 * to help prevent rcvhdrq overflows, when many packets
543 * are processed and queue is nearly full.
544 * Don't request an interrupt for intermediate updates.
547 if (!last && !(i & 0xf)) {
548 dd->f_update_usrhead(rcd, lval, updegr, etail, i);
553 * Notify qib_destroy_qp() if it is waiting
554 * for lookaside_qp to finish.
556 if (rcd->lookaside_qp) {
557 if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
558 wake_up(&rcd->lookaside_qp->wait);
559 rcd->lookaside_qp = NULL;
565 * Iterate over all QPs waiting to respond.
566 * The list won't change since the IRQ is only run on one CPU.
568 list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
569 list_del_init(&qp->rspwait);
570 if (qp->r_flags & QIB_R_RSP_NAK) {
571 qp->r_flags &= ~QIB_R_RSP_NAK;
574 if (qp->r_flags & QIB_R_RSP_SEND) {
577 qp->r_flags &= ~QIB_R_RSP_SEND;
578 spin_lock_irqsave(&qp->s_lock, flags);
579 if (ib_qib_state_ops[qp->state] &
580 QIB_PROCESS_OR_FLUSH_SEND)
581 qib_schedule_send(qp);
582 spin_unlock_irqrestore(&qp->s_lock, flags);
584 if (atomic_dec_and_test(&qp->refcount))
589 /* Report number of packets consumed */
594 * Always write head at end, and setup rcv interrupt, even
595 * if no packets were processed.
597 lval = (u64)rcd->head | dd->rhdrhead_intr_off;
598 dd->f_update_usrhead(rcd, lval, updegr, etail, i);
603 * qib_set_mtu - set the MTU
604 * @ppd: the perport data
607 * We can handle "any" incoming size, the issue here is whether we
608 * need to restrict our outgoing size. For now, we don't do any
609 * sanity checking on this, and we don't deal with what happens to
610 * programs that are already running when the size changes.
611 * NOTE: changing the MTU will usually cause the IBC to go back to
614 int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
619 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
624 chk = ib_mtu_enum_to_int(qib_ibmtu);
625 if (chk > 0 && arg > chk) {
630 piosize = ppd->ibmaxlen;
633 if (arg >= (piosize - QIB_PIO_MAXIBHDR)) {
634 /* Only if it's not the initial value (or reset to it) */
635 if (piosize != ppd->init_ibmaxlen) {
636 if (arg > piosize && arg <= ppd->init_ibmaxlen)
637 piosize = ppd->init_ibmaxlen - 2 * sizeof(u32);
638 ppd->ibmaxlen = piosize;
640 } else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) {
641 piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32);
642 ppd->ibmaxlen = piosize;
645 ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0);
653 int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc)
655 struct qib_devdata *dd = ppd->dd;
660 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC,
661 lid | (~((1U << lmc) - 1)) << 16);
663 qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n",
664 dd->unit, ppd->port, lid);
670 * Following deal with the "obviously simple" task of overriding the state
671 * of the LEDS, which normally indicate link physical and logical status.
672 * The complications arise in dealing with different hardware mappings
673 * and the board-dependent routine being called from interrupts.
674 * and then there's the requirement to _flash_ them.
676 #define LED_OVER_FREQ_SHIFT 8
677 #define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
678 /* Below is "non-zero" to force override, but both actual LEDs are off */
679 #define LED_OVER_BOTH_OFF (8)
681 static void qib_run_led_override(unsigned long opaque)
683 struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
684 struct qib_devdata *dd = ppd->dd;
688 if (!(dd->flags & QIB_INITTED))
691 ph_idx = ppd->led_override_phase++ & 1;
692 ppd->led_override = ppd->led_override_vals[ph_idx];
693 timeoff = ppd->led_override_timeoff;
695 dd->f_setextled(ppd, 1);
697 * don't re-fire the timer if user asked for it to be off; we let
698 * it fire one more time after they turn it off to simplify
700 if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
701 mod_timer(&ppd->led_override_timer, jiffies + timeoff);
704 void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val)
706 struct qib_devdata *dd = ppd->dd;
709 if (!(dd->flags & QIB_INITTED))
712 /* First check if we are blinking. If not, use 1HZ polling */
714 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
717 /* For blink, set each phase from one nybble of val */
718 ppd->led_override_vals[0] = val & 0xF;
719 ppd->led_override_vals[1] = (val >> 4) & 0xF;
720 timeoff = (HZ << 4)/freq;
722 /* Non-blink set both phases the same. */
723 ppd->led_override_vals[0] = val & 0xF;
724 ppd->led_override_vals[1] = val & 0xF;
726 ppd->led_override_timeoff = timeoff;
729 * If the timer has not already been started, do so. Use a "quick"
730 * timeout so the function will be called soon, to look at our request.
732 if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
733 /* Need to start timer */
734 init_timer(&ppd->led_override_timer);
735 ppd->led_override_timer.function = qib_run_led_override;
736 ppd->led_override_timer.data = (unsigned long) ppd;
737 ppd->led_override_timer.expires = jiffies + 1;
738 add_timer(&ppd->led_override_timer);
740 if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
741 mod_timer(&ppd->led_override_timer, jiffies + 1);
742 atomic_dec(&ppd->led_override_timer_active);
747 * qib_reset_device - reset the chip if possible
748 * @unit: the device to reset
750 * Whether or not reset is successful, we attempt to re-initialize the chip
751 * (that is, much like a driver unload/reload). We clear the INITTED flag
752 * so that the various entry points will fail until we reinitialize. For
753 * now, we only allow this if no user contexts are open that use chip resources
755 int qib_reset_device(int unit)
758 struct qib_devdata *dd = qib_lookup(unit);
759 struct qib_pportdata *ppd;
768 qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
770 if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
771 qib_devinfo(dd->pcidev,
772 "Invalid unit number %u or not initialized or not present\n",
778 spin_lock_irqsave(&dd->uctxt_lock, flags);
780 for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
781 if (!dd->rcd[i] || !dd->rcd[i]->cnt)
783 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
787 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
789 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
790 ppd = dd->pport + pidx;
791 if (atomic_read(&ppd->led_override_timer_active)) {
792 /* Need to stop LED timer, _then_ shut off LEDs */
793 del_timer_sync(&ppd->led_override_timer);
794 atomic_set(&ppd->led_override_timer_active, 0);
797 /* Shut off LEDs after we are sure timer is not running */
798 ppd->led_override = LED_OVER_BOTH_OFF;
799 dd->f_setextled(ppd, 0);
800 if (dd->flags & QIB_HAS_SEND_DMA)
801 qib_teardown_sdma(ppd);
804 ret = dd->f_reset(dd);
806 ret = qib_init(dd, 1);
811 "Reinitialize unit %u after reset failed with %d\n",
814 qib_devinfo(dd->pcidev,
815 "Reinitialized unit %u after resetting\n",