staging/rdma/hfi1: Read EFI variable for device description
[cascardo/linux.git] / drivers / staging / rdma / hfi1 / chip.c
1 /*
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2015 Intel Corporation.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License as
12  * published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * BSD LICENSE
20  *
21  * Copyright(c) 2015 Intel Corporation.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  *
27  *  - Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  *  - Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in
31  *    the documentation and/or other materials provided with the
32  *    distribution.
33  *  - Neither the name of Intel Corporation nor the names of its
34  *    contributors may be used to endorse or promote products derived
35  *    from this software without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48  *
49  */
50
51 /*
52  * This file contains all of the code that is specific to the HFI chip
53  */
54
55 #include <linux/pci.h>
56 #include <linux/delay.h>
57 #include <linux/interrupt.h>
58 #include <linux/module.h>
59
60 #include "hfi.h"
61 #include "trace.h"
62 #include "mad.h"
63 #include "pio.h"
64 #include "sdma.h"
65 #include "eprom.h"
66 #include "efivar.h"
67
68 #define NUM_IB_PORTS 1
69
70 uint kdeth_qp;
71 module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
72 MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
73
74 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
75 module_param(num_vls, uint, S_IRUGO);
76 MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
77
78 /*
79  * Default time to aggregate two 10K packets from the idle state
80  * (timer not running). The timer starts at the end of the first packet,
81  * so only the time for one 10K packet and header plus a bit extra is needed.
82  * 10 * 1024 + 64 header byte = 10304 byte
83  * 10304 byte / 12.5 GB/s = 824.32ns
84  */
85 uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
86 module_param(rcv_intr_timeout, uint, S_IRUGO);
87 MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
88
89 uint rcv_intr_count = 16; /* same as qib */
90 module_param(rcv_intr_count, uint, S_IRUGO);
91 MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
92
93 ushort link_crc_mask = SUPPORTED_CRCS;
94 module_param(link_crc_mask, ushort, S_IRUGO);
95 MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
96
97 uint loopback;
98 module_param_named(loopback, loopback, uint, S_IRUGO);
99 MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
100
101 /* Other driver tunables */
102 uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
103 static ushort crc_14b_sideband = 1;
104 static uint use_flr = 1;
105 uint quick_linkup; /* skip LNI */
106
107 struct flag_table {
108         u64 flag;       /* the flag */
109         char *str;      /* description string */
110         u16 extra;      /* extra information */
111         u16 unused0;
112         u32 unused1;
113 };
114
115 /* str must be a string constant */
116 #define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
117 #define FLAG_ENTRY0(str, flag) {flag, str, 0}
118
119 /* Send Error Consequences */
120 #define SEC_WRITE_DROPPED       0x1
121 #define SEC_PACKET_DROPPED      0x2
122 #define SEC_SC_HALTED           0x4     /* per-context only */
123 #define SEC_SPC_FREEZE          0x8     /* per-HFI only */
124
125 #define MIN_KERNEL_KCTXTS         2
126 #define FIRST_KERNEL_KCTXT        1
127 #define NUM_MAP_REGS             32
128
129 /* Bit offset into the GUID which carries HFI id information */
130 #define GUID_HFI_INDEX_SHIFT     39
131
132 /* extract the emulation revision */
133 #define emulator_rev(dd) ((dd)->irev >> 8)
134 /* parallel and serial emulation versions are 3 and 4 respectively */
135 #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
136 #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
137
138 /* RSM fields */
139
140 /* packet type */
141 #define IB_PACKET_TYPE         2ull
142 #define QW_SHIFT               6ull
143 /* QPN[7..1] */
144 #define QPN_WIDTH              7ull
145
146 /* LRH.BTH: QW 0, OFFSET 48 - for match */
147 #define LRH_BTH_QW             0ull
148 #define LRH_BTH_BIT_OFFSET     48ull
149 #define LRH_BTH_OFFSET(off)    ((LRH_BTH_QW << QW_SHIFT) | (off))
150 #define LRH_BTH_MATCH_OFFSET   LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
151 #define LRH_BTH_SELECT
152 #define LRH_BTH_MASK           3ull
153 #define LRH_BTH_VALUE          2ull
154
155 /* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
156 #define LRH_SC_QW              0ull
157 #define LRH_SC_BIT_OFFSET      56ull
158 #define LRH_SC_OFFSET(off)     ((LRH_SC_QW << QW_SHIFT) | (off))
159 #define LRH_SC_MATCH_OFFSET    LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
160 #define LRH_SC_MASK            128ull
161 #define LRH_SC_VALUE           0ull
162
163 /* SC[n..0] QW 0, OFFSET 60 - for select */
164 #define LRH_SC_SELECT_OFFSET  ((LRH_SC_QW << QW_SHIFT) | (60ull))
165
166 /* QPN[m+n:1] QW 1, OFFSET 1 */
167 #define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
168
169 /* defines to build power on SC2VL table */
170 #define SC2VL_VAL( \
171         num, \
172         sc0, sc0val, \
173         sc1, sc1val, \
174         sc2, sc2val, \
175         sc3, sc3val, \
176         sc4, sc4val, \
177         sc5, sc5val, \
178         sc6, sc6val, \
179         sc7, sc7val) \
180 ( \
181         ((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
182         ((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
183         ((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
184         ((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
185         ((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
186         ((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
187         ((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
188         ((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT)   \
189 )
190
191 #define DC_SC_VL_VAL( \
192         range, \
193         e0, e0val, \
194         e1, e1val, \
195         e2, e2val, \
196         e3, e3val, \
197         e4, e4val, \
198         e5, e5val, \
199         e6, e6val, \
200         e7, e7val, \
201         e8, e8val, \
202         e9, e9val, \
203         e10, e10val, \
204         e11, e11val, \
205         e12, e12val, \
206         e13, e13val, \
207         e14, e14val, \
208         e15, e15val) \
209 ( \
210         ((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
211         ((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
212         ((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
213         ((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
214         ((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
215         ((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
216         ((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
217         ((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
218         ((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
219         ((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
220         ((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
221         ((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
222         ((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
223         ((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
224         ((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
225         ((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
226 )
227
228 /* all CceStatus sub-block freeze bits */
229 #define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
230                         | CCE_STATUS_RXE_FROZE_SMASK \
231                         | CCE_STATUS_TXE_FROZE_SMASK \
232                         | CCE_STATUS_TXE_PIO_FROZE_SMASK)
233 /* all CceStatus sub-block TXE pause bits */
234 #define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
235                         | CCE_STATUS_TXE_PAUSED_SMASK \
236                         | CCE_STATUS_SDMA_PAUSED_SMASK)
237 /* all CceStatus sub-block RXE pause bits */
238 #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
239
240 /*
241  * CCE Error flags.
242  */
243 static struct flag_table cce_err_status_flags[] = {
244 /* 0*/  FLAG_ENTRY0("CceCsrParityErr",
245                 CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
246 /* 1*/  FLAG_ENTRY0("CceCsrReadBadAddrErr",
247                 CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
248 /* 2*/  FLAG_ENTRY0("CceCsrWriteBadAddrErr",
249                 CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
250 /* 3*/  FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
251                 CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
252 /* 4*/  FLAG_ENTRY0("CceTrgtAccessErr",
253                 CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
254 /* 5*/  FLAG_ENTRY0("CceRspdDataParityErr",
255                 CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
256 /* 6*/  FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
257                 CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
258 /* 7*/  FLAG_ENTRY0("CceCsrCfgBusParityErr",
259                 CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
260 /* 8*/  FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
261                 CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
262 /* 9*/  FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
263             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
264 /*10*/  FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
265             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
266 /*11*/  FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
267             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
268 /*12*/  FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
269                 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
270 /*13*/  FLAG_ENTRY0("PcicRetryMemCorErr",
271                 CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
272 /*14*/  FLAG_ENTRY0("PcicRetryMemCorErr",
273                 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
274 /*15*/  FLAG_ENTRY0("PcicPostHdQCorErr",
275                 CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
276 /*16*/  FLAG_ENTRY0("PcicPostHdQCorErr",
277                 CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
278 /*17*/  FLAG_ENTRY0("PcicPostHdQCorErr",
279                 CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
280 /*18*/  FLAG_ENTRY0("PcicCplDatQCorErr",
281                 CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
282 /*19*/  FLAG_ENTRY0("PcicNPostHQParityErr",
283                 CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
284 /*20*/  FLAG_ENTRY0("PcicNPostDatQParityErr",
285                 CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
286 /*21*/  FLAG_ENTRY0("PcicRetryMemUncErr",
287                 CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
288 /*22*/  FLAG_ENTRY0("PcicRetrySotMemUncErr",
289                 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
290 /*23*/  FLAG_ENTRY0("PcicPostHdQUncErr",
291                 CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
292 /*24*/  FLAG_ENTRY0("PcicPostDatQUncErr",
293                 CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
294 /*25*/  FLAG_ENTRY0("PcicCplHdQUncErr",
295                 CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
296 /*26*/  FLAG_ENTRY0("PcicCplDatQUncErr",
297                 CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
298 /*27*/  FLAG_ENTRY0("PcicTransmitFrontParityErr",
299                 CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
300 /*28*/  FLAG_ENTRY0("PcicTransmitBackParityErr",
301                 CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
302 /*29*/  FLAG_ENTRY0("PcicReceiveParityErr",
303                 CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
304 /*30*/  FLAG_ENTRY0("CceTrgtCplTimeoutErr",
305                 CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
306 /*31*/  FLAG_ENTRY0("LATriggered",
307                 CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
308 /*32*/  FLAG_ENTRY0("CceSegReadBadAddrErr",
309                 CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
310 /*33*/  FLAG_ENTRY0("CceSegWriteBadAddrErr",
311                 CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
312 /*34*/  FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
313                 CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
314 /*35*/  FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
315                 CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
316 /*36*/  FLAG_ENTRY0("CceMsixTableCorErr",
317                 CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
318 /*37*/  FLAG_ENTRY0("CceMsixTableUncErr",
319                 CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
320 /*38*/  FLAG_ENTRY0("CceIntMapCorErr",
321                 CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
322 /*39*/  FLAG_ENTRY0("CceIntMapUncErr",
323                 CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
324 /*40*/  FLAG_ENTRY0("CceMsixCsrParityErr",
325                 CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
326 /*41-63 reserved*/
327 };
328
329 /*
330  * Misc Error flags
331  */
332 #define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
333 static struct flag_table misc_err_status_flags[] = {
334 /* 0*/  FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
335 /* 1*/  FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
336 /* 2*/  FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
337 /* 3*/  FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
338 /* 4*/  FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
339 /* 5*/  FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
340 /* 6*/  FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
341 /* 7*/  FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
342 /* 8*/  FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
343 /* 9*/  FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
344 /*10*/  FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
345 /*11*/  FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
346 /*12*/  FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
347 };
348
349 /*
350  * TXE PIO Error flags and consequences
351  */
352 static struct flag_table pio_err_status_flags[] = {
353 /* 0*/  FLAG_ENTRY("PioWriteBadCtxt",
354         SEC_WRITE_DROPPED,
355         SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
356 /* 1*/  FLAG_ENTRY("PioWriteAddrParity",
357         SEC_SPC_FREEZE,
358         SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
359 /* 2*/  FLAG_ENTRY("PioCsrParity",
360         SEC_SPC_FREEZE,
361         SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
362 /* 3*/  FLAG_ENTRY("PioSbMemFifo0",
363         SEC_SPC_FREEZE,
364         SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
365 /* 4*/  FLAG_ENTRY("PioSbMemFifo1",
366         SEC_SPC_FREEZE,
367         SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
368 /* 5*/  FLAG_ENTRY("PioPccFifoParity",
369         SEC_SPC_FREEZE,
370         SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
371 /* 6*/  FLAG_ENTRY("PioPecFifoParity",
372         SEC_SPC_FREEZE,
373         SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
374 /* 7*/  FLAG_ENTRY("PioSbrdctlCrrelParity",
375         SEC_SPC_FREEZE,
376         SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
377 /* 8*/  FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
378         SEC_SPC_FREEZE,
379         SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
380 /* 9*/  FLAG_ENTRY("PioPktEvictFifoParityErr",
381         SEC_SPC_FREEZE,
382         SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
383 /*10*/  FLAG_ENTRY("PioSmPktResetParity",
384         SEC_SPC_FREEZE,
385         SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
386 /*11*/  FLAG_ENTRY("PioVlLenMemBank0Unc",
387         SEC_SPC_FREEZE,
388         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
389 /*12*/  FLAG_ENTRY("PioVlLenMemBank1Unc",
390         SEC_SPC_FREEZE,
391         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
392 /*13*/  FLAG_ENTRY("PioVlLenMemBank0Cor",
393         0,
394         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
395 /*14*/  FLAG_ENTRY("PioVlLenMemBank1Cor",
396         0,
397         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
398 /*15*/  FLAG_ENTRY("PioCreditRetFifoParity",
399         SEC_SPC_FREEZE,
400         SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
401 /*16*/  FLAG_ENTRY("PioPpmcPblFifo",
402         SEC_SPC_FREEZE,
403         SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
404 /*17*/  FLAG_ENTRY("PioInitSmIn",
405         0,
406         SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
407 /*18*/  FLAG_ENTRY("PioPktEvictSmOrArbSm",
408         SEC_SPC_FREEZE,
409         SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
410 /*19*/  FLAG_ENTRY("PioHostAddrMemUnc",
411         SEC_SPC_FREEZE,
412         SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
413 /*20*/  FLAG_ENTRY("PioHostAddrMemCor",
414         0,
415         SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
416 /*21*/  FLAG_ENTRY("PioWriteDataParity",
417         SEC_SPC_FREEZE,
418         SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
419 /*22*/  FLAG_ENTRY("PioStateMachine",
420         SEC_SPC_FREEZE,
421         SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
422 /*23*/  FLAG_ENTRY("PioWriteQwValidParity",
423         SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
424         SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
425 /*24*/  FLAG_ENTRY("PioBlockQwCountParity",
426         SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
427         SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
428 /*25*/  FLAG_ENTRY("PioVlfVlLenParity",
429         SEC_SPC_FREEZE,
430         SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
431 /*26*/  FLAG_ENTRY("PioVlfSopParity",
432         SEC_SPC_FREEZE,
433         SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
434 /*27*/  FLAG_ENTRY("PioVlFifoParity",
435         SEC_SPC_FREEZE,
436         SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
437 /*28*/  FLAG_ENTRY("PioPpmcBqcMemParity",
438         SEC_SPC_FREEZE,
439         SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
440 /*29*/  FLAG_ENTRY("PioPpmcSopLen",
441         SEC_SPC_FREEZE,
442         SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
443 /*30-31 reserved*/
444 /*32*/  FLAG_ENTRY("PioCurrentFreeCntParity",
445         SEC_SPC_FREEZE,
446         SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
447 /*33*/  FLAG_ENTRY("PioLastReturnedCntParity",
448         SEC_SPC_FREEZE,
449         SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
450 /*34*/  FLAG_ENTRY("PioPccSopHeadParity",
451         SEC_SPC_FREEZE,
452         SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
453 /*35*/  FLAG_ENTRY("PioPecSopHeadParityErr",
454         SEC_SPC_FREEZE,
455         SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
456 /*36-63 reserved*/
457 };
458
459 /* TXE PIO errors that cause an SPC freeze */
460 #define ALL_PIO_FREEZE_ERR \
461         (SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
462         | SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
463         | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
464         | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
465         | SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
466         | SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
467         | SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
468         | SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
469         | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
470         | SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
471         | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
472         | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
473         | SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
474         | SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
475         | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
476         | SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
477         | SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
478         | SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
479         | SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
480         | SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
481         | SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
482         | SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
483         | SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
484         | SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
485         | SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
486         | SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
487         | SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
488         | SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
489         | SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
490
491 /*
492  * TXE SDMA Error flags
493  */
494 static struct flag_table sdma_err_status_flags[] = {
495 /* 0*/  FLAG_ENTRY0("SDmaRpyTagErr",
496                 SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
497 /* 1*/  FLAG_ENTRY0("SDmaCsrParityErr",
498                 SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
499 /* 2*/  FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
500                 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
501 /* 3*/  FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
502                 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
503 /*04-63 reserved*/
504 };
505
506 /* TXE SDMA errors that cause an SPC freeze */
507 #define ALL_SDMA_FREEZE_ERR  \
508                 (SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
509                 | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
510                 | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
511
512 /*
513  * TXE Egress Error flags
514  */
515 #define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
516 static struct flag_table egress_err_status_flags[] = {
517 /* 0*/  FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
518 /* 1*/  FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
519 /* 2 reserved */
520 /* 3*/  FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
521                 SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
522 /* 4*/  FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
523 /* 5*/  FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
524 /* 6 reserved */
525 /* 7*/  FLAG_ENTRY0("TxPioLaunchIntfParityErr",
526                 SEES(TX_PIO_LAUNCH_INTF_PARITY)),
527 /* 8*/  FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
528                 SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
529 /* 9-10 reserved */
530 /*11*/  FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
531                 SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
532 /*12*/  FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
533 /*13*/  FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
534 /*14*/  FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
535 /*15*/  FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
536 /*16*/  FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
537                 SEES(TX_SDMA0_DISALLOWED_PACKET)),
538 /*17*/  FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
539                 SEES(TX_SDMA1_DISALLOWED_PACKET)),
540 /*18*/  FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
541                 SEES(TX_SDMA2_DISALLOWED_PACKET)),
542 /*19*/  FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
543                 SEES(TX_SDMA3_DISALLOWED_PACKET)),
544 /*20*/  FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
545                 SEES(TX_SDMA4_DISALLOWED_PACKET)),
546 /*21*/  FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
547                 SEES(TX_SDMA5_DISALLOWED_PACKET)),
548 /*22*/  FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
549                 SEES(TX_SDMA6_DISALLOWED_PACKET)),
550 /*23*/  FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
551                 SEES(TX_SDMA7_DISALLOWED_PACKET)),
552 /*24*/  FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
553                 SEES(TX_SDMA8_DISALLOWED_PACKET)),
554 /*25*/  FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
555                 SEES(TX_SDMA9_DISALLOWED_PACKET)),
556 /*26*/  FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
557                 SEES(TX_SDMA10_DISALLOWED_PACKET)),
558 /*27*/  FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
559                 SEES(TX_SDMA11_DISALLOWED_PACKET)),
560 /*28*/  FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
561                 SEES(TX_SDMA12_DISALLOWED_PACKET)),
562 /*29*/  FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
563                 SEES(TX_SDMA13_DISALLOWED_PACKET)),
564 /*30*/  FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
565                 SEES(TX_SDMA14_DISALLOWED_PACKET)),
566 /*31*/  FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
567                 SEES(TX_SDMA15_DISALLOWED_PACKET)),
568 /*32*/  FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
569                 SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
570 /*33*/  FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
571                 SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
572 /*34*/  FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
573                 SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
574 /*35*/  FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
575                 SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
576 /*36*/  FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
577                 SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
578 /*37*/  FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
579                 SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
580 /*38*/  FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
581                 SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
582 /*39*/  FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
583                 SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
584 /*40*/  FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
585                 SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
586 /*41*/  FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
587 /*42*/  FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
588 /*43*/  FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
589 /*44*/  FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
590 /*45*/  FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
591 /*46*/  FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
592 /*47*/  FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
593 /*48*/  FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
594 /*49*/  FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
595 /*50*/  FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
596 /*51*/  FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
597 /*52*/  FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
598 /*53*/  FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
599 /*54*/  FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
600 /*55*/  FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
601 /*56*/  FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
602 /*57*/  FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
603 /*58*/  FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
604 /*59*/  FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
605 /*60*/  FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
606 /*61*/  FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
607 /*62*/  FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
608                 SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
609 /*63*/  FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
610                 SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
611 };
612
613 /*
614  * TXE Egress Error Info flags
615  */
616 #define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
617 static struct flag_table egress_err_info_flags[] = {
618 /* 0*/  FLAG_ENTRY0("Reserved", 0ull),
619 /* 1*/  FLAG_ENTRY0("VLErr", SEEI(VL)),
620 /* 2*/  FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
621 /* 3*/  FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
622 /* 4*/  FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
623 /* 5*/  FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
624 /* 6*/  FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
625 /* 7*/  FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
626 /* 8*/  FLAG_ENTRY0("RawErr", SEEI(RAW)),
627 /* 9*/  FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
628 /*10*/  FLAG_ENTRY0("GRHErr", SEEI(GRH)),
629 /*11*/  FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
630 /*12*/  FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
631 /*13*/  FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
632 /*14*/  FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
633 /*15*/  FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
634 /*16*/  FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
635 /*17*/  FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
636 /*18*/  FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
637 /*19*/  FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
638 /*20*/  FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
639 /*21*/  FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
640 };
641
642 /* TXE Egress errors that cause an SPC freeze */
643 #define ALL_TXE_EGRESS_FREEZE_ERR \
644         (SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
645         | SEES(TX_PIO_LAUNCH_INTF_PARITY) \
646         | SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
647         | SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
648         | SEES(TX_LAUNCH_CSR_PARITY) \
649         | SEES(TX_SBRD_CTL_CSR_PARITY) \
650         | SEES(TX_CONFIG_PARITY) \
651         | SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
652         | SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
653         | SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
654         | SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
655         | SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
656         | SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
657         | SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
658         | SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
659         | SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
660         | SEES(TX_CREDIT_RETURN_PARITY))
661
662 /*
663  * TXE Send error flags
664  */
665 #define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
666 static struct flag_table send_err_status_flags[] = {
667 /* 0*/  FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
668 /* 1*/  FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
669 /* 2*/  FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
670 };
671
672 /*
673  * TXE Send Context Error flags and consequences
674  */
675 static struct flag_table sc_err_status_flags[] = {
676 /* 0*/  FLAG_ENTRY("InconsistentSop",
677                 SEC_PACKET_DROPPED | SEC_SC_HALTED,
678                 SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
679 /* 1*/  FLAG_ENTRY("DisallowedPacket",
680                 SEC_PACKET_DROPPED | SEC_SC_HALTED,
681                 SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
682 /* 2*/  FLAG_ENTRY("WriteCrossesBoundary",
683                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
684                 SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
685 /* 3*/  FLAG_ENTRY("WriteOverflow",
686                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
687                 SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
688 /* 4*/  FLAG_ENTRY("WriteOutOfBounds",
689                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
690                 SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
691 /* 5-63 reserved*/
692 };
693
694 /*
695  * RXE Receive Error flags
696  */
697 #define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
698 static struct flag_table rxe_err_status_flags[] = {
699 /* 0*/  FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
700 /* 1*/  FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
701 /* 2*/  FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
702 /* 3*/  FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
703 /* 4*/  FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
704 /* 5*/  FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
705 /* 6*/  FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
706 /* 7*/  FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
707 /* 8*/  FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
708 /* 9*/  FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
709 /*10*/  FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
710 /*11*/  FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
711 /*12*/  FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
712 /*13*/  FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
713 /*14*/  FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
714 /*15*/  FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
715 /*16*/  FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
716                 RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
717 /*17*/  FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
718 /*18*/  FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
719 /*19*/  FLAG_ENTRY0("RxRbufBlockListReadUncErr",
720                 RXES(RBUF_BLOCK_LIST_READ_UNC)),
721 /*20*/  FLAG_ENTRY0("RxRbufBlockListReadCorErr",
722                 RXES(RBUF_BLOCK_LIST_READ_COR)),
723 /*21*/  FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
724                 RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
725 /*22*/  FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
726                 RXES(RBUF_CSR_QENT_CNT_PARITY)),
727 /*23*/  FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
728                 RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
729 /*24*/  FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
730                 RXES(RBUF_CSR_QVLD_BIT_PARITY)),
731 /*25*/  FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
732 /*26*/  FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
733 /*27*/  FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
734                 RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
735 /*28*/  FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
736 /*29*/  FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
737 /*30*/  FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
738 /*31*/  FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
739 /*32*/  FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
740 /*33*/  FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
741 /*34*/  FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
742 /*35*/  FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
743                 RXES(RBUF_FL_INITDONE_PARITY)),
744 /*36*/  FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
745                 RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
746 /*37*/  FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
747 /*38*/  FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
748 /*39*/  FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
749 /*40*/  FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
750                 RXES(LOOKUP_DES_PART1_UNC_COR)),
751 /*41*/  FLAG_ENTRY0("RxLookupDesPart2ParityErr",
752                 RXES(LOOKUP_DES_PART2_PARITY)),
753 /*42*/  FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
754 /*43*/  FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
755 /*44*/  FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
756 /*45*/  FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
757 /*46*/  FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
758 /*47*/  FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
759 /*48*/  FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
760 /*49*/  FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
761 /*50*/  FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
762 /*51*/  FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
763 /*52*/  FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
764 /*53*/  FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
765 /*54*/  FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
766 /*55*/  FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
767 /*56*/  FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
768 /*57*/  FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
769 /*58*/  FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
770 /*59*/  FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
771 /*60*/  FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
772 /*61*/  FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
773 /*62*/  FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
774 /*63*/  FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
775 };
776
777 /* RXE errors that will trigger an SPC freeze */
778 #define ALL_RXE_FREEZE_ERR  \
779         (RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
780         | RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
781         | RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
782         | RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
783         | RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
784         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
785         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
786         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
787         | RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
788         | RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
789         | RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
790         | RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
791         | RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
792         | RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
793         | RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
794         | RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
795         | RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
796         | RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
797         | RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
798         | RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
799         | RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
800         | RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
801         | RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
802         | RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
803         | RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
804         | RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
805         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
806         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
807         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
808         | RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
809         | RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
810         | RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
811         | RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
812         | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
813         | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
814         | RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
815         | RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
816         | RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
817         | RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
818         | RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
819         | RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
820         | RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
821         | RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
822         | RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
823
824 #define RXE_FREEZE_ABORT_MASK \
825         (RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
826         RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
827         RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
828
829 /*
830  * DCC Error Flags
831  */
832 #define DCCE(name) DCC_ERR_FLG_##name##_SMASK
833 static struct flag_table dcc_err_flags[] = {
834         FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
835         FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
836         FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
837         FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
838         FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
839         FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
840         FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
841         FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
842         FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
843         FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
844         FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
845         FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
846         FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
847         FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
848         FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
849         FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
850         FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
851         FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
852         FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
853         FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
854         FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
855         FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
856         FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
857         FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
858         FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
859         FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
860         FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
861         FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
862         FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
863         FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
864         FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
865         FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
866         FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
867         FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
868         FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
869         FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
870         FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
871         FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
872         FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
873         FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
874         FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
875         FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
876         FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
877         FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
878         FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
879         FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
880 };
881
882 /*
883  * LCB error flags
884  */
885 #define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
886 static struct flag_table lcb_err_flags[] = {
887 /* 0*/  FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
888 /* 1*/  FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
889 /* 2*/  FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
890 /* 3*/  FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
891                 LCBE(ALL_LNS_FAILED_REINIT_TEST)),
892 /* 4*/  FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
893 /* 5*/  FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
894 /* 6*/  FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
895 /* 7*/  FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
896 /* 8*/  FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
897 /* 9*/  FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
898 /*10*/  FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
899 /*11*/  FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
900 /*12*/  FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
901 /*13*/  FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
902                 LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
903 /*14*/  FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
904 /*15*/  FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
905 /*16*/  FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
906 /*17*/  FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
907 /*18*/  FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
908 /*19*/  FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
909                 LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
910 /*20*/  FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
911 /*21*/  FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
912 /*22*/  FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
913 /*23*/  FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
914 /*24*/  FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
915 /*25*/  FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
916 /*26*/  FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
917                 LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
918 /*27*/  FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
919 /*28*/  FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
920                 LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
921 /*29*/  FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
922                 LCBE(REDUNDANT_FLIT_PARITY_ERR))
923 };
924
925 /*
926  * DC8051 Error Flags
927  */
928 #define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
929 static struct flag_table dc8051_err_flags[] = {
930         FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
931         FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
932         FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
933         FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
934         FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
935         FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
936         FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
937         FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
938         FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
939                 D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
940         FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
941 };
942
943 /*
944  * DC8051 Information Error flags
945  *
946  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
947  */
948 static struct flag_table dc8051_info_err_flags[] = {
949         FLAG_ENTRY0("Spico ROM check failed",  SPICO_ROM_FAILED),
950         FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
951         FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
952         FLAG_ENTRY0("Serdes internal loopback failure",
953                                         FAILED_SERDES_INTERNAL_LOOPBACK),
954         FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
955         FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
956         FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
957         FLAG_ENTRY0("Failed LNI(EstbComm)",    FAILED_LNI_ESTBCOMM),
958         FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
959         FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
960         FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
961         FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
962 };
963
964 /*
965  * DC8051 Information Host Information flags
966  *
967  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
968  */
969 static struct flag_table dc8051_info_host_msg_flags[] = {
970         FLAG_ENTRY0("Host request done", 0x0001),
971         FLAG_ENTRY0("BC SMA message", 0x0002),
972         FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
973         FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
974         FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
975         FLAG_ENTRY0("External device config request", 0x0020),
976         FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
977         FLAG_ENTRY0("LinkUp achieved", 0x0080),
978         FLAG_ENTRY0("Link going down", 0x0100),
979 };
980
981
982 static u32 encoded_size(u32 size);
983 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
984 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
985 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
986                                u8 *continuous);
987 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
988                                   u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
989 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
990                                       u8 *remote_tx_rate, u16 *link_widths);
991 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
992                                      u8 *flag_bits, u16 *link_widths);
993 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
994                                   u8 *device_rev);
995 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
996 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
997 static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
998                             u8 *tx_polarity_inversion,
999                             u8 *rx_polarity_inversion, u8 *max_rate);
1000 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1001                                 unsigned int context, u64 err_status);
1002 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1003 static void handle_dcc_err(struct hfi1_devdata *dd,
1004                            unsigned int context, u64 err_status);
1005 static void handle_lcb_err(struct hfi1_devdata *dd,
1006                            unsigned int context, u64 err_status);
1007 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1015 static void set_partition_keys(struct hfi1_pportdata *);
1016 static const char *link_state_name(u32 state);
1017 static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1018                                           u32 state);
1019 static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1020                            u64 *out_data);
1021 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1022 static int thermal_init(struct hfi1_devdata *dd);
1023
1024 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1025                                   int msecs);
1026 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1027 static void handle_temp_err(struct hfi1_devdata *);
1028 static void dc_shutdown(struct hfi1_devdata *);
1029 static void dc_start(struct hfi1_devdata *);
1030
1031 /*
1032  * Error interrupt table entry.  This is used as input to the interrupt
1033  * "clear down" routine used for all second tier error interrupt register.
1034  * Second tier interrupt registers have a single bit representing them
1035  * in the top-level CceIntStatus.
1036  */
1037 struct err_reg_info {
1038         u32 status;             /* status CSR offset */
1039         u32 clear;              /* clear CSR offset */
1040         u32 mask;               /* mask CSR offset */
1041         void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1042         const char *desc;
1043 };
1044
1045 #define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1046 #define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1047 #define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1048
1049 /*
1050  * Helpers for building HFI and DC error interrupt table entries.  Different
1051  * helpers are needed because of inconsistent register names.
1052  */
1053 #define EE(reg, handler, desc) \
1054         { reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1055                 handler, desc }
1056 #define DC_EE1(reg, handler, desc) \
1057         { reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1058 #define DC_EE2(reg, handler, desc) \
1059         { reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1060
1061 /*
1062  * Table of the "misc" grouping of error interrupts.  Each entry refers to
1063  * another register containing more information.
1064  */
1065 static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1066 /* 0*/  EE(CCE_ERR,             handle_cce_err,    "CceErr"),
1067 /* 1*/  EE(RCV_ERR,             handle_rxe_err,    "RxeErr"),
1068 /* 2*/  EE(MISC_ERR,    handle_misc_err,   "MiscErr"),
1069 /* 3*/  { 0, 0, 0, NULL }, /* reserved */
1070 /* 4*/  EE(SEND_PIO_ERR,    handle_pio_err,    "PioErr"),
1071 /* 5*/  EE(SEND_DMA_ERR,    handle_sdma_err,   "SDmaErr"),
1072 /* 6*/  EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1073 /* 7*/  EE(SEND_ERR,    handle_txe_err,    "TxeErr")
1074         /* the rest are reserved */
1075 };
1076
1077 /*
1078  * Index into the Various section of the interrupt sources
1079  * corresponding to the Critical Temperature interrupt.
1080  */
1081 #define TCRIT_INT_SOURCE 4
1082
1083 /*
1084  * SDMA error interrupt entry - refers to another register containing more
1085  * information.
1086  */
1087 static const struct err_reg_info sdma_eng_err =
1088         EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1089
1090 static const struct err_reg_info various_err[NUM_VARIOUS] = {
1091 /* 0*/  { 0, 0, 0, NULL }, /* PbcInt */
1092 /* 1*/  { 0, 0, 0, NULL }, /* GpioAssertInt */
1093 /* 2*/  EE(ASIC_QSFP1,  handle_qsfp_int,        "QSFP1"),
1094 /* 3*/  EE(ASIC_QSFP2,  handle_qsfp_int,        "QSFP2"),
1095 /* 4*/  { 0, 0, 0, NULL }, /* TCritInt */
1096         /* rest are reserved */
1097 };
1098
1099 /*
1100  * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1101  * register can not be derived from the MTU value because 10K is not
1102  * a power of 2. Therefore, we need a constant. Everything else can
1103  * be calculated.
1104  */
1105 #define DCC_CFG_PORT_MTU_CAP_10240 7
1106
1107 /*
1108  * Table of the DC grouping of error interrupts.  Each entry refers to
1109  * another register containing more information.
1110  */
1111 static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1112 /* 0*/  DC_EE1(DCC_ERR,         handle_dcc_err,        "DCC Err"),
1113 /* 1*/  DC_EE2(DC_LCB_ERR,      handle_lcb_err,        "LCB Err"),
1114 /* 2*/  DC_EE2(DC_DC8051_ERR,   handle_8051_interrupt, "DC8051 Interrupt"),
1115 /* 3*/  /* dc_lbm_int - special, see is_dc_int() */
1116         /* the rest are reserved */
1117 };
1118
1119 struct cntr_entry {
1120         /*
1121          * counter name
1122          */
1123         char *name;
1124
1125         /*
1126          * csr to read for name (if applicable)
1127          */
1128         u64 csr;
1129
1130         /*
1131          * offset into dd or ppd to store the counter's value
1132          */
1133         int offset;
1134
1135         /*
1136          * flags
1137          */
1138         u8 flags;
1139
1140         /*
1141          * accessor for stat element, context either dd or ppd
1142          */
1143         u64 (*rw_cntr)(const struct cntr_entry *,
1144                                void *context,
1145                                int vl,
1146                                int mode,
1147                                u64 data);
1148 };
1149
1150 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1151 #define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1152
1153 #define CNTR_ELEM(name, csr, offset, flags, accessor) \
1154 { \
1155         name, \
1156         csr, \
1157         offset, \
1158         flags, \
1159         accessor \
1160 }
1161
1162 /* 32bit RXE */
1163 #define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1164 CNTR_ELEM(#name, \
1165           (counter * 8 + RCV_COUNTER_ARRAY32), \
1166           0, flags | CNTR_32BIT, \
1167           port_access_u32_csr)
1168
1169 #define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1170 CNTR_ELEM(#name, \
1171           (counter * 8 + RCV_COUNTER_ARRAY32), \
1172           0, flags | CNTR_32BIT, \
1173           dev_access_u32_csr)
1174
1175 /* 64bit RXE */
1176 #define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1177 CNTR_ELEM(#name, \
1178           (counter * 8 + RCV_COUNTER_ARRAY64), \
1179           0, flags, \
1180           port_access_u64_csr)
1181
1182 #define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1183 CNTR_ELEM(#name, \
1184           (counter * 8 + RCV_COUNTER_ARRAY64), \
1185           0, flags, \
1186           dev_access_u64_csr)
1187
1188 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1189 #define OVR_ELM(ctx) \
1190 CNTR_ELEM("RcvHdrOvr" #ctx, \
1191           (RCV_HDR_OVFL_CNT + ctx*0x100), \
1192           0, CNTR_NORMAL, port_access_u64_csr)
1193
1194 /* 32bit TXE */
1195 #define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1196 CNTR_ELEM(#name, \
1197           (counter * 8 + SEND_COUNTER_ARRAY32), \
1198           0, flags | CNTR_32BIT, \
1199           port_access_u32_csr)
1200
1201 /* 64bit TXE */
1202 #define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1203 CNTR_ELEM(#name, \
1204           (counter * 8 + SEND_COUNTER_ARRAY64), \
1205           0, flags, \
1206           port_access_u64_csr)
1207
1208 # define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1209 CNTR_ELEM(#name,\
1210           counter * 8 + SEND_COUNTER_ARRAY64, \
1211           0, \
1212           flags, \
1213           dev_access_u64_csr)
1214
1215 /* CCE */
1216 #define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1217 CNTR_ELEM(#name, \
1218           (counter * 8 + CCE_COUNTER_ARRAY32), \
1219           0, flags | CNTR_32BIT, \
1220           dev_access_u32_csr)
1221
1222 #define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1223 CNTR_ELEM(#name, \
1224           (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1225           0, flags | CNTR_32BIT, \
1226           dev_access_u32_csr)
1227
1228 /* DC */
1229 #define DC_PERF_CNTR(name, counter, flags) \
1230 CNTR_ELEM(#name, \
1231           counter, \
1232           0, \
1233           flags, \
1234           dev_access_u64_csr)
1235
1236 #define DC_PERF_CNTR_LCB(name, counter, flags) \
1237 CNTR_ELEM(#name, \
1238           counter, \
1239           0, \
1240           flags, \
1241           dc_access_lcb_cntr)
1242
1243 /* ibp counters */
1244 #define SW_IBP_CNTR(name, cntr) \
1245 CNTR_ELEM(#name, \
1246           0, \
1247           0, \
1248           CNTR_SYNTH, \
1249           access_ibp_##cntr)
1250
1251 u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1252 {
1253         u64 val;
1254
1255         if (dd->flags & HFI1_PRESENT) {
1256                 val = readq((void __iomem *)dd->kregbase + offset);
1257                 return val;
1258         }
1259         return -1;
1260 }
1261
1262 void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1263 {
1264         if (dd->flags & HFI1_PRESENT)
1265                 writeq(value, (void __iomem *)dd->kregbase + offset);
1266 }
1267
1268 void __iomem *get_csr_addr(
1269         struct hfi1_devdata *dd,
1270         u32 offset)
1271 {
1272         return (void __iomem *)dd->kregbase + offset;
1273 }
1274
1275 static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1276                                  int mode, u64 value)
1277 {
1278         u64 ret;
1279
1280
1281         if (mode == CNTR_MODE_R) {
1282                 ret = read_csr(dd, csr);
1283         } else if (mode == CNTR_MODE_W) {
1284                 write_csr(dd, csr, value);
1285                 ret = value;
1286         } else {
1287                 dd_dev_err(dd, "Invalid cntr register access mode");
1288                 return 0;
1289         }
1290
1291         hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1292         return ret;
1293 }
1294
1295 /* Dev Access */
1296 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1297                             void *context, int vl, int mode, u64 data)
1298 {
1299         struct hfi1_devdata *dd = context;
1300
1301         if (vl != CNTR_INVALID_VL)
1302                 return 0;
1303         return read_write_csr(dd, entry->csr, mode, data);
1304 }
1305
1306 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1307                             int vl, int mode, u64 data)
1308 {
1309         struct hfi1_devdata *dd = context;
1310
1311         u64 val = 0;
1312         u64 csr = entry->csr;
1313
1314         if (entry->flags & CNTR_VL) {
1315                 if (vl == CNTR_INVALID_VL)
1316                         return 0;
1317                 csr += 8 * vl;
1318         } else {
1319                 if (vl != CNTR_INVALID_VL)
1320                         return 0;
1321         }
1322
1323         val = read_write_csr(dd, csr, mode, data);
1324         return val;
1325 }
1326
1327 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1328                             int vl, int mode, u64 data)
1329 {
1330         struct hfi1_devdata *dd = context;
1331         u32 csr = entry->csr;
1332         int ret = 0;
1333
1334         if (vl != CNTR_INVALID_VL)
1335                 return 0;
1336         if (mode == CNTR_MODE_R)
1337                 ret = read_lcb_csr(dd, csr, &data);
1338         else if (mode == CNTR_MODE_W)
1339                 ret = write_lcb_csr(dd, csr, data);
1340
1341         if (ret) {
1342                 dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1343                 return 0;
1344         }
1345
1346         hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1347         return data;
1348 }
1349
1350 /* Port Access */
1351 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1352                              int vl, int mode, u64 data)
1353 {
1354         struct hfi1_pportdata *ppd = context;
1355
1356         if (vl != CNTR_INVALID_VL)
1357                 return 0;
1358         return read_write_csr(ppd->dd, entry->csr, mode, data);
1359 }
1360
1361 static u64 port_access_u64_csr(const struct cntr_entry *entry,
1362                              void *context, int vl, int mode, u64 data)
1363 {
1364         struct hfi1_pportdata *ppd = context;
1365         u64 val;
1366         u64 csr = entry->csr;
1367
1368         if (entry->flags & CNTR_VL) {
1369                 if (vl == CNTR_INVALID_VL)
1370                         return 0;
1371                 csr += 8 * vl;
1372         } else {
1373                 if (vl != CNTR_INVALID_VL)
1374                         return 0;
1375         }
1376         val = read_write_csr(ppd->dd, csr, mode, data);
1377         return val;
1378 }
1379
1380 /* Software defined */
1381 static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1382                                 u64 data)
1383 {
1384         u64 ret;
1385
1386         if (mode == CNTR_MODE_R) {
1387                 ret = *cntr;
1388         } else if (mode == CNTR_MODE_W) {
1389                 *cntr = data;
1390                 ret = data;
1391         } else {
1392                 dd_dev_err(dd, "Invalid cntr sw access mode");
1393                 return 0;
1394         }
1395
1396         hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1397
1398         return ret;
1399 }
1400
1401 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1402                                int vl, int mode, u64 data)
1403 {
1404         struct hfi1_pportdata *ppd = context;
1405
1406         if (vl != CNTR_INVALID_VL)
1407                 return 0;
1408         return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1409 }
1410
1411 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1412                                int vl, int mode, u64 data)
1413 {
1414         struct hfi1_pportdata *ppd = context;
1415
1416         if (vl != CNTR_INVALID_VL)
1417                 return 0;
1418         return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1419 }
1420
1421 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1422                                     void *context, int vl, int mode, u64 data)
1423 {
1424         struct hfi1_pportdata *ppd = context;
1425
1426         if (vl != CNTR_INVALID_VL)
1427                 return 0;
1428
1429         return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1430 }
1431
1432 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1433                                      void *context, int vl, int mode, u64 data)
1434 {
1435         struct hfi1_pportdata *ppd = context;
1436
1437         if (vl != CNTR_INVALID_VL)
1438                 return 0;
1439
1440         return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1441                              mode, data);
1442 }
1443
1444 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1445                                      void *context, int vl, int mode, u64 data)
1446 {
1447         struct hfi1_pportdata *ppd = context;
1448
1449         if (vl != CNTR_INVALID_VL)
1450                 return 0;
1451
1452         return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1453                              mode, data);
1454 }
1455
1456 u64 get_all_cpu_total(u64 __percpu *cntr)
1457 {
1458         int cpu;
1459         u64 counter = 0;
1460
1461         for_each_possible_cpu(cpu)
1462                 counter += *per_cpu_ptr(cntr, cpu);
1463         return counter;
1464 }
1465
1466 static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1467                           u64 __percpu *cntr,
1468                           int vl, int mode, u64 data)
1469 {
1470
1471         u64 ret = 0;
1472
1473         if (vl != CNTR_INVALID_VL)
1474                 return 0;
1475
1476         if (mode == CNTR_MODE_R) {
1477                 ret = get_all_cpu_total(cntr) - *z_val;
1478         } else if (mode == CNTR_MODE_W) {
1479                 /* A write can only zero the counter */
1480                 if (data == 0)
1481                         *z_val = get_all_cpu_total(cntr);
1482                 else
1483                         dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1484         } else {
1485                 dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1486                 return 0;
1487         }
1488
1489         return ret;
1490 }
1491
1492 static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1493                               void *context, int vl, int mode, u64 data)
1494 {
1495         struct hfi1_devdata *dd = context;
1496
1497         return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1498                               mode, data);
1499 }
1500
1501 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1502                               void *context, int vl, int mode, u64 data)
1503 {
1504         struct hfi1_devdata *dd = context;
1505
1506         return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1507                               mode, data);
1508 }
1509
1510 static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1511                               void *context, int vl, int mode, u64 data)
1512 {
1513         struct hfi1_devdata *dd = context;
1514
1515         return dd->verbs_dev.n_piowait;
1516 }
1517
1518 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1519                               void *context, int vl, int mode, u64 data)
1520 {
1521         struct hfi1_devdata *dd = context;
1522
1523         return dd->verbs_dev.n_txwait;
1524 }
1525
1526 static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1527                                void *context, int vl, int mode, u64 data)
1528 {
1529         struct hfi1_devdata *dd = context;
1530
1531         return dd->verbs_dev.n_kmem_wait;
1532 }
1533
1534 static u64 access_sw_send_schedule(const struct cntr_entry *entry,
1535                                void *context, int vl, int mode, u64 data)
1536 {
1537         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1538
1539         return dd->verbs_dev.n_send_schedule;
1540 }
1541
1542 #define def_access_sw_cpu(cntr) \
1543 static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,               \
1544                               void *context, int vl, int mode, u64 data)      \
1545 {                                                                             \
1546         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
1547         return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,           \
1548                               ppd->ibport_data.cntr, vl,                      \
1549                               mode, data);                                    \
1550 }
1551
1552 def_access_sw_cpu(rc_acks);
1553 def_access_sw_cpu(rc_qacks);
1554 def_access_sw_cpu(rc_delayed_comp);
1555
1556 #define def_access_ibp_counter(cntr) \
1557 static u64 access_ibp_##cntr(const struct cntr_entry *entry,                  \
1558                                 void *context, int vl, int mode, u64 data)    \
1559 {                                                                             \
1560         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
1561                                                                               \
1562         if (vl != CNTR_INVALID_VL)                                            \
1563                 return 0;                                                     \
1564                                                                               \
1565         return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,            \
1566                              mode, data);                                     \
1567 }
1568
1569 def_access_ibp_counter(loop_pkts);
1570 def_access_ibp_counter(rc_resends);
1571 def_access_ibp_counter(rnr_naks);
1572 def_access_ibp_counter(other_naks);
1573 def_access_ibp_counter(rc_timeouts);
1574 def_access_ibp_counter(pkt_drops);
1575 def_access_ibp_counter(dmawait);
1576 def_access_ibp_counter(rc_seqnak);
1577 def_access_ibp_counter(rc_dupreq);
1578 def_access_ibp_counter(rdma_seq);
1579 def_access_ibp_counter(unaligned);
1580 def_access_ibp_counter(seq_naks);
1581
1582 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1583 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1584 [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1585                         CNTR_NORMAL),
1586 [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1587                         CNTR_NORMAL),
1588 [C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1589                         RCV_TID_FLOW_GEN_MISMATCH_CNT,
1590                         CNTR_NORMAL),
1591 [C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1592                         CNTR_NORMAL),
1593 [C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1594                         RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1595 [C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1596                         CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1597 [C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1598                         CNTR_NORMAL),
1599 [C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1600                         CNTR_NORMAL),
1601 [C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1602                         CNTR_NORMAL),
1603 [C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1604                         CNTR_NORMAL),
1605 [C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1606                         CNTR_NORMAL),
1607 [C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1608                         CNTR_NORMAL),
1609 [C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1610                         CCE_RCV_URGENT_INT_CNT, CNTR_NORMAL),
1611 [C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1612                         CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1613 [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1614                               CNTR_SYNTH),
1615 [C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1616 [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1617                                  CNTR_SYNTH),
1618 [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1619                                   CNTR_SYNTH),
1620 [C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1621                                   CNTR_SYNTH),
1622 [C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1623                                    DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1624 [C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1625                                   DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1626                                   CNTR_SYNTH),
1627 [C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1628                                 DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1629 [C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1630                                CNTR_SYNTH),
1631 [C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1632                               CNTR_SYNTH),
1633 [C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1634                                CNTR_SYNTH),
1635 [C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1636                                  CNTR_SYNTH),
1637 [C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1638                                 CNTR_SYNTH),
1639 [C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1640                                 CNTR_SYNTH),
1641 [C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1642                                CNTR_SYNTH),
1643 [C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1644                                  CNTR_SYNTH | CNTR_VL),
1645 [C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1646                                 CNTR_SYNTH | CNTR_VL),
1647 [C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1648 [C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1649                                  CNTR_SYNTH | CNTR_VL),
1650 [C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1651 [C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1652                                  CNTR_SYNTH | CNTR_VL),
1653 [C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1654                               CNTR_SYNTH),
1655 [C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1656                                  CNTR_SYNTH | CNTR_VL),
1657 [C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1658                                 CNTR_SYNTH),
1659 [C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1660                                    CNTR_SYNTH | CNTR_VL),
1661 [C_DC_TOTAL_CRC] =
1662         DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1663                          CNTR_SYNTH),
1664 [C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1665                                   CNTR_SYNTH),
1666 [C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1667                                   CNTR_SYNTH),
1668 [C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1669                                   CNTR_SYNTH),
1670 [C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1671                                   CNTR_SYNTH),
1672 [C_DC_CRC_MULT_LN] =
1673         DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1674                          CNTR_SYNTH),
1675 [C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1676                                     CNTR_SYNTH),
1677 [C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1678                                     CNTR_SYNTH),
1679 [C_DC_SEQ_CRC_CNT] =
1680         DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1681                          CNTR_SYNTH),
1682 [C_DC_ESC0_ONLY_CNT] =
1683         DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1684                          CNTR_SYNTH),
1685 [C_DC_ESC0_PLUS1_CNT] =
1686         DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1687                          CNTR_SYNTH),
1688 [C_DC_ESC0_PLUS2_CNT] =
1689         DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1690                          CNTR_SYNTH),
1691 [C_DC_REINIT_FROM_PEER_CNT] =
1692         DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1693                          CNTR_SYNTH),
1694 [C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1695                                   CNTR_SYNTH),
1696 [C_DC_MISC_FLG_CNT] =
1697         DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1698                          CNTR_SYNTH),
1699 [C_DC_PRF_GOOD_LTP_CNT] =
1700         DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1701 [C_DC_PRF_ACCEPTED_LTP_CNT] =
1702         DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1703                          CNTR_SYNTH),
1704 [C_DC_PRF_RX_FLIT_CNT] =
1705         DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1706 [C_DC_PRF_TX_FLIT_CNT] =
1707         DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1708 [C_DC_PRF_CLK_CNTR] =
1709         DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1710 [C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1711         DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1712 [C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1713         DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1714                          CNTR_SYNTH),
1715 [C_DC_PG_STS_TX_SBE_CNT] =
1716         DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1717 [C_DC_PG_STS_TX_MBE_CNT] =
1718         DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1719                          CNTR_SYNTH),
1720 [C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1721                             access_sw_cpu_intr),
1722 [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1723                             access_sw_cpu_rcv_limit),
1724 [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1725                             access_sw_vtx_wait),
1726 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1727                             access_sw_pio_wait),
1728 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1729                             access_sw_kmem_wait),
1730 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
1731                             access_sw_send_schedule),
1732 };
1733
1734 static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1735 [C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1736                         CNTR_NORMAL),
1737 [C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1738                         CNTR_NORMAL),
1739 [C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1740                         CNTR_NORMAL),
1741 [C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1742                         CNTR_NORMAL),
1743 [C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1744                         CNTR_NORMAL),
1745 [C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1746                         CNTR_NORMAL),
1747 [C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1748                         CNTR_NORMAL),
1749 [C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1750 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1751 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1752 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1753                         CNTR_SYNTH | CNTR_VL),
1754 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1755                         CNTR_SYNTH | CNTR_VL),
1756 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1757                         CNTR_SYNTH | CNTR_VL),
1758 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1759 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1760 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1761                         access_sw_link_dn_cnt),
1762 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1763                         access_sw_link_up_cnt),
1764 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1765                         access_sw_xmit_discards),
1766 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1767                         CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1768                         access_sw_xmit_discards),
1769 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1770                         access_xmit_constraint_errs),
1771 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1772                         access_rcv_constraint_errs),
1773 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1774 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1775 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1776 [C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1777 [C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1778 [C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1779 [C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1780 [C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1781 [C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1782 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1783 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1784 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1785 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1786                                access_sw_cpu_rc_acks),
1787 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1788                                access_sw_cpu_rc_qacks),
1789 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1790                                access_sw_cpu_rc_delayed_comp),
1791 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1792 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1793 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1794 [OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1795 [OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1796 [OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1797 [OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1798 [OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1799 [OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1800 [OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1801 [OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1802 [OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1803 [OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1804 [OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1805 [OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1806 [OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1807 [OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1808 [OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1809 [OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1810 [OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1811 [OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1812 [OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1813 [OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1814 [OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1815 [OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1816 [OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1817 [OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1818 [OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1819 [OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1820 [OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1821 [OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1822 [OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1823 [OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1824 [OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1825 [OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1826 [OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1827 [OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1828 [OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1829 [OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1830 [OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1831 [OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1832 [OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1833 [OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1834 [OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1835 [OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1836 [OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1837 [OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1838 [OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1839 [OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1840 [OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1841 [OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1842 [OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1843 [OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1844 [OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1845 [OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1846 [OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1847 [OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1848 [OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1849 [OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1850 [OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1851 [OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1852 [OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1853 [OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1854 [OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1855 [OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1856 [OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1857 [OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1858 [OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1859 [OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1860 [OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1861 [OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1862 [OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1863 [OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1864 [OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1865 [OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1866 [OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1867 [OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1868 [OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1869 [OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1870 [OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1871 };
1872
1873 /* ======================================================================== */
1874
1875 /* return true if this is chip revision revision a */
1876 int is_ax(struct hfi1_devdata *dd)
1877 {
1878         u8 chip_rev_minor =
1879                 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1880                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
1881         return (chip_rev_minor & 0xf0) == 0;
1882 }
1883
1884 /* return true if this is chip revision revision b */
1885 int is_bx(struct hfi1_devdata *dd)
1886 {
1887         u8 chip_rev_minor =
1888                 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1889                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
1890         return (chip_rev_minor & 0xF0) == 0x10;
1891 }
1892
1893 /*
1894  * Append string s to buffer buf.  Arguments curp and len are the current
1895  * position and remaining length, respectively.
1896  *
1897  * return 0 on success, 1 on out of room
1898  */
1899 static int append_str(char *buf, char **curp, int *lenp, const char *s)
1900 {
1901         char *p = *curp;
1902         int len = *lenp;
1903         int result = 0; /* success */
1904         char c;
1905
1906         /* add a comma, if first in the buffer */
1907         if (p != buf) {
1908                 if (len == 0) {
1909                         result = 1; /* out of room */
1910                         goto done;
1911                 }
1912                 *p++ = ',';
1913                 len--;
1914         }
1915
1916         /* copy the string */
1917         while ((c = *s++) != 0) {
1918                 if (len == 0) {
1919                         result = 1; /* out of room */
1920                         goto done;
1921                 }
1922                 *p++ = c;
1923                 len--;
1924         }
1925
1926 done:
1927         /* write return values */
1928         *curp = p;
1929         *lenp = len;
1930
1931         return result;
1932 }
1933
1934 /*
1935  * Using the given flag table, print a comma separated string into
1936  * the buffer.  End in '*' if the buffer is too short.
1937  */
1938 static char *flag_string(char *buf, int buf_len, u64 flags,
1939                                 struct flag_table *table, int table_size)
1940 {
1941         char extra[32];
1942         char *p = buf;
1943         int len = buf_len;
1944         int no_room = 0;
1945         int i;
1946
1947         /* make sure there is at least 2 so we can form "*" */
1948         if (len < 2)
1949                 return "";
1950
1951         len--;  /* leave room for a nul */
1952         for (i = 0; i < table_size; i++) {
1953                 if (flags & table[i].flag) {
1954                         no_room = append_str(buf, &p, &len, table[i].str);
1955                         if (no_room)
1956                                 break;
1957                         flags &= ~table[i].flag;
1958                 }
1959         }
1960
1961         /* any undocumented bits left? */
1962         if (!no_room && flags) {
1963                 snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1964                 no_room = append_str(buf, &p, &len, extra);
1965         }
1966
1967         /* add * if ran out of room */
1968         if (no_room) {
1969                 /* may need to back up to add space for a '*' */
1970                 if (len == 0)
1971                         --p;
1972                 *p++ = '*';
1973         }
1974
1975         /* add final nul - space already allocated above */
1976         *p = 0;
1977         return buf;
1978 }
1979
1980 /* first 8 CCE error interrupt source names */
1981 static const char * const cce_misc_names[] = {
1982         "CceErrInt",            /* 0 */
1983         "RxeErrInt",            /* 1 */
1984         "MiscErrInt",           /* 2 */
1985         "Reserved3",            /* 3 */
1986         "PioErrInt",            /* 4 */
1987         "SDmaErrInt",           /* 5 */
1988         "EgressErrInt",         /* 6 */
1989         "TxeErrInt"             /* 7 */
1990 };
1991
1992 /*
1993  * Return the miscellaneous error interrupt name.
1994  */
1995 static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
1996 {
1997         if (source < ARRAY_SIZE(cce_misc_names))
1998                 strncpy(buf, cce_misc_names[source], bsize);
1999         else
2000                 snprintf(buf,
2001                         bsize,
2002                         "Reserved%u",
2003                         source + IS_GENERAL_ERR_START);
2004
2005         return buf;
2006 }
2007
2008 /*
2009  * Return the SDMA engine error interrupt name.
2010  */
2011 static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2012 {
2013         snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2014         return buf;
2015 }
2016
2017 /*
2018  * Return the send context error interrupt name.
2019  */
2020 static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2021 {
2022         snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2023         return buf;
2024 }
2025
2026 static const char * const various_names[] = {
2027         "PbcInt",
2028         "GpioAssertInt",
2029         "Qsfp1Int",
2030         "Qsfp2Int",
2031         "TCritInt"
2032 };
2033
2034 /*
2035  * Return the various interrupt name.
2036  */
2037 static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2038 {
2039         if (source < ARRAY_SIZE(various_names))
2040                 strncpy(buf, various_names[source], bsize);
2041         else
2042                 snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2043         return buf;
2044 }
2045
2046 /*
2047  * Return the DC interrupt name.
2048  */
2049 static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2050 {
2051         static const char * const dc_int_names[] = {
2052                 "common",
2053                 "lcb",
2054                 "8051",
2055                 "lbm"   /* local block merge */
2056         };
2057
2058         if (source < ARRAY_SIZE(dc_int_names))
2059                 snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2060         else
2061                 snprintf(buf, bsize, "DCInt%u", source);
2062         return buf;
2063 }
2064
2065 static const char * const sdma_int_names[] = {
2066         "SDmaInt",
2067         "SdmaIdleInt",
2068         "SdmaProgressInt",
2069 };
2070
2071 /*
2072  * Return the SDMA engine interrupt name.
2073  */
2074 static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2075 {
2076         /* what interrupt */
2077         unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
2078         /* which engine */
2079         unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2080
2081         if (likely(what < 3))
2082                 snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2083         else
2084                 snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2085         return buf;
2086 }
2087
2088 /*
2089  * Return the receive available interrupt name.
2090  */
2091 static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2092 {
2093         snprintf(buf, bsize, "RcvAvailInt%u", source);
2094         return buf;
2095 }
2096
2097 /*
2098  * Return the receive urgent interrupt name.
2099  */
2100 static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2101 {
2102         snprintf(buf, bsize, "RcvUrgentInt%u", source);
2103         return buf;
2104 }
2105
2106 /*
2107  * Return the send credit interrupt name.
2108  */
2109 static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2110 {
2111         snprintf(buf, bsize, "SendCreditInt%u", source);
2112         return buf;
2113 }
2114
2115 /*
2116  * Return the reserved interrupt name.
2117  */
2118 static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2119 {
2120         snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2121         return buf;
2122 }
2123
2124 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2125 {
2126         return flag_string(buf, buf_len, flags,
2127                         cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2128 }
2129
2130 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2131 {
2132         return flag_string(buf, buf_len, flags,
2133                         rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2134 }
2135
2136 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2137 {
2138         return flag_string(buf, buf_len, flags, misc_err_status_flags,
2139                         ARRAY_SIZE(misc_err_status_flags));
2140 }
2141
2142 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2143 {
2144         return flag_string(buf, buf_len, flags,
2145                         pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2146 }
2147
2148 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2149 {
2150         return flag_string(buf, buf_len, flags,
2151                         sdma_err_status_flags,
2152                         ARRAY_SIZE(sdma_err_status_flags));
2153 }
2154
2155 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2156 {
2157         return flag_string(buf, buf_len, flags,
2158                 egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2159 }
2160
2161 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2162 {
2163         return flag_string(buf, buf_len, flags,
2164                 egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2165 }
2166
2167 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2168 {
2169         return flag_string(buf, buf_len, flags,
2170                         send_err_status_flags,
2171                         ARRAY_SIZE(send_err_status_flags));
2172 }
2173
2174 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2175 {
2176         char buf[96];
2177
2178         /*
2179          * For most these errors, there is nothing that can be done except
2180          * report or record it.
2181          */
2182         dd_dev_info(dd, "CCE Error: %s\n",
2183                 cce_err_status_string(buf, sizeof(buf), reg));
2184
2185         if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) &&
2186             is_ax(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2187                 /* this error requires a manual drop into SPC freeze mode */
2188                 /* then a fix up */
2189                 start_freeze_handling(dd->pport, FREEZE_SELF);
2190         }
2191 }
2192
2193 /*
2194  * Check counters for receive errors that do not have an interrupt
2195  * associated with them.
2196  */
2197 #define RCVERR_CHECK_TIME 10
2198 static void update_rcverr_timer(unsigned long opaque)
2199 {
2200         struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2201         struct hfi1_pportdata *ppd = dd->pport;
2202         u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2203
2204         if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2205                 ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2206                 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2207                 set_link_down_reason(ppd,
2208                   OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2209                         OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2210                 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2211         }
2212         dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2213
2214         mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2215 }
2216
2217 static int init_rcverr(struct hfi1_devdata *dd)
2218 {
2219         setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
2220         /* Assume the hardware counter has been reset */
2221         dd->rcv_ovfl_cnt = 0;
2222         return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2223 }
2224
2225 static void free_rcverr(struct hfi1_devdata *dd)
2226 {
2227         if (dd->rcverr_timer.data)
2228                 del_timer_sync(&dd->rcverr_timer);
2229         dd->rcverr_timer.data = 0;
2230 }
2231
2232 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2233 {
2234         char buf[96];
2235
2236         dd_dev_info(dd, "Receive Error: %s\n",
2237                 rxe_err_status_string(buf, sizeof(buf), reg));
2238
2239         if (reg & ALL_RXE_FREEZE_ERR) {
2240                 int flags = 0;
2241
2242                 /*
2243                  * Freeze mode recovery is disabled for the errors
2244                  * in RXE_FREEZE_ABORT_MASK
2245                  */
2246                 if (is_ax(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2247                         flags = FREEZE_ABORT;
2248
2249                 start_freeze_handling(dd->pport, flags);
2250         }
2251 }
2252
2253 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2254 {
2255         char buf[96];
2256
2257         dd_dev_info(dd, "Misc Error: %s",
2258                 misc_err_status_string(buf, sizeof(buf), reg));
2259 }
2260
2261 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2262 {
2263         char buf[96];
2264
2265         dd_dev_info(dd, "PIO Error: %s\n",
2266                 pio_err_status_string(buf, sizeof(buf), reg));
2267
2268         if (reg & ALL_PIO_FREEZE_ERR)
2269                 start_freeze_handling(dd->pport, 0);
2270 }
2271
2272 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2273 {
2274         char buf[96];
2275
2276         dd_dev_info(dd, "SDMA Error: %s\n",
2277                 sdma_err_status_string(buf, sizeof(buf), reg));
2278
2279         if (reg & ALL_SDMA_FREEZE_ERR)
2280                 start_freeze_handling(dd->pport, 0);
2281 }
2282
2283 static void count_port_inactive(struct hfi1_devdata *dd)
2284 {
2285         struct hfi1_pportdata *ppd = dd->pport;
2286
2287         if (ppd->port_xmit_discards < ~(u64)0)
2288                 ppd->port_xmit_discards++;
2289 }
2290
2291 /*
2292  * We have had a "disallowed packet" error during egress. Determine the
2293  * integrity check which failed, and update relevant error counter, etc.
2294  *
2295  * Note that the SEND_EGRESS_ERR_INFO register has only a single
2296  * bit of state per integrity check, and so we can miss the reason for an
2297  * egress error if more than one packet fails the same integrity check
2298  * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2299  */
2300 static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2301 {
2302         struct hfi1_pportdata *ppd = dd->pport;
2303         u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2304         u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2305         char buf[96];
2306
2307         /* clear down all observed info as quickly as possible after read */
2308         write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2309
2310         dd_dev_info(dd,
2311                 "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2312                 info, egress_err_info_string(buf, sizeof(buf), info), src);
2313
2314         /* Eventually add other counters for each bit */
2315
2316         if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2317                 if (ppd->port_xmit_discards < ~(u64)0)
2318                         ppd->port_xmit_discards++;
2319         }
2320 }
2321
2322 /*
2323  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2324  * register. Does it represent a 'port inactive' error?
2325  */
2326 static inline int port_inactive_err(u64 posn)
2327 {
2328         return (posn >= SEES(TX_LINKDOWN) &&
2329                 posn <= SEES(TX_INCORRECT_LINK_STATE));
2330 }
2331
2332 /*
2333  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2334  * register. Does it represent a 'disallowed packet' error?
2335  */
2336 static inline int disallowed_pkt_err(u64 posn)
2337 {
2338         return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2339                 posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2340 }
2341
2342 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2343 {
2344         u64 reg_copy = reg, handled = 0;
2345         char buf[96];
2346
2347         if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2348                 start_freeze_handling(dd->pport, 0);
2349         if (is_ax(dd) && (reg &
2350                     SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2351                     && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2352                 start_freeze_handling(dd->pport, 0);
2353
2354         while (reg_copy) {
2355                 int posn = fls64(reg_copy);
2356                 /*
2357                  * fls64() returns a 1-based offset, but we generally
2358                  * want 0-based offsets.
2359                  */
2360                 int shift = posn - 1;
2361
2362                 if (port_inactive_err(shift)) {
2363                         count_port_inactive(dd);
2364                         handled |= (1ULL << shift);
2365                 } else if (disallowed_pkt_err(shift)) {
2366                         handle_send_egress_err_info(dd);
2367                         handled |= (1ULL << shift);
2368                 }
2369                 clear_bit(shift, (unsigned long *)&reg_copy);
2370         }
2371
2372         reg &= ~handled;
2373
2374         if (reg)
2375                 dd_dev_info(dd, "Egress Error: %s\n",
2376                         egress_err_status_string(buf, sizeof(buf), reg));
2377 }
2378
2379 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2380 {
2381         char buf[96];
2382
2383         dd_dev_info(dd, "Send Error: %s\n",
2384                 send_err_status_string(buf, sizeof(buf), reg));
2385
2386 }
2387
2388 /*
2389  * The maximum number of times the error clear down will loop before
2390  * blocking a repeating error.  This value is arbitrary.
2391  */
2392 #define MAX_CLEAR_COUNT 20
2393
2394 /*
2395  * Clear and handle an error register.  All error interrupts are funneled
2396  * through here to have a central location to correctly handle single-
2397  * or multi-shot errors.
2398  *
2399  * For non per-context registers, call this routine with a context value
2400  * of 0 so the per-context offset is zero.
2401  *
2402  * If the handler loops too many times, assume that something is wrong
2403  * and can't be fixed, so mask the error bits.
2404  */
2405 static void interrupt_clear_down(struct hfi1_devdata *dd,
2406                                  u32 context,
2407                                  const struct err_reg_info *eri)
2408 {
2409         u64 reg;
2410         u32 count;
2411
2412         /* read in a loop until no more errors are seen */
2413         count = 0;
2414         while (1) {
2415                 reg = read_kctxt_csr(dd, context, eri->status);
2416                 if (reg == 0)
2417                         break;
2418                 write_kctxt_csr(dd, context, eri->clear, reg);
2419                 if (likely(eri->handler))
2420                         eri->handler(dd, context, reg);
2421                 count++;
2422                 if (count > MAX_CLEAR_COUNT) {
2423                         u64 mask;
2424
2425                         dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2426                                 eri->desc, reg);
2427                         /*
2428                          * Read-modify-write so any other masked bits
2429                          * remain masked.
2430                          */
2431                         mask = read_kctxt_csr(dd, context, eri->mask);
2432                         mask &= ~reg;
2433                         write_kctxt_csr(dd, context, eri->mask, mask);
2434                         break;
2435                 }
2436         }
2437 }
2438
2439 /*
2440  * CCE block "misc" interrupt.  Source is < 16.
2441  */
2442 static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2443 {
2444         const struct err_reg_info *eri = &misc_errs[source];
2445
2446         if (eri->handler) {
2447                 interrupt_clear_down(dd, 0, eri);
2448         } else {
2449                 dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2450                         source);
2451         }
2452 }
2453
2454 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2455 {
2456         return flag_string(buf, buf_len, flags,
2457                         sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2458 }
2459
2460 /*
2461  * Send context error interrupt.  Source (hw_context) is < 160.
2462  *
2463  * All send context errors cause the send context to halt.  The normal
2464  * clear-down mechanism cannot be used because we cannot clear the
2465  * error bits until several other long-running items are done first.
2466  * This is OK because with the context halted, nothing else is going
2467  * to happen on it anyway.
2468  */
2469 static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2470                                 unsigned int hw_context)
2471 {
2472         struct send_context_info *sci;
2473         struct send_context *sc;
2474         char flags[96];
2475         u64 status;
2476         u32 sw_index;
2477
2478         sw_index = dd->hw_to_sw[hw_context];
2479         if (sw_index >= dd->num_send_contexts) {
2480                 dd_dev_err(dd,
2481                         "out of range sw index %u for send context %u\n",
2482                         sw_index, hw_context);
2483                 return;
2484         }
2485         sci = &dd->send_contexts[sw_index];
2486         sc = sci->sc;
2487         if (!sc) {
2488                 dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2489                         sw_index, hw_context);
2490                 return;
2491         }
2492
2493         /* tell the software that a halt has begun */
2494         sc_stop(sc, SCF_HALTED);
2495
2496         status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2497
2498         dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2499                 send_context_err_status_string(flags, sizeof(flags), status));
2500
2501         if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2502                 handle_send_egress_err_info(dd);
2503
2504         /*
2505          * Automatically restart halted kernel contexts out of interrupt
2506          * context.  User contexts must ask the driver to restart the context.
2507          */
2508         if (sc->type != SC_USER)
2509                 queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2510 }
2511
2512 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2513                                 unsigned int source, u64 status)
2514 {
2515         struct sdma_engine *sde;
2516
2517         sde = &dd->per_sdma[source];
2518 #ifdef CONFIG_SDMA_VERBOSITY
2519         dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2520                    slashstrip(__FILE__), __LINE__, __func__);
2521         dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2522                    sde->this_idx, source, (unsigned long long)status);
2523 #endif
2524         sdma_engine_error(sde, status);
2525 }
2526
2527 /*
2528  * CCE block SDMA error interrupt.  Source is < 16.
2529  */
2530 static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2531 {
2532 #ifdef CONFIG_SDMA_VERBOSITY
2533         struct sdma_engine *sde = &dd->per_sdma[source];
2534
2535         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2536                    slashstrip(__FILE__), __LINE__, __func__);
2537         dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2538                    source);
2539         sdma_dumpstate(sde);
2540 #endif
2541         interrupt_clear_down(dd, source, &sdma_eng_err);
2542 }
2543
2544 /*
2545  * CCE block "various" interrupt.  Source is < 8.
2546  */
2547 static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2548 {
2549         const struct err_reg_info *eri = &various_err[source];
2550
2551         /*
2552          * TCritInt cannot go through interrupt_clear_down()
2553          * because it is not a second tier interrupt. The handler
2554          * should be called directly.
2555          */
2556         if (source == TCRIT_INT_SOURCE)
2557                 handle_temp_err(dd);
2558         else if (eri->handler)
2559                 interrupt_clear_down(dd, 0, eri);
2560         else
2561                 dd_dev_info(dd,
2562                         "%s: Unimplemented/reserved interrupt %d\n",
2563                         __func__, source);
2564 }
2565
2566 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2567 {
2568         /* source is always zero */
2569         struct hfi1_pportdata *ppd = dd->pport;
2570         unsigned long flags;
2571         u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2572
2573         if (reg & QSFP_HFI0_MODPRST_N) {
2574
2575                 dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2576                                 __func__);
2577
2578                 if (!qsfp_mod_present(ppd)) {
2579                         ppd->driver_link_ready = 0;
2580                         /*
2581                          * Cable removed, reset all our information about the
2582                          * cache and cable capabilities
2583                          */
2584
2585                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2586                         /*
2587                          * We don't set cache_refresh_required here as we expect
2588                          * an interrupt when a cable is inserted
2589                          */
2590                         ppd->qsfp_info.cache_valid = 0;
2591                         ppd->qsfp_info.qsfp_interrupt_functional = 0;
2592                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2593                                                 flags);
2594                         write_csr(dd,
2595                                         dd->hfi1_id ?
2596                                                 ASIC_QSFP2_INVERT :
2597                                                 ASIC_QSFP1_INVERT,
2598                                 qsfp_int_mgmt);
2599                         if (ppd->host_link_state == HLS_DN_POLL) {
2600                                 /*
2601                                  * The link is still in POLL. This means
2602                                  * that the normal link down processing
2603                                  * will not happen. We have to do it here
2604                                  * before turning the DC off.
2605                                  */
2606                                 queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2607                         }
2608                 } else {
2609                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2610                         ppd->qsfp_info.cache_valid = 0;
2611                         ppd->qsfp_info.cache_refresh_required = 1;
2612                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2613                                                 flags);
2614
2615                         qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2616                         write_csr(dd,
2617                                         dd->hfi1_id ?
2618                                                 ASIC_QSFP2_INVERT :
2619                                                 ASIC_QSFP1_INVERT,
2620                                 qsfp_int_mgmt);
2621                 }
2622         }
2623
2624         if (reg & QSFP_HFI0_INT_N) {
2625
2626                 dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2627                                 __func__);
2628                 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2629                 ppd->qsfp_info.check_interrupt_flags = 1;
2630                 ppd->qsfp_info.qsfp_interrupt_functional = 1;
2631                 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2632         }
2633
2634         /* Schedule the QSFP work only if there is a cable attached. */
2635         if (qsfp_mod_present(ppd))
2636                 queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2637 }
2638
2639 static int request_host_lcb_access(struct hfi1_devdata *dd)
2640 {
2641         int ret;
2642
2643         ret = do_8051_command(dd, HCMD_MISC,
2644                 (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2645                 NULL);
2646         if (ret != HCMD_SUCCESS) {
2647                 dd_dev_err(dd, "%s: command failed with error %d\n",
2648                         __func__, ret);
2649         }
2650         return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2651 }
2652
2653 static int request_8051_lcb_access(struct hfi1_devdata *dd)
2654 {
2655         int ret;
2656
2657         ret = do_8051_command(dd, HCMD_MISC,
2658                 (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2659                 NULL);
2660         if (ret != HCMD_SUCCESS) {
2661                 dd_dev_err(dd, "%s: command failed with error %d\n",
2662                         __func__, ret);
2663         }
2664         return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2665 }
2666
2667 /*
2668  * Set the LCB selector - allow host access.  The DCC selector always
2669  * points to the host.
2670  */
2671 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2672 {
2673         write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2674                                 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2675                                 | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2676 }
2677
2678 /*
2679  * Clear the LCB selector - allow 8051 access.  The DCC selector always
2680  * points to the host.
2681  */
2682 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2683 {
2684         write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2685                                 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2686 }
2687
2688 /*
2689  * Acquire LCB access from the 8051.  If the host already has access,
2690  * just increment a counter.  Otherwise, inform the 8051 that the
2691  * host is taking access.
2692  *
2693  * Returns:
2694  *      0 on success
2695  *      -EBUSY if the 8051 has control and cannot be disturbed
2696  *      -errno if unable to acquire access from the 8051
2697  */
2698 int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2699 {
2700         struct hfi1_pportdata *ppd = dd->pport;
2701         int ret = 0;
2702
2703         /*
2704          * Use the host link state lock so the operation of this routine
2705          * { link state check, selector change, count increment } can occur
2706          * as a unit against a link state change.  Otherwise there is a
2707          * race between the state change and the count increment.
2708          */
2709         if (sleep_ok) {
2710                 mutex_lock(&ppd->hls_lock);
2711         } else {
2712                 while (!mutex_trylock(&ppd->hls_lock))
2713                         udelay(1);
2714         }
2715
2716         /* this access is valid only when the link is up */
2717         if ((ppd->host_link_state & HLS_UP) == 0) {
2718                 dd_dev_info(dd, "%s: link state %s not up\n",
2719                         __func__, link_state_name(ppd->host_link_state));
2720                 ret = -EBUSY;
2721                 goto done;
2722         }
2723
2724         if (dd->lcb_access_count == 0) {
2725                 ret = request_host_lcb_access(dd);
2726                 if (ret) {
2727                         dd_dev_err(dd,
2728                                 "%s: unable to acquire LCB access, err %d\n",
2729                                 __func__, ret);
2730                         goto done;
2731                 }
2732                 set_host_lcb_access(dd);
2733         }
2734         dd->lcb_access_count++;
2735 done:
2736         mutex_unlock(&ppd->hls_lock);
2737         return ret;
2738 }
2739
2740 /*
2741  * Release LCB access by decrementing the use count.  If the count is moving
2742  * from 1 to 0, inform 8051 that it has control back.
2743  *
2744  * Returns:
2745  *      0 on success
2746  *      -errno if unable to release access to the 8051
2747  */
2748 int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2749 {
2750         int ret = 0;
2751
2752         /*
2753          * Use the host link state lock because the acquire needed it.
2754          * Here, we only need to keep { selector change, count decrement }
2755          * as a unit.
2756          */
2757         if (sleep_ok) {
2758                 mutex_lock(&dd->pport->hls_lock);
2759         } else {
2760                 while (!mutex_trylock(&dd->pport->hls_lock))
2761                         udelay(1);
2762         }
2763
2764         if (dd->lcb_access_count == 0) {
2765                 dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
2766                         __func__);
2767                 goto done;
2768         }
2769
2770         if (dd->lcb_access_count == 1) {
2771                 set_8051_lcb_access(dd);
2772                 ret = request_8051_lcb_access(dd);
2773                 if (ret) {
2774                         dd_dev_err(dd,
2775                                 "%s: unable to release LCB access, err %d\n",
2776                                 __func__, ret);
2777                         /* restore host access if the grant didn't work */
2778                         set_host_lcb_access(dd);
2779                         goto done;
2780                 }
2781         }
2782         dd->lcb_access_count--;
2783 done:
2784         mutex_unlock(&dd->pport->hls_lock);
2785         return ret;
2786 }
2787
2788 /*
2789  * Initialize LCB access variables and state.  Called during driver load,
2790  * after most of the initialization is finished.
2791  *
2792  * The DC default is LCB access on for the host.  The driver defaults to
2793  * leaving access to the 8051.  Assign access now - this constrains the call
2794  * to this routine to be after all LCB set-up is done.  In particular, after
2795  * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2796  */
2797 static void init_lcb_access(struct hfi1_devdata *dd)
2798 {
2799         dd->lcb_access_count = 0;
2800 }
2801
2802 /*
2803  * Write a response back to a 8051 request.
2804  */
2805 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2806 {
2807         write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2808                 DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2809                 | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2810                 | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2811 }
2812
2813 /*
2814  * Handle requests from the 8051.
2815  */
2816 static void handle_8051_request(struct hfi1_devdata *dd)
2817 {
2818         u64 reg;
2819         u16 data;
2820         u8 type;
2821
2822         reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2823         if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2824                 return; /* no request */
2825
2826         /* zero out COMPLETED so the response is seen */
2827         write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2828
2829         /* extract request details */
2830         type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2831                         & DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2832         data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2833                         & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2834
2835         switch (type) {
2836         case HREQ_LOAD_CONFIG:
2837         case HREQ_SAVE_CONFIG:
2838         case HREQ_READ_CONFIG:
2839         case HREQ_SET_TX_EQ_ABS:
2840         case HREQ_SET_TX_EQ_REL:
2841         case HREQ_ENABLE:
2842                 dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2843                         type);
2844                 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2845                 break;
2846
2847         case HREQ_CONFIG_DONE:
2848                 hreq_response(dd, HREQ_SUCCESS, 0);
2849                 break;
2850
2851         case HREQ_INTERFACE_TEST:
2852                 hreq_response(dd, HREQ_SUCCESS, data);
2853                 break;
2854
2855         default:
2856                 dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2857                 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2858                 break;
2859         }
2860 }
2861
2862 static void write_global_credit(struct hfi1_devdata *dd,
2863                                 u8 vau, u16 total, u16 shared)
2864 {
2865         write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2866                 ((u64)total
2867                         << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2868                 | ((u64)shared
2869                         << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2870                 | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2871 }
2872
2873 /*
2874  * Set up initial VL15 credits of the remote.  Assumes the rest of
2875  * the CM credit registers are zero from a previous global or credit reset .
2876  */
2877 void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2878 {
2879         /* leave shared count at zero for both global and VL15 */
2880         write_global_credit(dd, vau, vl15buf, 0);
2881
2882         /* We may need some credits for another VL when sending packets
2883          * with the snoop interface. Dividing it down the middle for VL15
2884          * and VL0 should suffice.
2885          */
2886         if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2887                 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2888                     << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2889                 write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2890                     << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2891         } else {
2892                 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2893                         << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2894         }
2895 }
2896
2897 /*
2898  * Zero all credit details from the previous connection and
2899  * reset the CM manager's internal counters.
2900  */
2901 void reset_link_credits(struct hfi1_devdata *dd)
2902 {
2903         int i;
2904
2905         /* remove all previous VL credit limits */
2906         for (i = 0; i < TXE_NUM_DATA_VL; i++)
2907                 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2908         write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2909         write_global_credit(dd, 0, 0, 0);
2910         /* reset the CM block */
2911         pio_send_control(dd, PSC_CM_RESET);
2912 }
2913
2914 /* convert a vCU to a CU */
2915 static u32 vcu_to_cu(u8 vcu)
2916 {
2917         return 1 << vcu;
2918 }
2919
2920 /* convert a CU to a vCU */
2921 static u8 cu_to_vcu(u32 cu)
2922 {
2923         return ilog2(cu);
2924 }
2925
2926 /* convert a vAU to an AU */
2927 static u32 vau_to_au(u8 vau)
2928 {
2929         return 8 * (1 << vau);
2930 }
2931
2932 static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2933 {
2934         ppd->sm_trap_qp = 0x0;
2935         ppd->sa_qp = 0x1;
2936 }
2937
2938 /*
2939  * Graceful LCB shutdown.  This leaves the LCB FIFOs in reset.
2940  */
2941 static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2942 {
2943         u64 reg;
2944
2945         /* clear lcb run: LCB_CFG_RUN.EN = 0 */
2946         write_csr(dd, DC_LCB_CFG_RUN, 0);
2947         /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2948         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2949                 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2950         /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2951         dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2952         reg = read_csr(dd, DCC_CFG_RESET);
2953         write_csr(dd, DCC_CFG_RESET,
2954                 reg
2955                 | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2956                 | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2957         (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2958         if (!abort) {
2959                 udelay(1);    /* must hold for the longer of 16cclks or 20ns */
2960                 write_csr(dd, DCC_CFG_RESET, reg);
2961                 write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2962         }
2963 }
2964
2965 /*
2966  * This routine should be called after the link has been transitioned to
2967  * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2968  * reset).
2969  *
2970  * The expectation is that the caller of this routine would have taken
2971  * care of properly transitioning the link into the correct state.
2972  */
2973 static void dc_shutdown(struct hfi1_devdata *dd)
2974 {
2975         unsigned long flags;
2976
2977         spin_lock_irqsave(&dd->dc8051_lock, flags);
2978         if (dd->dc_shutdown) {
2979                 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2980                 return;
2981         }
2982         dd->dc_shutdown = 1;
2983         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2984         /* Shutdown the LCB */
2985         lcb_shutdown(dd, 1);
2986         /* Going to OFFLINE would have causes the 8051 to put the
2987          * SerDes into reset already. Just need to shut down the 8051,
2988          * itself. */
2989         write_csr(dd, DC_DC8051_CFG_RST, 0x1);
2990 }
2991
2992 /* Calling this after the DC has been brought out of reset should not
2993  * do any damage. */
2994 static void dc_start(struct hfi1_devdata *dd)
2995 {
2996         unsigned long flags;
2997         int ret;
2998
2999         spin_lock_irqsave(&dd->dc8051_lock, flags);
3000         if (!dd->dc_shutdown)
3001                 goto done;
3002         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3003         /* Take the 8051 out of reset */
3004         write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3005         /* Wait until 8051 is ready */
3006         ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3007         if (ret) {
3008                 dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3009                         __func__);
3010         }
3011         /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3012         write_csr(dd, DCC_CFG_RESET, 0x10);
3013         /* lcb_shutdown() with abort=1 does not restore these */
3014         write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3015         spin_lock_irqsave(&dd->dc8051_lock, flags);
3016         dd->dc_shutdown = 0;
3017 done:
3018         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3019 }
3020
3021 /*
3022  * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3023  */
3024 static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3025 {
3026         u64 rx_radr, tx_radr;
3027         u32 version;
3028
3029         if (dd->icode != ICODE_FPGA_EMULATION)
3030                 return;
3031
3032         /*
3033          * These LCB defaults on emulator _s are good, nothing to do here:
3034          *      LCB_CFG_TX_FIFOS_RADR
3035          *      LCB_CFG_RX_FIFOS_RADR
3036          *      LCB_CFG_LN_DCLK
3037          *      LCB_CFG_IGNORE_LOST_RCLK
3038          */
3039         if (is_emulator_s(dd))
3040                 return;
3041         /* else this is _p */
3042
3043         version = emulator_rev(dd);
3044         if (!is_ax(dd))
3045                 version = 0x2d; /* all B0 use 0x2d or higher settings */
3046
3047         if (version <= 0x12) {
3048                 /* release 0x12 and below */
3049
3050                 /*
3051                  * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3052                  * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3053                  * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3054                  */
3055                 rx_radr =
3056                       0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3057                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3058                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3059                 /*
3060                  * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3061                  * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3062                  */
3063                 tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3064         } else if (version <= 0x18) {
3065                 /* release 0x13 up to 0x18 */
3066                 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3067                 rx_radr =
3068                       0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3069                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3070                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3071                 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3072         } else if (version == 0x19) {
3073                 /* release 0x19 */
3074                 /* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3075                 rx_radr =
3076                       0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3077                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3078                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3079                 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3080         } else if (version == 0x1a) {
3081                 /* release 0x1a */
3082                 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3083                 rx_radr =
3084                       0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3085                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3086                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3087                 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3088                 write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3089         } else {
3090                 /* release 0x1b and higher */
3091                 /* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3092                 rx_radr =
3093                       0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3094                     | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3095                     | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3096                 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3097         }
3098
3099         write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3100         /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3101         write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3102                 DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3103         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3104 }
3105
3106 /*
3107  * Handle a SMA idle message
3108  *
3109  * This is a work-queue function outside of the interrupt.
3110  */
3111 void handle_sma_message(struct work_struct *work)
3112 {
3113         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3114                                                         sma_message_work);
3115         struct hfi1_devdata *dd = ppd->dd;
3116         u64 msg;
3117         int ret;
3118
3119         /* msg is bytes 1-4 of the 40-bit idle message - the command code
3120            is stripped off */
3121         ret = read_idle_sma(dd, &msg);
3122         if (ret)
3123                 return;
3124         dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3125         /*
3126          * React to the SMA message.  Byte[1] (0 for us) is the command.
3127          */
3128         switch (msg & 0xff) {
3129         case SMA_IDLE_ARM:
3130                 /*
3131                  * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3132                  * State Transitions
3133                  *
3134                  * Only expected in INIT or ARMED, discard otherwise.
3135                  */
3136                 if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3137                         ppd->neighbor_normal = 1;
3138                 break;
3139         case SMA_IDLE_ACTIVE:
3140                 /*
3141                  * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3142                  * State Transitions
3143                  *
3144                  * Can activate the node.  Discard otherwise.
3145                  */
3146                 if (ppd->host_link_state == HLS_UP_ARMED
3147                                         && ppd->is_active_optimize_enabled) {
3148                         ppd->neighbor_normal = 1;
3149                         ret = set_link_state(ppd, HLS_UP_ACTIVE);
3150                         if (ret)
3151                                 dd_dev_err(
3152                                         dd,
3153                                         "%s: received Active SMA idle message, couldn't set link to Active\n",
3154                                         __func__);
3155                 }
3156                 break;
3157         default:
3158                 dd_dev_err(dd,
3159                         "%s: received unexpected SMA idle message 0x%llx\n",
3160                         __func__, msg);
3161                 break;
3162         }
3163 }
3164
3165 static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3166 {
3167         u64 rcvctrl;
3168         unsigned long flags;
3169
3170         spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3171         rcvctrl = read_csr(dd, RCV_CTRL);
3172         rcvctrl |= add;
3173         rcvctrl &= ~clear;
3174         write_csr(dd, RCV_CTRL, rcvctrl);
3175         spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3176 }
3177
3178 static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3179 {
3180         adjust_rcvctrl(dd, add, 0);
3181 }
3182
3183 static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3184 {
3185         adjust_rcvctrl(dd, 0, clear);
3186 }
3187
3188 /*
3189  * Called from all interrupt handlers to start handling an SPC freeze.
3190  */
3191 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3192 {
3193         struct hfi1_devdata *dd = ppd->dd;
3194         struct send_context *sc;
3195         int i;
3196
3197         if (flags & FREEZE_SELF)
3198                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3199
3200         /* enter frozen mode */
3201         dd->flags |= HFI1_FROZEN;
3202
3203         /* notify all SDMA engines that they are going into a freeze */
3204         sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3205
3206         /* do halt pre-handling on all enabled send contexts */
3207         for (i = 0; i < dd->num_send_contexts; i++) {
3208                 sc = dd->send_contexts[i].sc;
3209                 if (sc && (sc->flags & SCF_ENABLED))
3210                         sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3211         }
3212
3213         /* Send context are frozen. Notify user space */
3214         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3215
3216         if (flags & FREEZE_ABORT) {
3217                 dd_dev_err(dd,
3218                            "Aborted freeze recovery. Please REBOOT system\n");
3219                 return;
3220         }
3221         /* queue non-interrupt handler */
3222         queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3223 }
3224
3225 /*
3226  * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3227  * depending on the "freeze" parameter.
3228  *
3229  * No need to return an error if it times out, our only option
3230  * is to proceed anyway.
3231  */
3232 static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3233 {
3234         unsigned long timeout;
3235         u64 reg;
3236
3237         timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3238         while (1) {
3239                 reg = read_csr(dd, CCE_STATUS);
3240                 if (freeze) {
3241                         /* waiting until all indicators are set */
3242                         if ((reg & ALL_FROZE) == ALL_FROZE)
3243                                 return; /* all done */
3244                 } else {
3245                         /* waiting until all indicators are clear */
3246                         if ((reg & ALL_FROZE) == 0)
3247                                 return; /* all done */
3248                 }
3249
3250                 if (time_after(jiffies, timeout)) {
3251                         dd_dev_err(dd,
3252                                 "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3253                                 freeze ? "" : "un",
3254                                 reg & ALL_FROZE,
3255                                 freeze ? ALL_FROZE : 0ull);
3256                         return;
3257                 }
3258                 usleep_range(80, 120);
3259         }
3260 }
3261
3262 /*
3263  * Do all freeze handling for the RXE block.
3264  */
3265 static void rxe_freeze(struct hfi1_devdata *dd)
3266 {
3267         int i;
3268
3269         /* disable port */
3270         clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3271
3272         /* disable all receive contexts */
3273         for (i = 0; i < dd->num_rcv_contexts; i++)
3274                 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3275 }
3276
3277 /*
3278  * Unfreeze handling for the RXE block - kernel contexts only.
3279  * This will also enable the port.  User contexts will do unfreeze
3280  * handling on a per-context basis as they call into the driver.
3281  *
3282  */
3283 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3284 {
3285         int i;
3286
3287         /* enable all kernel contexts */
3288         for (i = 0; i < dd->n_krcv_queues; i++)
3289                 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3290
3291         /* enable port */
3292         add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3293 }
3294
3295 /*
3296  * Non-interrupt SPC freeze handling.
3297  *
3298  * This is a work-queue function outside of the triggering interrupt.
3299  */
3300 void handle_freeze(struct work_struct *work)
3301 {
3302         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3303                                                                 freeze_work);
3304         struct hfi1_devdata *dd = ppd->dd;
3305
3306         /* wait for freeze indicators on all affected blocks */
3307         dd_dev_info(dd, "Entering SPC freeze\n");
3308         wait_for_freeze_status(dd, 1);
3309
3310         /* SPC is now frozen */
3311
3312         /* do send PIO freeze steps */
3313         pio_freeze(dd);
3314
3315         /* do send DMA freeze steps */
3316         sdma_freeze(dd);
3317
3318         /* do send egress freeze steps - nothing to do */
3319
3320         /* do receive freeze steps */
3321         rxe_freeze(dd);
3322
3323         /*
3324          * Unfreeze the hardware - clear the freeze, wait for each
3325          * block's frozen bit to clear, then clear the frozen flag.
3326          */
3327         write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3328         wait_for_freeze_status(dd, 0);
3329
3330         if (is_ax(dd)) {
3331                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3332                 wait_for_freeze_status(dd, 1);
3333                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3334                 wait_for_freeze_status(dd, 0);
3335         }
3336
3337         /* do send PIO unfreeze steps for kernel contexts */
3338         pio_kernel_unfreeze(dd);
3339
3340         /* do send DMA unfreeze steps */
3341         sdma_unfreeze(dd);
3342
3343         /* do send egress unfreeze steps - nothing to do */
3344
3345         /* do receive unfreeze steps for kernel contexts */
3346         rxe_kernel_unfreeze(dd);
3347
3348         /*
3349          * The unfreeze procedure touches global device registers when
3350          * it disables and re-enables RXE. Mark the device unfrozen
3351          * after all that is done so other parts of the driver waiting
3352          * for the device to unfreeze don't do things out of order.
3353          *
3354          * The above implies that the meaning of HFI1_FROZEN flag is
3355          * "Device has gone into freeze mode and freeze mode handling
3356          * is still in progress."
3357          *
3358          * The flag will be removed when freeze mode processing has
3359          * completed.
3360          */
3361         dd->flags &= ~HFI1_FROZEN;
3362         wake_up(&dd->event_queue);
3363
3364         /* no longer frozen */
3365         dd_dev_err(dd, "Exiting SPC freeze\n");
3366 }
3367
3368 /*
3369  * Handle a link up interrupt from the 8051.
3370  *
3371  * This is a work-queue function outside of the interrupt.
3372  */
3373 void handle_link_up(struct work_struct *work)
3374 {
3375         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3376                                                                 link_up_work);
3377         set_link_state(ppd, HLS_UP_INIT);
3378
3379         /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3380         read_ltp_rtt(ppd->dd);
3381         /*
3382          * OPA specifies that certain counters are cleared on a transition
3383          * to link up, so do that.
3384          */
3385         clear_linkup_counters(ppd->dd);
3386         /*
3387          * And (re)set link up default values.
3388          */
3389         set_linkup_defaults(ppd);
3390
3391         /* enforce link speed enabled */
3392         if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3393                 /* oops - current speed is not enabled, bounce */
3394                 dd_dev_err(ppd->dd,
3395                         "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3396                         ppd->link_speed_active, ppd->link_speed_enabled);
3397                 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3398                         OPA_LINKDOWN_REASON_SPEED_POLICY);
3399                 set_link_state(ppd, HLS_DN_OFFLINE);
3400                 start_link(ppd);
3401         }
3402 }
3403
3404 /* Several pieces of LNI information were cached for SMA in ppd.
3405  * Reset these on link down */
3406 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3407 {
3408         ppd->neighbor_guid = 0;
3409         ppd->neighbor_port_number = 0;
3410         ppd->neighbor_type = 0;
3411         ppd->neighbor_fm_security = 0;
3412 }
3413
3414 /*
3415  * Handle a link down interrupt from the 8051.
3416  *
3417  * This is a work-queue function outside of the interrupt.
3418  */
3419 void handle_link_down(struct work_struct *work)
3420 {
3421         u8 lcl_reason, neigh_reason = 0;
3422         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3423                                                                 link_down_work);
3424
3425         /* go offline first, then deal with reasons */
3426         set_link_state(ppd, HLS_DN_OFFLINE);
3427
3428         lcl_reason = 0;
3429         read_planned_down_reason_code(ppd->dd, &neigh_reason);
3430
3431         /*
3432          * If no reason, assume peer-initiated but missed
3433          * LinkGoingDown idle flits.
3434          */
3435         if (neigh_reason == 0)
3436                 lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3437
3438         set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3439
3440         reset_neighbor_info(ppd);
3441
3442         /* disable the port */
3443         clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3444
3445         /* If there is no cable attached, turn the DC off. Otherwise,
3446          * start the link bring up. */
3447         if (!qsfp_mod_present(ppd))
3448                 dc_shutdown(ppd->dd);
3449         else
3450                 start_link(ppd);
3451 }
3452
3453 void handle_link_bounce(struct work_struct *work)
3454 {
3455         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3456                                                         link_bounce_work);
3457
3458         /*
3459          * Only do something if the link is currently up.
3460          */
3461         if (ppd->host_link_state & HLS_UP) {
3462                 set_link_state(ppd, HLS_DN_OFFLINE);
3463                 start_link(ppd);
3464         } else {
3465                 dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3466                         __func__, link_state_name(ppd->host_link_state));
3467         }
3468 }
3469
3470 /*
3471  * Mask conversion: Capability exchange to Port LTP.  The capability
3472  * exchange has an implicit 16b CRC that is mandatory.
3473  */
3474 static int cap_to_port_ltp(int cap)
3475 {
3476         int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3477
3478         if (cap & CAP_CRC_14B)
3479                 port_ltp |= PORT_LTP_CRC_MODE_14;
3480         if (cap & CAP_CRC_48B)
3481                 port_ltp |= PORT_LTP_CRC_MODE_48;
3482         if (cap & CAP_CRC_12B_16B_PER_LANE)
3483                 port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3484
3485         return port_ltp;
3486 }
3487
3488 /*
3489  * Convert an OPA Port LTP mask to capability mask
3490  */
3491 int port_ltp_to_cap(int port_ltp)
3492 {
3493         int cap_mask = 0;
3494
3495         if (port_ltp & PORT_LTP_CRC_MODE_14)
3496                 cap_mask |= CAP_CRC_14B;
3497         if (port_ltp & PORT_LTP_CRC_MODE_48)
3498                 cap_mask |= CAP_CRC_48B;
3499         if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3500                 cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3501
3502         return cap_mask;
3503 }
3504
3505 /*
3506  * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3507  */
3508 static int lcb_to_port_ltp(int lcb_crc)
3509 {
3510         int port_ltp = 0;
3511
3512         if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3513                 port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3514         else if (lcb_crc == LCB_CRC_48B)
3515                 port_ltp = PORT_LTP_CRC_MODE_48;
3516         else if (lcb_crc == LCB_CRC_14B)
3517                 port_ltp = PORT_LTP_CRC_MODE_14;
3518         else
3519                 port_ltp = PORT_LTP_CRC_MODE_16;
3520
3521         return port_ltp;
3522 }
3523
3524 /*
3525  * Our neighbor has indicated that we are allowed to act as a fabric
3526  * manager, so place the full management partition key in the second
3527  * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3528  * that we should already have the limited management partition key in
3529  * array element 1, and also that the port is not yet up when
3530  * add_full_mgmt_pkey() is invoked.
3531  */
3532 static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3533 {
3534         struct hfi1_devdata *dd = ppd->dd;
3535
3536         /* Sanity check - ppd->pkeys[2] should be 0 */
3537         if (ppd->pkeys[2] != 0)
3538                 dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3539                            __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3540         ppd->pkeys[2] = FULL_MGMT_P_KEY;
3541         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3542 }
3543
3544 /*
3545  * Convert the given link width to the OPA link width bitmask.
3546  */
3547 static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3548 {
3549         switch (width) {
3550         case 0:
3551                 /*
3552                  * Simulator and quick linkup do not set the width.
3553                  * Just set it to 4x without complaint.
3554                  */
3555                 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3556                         return OPA_LINK_WIDTH_4X;
3557                 return 0; /* no lanes up */
3558         case 1: return OPA_LINK_WIDTH_1X;
3559         case 2: return OPA_LINK_WIDTH_2X;
3560         case 3: return OPA_LINK_WIDTH_3X;
3561         default:
3562                 dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3563                         __func__, width);
3564                 /* fall through */
3565         case 4: return OPA_LINK_WIDTH_4X;
3566         }
3567 }
3568
3569 /*
3570  * Do a population count on the bottom nibble.
3571  */
3572 static const u8 bit_counts[16] = {
3573         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3574 };
3575 static inline u8 nibble_to_count(u8 nibble)
3576 {
3577         return bit_counts[nibble & 0xf];
3578 }
3579
3580 /*
3581  * Read the active lane information from the 8051 registers and return
3582  * their widths.
3583  *
3584  * Active lane information is found in these 8051 registers:
3585  *      enable_lane_tx
3586  *      enable_lane_rx
3587  */
3588 static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3589                             u16 *rx_width)
3590 {
3591         u16 tx, rx;
3592         u8 enable_lane_rx;
3593         u8 enable_lane_tx;
3594         u8 tx_polarity_inversion;
3595         u8 rx_polarity_inversion;
3596         u8 max_rate;
3597
3598         /* read the active lanes */
3599         read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3600                                 &rx_polarity_inversion, &max_rate);
3601         read_local_lni(dd, &enable_lane_rx);
3602
3603         /* convert to counts */
3604         tx = nibble_to_count(enable_lane_tx);
3605         rx = nibble_to_count(enable_lane_rx);
3606
3607         /*
3608          * Set link_speed_active here, overriding what was set in
3609          * handle_verify_cap().  The ASIC 8051 firmware does not correctly
3610          * set the max_rate field in handle_verify_cap until v0.19.
3611          */
3612         if ((dd->icode == ICODE_RTL_SILICON)
3613                                 && (dd->dc8051_ver < dc8051_ver(0, 19))) {
3614                 /* max_rate: 0 = 12.5G, 1 = 25G */
3615                 switch (max_rate) {
3616                 case 0:
3617                         dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3618                         break;
3619                 default:
3620                         dd_dev_err(dd,
3621                                 "%s: unexpected max rate %d, using 25Gb\n",
3622                                 __func__, (int)max_rate);
3623                         /* fall through */
3624                 case 1:
3625                         dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3626                         break;
3627                 }
3628         }
3629
3630         dd_dev_info(dd,
3631                 "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3632                 enable_lane_tx, tx, enable_lane_rx, rx);
3633         *tx_width = link_width_to_bits(dd, tx);
3634         *rx_width = link_width_to_bits(dd, rx);
3635 }
3636
3637 /*
3638  * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3639  * Valid after the end of VerifyCap and during LinkUp.  Does not change
3640  * after link up.  I.e. look elsewhere for downgrade information.
3641  *
3642  * Bits are:
3643  *      + bits [7:4] contain the number of active transmitters
3644  *      + bits [3:0] contain the number of active receivers
3645  * These are numbers 1 through 4 and can be different values if the
3646  * link is asymmetric.
3647  *
3648  * verify_cap_local_fm_link_width[0] retains its original value.
3649  */
3650 static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3651                               u16 *rx_width)
3652 {
3653         u16 widths, tx, rx;
3654         u8 misc_bits, local_flags;
3655         u16 active_tx, active_rx;
3656
3657         read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3658         tx = widths >> 12;
3659         rx = (widths >> 8) & 0xf;
3660
3661         *tx_width = link_width_to_bits(dd, tx);
3662         *rx_width = link_width_to_bits(dd, rx);
3663
3664         /* print the active widths */
3665         get_link_widths(dd, &active_tx, &active_rx);
3666 }
3667
3668 /*
3669  * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3670  * hardware information when the link first comes up.
3671  *
3672  * The link width is not available until after VerifyCap.AllFramesReceived
3673  * (the trigger for handle_verify_cap), so this is outside that routine
3674  * and should be called when the 8051 signals linkup.
3675  */
3676 void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3677 {
3678         u16 tx_width, rx_width;
3679
3680         /* get end-of-LNI link widths */
3681         get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3682
3683         /* use tx_width as the link is supposed to be symmetric on link up */
3684         ppd->link_width_active = tx_width;
3685         /* link width downgrade active (LWD.A) starts out matching LW.A */
3686         ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3687         ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3688         /* per OPA spec, on link up LWD.E resets to LWD.S */
3689         ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3690         /* cache the active egress rate (units {10^6 bits/sec]) */
3691         ppd->current_egress_rate = active_egress_rate(ppd);
3692 }
3693
3694 /*
3695  * Handle a verify capabilities interrupt from the 8051.
3696  *
3697  * This is a work-queue function outside of the interrupt.
3698  */
3699 void handle_verify_cap(struct work_struct *work)
3700 {
3701         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3702                                                                 link_vc_work);
3703         struct hfi1_devdata *dd = ppd->dd;
3704         u64 reg;
3705         u8 power_management;
3706         u8 continious;
3707         u8 vcu;
3708         u8 vau;
3709         u8 z;
3710         u16 vl15buf;
3711         u16 link_widths;
3712         u16 crc_mask;
3713         u16 crc_val;
3714         u16 device_id;
3715         u16 active_tx, active_rx;
3716         u8 partner_supported_crc;
3717         u8 remote_tx_rate;
3718         u8 device_rev;
3719
3720         set_link_state(ppd, HLS_VERIFY_CAP);
3721
3722         lcb_shutdown(dd, 0);
3723         adjust_lcb_for_fpga_serdes(dd);
3724
3725         /*
3726          * These are now valid:
3727          *      remote VerifyCap fields in the general LNI config
3728          *      CSR DC8051_STS_REMOTE_GUID
3729          *      CSR DC8051_STS_REMOTE_NODE_TYPE
3730          *      CSR DC8051_STS_REMOTE_FM_SECURITY
3731          *      CSR DC8051_STS_REMOTE_PORT_NO
3732          */
3733
3734         read_vc_remote_phy(dd, &power_management, &continious);
3735         read_vc_remote_fabric(
3736                 dd,
3737                 &vau,
3738                 &z,
3739                 &vcu,
3740                 &vl15buf,
3741                 &partner_supported_crc);
3742         read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3743         read_remote_device_id(dd, &device_id, &device_rev);
3744         /*
3745          * And the 'MgmtAllowed' information, which is exchanged during
3746          * LNI, is also be available at this point.
3747          */
3748         read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3749         /* print the active widths */
3750         get_link_widths(dd, &active_tx, &active_rx);
3751         dd_dev_info(dd,
3752                 "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3753                 (int)power_management, (int)continious);
3754         dd_dev_info(dd,
3755                 "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3756                 (int)vau,
3757                 (int)z,
3758                 (int)vcu,
3759                 (int)vl15buf,
3760                 (int)partner_supported_crc);
3761         dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3762                 (u32)remote_tx_rate, (u32)link_widths);
3763         dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3764                 (u32)device_id, (u32)device_rev);
3765         /*
3766          * The peer vAU value just read is the peer receiver value.  HFI does
3767          * not support a transmit vAU of 0 (AU == 8).  We advertised that
3768          * with Z=1 in the fabric capabilities sent to the peer.  The peer
3769          * will see our Z=1, and, if it advertised a vAU of 0, will move its
3770          * receive to vAU of 1 (AU == 16).  Do the same here.  We do not care
3771          * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3772          * subject to the Z value exception.
3773          */
3774         if (vau == 0)
3775                 vau = 1;
3776         set_up_vl15(dd, vau, vl15buf);
3777
3778         /* set up the LCB CRC mode */
3779         crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3780
3781         /* order is important: use the lowest bit in common */
3782         if (crc_mask & CAP_CRC_14B)
3783                 crc_val = LCB_CRC_14B;
3784         else if (crc_mask & CAP_CRC_48B)
3785                 crc_val = LCB_CRC_48B;
3786         else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3787                 crc_val = LCB_CRC_12B_16B_PER_LANE;
3788         else
3789                 crc_val = LCB_CRC_16B;
3790
3791         dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3792         write_csr(dd, DC_LCB_CFG_CRC_MODE,
3793                   (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3794
3795         /* set (14b only) or clear sideband credit */
3796         reg = read_csr(dd, SEND_CM_CTRL);
3797         if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3798                 write_csr(dd, SEND_CM_CTRL,
3799                         reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3800         } else {
3801                 write_csr(dd, SEND_CM_CTRL,
3802                         reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3803         }
3804
3805         ppd->link_speed_active = 0;     /* invalid value */
3806         if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3807                 /* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3808                 switch (remote_tx_rate) {
3809                 case 0:
3810                         ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3811                         break;
3812                 case 1:
3813                         ppd->link_speed_active = OPA_LINK_SPEED_25G;
3814                         break;
3815                 }
3816         } else {
3817                 /* actual rate is highest bit of the ANDed rates */
3818                 u8 rate = remote_tx_rate & ppd->local_tx_rate;
3819
3820                 if (rate & 2)
3821                         ppd->link_speed_active = OPA_LINK_SPEED_25G;
3822                 else if (rate & 1)
3823                         ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3824         }
3825         if (ppd->link_speed_active == 0) {
3826                 dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3827                         __func__, (int)remote_tx_rate);
3828                 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3829         }
3830
3831         /*
3832          * Cache the values of the supported, enabled, and active
3833          * LTP CRC modes to return in 'portinfo' queries. But the bit
3834          * flags that are returned in the portinfo query differ from
3835          * what's in the link_crc_mask, crc_sizes, and crc_val
3836          * variables. Convert these here.
3837          */
3838         ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3839                 /* supported crc modes */
3840         ppd->port_ltp_crc_mode |=
3841                 cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3842                 /* enabled crc modes */
3843         ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3844                 /* active crc mode */
3845
3846         /* set up the remote credit return table */
3847         assign_remote_cm_au_table(dd, vcu);
3848
3849         /*
3850          * The LCB is reset on entry to handle_verify_cap(), so this must
3851          * be applied on every link up.
3852          *
3853          * Adjust LCB error kill enable to kill the link if
3854          * these RBUF errors are seen:
3855          *      REPLAY_BUF_MBE_SMASK
3856          *      FLIT_INPUT_BUF_MBE_SMASK
3857          */
3858         if (is_ax(dd)) {                        /* fixed in B0 */
3859                 reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3860                 reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3861                         | DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3862                 write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3863         }
3864
3865         /* pull LCB fifos out of reset - all fifo clocks must be stable */
3866         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3867
3868         /* give 8051 access to the LCB CSRs */
3869         write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3870         set_8051_lcb_access(dd);
3871
3872         ppd->neighbor_guid =
3873                 read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3874         ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3875                                         DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3876         ppd->neighbor_type =
3877                 read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3878                 DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3879         ppd->neighbor_fm_security =
3880                 read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3881                 DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3882         dd_dev_info(dd,
3883                 "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3884                 ppd->neighbor_guid, ppd->neighbor_type,
3885                 ppd->mgmt_allowed, ppd->neighbor_fm_security);
3886         if (ppd->mgmt_allowed)
3887                 add_full_mgmt_pkey(ppd);
3888
3889         /* tell the 8051 to go to LinkUp */
3890         set_link_state(ppd, HLS_GOING_UP);
3891 }
3892
3893 /*
3894  * Apply the link width downgrade enabled policy against the current active
3895  * link widths.
3896  *
3897  * Called when the enabled policy changes or the active link widths change.
3898  */
3899 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3900 {
3901         int do_bounce = 0;
3902         int tries;
3903         u16 lwde;
3904         u16 tx, rx;
3905
3906         /* use the hls lock to avoid a race with actual link up */
3907         tries = 0;
3908 retry:
3909         mutex_lock(&ppd->hls_lock);
3910         /* only apply if the link is up */
3911         if (!(ppd->host_link_state & HLS_UP)) {
3912                 /* still going up..wait and retry */
3913                 if (ppd->host_link_state & HLS_GOING_UP) {
3914                         if (++tries < 1000) {
3915                                 mutex_unlock(&ppd->hls_lock);
3916                                 usleep_range(100, 120); /* arbitrary */
3917                                 goto retry;
3918                         }
3919                         dd_dev_err(ppd->dd,
3920                                    "%s: giving up waiting for link state change\n",
3921                                    __func__);
3922                 }
3923                 goto done;
3924         }
3925
3926         lwde = ppd->link_width_downgrade_enabled;
3927
3928         if (refresh_widths) {
3929                 get_link_widths(ppd->dd, &tx, &rx);
3930                 ppd->link_width_downgrade_tx_active = tx;
3931                 ppd->link_width_downgrade_rx_active = rx;
3932         }
3933
3934         if (lwde == 0) {
3935                 /* downgrade is disabled */
3936
3937                 /* bounce if not at starting active width */
3938                 if ((ppd->link_width_active !=
3939                                         ppd->link_width_downgrade_tx_active)
3940                                 || (ppd->link_width_active !=
3941                                         ppd->link_width_downgrade_rx_active)) {
3942                         dd_dev_err(ppd->dd,
3943                                 "Link downgrade is disabled and link has downgraded, downing link\n");
3944                         dd_dev_err(ppd->dd,
3945                                 "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
3946                                 ppd->link_width_active,
3947                                 ppd->link_width_downgrade_tx_active,
3948                                 ppd->link_width_downgrade_rx_active);
3949                         do_bounce = 1;
3950                 }
3951         } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3952                 || (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3953                 /* Tx or Rx is outside the enabled policy */
3954                 dd_dev_err(ppd->dd,
3955                         "Link is outside of downgrade allowed, downing link\n");
3956                 dd_dev_err(ppd->dd,
3957                         "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3958                         lwde,
3959                         ppd->link_width_downgrade_tx_active,
3960                         ppd->link_width_downgrade_rx_active);
3961                 do_bounce = 1;
3962         }
3963
3964 done:
3965         mutex_unlock(&ppd->hls_lock);
3966
3967         if (do_bounce) {
3968                 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3969                   OPA_LINKDOWN_REASON_WIDTH_POLICY);
3970                 set_link_state(ppd, HLS_DN_OFFLINE);
3971                 start_link(ppd);
3972         }
3973 }
3974
3975 /*
3976  * Handle a link downgrade interrupt from the 8051.
3977  *
3978  * This is a work-queue function outside of the interrupt.
3979  */
3980 void handle_link_downgrade(struct work_struct *work)
3981 {
3982         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3983                                                         link_downgrade_work);
3984
3985         dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3986         apply_link_downgrade_policy(ppd, 1);
3987 }
3988
3989 static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3990 {
3991         return flag_string(buf, buf_len, flags, dcc_err_flags,
3992                 ARRAY_SIZE(dcc_err_flags));
3993 }
3994
3995 static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3996 {
3997         return flag_string(buf, buf_len, flags, lcb_err_flags,
3998                 ARRAY_SIZE(lcb_err_flags));
3999 }
4000
4001 static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
4002 {
4003         return flag_string(buf, buf_len, flags, dc8051_err_flags,
4004                 ARRAY_SIZE(dc8051_err_flags));
4005 }
4006
4007 static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
4008 {
4009         return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
4010                 ARRAY_SIZE(dc8051_info_err_flags));
4011 }
4012
4013 static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
4014 {
4015         return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4016                 ARRAY_SIZE(dc8051_info_host_msg_flags));
4017 }
4018
4019 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4020 {
4021         struct hfi1_pportdata *ppd = dd->pport;
4022         u64 info, err, host_msg;
4023         int queue_link_down = 0;
4024         char buf[96];
4025
4026         /* look at the flags */
4027         if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4028                 /* 8051 information set by firmware */
4029                 /* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4030                 info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4031                 err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4032                         & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4033                 host_msg = (info >>
4034                         DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4035                         & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4036
4037                 /*
4038                  * Handle error flags.
4039                  */
4040                 if (err & FAILED_LNI) {
4041                         /*
4042                          * LNI error indications are cleared by the 8051
4043                          * only when starting polling.  Only pay attention
4044                          * to them when in the states that occur during
4045                          * LNI.
4046                          */
4047                         if (ppd->host_link_state
4048                             & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4049                                 queue_link_down = 1;
4050                                 dd_dev_info(dd, "Link error: %s\n",
4051                                         dc8051_info_err_string(buf,
4052                                                 sizeof(buf),
4053                                                 err & FAILED_LNI));
4054                         }
4055                         err &= ~(u64)FAILED_LNI;
4056                 }
4057                 if (err) {
4058                         /* report remaining errors, but do not do anything */
4059                         dd_dev_err(dd, "8051 info error: %s\n",
4060                                 dc8051_info_err_string(buf, sizeof(buf), err));
4061                 }
4062
4063                 /*
4064                  * Handle host message flags.
4065                  */
4066                 if (host_msg & HOST_REQ_DONE) {
4067                         /*
4068                          * Presently, the driver does a busy wait for
4069                          * host requests to complete.  This is only an
4070                          * informational message.
4071                          * NOTE: The 8051 clears the host message
4072                          * information *on the next 8051 command*.
4073                          * Therefore, when linkup is achieved,
4074                          * this flag will still be set.
4075                          */
4076                         host_msg &= ~(u64)HOST_REQ_DONE;
4077                 }
4078                 if (host_msg & BC_SMA_MSG) {
4079                         queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4080                         host_msg &= ~(u64)BC_SMA_MSG;
4081                 }
4082                 if (host_msg & LINKUP_ACHIEVED) {
4083                         dd_dev_info(dd, "8051: Link up\n");
4084                         queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4085                         host_msg &= ~(u64)LINKUP_ACHIEVED;
4086                 }
4087                 if (host_msg & EXT_DEVICE_CFG_REQ) {
4088                         handle_8051_request(dd);
4089                         host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4090                 }
4091                 if (host_msg & VERIFY_CAP_FRAME) {
4092                         queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4093                         host_msg &= ~(u64)VERIFY_CAP_FRAME;
4094                 }
4095                 if (host_msg & LINK_GOING_DOWN) {
4096                         const char *extra = "";
4097                         /* no downgrade action needed if going down */
4098                         if (host_msg & LINK_WIDTH_DOWNGRADED) {
4099                                 host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4100                                 extra = " (ignoring downgrade)";
4101                         }
4102                         dd_dev_info(dd, "8051: Link down%s\n", extra);
4103                         queue_link_down = 1;
4104                         host_msg &= ~(u64)LINK_GOING_DOWN;
4105                 }
4106                 if (host_msg & LINK_WIDTH_DOWNGRADED) {
4107                         queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4108                         host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4109                 }
4110                 if (host_msg) {
4111                         /* report remaining messages, but do not do anything */
4112                         dd_dev_info(dd, "8051 info host message: %s\n",
4113                                 dc8051_info_host_msg_string(buf, sizeof(buf),
4114                                         host_msg));
4115                 }
4116
4117                 reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4118         }
4119         if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4120                 /*
4121                  * Lost the 8051 heartbeat.  If this happens, we
4122                  * receive constant interrupts about it.  Disable
4123                  * the interrupt after the first.
4124                  */
4125                 dd_dev_err(dd, "Lost 8051 heartbeat\n");
4126                 write_csr(dd, DC_DC8051_ERR_EN,
4127                         read_csr(dd, DC_DC8051_ERR_EN)
4128                           & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4129
4130                 reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4131         }
4132         if (reg) {
4133                 /* report the error, but do not do anything */
4134                 dd_dev_err(dd, "8051 error: %s\n",
4135                         dc8051_err_string(buf, sizeof(buf), reg));
4136         }
4137
4138         if (queue_link_down) {
4139                 /* if the link is already going down or disabled, do not
4140                  * queue another */
4141                 if ((ppd->host_link_state
4142                                     & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4143                                 || ppd->link_enabled == 0) {
4144                         dd_dev_info(dd, "%s: not queuing link down\n",
4145                                 __func__);
4146                 } else {
4147                         queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4148                 }
4149         }
4150 }
4151
4152 static const char * const fm_config_txt[] = {
4153 [0] =
4154         "BadHeadDist: Distance violation between two head flits",
4155 [1] =
4156         "BadTailDist: Distance violation between two tail flits",
4157 [2] =
4158         "BadCtrlDist: Distance violation between two credit control flits",
4159 [3] =
4160         "BadCrdAck: Credits return for unsupported VL",
4161 [4] =
4162         "UnsupportedVLMarker: Received VL Marker",
4163 [5] =
4164         "BadPreempt: Exceeded the preemption nesting level",
4165 [6] =
4166         "BadControlFlit: Received unsupported control flit",
4167 /* no 7 */
4168 [8] =
4169         "UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4170 };
4171
4172 static const char * const port_rcv_txt[] = {
4173 [1] =
4174         "BadPktLen: Illegal PktLen",
4175 [2] =
4176         "PktLenTooLong: Packet longer than PktLen",
4177 [3] =
4178         "PktLenTooShort: Packet shorter than PktLen",
4179 [4] =
4180         "BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4181 [5] =
4182         "BadDLID: Illegal DLID (0, doesn't match HFI)",
4183 [6] =
4184         "BadL2: Illegal L2 opcode",
4185 [7] =
4186         "BadSC: Unsupported SC",
4187 [9] =
4188         "BadRC: Illegal RC",
4189 [11] =
4190         "PreemptError: Preempting with same VL",
4191 [12] =
4192         "PreemptVL15: Preempting a VL15 packet",
4193 };
4194
4195 #define OPA_LDR_FMCONFIG_OFFSET 16
4196 #define OPA_LDR_PORTRCV_OFFSET 0
4197 static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4198 {
4199         u64 info, hdr0, hdr1;
4200         const char *extra;
4201         char buf[96];
4202         struct hfi1_pportdata *ppd = dd->pport;
4203         u8 lcl_reason = 0;
4204         int do_bounce = 0;
4205
4206         if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4207                 if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4208                         info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4209                         dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4210                         /* set status bit */
4211                         dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4212                 }
4213                 reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4214         }
4215
4216         if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4217                 struct hfi1_pportdata *ppd = dd->pport;
4218                 /* this counter saturates at (2^32) - 1 */
4219                 if (ppd->link_downed < (u32)UINT_MAX)
4220                         ppd->link_downed++;
4221                 reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4222         }
4223
4224         if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4225                 u8 reason_valid = 1;
4226
4227                 info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4228                 if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4229                         dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4230                         /* set status bit */
4231                         dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4232                 }
4233                 switch (info) {
4234                 case 0:
4235                 case 1:
4236                 case 2:
4237                 case 3:
4238                 case 4:
4239                 case 5:
4240                 case 6:
4241                         extra = fm_config_txt[info];
4242                         break;
4243                 case 8:
4244                         extra = fm_config_txt[info];
4245                         if (ppd->port_error_action &
4246                             OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4247                                 do_bounce = 1;
4248                                 /*
4249                                  * lcl_reason cannot be derived from info
4250                                  * for this error
4251                                  */
4252                                 lcl_reason =
4253                                   OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4254                         }
4255                         break;
4256                 default:
4257                         reason_valid = 0;
4258                         snprintf(buf, sizeof(buf), "reserved%lld", info);
4259                         extra = buf;
4260                         break;
4261                 }
4262
4263                 if (reason_valid && !do_bounce) {
4264                         do_bounce = ppd->port_error_action &
4265                                         (1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4266                         lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4267                 }
4268
4269                 /* just report this */
4270                 dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4271                 reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4272         }
4273
4274         if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4275                 u8 reason_valid = 1;
4276
4277                 info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4278                 hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4279                 hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4280                 if (!(dd->err_info_rcvport.status_and_code &
4281                       OPA_EI_STATUS_SMASK)) {
4282                         dd->err_info_rcvport.status_and_code =
4283                                 info & OPA_EI_CODE_SMASK;
4284                         /* set status bit */
4285                         dd->err_info_rcvport.status_and_code |=
4286                                 OPA_EI_STATUS_SMASK;
4287                         /* save first 2 flits in the packet that caused
4288                          * the error */
4289                          dd->err_info_rcvport.packet_flit1 = hdr0;
4290                          dd->err_info_rcvport.packet_flit2 = hdr1;
4291                 }
4292                 switch (info) {
4293                 case 1:
4294                 case 2:
4295                 case 3:
4296                 case 4:
4297                 case 5:
4298                 case 6:
4299                 case 7:
4300                 case 9:
4301                 case 11:
4302                 case 12:
4303                         extra = port_rcv_txt[info];
4304                         break;
4305                 default:
4306                         reason_valid = 0;
4307                         snprintf(buf, sizeof(buf), "reserved%lld", info);
4308                         extra = buf;
4309                         break;
4310                 }
4311
4312                 if (reason_valid && !do_bounce) {
4313                         do_bounce = ppd->port_error_action &
4314                                         (1 << (OPA_LDR_PORTRCV_OFFSET + info));
4315                         lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4316                 }
4317
4318                 /* just report this */
4319                 dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4320                 dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
4321                         hdr0, hdr1);
4322
4323                 reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4324         }
4325
4326         if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4327                 /* informative only */
4328                 dd_dev_info(dd, "8051 access to LCB blocked\n");
4329                 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4330         }
4331         if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4332                 /* informative only */
4333                 dd_dev_info(dd, "host access to LCB blocked\n");
4334                 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4335         }
4336
4337         /* report any remaining errors */
4338         if (reg)
4339                 dd_dev_info(dd, "DCC Error: %s\n",
4340                         dcc_err_string(buf, sizeof(buf), reg));
4341
4342         if (lcl_reason == 0)
4343                 lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4344
4345         if (do_bounce) {
4346                 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4347                 set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4348                 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4349         }
4350 }
4351
4352 static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4353 {
4354         char buf[96];
4355
4356         dd_dev_info(dd, "LCB Error: %s\n",
4357                 lcb_err_string(buf, sizeof(buf), reg));
4358 }
4359
4360 /*
4361  * CCE block DC interrupt.  Source is < 8.
4362  */
4363 static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4364 {
4365         const struct err_reg_info *eri = &dc_errs[source];
4366
4367         if (eri->handler) {
4368                 interrupt_clear_down(dd, 0, eri);
4369         } else if (source == 3 /* dc_lbm_int */) {
4370                 /*
4371                  * This indicates that a parity error has occurred on the
4372                  * address/control lines presented to the LBM.  The error
4373                  * is a single pulse, there is no associated error flag,
4374                  * and it is non-maskable.  This is because if a parity
4375                  * error occurs on the request the request is dropped.
4376                  * This should never occur, but it is nice to know if it
4377                  * ever does.
4378                  */
4379                 dd_dev_err(dd, "Parity error in DC LBM block\n");
4380         } else {
4381                 dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4382         }
4383 }
4384
4385 /*
4386  * TX block send credit interrupt.  Source is < 160.
4387  */
4388 static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4389 {
4390         sc_group_release_update(dd, source);
4391 }
4392
4393 /*
4394  * TX block SDMA interrupt.  Source is < 48.
4395  *
4396  * SDMA interrupts are grouped by type:
4397  *
4398  *       0 -  N-1 = SDma
4399  *       N - 2N-1 = SDmaProgress
4400  *      2N - 3N-1 = SDmaIdle
4401  */
4402 static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4403 {
4404         /* what interrupt */
4405         unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
4406         /* which engine */
4407         unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4408
4409 #ifdef CONFIG_SDMA_VERBOSITY
4410         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4411                    slashstrip(__FILE__), __LINE__, __func__);
4412         sdma_dumpstate(&dd->per_sdma[which]);
4413 #endif
4414
4415         if (likely(what < 3 && which < dd->num_sdma)) {
4416                 sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4417         } else {
4418                 /* should not happen */
4419                 dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4420         }
4421 }
4422
4423 /*
4424  * RX block receive available interrupt.  Source is < 160.
4425  */
4426 static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4427 {
4428         struct hfi1_ctxtdata *rcd;
4429         char *err_detail;
4430
4431         if (likely(source < dd->num_rcv_contexts)) {
4432                 rcd = dd->rcd[source];
4433                 if (rcd) {
4434                         if (source < dd->first_user_ctxt)
4435                                 rcd->do_interrupt(rcd, 0);
4436                         else
4437                                 handle_user_interrupt(rcd);
4438                         return; /* OK */
4439                 }
4440                 /* received an interrupt, but no rcd */
4441                 err_detail = "dataless";
4442         } else {
4443                 /* received an interrupt, but are not using that context */
4444                 err_detail = "out of range";
4445         }
4446         dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4447                 err_detail, source);
4448 }
4449
4450 /*
4451  * RX block receive urgent interrupt.  Source is < 160.
4452  */
4453 static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4454 {
4455         struct hfi1_ctxtdata *rcd;
4456         char *err_detail;
4457
4458         if (likely(source < dd->num_rcv_contexts)) {
4459                 rcd = dd->rcd[source];
4460                 if (rcd) {
4461                         /* only pay attention to user urgent interrupts */
4462                         if (source >= dd->first_user_ctxt)
4463                                 handle_user_interrupt(rcd);
4464                         return; /* OK */
4465                 }
4466                 /* received an interrupt, but no rcd */
4467                 err_detail = "dataless";
4468         } else {
4469                 /* received an interrupt, but are not using that context */
4470                 err_detail = "out of range";
4471         }
4472         dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4473                 err_detail, source);
4474 }
4475
4476 /*
4477  * Reserved range interrupt.  Should not be called in normal operation.
4478  */
4479 static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4480 {
4481         char name[64];
4482
4483         dd_dev_err(dd, "unexpected %s interrupt\n",
4484                                 is_reserved_name(name, sizeof(name), source));
4485 }
4486
4487 static const struct is_table is_table[] = {
4488 /* start                     end
4489                                 name func               interrupt func */
4490 { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
4491                                 is_misc_err_name,       is_misc_err_int },
4492 { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
4493                                 is_sdma_eng_err_name,   is_sdma_eng_err_int },
4494 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4495                                 is_sendctxt_err_name,   is_sendctxt_err_int },
4496 { IS_SDMA_START,             IS_SDMA_END,
4497                                 is_sdma_eng_name,       is_sdma_eng_int },
4498 { IS_VARIOUS_START,          IS_VARIOUS_END,
4499                                 is_various_name,        is_various_int },
4500 { IS_DC_START,       IS_DC_END,
4501                                 is_dc_name,             is_dc_int },
4502 { IS_RCVAVAIL_START,     IS_RCVAVAIL_END,
4503                                 is_rcv_avail_name,      is_rcv_avail_int },
4504 { IS_RCVURGENT_START,    IS_RCVURGENT_END,
4505                                 is_rcv_urgent_name,     is_rcv_urgent_int },
4506 { IS_SENDCREDIT_START,   IS_SENDCREDIT_END,
4507                                 is_send_credit_name,    is_send_credit_int},
4508 { IS_RESERVED_START,     IS_RESERVED_END,
4509                                 is_reserved_name,       is_reserved_int},
4510 };
4511
4512 /*
4513  * Interrupt source interrupt - called when the given source has an interrupt.
4514  * Source is a bit index into an array of 64-bit integers.
4515  */
4516 static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4517 {
4518         const struct is_table *entry;
4519
4520         /* avoids a double compare by walking the table in-order */
4521         for (entry = &is_table[0]; entry->is_name; entry++) {
4522                 if (source < entry->end) {
4523                         trace_hfi1_interrupt(dd, entry, source);
4524                         entry->is_int(dd, source - entry->start);
4525                         return;
4526                 }
4527         }
4528         /* fell off the end */
4529         dd_dev_err(dd, "invalid interrupt source %u\n", source);
4530 }
4531
4532 /*
4533  * General interrupt handler.  This is able to correctly handle
4534  * all interrupts in case INTx is used.
4535  */
4536 static irqreturn_t general_interrupt(int irq, void *data)
4537 {
4538         struct hfi1_devdata *dd = data;
4539         u64 regs[CCE_NUM_INT_CSRS];
4540         u32 bit;
4541         int i;
4542
4543         this_cpu_inc(*dd->int_counter);
4544
4545         /* phase 1: scan and clear all handled interrupts */
4546         for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4547                 if (dd->gi_mask[i] == 0) {
4548                         regs[i] = 0;    /* used later */
4549                         continue;
4550                 }
4551                 regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4552                                 dd->gi_mask[i];
4553                 /* only clear if anything is set */
4554                 if (regs[i])
4555                         write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4556         }
4557
4558         /* phase 2: call the appropriate handler */
4559         for_each_set_bit(bit, (unsigned long *)&regs[0],
4560                                                 CCE_NUM_INT_CSRS*64) {
4561                 is_interrupt(dd, bit);
4562         }
4563
4564         return IRQ_HANDLED;
4565 }
4566
4567 static irqreturn_t sdma_interrupt(int irq, void *data)
4568 {
4569         struct sdma_engine *sde = data;
4570         struct hfi1_devdata *dd = sde->dd;
4571         u64 status;
4572
4573 #ifdef CONFIG_SDMA_VERBOSITY
4574         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4575                    slashstrip(__FILE__), __LINE__, __func__);
4576         sdma_dumpstate(sde);
4577 #endif
4578
4579         this_cpu_inc(*dd->int_counter);
4580
4581         /* This read_csr is really bad in the hot path */
4582         status = read_csr(dd,
4583                         CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4584                         & sde->imask;
4585         if (likely(status)) {
4586                 /* clear the interrupt(s) */
4587                 write_csr(dd,
4588                         CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4589                         status);
4590
4591                 /* handle the interrupt(s) */
4592                 sdma_engine_interrupt(sde, status);
4593         } else
4594                 dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4595                         sde->this_idx);
4596
4597         return IRQ_HANDLED;
4598 }
4599
4600 /*
4601  * Clear the receive interrupt, forcing the write and making sure
4602  * we have data from the chip, pushing everything in front of it
4603  * back to the host.
4604  */
4605 static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
4606 {
4607         struct hfi1_devdata *dd = rcd->dd;
4608         u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
4609
4610         mmiowb();       /* make sure everything before is written */
4611         write_csr(dd, addr, rcd->imask);
4612         /* force the above write on the chip and get a value back */
4613         (void)read_csr(dd, addr);
4614 }
4615
4616 /* force the receive interrupt */
4617 static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
4618 {
4619         write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
4620 }
4621
4622 /* return non-zero if a packet is present */
4623 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
4624 {
4625         if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
4626                 return (rcd->seq_cnt ==
4627                                 rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
4628
4629         /* else is RDMA rtail */
4630         return (rcd->head != get_rcvhdrtail(rcd));
4631 }
4632
4633 /*
4634  * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
4635  * This routine will try to handle packets immediately (latency), but if
4636  * it finds too many, it will invoke the thread handler (bandwitdh).  The
4637  * chip receive interupt is *not* cleared down until this or the thread (if
4638  * invoked) is finished.  The intent is to avoid extra interrupts while we
4639  * are processing packets anyway.
4640  */
4641 static irqreturn_t receive_context_interrupt(int irq, void *data)
4642 {
4643         struct hfi1_ctxtdata *rcd = data;
4644         struct hfi1_devdata *dd = rcd->dd;
4645         int disposition;
4646         int present;
4647
4648         trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4649         this_cpu_inc(*dd->int_counter);
4650
4651         /* receive interrupt remains blocked while processing packets */
4652         disposition = rcd->do_interrupt(rcd, 0);
4653
4654         /*
4655          * Too many packets were seen while processing packets in this
4656          * IRQ handler.  Invoke the handler thread.  The receive interrupt
4657          * remains blocked.
4658          */
4659         if (disposition == RCV_PKT_LIMIT)
4660                 return IRQ_WAKE_THREAD;
4661
4662         /*
4663          * The packet processor detected no more packets.  Clear the receive
4664          * interrupt and recheck for a packet packet that may have arrived
4665          * after the previous check and interrupt clear.  If a packet arrived,
4666          * force another interrupt.
4667          */
4668         clear_recv_intr(rcd);
4669         present = check_packet_present(rcd);
4670         if (present)
4671                 force_recv_intr(rcd);
4672
4673         return IRQ_HANDLED;
4674 }
4675
4676 /*
4677  * Receive packet thread handler.  This expects to be invoked with the
4678  * receive interrupt still blocked.
4679  */
4680 static irqreturn_t receive_context_thread(int irq, void *data)
4681 {
4682         struct hfi1_ctxtdata *rcd = data;
4683         int present;
4684
4685         /* receive interrupt is still blocked from the IRQ handler */
4686         (void)rcd->do_interrupt(rcd, 1);
4687
4688         /*
4689          * The packet processor will only return if it detected no more
4690          * packets.  Hold IRQs here so we can safely clear the interrupt and
4691          * recheck for a packet that may have arrived after the previous
4692          * check and the interrupt clear.  If a packet arrived, force another
4693          * interrupt.
4694          */
4695         local_irq_disable();
4696         clear_recv_intr(rcd);
4697         present = check_packet_present(rcd);
4698         if (present)
4699                 force_recv_intr(rcd);
4700         local_irq_enable();
4701
4702         return IRQ_HANDLED;
4703 }
4704
4705 /* ========================================================================= */
4706
4707 u32 read_physical_state(struct hfi1_devdata *dd)
4708 {
4709         u64 reg;
4710
4711         reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4712         return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4713                                 & DC_DC8051_STS_CUR_STATE_PORT_MASK;
4714 }
4715
4716 static u32 read_logical_state(struct hfi1_devdata *dd)
4717 {
4718         u64 reg;
4719
4720         reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4721         return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4722                                 & DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4723 }
4724
4725 static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4726 {
4727         u64 reg;
4728
4729         reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4730         /* clear current state, set new state */
4731         reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4732         reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4733         write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4734 }
4735
4736 /*
4737  * Use the 8051 to read a LCB CSR.
4738  */
4739 static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4740 {
4741         u32 regno;
4742         int ret;
4743
4744         if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4745                 if (acquire_lcb_access(dd, 0) == 0) {
4746                         *data = read_csr(dd, addr);
4747                         release_lcb_access(dd, 0);
4748                         return 0;
4749                 }
4750                 return -EBUSY;
4751         }
4752
4753         /* register is an index of LCB registers: (offset - base) / 8 */
4754         regno = (addr - DC_LCB_CFG_RUN) >> 3;
4755         ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4756         if (ret != HCMD_SUCCESS)
4757                 return -EBUSY;
4758         return 0;
4759 }
4760
4761 /*
4762  * Read an LCB CSR.  Access may not be in host control, so check.
4763  * Return 0 on success, -EBUSY on failure.
4764  */
4765 int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4766 {
4767         struct hfi1_pportdata *ppd = dd->pport;
4768
4769         /* if up, go through the 8051 for the value */
4770         if (ppd->host_link_state & HLS_UP)
4771                 return read_lcb_via_8051(dd, addr, data);
4772         /* if going up or down, no access */
4773         if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4774                 return -EBUSY;
4775         /* otherwise, host has access */
4776         *data = read_csr(dd, addr);
4777         return 0;
4778 }
4779
4780 /*
4781  * Use the 8051 to write a LCB CSR.
4782  */
4783 static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4784 {
4785         u32 regno;
4786         int ret;
4787
4788         if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
4789             (dd->dc8051_ver < dc8051_ver(0, 20))) {
4790                 if (acquire_lcb_access(dd, 0) == 0) {
4791                         write_csr(dd, addr, data);
4792                         release_lcb_access(dd, 0);
4793                         return 0;
4794                 }
4795                 return -EBUSY;
4796         }
4797
4798         /* register is an index of LCB registers: (offset - base) / 8 */
4799         regno = (addr - DC_LCB_CFG_RUN) >> 3;
4800         ret = do_8051_command(dd, HCMD_WRITE_LCB_CSR, regno, &data);
4801         if (ret != HCMD_SUCCESS)
4802                 return -EBUSY;
4803         return 0;
4804 }
4805
4806 /*
4807  * Write an LCB CSR.  Access may not be in host control, so check.
4808  * Return 0 on success, -EBUSY on failure.
4809  */
4810 int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4811 {
4812         struct hfi1_pportdata *ppd = dd->pport;
4813
4814         /* if up, go through the 8051 for the value */
4815         if (ppd->host_link_state & HLS_UP)
4816                 return write_lcb_via_8051(dd, addr, data);
4817         /* if going up or down, no access */
4818         if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4819                 return -EBUSY;
4820         /* otherwise, host has access */
4821         write_csr(dd, addr, data);
4822         return 0;
4823 }
4824
4825 /*
4826  * Returns:
4827  *      < 0 = Linux error, not able to get access
4828  *      > 0 = 8051 command RETURN_CODE
4829  */
4830 static int do_8051_command(
4831         struct hfi1_devdata *dd,
4832         u32 type,
4833         u64 in_data,
4834         u64 *out_data)
4835 {
4836         u64 reg, completed;
4837         int return_code;
4838         unsigned long flags;
4839         unsigned long timeout;
4840
4841         hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4842
4843         /*
4844          * Alternative to holding the lock for a long time:
4845          * - keep busy wait - have other users bounce off
4846          */
4847         spin_lock_irqsave(&dd->dc8051_lock, flags);
4848
4849         /* We can't send any commands to the 8051 if it's in reset */
4850         if (dd->dc_shutdown) {
4851                 return_code = -ENODEV;
4852                 goto fail;
4853         }
4854
4855         /*
4856          * If an 8051 host command timed out previously, then the 8051 is
4857          * stuck.
4858          *
4859          * On first timeout, attempt to reset and restart the entire DC
4860          * block (including 8051). (Is this too big of a hammer?)
4861          *
4862          * If the 8051 times out a second time, the reset did not bring it
4863          * back to healthy life. In that case, fail any subsequent commands.
4864          */
4865         if (dd->dc8051_timed_out) {
4866                 if (dd->dc8051_timed_out > 1) {
4867                         dd_dev_err(dd,
4868                                    "Previous 8051 host command timed out, skipping command %u\n",
4869                                    type);
4870                         return_code = -ENXIO;
4871                         goto fail;
4872                 }
4873                 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4874                 dc_shutdown(dd);
4875                 dc_start(dd);
4876                 spin_lock_irqsave(&dd->dc8051_lock, flags);
4877         }
4878
4879         /*
4880          * If there is no timeout, then the 8051 command interface is
4881          * waiting for a command.
4882          */
4883
4884         /*
4885          * When writing a LCB CSR, out_data contains the full value to
4886          * to be written, while in_data contains the relative LCB
4887          * address in 7:0.  Do the work here, rather than the caller,
4888          * of distrubting the write data to where it needs to go:
4889          *
4890          * Write data
4891          *   39:00 -> in_data[47:8]
4892          *   47:40 -> DC8051_CFG_EXT_DEV_0.RETURN_CODE
4893          *   63:48 -> DC8051_CFG_EXT_DEV_0.RSP_DATA
4894          */
4895         if (type == HCMD_WRITE_LCB_CSR) {
4896                 in_data |= ((*out_data) & 0xffffffffffull) << 8;
4897                 reg = ((((*out_data) >> 40) & 0xff) <<
4898                                 DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT)
4899                       | ((((*out_data) >> 48) & 0xffff) <<
4900                                 DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
4901                 write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, reg);
4902         }
4903
4904         /*
4905          * Do two writes: the first to stabilize the type and req_data, the
4906          * second to activate.
4907          */
4908         reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4909                         << DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4910                 | (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4911                         << DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4912         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4913         reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4914         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4915
4916         /* wait for completion, alternate: interrupt */
4917         timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4918         while (1) {
4919                 reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4920                 completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4921                 if (completed)
4922                         break;
4923                 if (time_after(jiffies, timeout)) {
4924                         dd->dc8051_timed_out++;
4925                         dd_dev_err(dd, "8051 host command %u timeout\n", type);
4926                         if (out_data)
4927                                 *out_data = 0;
4928                         return_code = -ETIMEDOUT;
4929                         goto fail;
4930                 }
4931                 udelay(2);
4932         }
4933
4934         if (out_data) {
4935                 *out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4936                                 & DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4937                 if (type == HCMD_READ_LCB_CSR) {
4938                         /* top 16 bits are in a different register */
4939                         *out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4940                                 & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4941                                 << (48
4942                                     - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4943                 }
4944         }
4945         return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4946                                 & DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4947         dd->dc8051_timed_out = 0;
4948         /*
4949          * Clear command for next user.
4950          */
4951         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4952
4953 fail:
4954         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4955
4956         return return_code;
4957 }
4958
4959 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4960 {
4961         return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4962 }
4963
4964 static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4965                             u8 lane_id, u32 config_data)
4966 {
4967         u64 data;
4968         int ret;
4969
4970         data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4971                 | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4972                 | (u64)config_data << LOAD_DATA_DATA_SHIFT;
4973         ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4974         if (ret != HCMD_SUCCESS) {
4975                 dd_dev_err(dd,
4976                         "load 8051 config: field id %d, lane %d, err %d\n",
4977                         (int)field_id, (int)lane_id, ret);
4978         }
4979         return ret;
4980 }
4981
4982 /*
4983  * Read the 8051 firmware "registers".  Use the RAM directly.  Always
4984  * set the result, even on error.
4985  * Return 0 on success, -errno on failure
4986  */
4987 static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4988                             u32 *result)
4989 {
4990         u64 big_data;
4991         u32 addr;
4992         int ret;
4993
4994         /* address start depends on the lane_id */
4995         if (lane_id < 4)
4996                 addr = (4 * NUM_GENERAL_FIELDS)
4997                         + (lane_id * 4 * NUM_LANE_FIELDS);
4998         else
4999                 addr = 0;
5000         addr += field_id * 4;
5001
5002         /* read is in 8-byte chunks, hardware will truncate the address down */
5003         ret = read_8051_data(dd, addr, 8, &big_data);
5004
5005         if (ret == 0) {
5006                 /* extract the 4 bytes we want */
5007                 if (addr & 0x4)
5008                         *result = (u32)(big_data >> 32);
5009                 else
5010                         *result = (u32)big_data;
5011         } else {
5012                 *result = 0;
5013                 dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
5014                         __func__, lane_id, field_id);
5015         }
5016
5017         return ret;
5018 }
5019
5020 static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
5021                               u8 continuous)
5022 {
5023         u32 frame;
5024
5025         frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
5026                 | power_management << POWER_MANAGEMENT_SHIFT;
5027         return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
5028                                 GENERAL_CONFIG, frame);
5029 }
5030
5031 static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
5032                                  u16 vl15buf, u8 crc_sizes)
5033 {
5034         u32 frame;
5035
5036         frame = (u32)vau << VAU_SHIFT
5037                 | (u32)z << Z_SHIFT
5038                 | (u32)vcu << VCU_SHIFT
5039                 | (u32)vl15buf << VL15BUF_SHIFT
5040                 | (u32)crc_sizes << CRC_SIZES_SHIFT;
5041         return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
5042                                 GENERAL_CONFIG, frame);
5043 }
5044
5045 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
5046                                      u8 *flag_bits, u16 *link_widths)
5047 {
5048         u32 frame;
5049
5050         read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5051                                 &frame);
5052         *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
5053         *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
5054         *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5055 }
5056
5057 static int write_vc_local_link_width(struct hfi1_devdata *dd,
5058                                      u8 misc_bits,
5059                                      u8 flag_bits,
5060                                      u16 link_widths)
5061 {
5062         u32 frame;
5063
5064         frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
5065                 | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
5066                 | (u32)link_widths << LINK_WIDTH_SHIFT;
5067         return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5068                      frame);
5069 }
5070
5071 static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
5072                                  u8 device_rev)
5073 {
5074         u32 frame;
5075
5076         frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
5077                 | ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
5078         return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
5079 }
5080
5081 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
5082                                   u8 *device_rev)
5083 {
5084         u32 frame;
5085
5086         read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
5087         *device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
5088         *device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
5089                         & REMOTE_DEVICE_REV_MASK;
5090 }
5091
5092 void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
5093 {
5094         u32 frame;
5095
5096         read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
5097         *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
5098         *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
5099 }
5100
5101 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
5102                                u8 *continuous)
5103 {
5104         u32 frame;
5105
5106         read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
5107         *power_management = (frame >> POWER_MANAGEMENT_SHIFT)
5108                                         & POWER_MANAGEMENT_MASK;
5109         *continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
5110                                         & CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
5111 }
5112
5113 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
5114                                   u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
5115 {
5116         u32 frame;
5117
5118         read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
5119         *vau = (frame >> VAU_SHIFT) & VAU_MASK;
5120         *z = (frame >> Z_SHIFT) & Z_MASK;
5121         *vcu = (frame >> VCU_SHIFT) & VCU_MASK;
5122         *vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
5123         *crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
5124 }
5125
5126 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
5127                                       u8 *remote_tx_rate,
5128                                       u16 *link_widths)
5129 {
5130         u32 frame;
5131
5132         read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5133                                 &frame);
5134         *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5135                                 & REMOTE_TX_RATE_MASK;
5136         *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5137 }
5138
5139 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5140 {
5141         u32 frame;
5142
5143         read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5144         *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5145 }
5146
5147 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5148 {
5149         u32 frame;
5150
5151         read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5152         *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5153 }
5154
5155 static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5156 {
5157         read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5158 }
5159
5160 static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5161 {
5162         read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5163 }
5164
5165 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5166 {
5167         u32 frame;
5168         int ret;
5169
5170         *link_quality = 0;
5171         if (dd->pport->host_link_state & HLS_UP) {
5172                 ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5173                                         &frame);
5174                 if (ret == 0)
5175                         *link_quality = (frame >> LINK_QUALITY_SHIFT)
5176                                                 & LINK_QUALITY_MASK;
5177         }
5178 }
5179
5180 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5181 {
5182         u32 frame;
5183
5184         read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5185         *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5186 }
5187
5188 static int read_tx_settings(struct hfi1_devdata *dd,
5189                             u8 *enable_lane_tx,
5190                             u8 *tx_polarity_inversion,
5191                             u8 *rx_polarity_inversion,
5192                             u8 *max_rate)
5193 {
5194         u32 frame;
5195         int ret;
5196
5197         ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5198         *enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5199                                 & ENABLE_LANE_TX_MASK;
5200         *tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5201                                 & TX_POLARITY_INVERSION_MASK;
5202         *rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5203                                 & RX_POLARITY_INVERSION_MASK;
5204         *max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5205         return ret;
5206 }
5207
5208 static int write_tx_settings(struct hfi1_devdata *dd,
5209                              u8 enable_lane_tx,
5210                              u8 tx_polarity_inversion,
5211                              u8 rx_polarity_inversion,
5212                              u8 max_rate)
5213 {
5214         u32 frame;
5215
5216         /* no need to mask, all variable sizes match field widths */
5217         frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5218                 | tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5219                 | rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5220                 | max_rate << MAX_RATE_SHIFT;
5221         return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5222 }
5223
5224 static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5225 {
5226         u32 frame, version, prod_id;
5227         int ret, lane;
5228
5229         /* 4 lanes */
5230         for (lane = 0; lane < 4; lane++) {
5231                 ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5232                 if (ret) {
5233                         dd_dev_err(
5234                                 dd,
5235                                 "Unable to read lane %d firmware details\n",
5236                                 lane);
5237                         continue;
5238                 }
5239                 version = (frame >> SPICO_ROM_VERSION_SHIFT)
5240                                         & SPICO_ROM_VERSION_MASK;
5241                 prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5242                                         & SPICO_ROM_PROD_ID_MASK;
5243                 dd_dev_info(dd,
5244                         "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5245                         lane, version, prod_id);
5246         }
5247 }
5248
5249 /*
5250  * Read an idle LCB message.
5251  *
5252  * Returns 0 on success, -EINVAL on error
5253  */
5254 static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5255 {
5256         int ret;
5257
5258         ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5259                 type, data_out);
5260         if (ret != HCMD_SUCCESS) {
5261                 dd_dev_err(dd, "read idle message: type %d, err %d\n",
5262                         (u32)type, ret);
5263                 return -EINVAL;
5264         }
5265         dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5266         /* return only the payload as we already know the type */
5267         *data_out >>= IDLE_PAYLOAD_SHIFT;
5268         return 0;
5269 }
5270
5271 /*
5272  * Read an idle SMA message.  To be done in response to a notification from
5273  * the 8051.
5274  *
5275  * Returns 0 on success, -EINVAL on error
5276  */
5277 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5278 {
5279         return read_idle_message(dd,
5280                         (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5281 }
5282
5283 /*
5284  * Send an idle LCB message.
5285  *
5286  * Returns 0 on success, -EINVAL on error
5287  */
5288 static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5289 {
5290         int ret;
5291
5292         dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5293         ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5294         if (ret != HCMD_SUCCESS) {
5295                 dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5296                         data, ret);
5297                 return -EINVAL;
5298         }
5299         return 0;
5300 }
5301
5302 /*
5303  * Send an idle SMA message.
5304  *
5305  * Returns 0 on success, -EINVAL on error
5306  */
5307 int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5308 {
5309         u64 data;
5310
5311         data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5312                 | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5313         return send_idle_message(dd, data);
5314 }
5315
5316 /*
5317  * Initialize the LCB then do a quick link up.  This may or may not be
5318  * in loopback.
5319  *
5320  * return 0 on success, -errno on error
5321  */
5322 static int do_quick_linkup(struct hfi1_devdata *dd)
5323 {
5324         u64 reg;
5325         unsigned long timeout;
5326         int ret;
5327
5328         lcb_shutdown(dd, 0);
5329
5330         if (loopback) {
5331                 /* LCB_CFG_LOOPBACK.VAL = 2 */
5332                 /* LCB_CFG_LANE_WIDTH.VAL = 0 */
5333                 write_csr(dd, DC_LCB_CFG_LOOPBACK,
5334                         IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5335                 write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5336         }
5337
5338         /* start the LCBs */
5339         /* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5340         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5341
5342         /* simulator only loopback steps */
5343         if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5344                 /* LCB_CFG_RUN.EN = 1 */
5345                 write_csr(dd, DC_LCB_CFG_RUN,
5346                         1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5347
5348                 /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5349                 timeout = jiffies + msecs_to_jiffies(10);
5350                 while (1) {
5351                         reg = read_csr(dd,
5352                                 DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5353                         if (reg)
5354                                 break;
5355                         if (time_after(jiffies, timeout)) {
5356                                 dd_dev_err(dd,
5357                                         "timeout waiting for LINK_TRANSFER_ACTIVE\n");
5358                                 return -ETIMEDOUT;
5359                         }
5360                         udelay(2);
5361                 }
5362
5363                 write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5364                         1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5365         }
5366
5367         if (!loopback) {
5368                 /*
5369                  * When doing quick linkup and not in loopback, both
5370                  * sides must be done with LCB set-up before either
5371                  * starts the quick linkup.  Put a delay here so that
5372                  * both sides can be started and have a chance to be
5373                  * done with LCB set up before resuming.
5374                  */
5375                 dd_dev_err(dd,
5376                         "Pausing for peer to be finished with LCB set up\n");
5377                 msleep(5000);
5378                 dd_dev_err(dd,
5379                         "Continuing with quick linkup\n");
5380         }
5381
5382         write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5383         set_8051_lcb_access(dd);
5384
5385         /*
5386          * State "quick" LinkUp request sets the physical link state to
5387          * LinkUp without a verify capability sequence.
5388          * This state is in simulator v37 and later.
5389          */
5390         ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5391         if (ret != HCMD_SUCCESS) {
5392                 dd_dev_err(dd,
5393                         "%s: set physical link state to quick LinkUp failed with return %d\n",
5394                         __func__, ret);
5395
5396                 set_host_lcb_access(dd);
5397                 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5398
5399                 if (ret >= 0)
5400                         ret = -EINVAL;
5401                 return ret;
5402         }
5403
5404         return 0; /* success */
5405 }
5406
5407 /*
5408  * Set the SerDes to internal loopback mode.
5409  * Returns 0 on success, -errno on error.
5410  */
5411 static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5412 {
5413         int ret;
5414
5415         ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5416         if (ret == HCMD_SUCCESS)
5417                 return 0;
5418         dd_dev_err(dd,
5419                 "Set physical link state to SerDes Loopback failed with return %d\n",
5420                 ret);
5421         if (ret >= 0)
5422                 ret = -EINVAL;
5423         return ret;
5424 }
5425
5426 /*
5427  * Do all special steps to set up loopback.
5428  */
5429 static int init_loopback(struct hfi1_devdata *dd)
5430 {
5431         dd_dev_info(dd, "Entering loopback mode\n");
5432
5433         /* all loopbacks should disable self GUID check */
5434         write_csr(dd, DC_DC8051_CFG_MODE,
5435                 (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5436
5437         /*
5438          * The simulator has only one loopback option - LCB.  Switch
5439          * to that option, which includes quick link up.
5440          *
5441          * Accept all valid loopback values.
5442          */
5443         if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5444                 && (loopback == LOOPBACK_SERDES
5445                         || loopback == LOOPBACK_LCB
5446                         || loopback == LOOPBACK_CABLE)) {
5447                 loopback = LOOPBACK_LCB;
5448                 quick_linkup = 1;
5449                 return 0;
5450         }
5451
5452         /* handle serdes loopback */
5453         if (loopback == LOOPBACK_SERDES) {
5454                 /* internal serdes loopack needs quick linkup on RTL */
5455                 if (dd->icode == ICODE_RTL_SILICON)
5456                         quick_linkup = 1;
5457                 return set_serdes_loopback_mode(dd);
5458         }
5459
5460         /* LCB loopback - handled at poll time */
5461         if (loopback == LOOPBACK_LCB) {
5462                 quick_linkup = 1; /* LCB is always quick linkup */
5463
5464                 /* not supported in emulation due to emulation RTL changes */
5465                 if (dd->icode == ICODE_FPGA_EMULATION) {
5466                         dd_dev_err(dd,
5467                                 "LCB loopback not supported in emulation\n");
5468                         return -EINVAL;
5469                 }
5470                 return 0;
5471         }
5472
5473         /* external cable loopback requires no extra steps */
5474         if (loopback == LOOPBACK_CABLE)
5475                 return 0;
5476
5477         dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5478         return -EINVAL;
5479 }
5480
5481 /*
5482  * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5483  * used in the Verify Capability link width attribute.
5484  */
5485 static u16 opa_to_vc_link_widths(u16 opa_widths)
5486 {
5487         int i;
5488         u16 result = 0;
5489
5490         static const struct link_bits {
5491                 u16 from;
5492                 u16 to;
5493         } opa_link_xlate[] = {
5494                 { OPA_LINK_WIDTH_1X, 1 << (1-1)  },
5495                 { OPA_LINK_WIDTH_2X, 1 << (2-1)  },
5496                 { OPA_LINK_WIDTH_3X, 1 << (3-1)  },
5497                 { OPA_LINK_WIDTH_4X, 1 << (4-1)  },
5498         };
5499
5500         for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5501                 if (opa_widths & opa_link_xlate[i].from)
5502                         result |= opa_link_xlate[i].to;
5503         }
5504         return result;
5505 }
5506
5507 /*
5508  * Set link attributes before moving to polling.
5509  */
5510 static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5511 {
5512         struct hfi1_devdata *dd = ppd->dd;
5513         u8 enable_lane_tx;
5514         u8 tx_polarity_inversion;
5515         u8 rx_polarity_inversion;
5516         int ret;
5517
5518         /* reset our fabric serdes to clear any lingering problems */
5519         fabric_serdes_reset(dd);
5520
5521         /* set the local tx rate - need to read-modify-write */
5522         ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5523                 &rx_polarity_inversion, &ppd->local_tx_rate);
5524         if (ret)
5525                 goto set_local_link_attributes_fail;
5526
5527         if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5528                 /* set the tx rate to the fastest enabled */
5529                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5530                         ppd->local_tx_rate = 1;
5531                 else
5532                         ppd->local_tx_rate = 0;
5533         } else {
5534                 /* set the tx rate to all enabled */
5535                 ppd->local_tx_rate = 0;
5536                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5537                         ppd->local_tx_rate |= 2;
5538                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5539                         ppd->local_tx_rate |= 1;
5540         }
5541
5542         enable_lane_tx = 0xF; /* enable all four lanes */
5543         ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5544                      rx_polarity_inversion, ppd->local_tx_rate);
5545         if (ret != HCMD_SUCCESS)
5546                 goto set_local_link_attributes_fail;
5547
5548         /*
5549          * DC supports continuous updates.
5550          */
5551         ret = write_vc_local_phy(dd, 0 /* no power management */,
5552                                      1 /* continuous updates */);
5553         if (ret != HCMD_SUCCESS)
5554                 goto set_local_link_attributes_fail;
5555
5556         /* z=1 in the next call: AU of 0 is not supported by the hardware */
5557         ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5558                                     ppd->port_crc_mode_enabled);
5559         if (ret != HCMD_SUCCESS)
5560                 goto set_local_link_attributes_fail;
5561
5562         ret = write_vc_local_link_width(dd, 0, 0,
5563                      opa_to_vc_link_widths(ppd->link_width_enabled));
5564         if (ret != HCMD_SUCCESS)
5565                 goto set_local_link_attributes_fail;
5566
5567         /* let peer know who we are */
5568         ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5569         if (ret == HCMD_SUCCESS)
5570                 return 0;
5571
5572 set_local_link_attributes_fail:
5573         dd_dev_err(dd,
5574                 "Failed to set local link attributes, return 0x%x\n",
5575                 ret);
5576         return ret;
5577 }
5578
5579 /*
5580  * Call this to start the link.  Schedule a retry if the cable is not
5581  * present or if unable to start polling.  Do not do anything if the
5582  * link is disabled.  Returns 0 if link is disabled or moved to polling
5583  */
5584 int start_link(struct hfi1_pportdata *ppd)
5585 {
5586         if (!ppd->link_enabled) {
5587                 dd_dev_info(ppd->dd,
5588                         "%s: stopping link start because link is disabled\n",
5589                         __func__);
5590                 return 0;
5591         }
5592         if (!ppd->driver_link_ready) {
5593                 dd_dev_info(ppd->dd,
5594                         "%s: stopping link start because driver is not ready\n",
5595                         __func__);
5596                 return 0;
5597         }
5598
5599         if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5600                         loopback == LOOPBACK_LCB ||
5601                         ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5602                 return set_link_state(ppd, HLS_DN_POLL);
5603
5604         dd_dev_info(ppd->dd,
5605                 "%s: stopping link start because no cable is present\n",
5606                 __func__);
5607         return -EAGAIN;
5608 }
5609
5610 static void reset_qsfp(struct hfi1_pportdata *ppd)
5611 {
5612         struct hfi1_devdata *dd = ppd->dd;
5613         u64 mask, qsfp_mask;
5614
5615         mask = (u64)QSFP_HFI0_RESET_N;
5616         qsfp_mask = read_csr(dd,
5617                 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5618         qsfp_mask |= mask;
5619         write_csr(dd,
5620                 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5621                 qsfp_mask);
5622
5623         qsfp_mask = read_csr(dd,
5624                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5625         qsfp_mask &= ~mask;
5626         write_csr(dd,
5627                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5628                 qsfp_mask);
5629
5630         udelay(10);
5631
5632         qsfp_mask |= mask;
5633         write_csr(dd,
5634                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5635                 qsfp_mask);
5636 }
5637
5638 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5639                                         u8 *qsfp_interrupt_status)
5640 {
5641         struct hfi1_devdata *dd = ppd->dd;
5642
5643         if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5644                 (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5645                 dd_dev_info(dd,
5646                         "%s: QSFP cable on fire\n",
5647                         __func__);
5648
5649         if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5650                 (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5651                 dd_dev_info(dd,
5652                         "%s: QSFP cable temperature too low\n",
5653                         __func__);
5654
5655         if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5656                 (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5657                 dd_dev_info(dd,
5658                         "%s: QSFP supply voltage too high\n",
5659                         __func__);
5660
5661         if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5662                 (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5663                 dd_dev_info(dd,
5664                         "%s: QSFP supply voltage too low\n",
5665                         __func__);
5666
5667         /* Byte 2 is vendor specific */
5668
5669         if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5670                 (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5671                 dd_dev_info(dd,
5672                         "%s: Cable RX channel 1/2 power too high\n",
5673                         __func__);
5674
5675         if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5676                 (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5677                 dd_dev_info(dd,
5678                         "%s: Cable RX channel 1/2 power too low\n",
5679                         __func__);
5680
5681         if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5682                 (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5683                 dd_dev_info(dd,
5684                         "%s: Cable RX channel 3/4 power too high\n",
5685                         __func__);
5686
5687         if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5688                 (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5689                 dd_dev_info(dd,
5690                         "%s: Cable RX channel 3/4 power too low\n",
5691                         __func__);
5692
5693         if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5694                 (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5695                 dd_dev_info(dd,
5696                         "%s: Cable TX channel 1/2 bias too high\n",
5697                         __func__);
5698
5699         if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5700                 (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5701                 dd_dev_info(dd,
5702                         "%s: Cable TX channel 1/2 bias too low\n",
5703                         __func__);
5704
5705         if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5706                 (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5707                 dd_dev_info(dd,
5708                         "%s: Cable TX channel 3/4 bias too high\n",
5709                         __func__);
5710
5711         if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5712                 (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5713                 dd_dev_info(dd,
5714                         "%s: Cable TX channel 3/4 bias too low\n",
5715                         __func__);
5716
5717         if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5718                 (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5719                 dd_dev_info(dd,
5720                         "%s: Cable TX channel 1/2 power too high\n",
5721                         __func__);
5722
5723         if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5724                 (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5725                 dd_dev_info(dd,
5726                         "%s: Cable TX channel 1/2 power too low\n",
5727                         __func__);
5728
5729         if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5730                 (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5731                 dd_dev_info(dd,
5732                         "%s: Cable TX channel 3/4 power too high\n",
5733                         __func__);
5734
5735         if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5736                 (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5737                 dd_dev_info(dd,
5738                         "%s: Cable TX channel 3/4 power too low\n",
5739                         __func__);
5740
5741         /* Bytes 9-10 and 11-12 are reserved */
5742         /* Bytes 13-15 are vendor specific */
5743
5744         return 0;
5745 }
5746
5747 static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5748 {
5749         refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5750
5751         return 0;
5752 }
5753
5754 static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5755 {
5756         struct hfi1_devdata *dd = ppd->dd;
5757         u8 qsfp_interrupt_status = 0;
5758
5759         if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5760                 != 1) {
5761                 dd_dev_info(dd,
5762                         "%s: Failed to read status of QSFP module\n",
5763                         __func__);
5764                 return -EIO;
5765         }
5766
5767         /* We don't care about alarms & warnings with a non-functional INT_N */
5768         if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5769                 do_pre_lni_host_behaviors(ppd);
5770
5771         return 0;
5772 }
5773
5774 /* This routine will only be scheduled if the QSFP module is present */
5775 static void qsfp_event(struct work_struct *work)
5776 {
5777         struct qsfp_data *qd;
5778         struct hfi1_pportdata *ppd;
5779         struct hfi1_devdata *dd;
5780
5781         qd = container_of(work, struct qsfp_data, qsfp_work);
5782         ppd = qd->ppd;
5783         dd = ppd->dd;
5784
5785         /* Sanity check */
5786         if (!qsfp_mod_present(ppd))
5787                 return;
5788
5789         /*
5790          * Turn DC back on after cables has been
5791          * re-inserted. Up until now, the DC has been in
5792          * reset to save power.
5793          */
5794         dc_start(dd);
5795
5796         if (qd->cache_refresh_required) {
5797                 msleep(3000);
5798                 reset_qsfp(ppd);
5799
5800                 /* Check for QSFP interrupt after t_init (SFF 8679)
5801                  * + extra
5802                  */
5803                 msleep(3000);
5804                 if (!qd->qsfp_interrupt_functional) {
5805                         if (do_qsfp_intr_fallback(ppd) < 0)
5806                                 dd_dev_info(dd, "%s: QSFP fallback failed\n",
5807                                         __func__);
5808                         ppd->driver_link_ready = 1;
5809                         start_link(ppd);
5810                 }
5811         }
5812
5813         if (qd->check_interrupt_flags) {
5814                 u8 qsfp_interrupt_status[16] = {0,};
5815
5816                 if (qsfp_read(ppd, dd->hfi1_id, 6,
5817                               &qsfp_interrupt_status[0], 16) != 16) {
5818                         dd_dev_info(dd,
5819                                 "%s: Failed to read status of QSFP module\n",
5820                                 __func__);
5821                 } else {
5822                         unsigned long flags;
5823                         u8 data_status;
5824
5825                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5826                         ppd->qsfp_info.check_interrupt_flags = 0;
5827                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5828                                                                 flags);
5829
5830                         if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5831                                  != 1) {
5832                                 dd_dev_info(dd,
5833                                 "%s: Failed to read status of QSFP module\n",
5834                                         __func__);
5835                         }
5836                         if (!(data_status & QSFP_DATA_NOT_READY)) {
5837                                 do_pre_lni_host_behaviors(ppd);
5838                                 start_link(ppd);
5839                         } else
5840                                 handle_qsfp_error_conditions(ppd,
5841                                                 qsfp_interrupt_status);
5842                 }
5843         }
5844 }
5845
5846 void init_qsfp(struct hfi1_pportdata *ppd)
5847 {
5848         struct hfi1_devdata *dd = ppd->dd;
5849         u64 qsfp_mask;
5850
5851         if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5852                         ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5853                 ppd->driver_link_ready = 1;
5854                 return;
5855         }
5856
5857         ppd->qsfp_info.ppd = ppd;
5858         INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5859
5860         qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5861         /* Clear current status to avoid spurious interrupts */
5862         write_csr(dd,
5863                         dd->hfi1_id ?
5864                                 ASIC_QSFP2_CLEAR :
5865                                 ASIC_QSFP1_CLEAR,
5866                 qsfp_mask);
5867
5868         /* Handle active low nature of INT_N and MODPRST_N pins */
5869         if (qsfp_mod_present(ppd))
5870                 qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5871         write_csr(dd,
5872                   dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5873                   qsfp_mask);
5874
5875         /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5876         qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5877         write_csr(dd,
5878                 dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5879                 qsfp_mask);
5880
5881         if (qsfp_mod_present(ppd)) {
5882                 msleep(3000);
5883                 reset_qsfp(ppd);
5884
5885                 /* Check for QSFP interrupt after t_init (SFF 8679)
5886                  * + extra
5887                  */
5888                 msleep(3000);
5889                 if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5890                         if (do_qsfp_intr_fallback(ppd) < 0)
5891                                 dd_dev_info(dd,
5892                                         "%s: QSFP fallback failed\n",
5893                                         __func__);
5894                         ppd->driver_link_ready = 1;
5895                 }
5896         }
5897 }
5898
5899 int bringup_serdes(struct hfi1_pportdata *ppd)
5900 {
5901         struct hfi1_devdata *dd = ppd->dd;
5902         u64 guid;
5903         int ret;
5904
5905         if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5906                 add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5907
5908         guid = ppd->guid;
5909         if (!guid) {
5910                 if (dd->base_guid)
5911                         guid = dd->base_guid + ppd->port - 1;
5912                 ppd->guid = guid;
5913         }
5914
5915         /* the link defaults to enabled */
5916         ppd->link_enabled = 1;
5917         /* Set linkinit_reason on power up per OPA spec */
5918         ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5919
5920         if (loopback) {
5921                 ret = init_loopback(dd);
5922                 if (ret < 0)
5923                         return ret;
5924         }
5925
5926         return start_link(ppd);
5927 }
5928
5929 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5930 {
5931         struct hfi1_devdata *dd = ppd->dd;
5932
5933         /*
5934          * Shut down the link and keep it down.   First turn off that the
5935          * driver wants to allow the link to be up (driver_link_ready).
5936          * Then make sure the link is not automatically restarted
5937          * (link_enabled).  Cancel any pending restart.  And finally
5938          * go offline.
5939          */
5940         ppd->driver_link_ready = 0;
5941         ppd->link_enabled = 0;
5942
5943         set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5944           OPA_LINKDOWN_REASON_SMA_DISABLED);
5945         set_link_state(ppd, HLS_DN_OFFLINE);
5946
5947         /* disable the port */
5948         clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5949 }
5950
5951 static inline int init_cpu_counters(struct hfi1_devdata *dd)
5952 {
5953         struct hfi1_pportdata *ppd;
5954         int i;
5955
5956         ppd = (struct hfi1_pportdata *)(dd + 1);
5957         for (i = 0; i < dd->num_pports; i++, ppd++) {
5958                 ppd->ibport_data.rc_acks = NULL;
5959                 ppd->ibport_data.rc_qacks = NULL;
5960                 ppd->ibport_data.rc_acks = alloc_percpu(u64);
5961                 ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5962                 ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5963                 if ((ppd->ibport_data.rc_acks == NULL) ||
5964                     (ppd->ibport_data.rc_delayed_comp == NULL) ||
5965                     (ppd->ibport_data.rc_qacks == NULL))
5966                         return -ENOMEM;
5967         }
5968
5969         return 0;
5970 }
5971
5972 static const char * const pt_names[] = {
5973         "expected",
5974         "eager",
5975         "invalid"
5976 };
5977
5978 static const char *pt_name(u32 type)
5979 {
5980         return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5981 }
5982
5983 /*
5984  * index is the index into the receive array
5985  */
5986 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5987                   u32 type, unsigned long pa, u16 order)
5988 {
5989         u64 reg;
5990         void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5991                               (dd->kregbase + RCV_ARRAY));
5992
5993         if (!(dd->flags & HFI1_PRESENT))
5994                 goto done;
5995
5996         if (type == PT_INVALID) {
5997                 pa = 0;
5998         } else if (type > PT_INVALID) {
5999                 dd_dev_err(dd,
6000                         "unexpected receive array type %u for index %u, not handled\n",
6001                         type, index);
6002                 goto done;
6003         }
6004
6005         hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
6006                   pt_name(type), index, pa, (unsigned long)order);
6007
6008 #define RT_ADDR_SHIFT 12        /* 4KB kernel address boundary */
6009         reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
6010                 | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
6011                 | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
6012                                         << RCV_ARRAY_RT_ADDR_SHIFT;
6013         writeq(reg, base + (index * 8));
6014
6015         if (type == PT_EAGER)
6016                 /*
6017                  * Eager entries are written one-by-one so we have to push them
6018                  * after we write the entry.
6019                  */
6020                 flush_wc();
6021 done:
6022         return;
6023 }
6024
6025 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
6026 {
6027         struct hfi1_devdata *dd = rcd->dd;
6028         u32 i;
6029
6030         /* this could be optimized */
6031         for (i = rcd->eager_base; i < rcd->eager_base +
6032                      rcd->egrbufs.alloced; i++)
6033                 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
6034
6035         for (i = rcd->expected_base;
6036                         i < rcd->expected_base + rcd->expected_count; i++)
6037                 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
6038 }
6039
6040 int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
6041                         struct hfi1_ctxt_info *kinfo)
6042 {
6043         kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
6044                 HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
6045         return 0;
6046 }
6047
6048 struct hfi1_message_header *hfi1_get_msgheader(
6049                                 struct hfi1_devdata *dd, __le32 *rhf_addr)
6050 {
6051         u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
6052
6053         return (struct hfi1_message_header *)
6054                 (rhf_addr - dd->rhf_offset + offset);
6055 }
6056
6057 static const char * const ib_cfg_name_strings[] = {
6058         "HFI1_IB_CFG_LIDLMC",
6059         "HFI1_IB_CFG_LWID_DG_ENB",
6060         "HFI1_IB_CFG_LWID_ENB",
6061         "HFI1_IB_CFG_LWID",
6062         "HFI1_IB_CFG_SPD_ENB",
6063         "HFI1_IB_CFG_SPD",
6064         "HFI1_IB_CFG_RXPOL_ENB",
6065         "HFI1_IB_CFG_LREV_ENB",
6066         "HFI1_IB_CFG_LINKLATENCY",
6067         "HFI1_IB_CFG_HRTBT",
6068         "HFI1_IB_CFG_OP_VLS",
6069         "HFI1_IB_CFG_VL_HIGH_CAP",
6070         "HFI1_IB_CFG_VL_LOW_CAP",
6071         "HFI1_IB_CFG_OVERRUN_THRESH",
6072         "HFI1_IB_CFG_PHYERR_THRESH",
6073         "HFI1_IB_CFG_LINKDEFAULT",
6074         "HFI1_IB_CFG_PKEYS",
6075         "HFI1_IB_CFG_MTU",
6076         "HFI1_IB_CFG_LSTATE",
6077         "HFI1_IB_CFG_VL_HIGH_LIMIT",
6078         "HFI1_IB_CFG_PMA_TICKS",
6079         "HFI1_IB_CFG_PORT"
6080 };
6081
6082 static const char *ib_cfg_name(int which)
6083 {
6084         if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
6085                 return "invalid";
6086         return ib_cfg_name_strings[which];
6087 }
6088
6089 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
6090 {
6091         struct hfi1_devdata *dd = ppd->dd;
6092         int val = 0;
6093
6094         switch (which) {
6095         case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
6096                 val = ppd->link_width_enabled;
6097                 break;
6098         case HFI1_IB_CFG_LWID: /* currently active Link-width */
6099                 val = ppd->link_width_active;
6100                 break;
6101         case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6102                 val = ppd->link_speed_enabled;
6103                 break;
6104         case HFI1_IB_CFG_SPD: /* current Link speed */
6105                 val = ppd->link_speed_active;
6106                 break;
6107
6108         case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
6109         case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
6110         case HFI1_IB_CFG_LINKLATENCY:
6111                 goto unimplemented;
6112
6113         case HFI1_IB_CFG_OP_VLS:
6114                 val = ppd->vls_operational;
6115                 break;
6116         case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
6117                 val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
6118                 break;
6119         case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
6120                 val = VL_ARB_LOW_PRIO_TABLE_SIZE;
6121                 break;
6122         case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6123                 val = ppd->overrun_threshold;
6124                 break;
6125         case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6126                 val = ppd->phy_error_threshold;
6127                 break;
6128         case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6129                 val = dd->link_default;
6130                 break;
6131
6132         case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6133         case HFI1_IB_CFG_PMA_TICKS:
6134         default:
6135 unimplemented:
6136                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6137                         dd_dev_info(
6138                                 dd,
6139                                 "%s: which %s: not implemented\n",
6140                                 __func__,
6141                                 ib_cfg_name(which));
6142                 break;
6143         }
6144
6145         return val;
6146 }
6147
6148 /*
6149  * The largest MAD packet size.
6150  */
6151 #define MAX_MAD_PACKET 2048
6152
6153 /*
6154  * Return the maximum header bytes that can go on the _wire_
6155  * for this device. This count includes the ICRC which is
6156  * not part of the packet held in memory but it is appended
6157  * by the HW.
6158  * This is dependent on the device's receive header entry size.
6159  * HFI allows this to be set per-receive context, but the
6160  * driver presently enforces a global value.
6161  */
6162 u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6163 {
6164         /*
6165          * The maximum non-payload (MTU) bytes in LRH.PktLen are
6166          * the Receive Header Entry Size minus the PBC (or RHF) size
6167          * plus one DW for the ICRC appended by HW.
6168          *
6169          * dd->rcd[0].rcvhdrqentsize is in DW.
6170          * We use rcd[0] as all context will have the same value. Also,
6171          * the first kernel context would have been allocated by now so
6172          * we are guaranteed a valid value.
6173          */
6174         return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6175 }
6176
6177 /*
6178  * Set Send Length
6179  * @ppd - per port data
6180  *
6181  * Set the MTU by limiting how many DWs may be sent.  The SendLenCheck*
6182  * registers compare against LRH.PktLen, so use the max bytes included
6183  * in the LRH.
6184  *
6185  * This routine changes all VL values except VL15, which it maintains at
6186  * the same value.
6187  */
6188 static void set_send_length(struct hfi1_pportdata *ppd)
6189 {
6190         struct hfi1_devdata *dd = ppd->dd;
6191         u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6192         u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6193                               & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6194                 SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6195         int i;
6196
6197         for (i = 0; i < ppd->vls_supported; i++) {
6198                 if (dd->vld[i].mtu > maxvlmtu)
6199                         maxvlmtu = dd->vld[i].mtu;
6200                 if (i <= 3)
6201                         len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6202                                  & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6203                                 ((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6204                 else
6205                         len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6206                                  & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6207                                 ((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6208         }
6209         write_csr(dd, SEND_LEN_CHECK0, len1);
6210         write_csr(dd, SEND_LEN_CHECK1, len2);
6211         /* adjust kernel credit return thresholds based on new MTUs */
6212         /* all kernel receive contexts have the same hdrqentsize */
6213         for (i = 0; i < ppd->vls_supported; i++) {
6214                 sc_set_cr_threshold(dd->vld[i].sc,
6215                         sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6216                                 dd->rcd[0]->rcvhdrqentsize));
6217         }
6218         sc_set_cr_threshold(dd->vld[15].sc,
6219                 sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6220                         dd->rcd[0]->rcvhdrqentsize));
6221
6222         /* Adjust maximum MTU for the port in DC */
6223         dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6224                 (ilog2(maxvlmtu >> 8) + 1);
6225         len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6226         len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6227         len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6228                 DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6229         write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6230 }
6231
6232 static void set_lidlmc(struct hfi1_pportdata *ppd)
6233 {
6234         int i;
6235         u64 sreg = 0;
6236         struct hfi1_devdata *dd = ppd->dd;
6237         u32 mask = ~((1U << ppd->lmc) - 1);
6238         u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6239
6240         if (dd->hfi1_snoop.mode_flag)
6241                 dd_dev_info(dd, "Set lid/lmc while snooping");
6242
6243         c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6244                 | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6245         c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6246                         << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6247               ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6248                         << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6249         write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6250
6251         /*
6252          * Iterate over all the send contexts and set their SLID check
6253          */
6254         sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6255                         SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6256                (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6257                         SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6258
6259         for (i = 0; i < dd->chip_send_contexts; i++) {
6260                 hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6261                           i, (u32)sreg);
6262                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6263         }
6264
6265         /* Now we have to do the same thing for the sdma engines */
6266         sdma_update_lmc(dd, mask, ppd->lid);
6267 }
6268
6269 static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6270 {
6271         unsigned long timeout;
6272         u32 curr_state;
6273
6274         timeout = jiffies + msecs_to_jiffies(msecs);
6275         while (1) {
6276                 curr_state = read_physical_state(dd);
6277                 if (curr_state == state)
6278                         break;
6279                 if (time_after(jiffies, timeout)) {
6280                         dd_dev_err(dd,
6281                                 "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6282                                 state, curr_state);
6283                         return -ETIMEDOUT;
6284                 }
6285                 usleep_range(1950, 2050); /* sleep 2ms-ish */
6286         }
6287
6288         return 0;
6289 }
6290
6291 /*
6292  * Helper for set_link_state().  Do not call except from that routine.
6293  * Expects ppd->hls_mutex to be held.
6294  *
6295  * @rem_reason value to be sent to the neighbor
6296  *
6297  * LinkDownReasons only set if transition succeeds.
6298  */
6299 static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6300 {
6301         struct hfi1_devdata *dd = ppd->dd;
6302         u32 pstate, previous_state;
6303         u32 last_local_state;
6304         u32 last_remote_state;
6305         int ret;
6306         int do_transition;
6307         int do_wait;
6308
6309         previous_state = ppd->host_link_state;
6310         ppd->host_link_state = HLS_GOING_OFFLINE;
6311         pstate = read_physical_state(dd);
6312         if (pstate == PLS_OFFLINE) {
6313                 do_transition = 0;      /* in right state */
6314                 do_wait = 0;            /* ...no need to wait */
6315         } else if ((pstate & 0xff) == PLS_OFFLINE) {
6316                 do_transition = 0;      /* in an offline transient state */
6317                 do_wait = 1;            /* ...wait for it to settle */
6318         } else {
6319                 do_transition = 1;      /* need to move to offline */
6320                 do_wait = 1;            /* ...will need to wait */
6321         }
6322
6323         if (do_transition) {
6324                 ret = set_physical_link_state(dd,
6325                         PLS_OFFLINE | (rem_reason << 8));
6326
6327                 if (ret != HCMD_SUCCESS) {
6328                         dd_dev_err(dd,
6329                                 "Failed to transition to Offline link state, return %d\n",
6330                                 ret);
6331                         return -EINVAL;
6332                 }
6333                 if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6334                         ppd->offline_disabled_reason =
6335                         OPA_LINKDOWN_REASON_TRANSIENT;
6336         }
6337
6338         if (do_wait) {
6339                 /* it can take a while for the link to go down */
6340                 ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
6341                 if (ret < 0)
6342                         return ret;
6343         }
6344
6345         /* make sure the logical state is also down */
6346         wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6347
6348         /*
6349          * Now in charge of LCB - must be after the physical state is
6350          * offline.quiet and before host_link_state is changed.
6351          */
6352         set_host_lcb_access(dd);
6353         write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6354         ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6355
6356         /*
6357          * The LNI has a mandatory wait time after the physical state
6358          * moves to Offline.Quiet.  The wait time may be different
6359          * depending on how the link went down.  The 8051 firmware
6360          * will observe the needed wait time and only move to ready
6361          * when that is completed.  The largest of the quiet timeouts
6362          * is 2.5s, so wait that long and then a bit more.
6363          */
6364         ret = wait_fm_ready(dd, 3000);
6365         if (ret) {
6366                 dd_dev_err(dd,
6367                         "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6368                 /* state is really offline, so make it so */
6369                 ppd->host_link_state = HLS_DN_OFFLINE;
6370                 return ret;
6371         }
6372
6373         /*
6374          * The state is now offline and the 8051 is ready to accept host
6375          * requests.
6376          *      - change our state
6377          *      - notify others if we were previously in a linkup state
6378          */
6379         ppd->host_link_state = HLS_DN_OFFLINE;
6380         if (previous_state & HLS_UP) {
6381                 /* went down while link was up */
6382                 handle_linkup_change(dd, 0);
6383         } else if (previous_state
6384                         & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6385                 /* went down while attempting link up */
6386                 /* byte 1 of last_*_state is the failure reason */
6387                 read_last_local_state(dd, &last_local_state);
6388                 read_last_remote_state(dd, &last_remote_state);
6389                 dd_dev_err(dd,
6390                         "LNI failure last states: local 0x%08x, remote 0x%08x\n",
6391                         last_local_state, last_remote_state);
6392         }
6393
6394         /* the active link width (downgrade) is 0 on link down */
6395         ppd->link_width_active = 0;
6396         ppd->link_width_downgrade_tx_active = 0;
6397         ppd->link_width_downgrade_rx_active = 0;
6398         ppd->current_egress_rate = 0;
6399         return 0;
6400 }
6401
6402 /* return the link state name */
6403 static const char *link_state_name(u32 state)
6404 {
6405         const char *name;
6406         int n = ilog2(state);
6407         static const char * const names[] = {
6408                 [__HLS_UP_INIT_BP]       = "INIT",
6409                 [__HLS_UP_ARMED_BP]      = "ARMED",
6410                 [__HLS_UP_ACTIVE_BP]     = "ACTIVE",
6411                 [__HLS_DN_DOWNDEF_BP]    = "DOWNDEF",
6412                 [__HLS_DN_POLL_BP]       = "POLL",
6413                 [__HLS_DN_DISABLE_BP]    = "DISABLE",
6414                 [__HLS_DN_OFFLINE_BP]    = "OFFLINE",
6415                 [__HLS_VERIFY_CAP_BP]    = "VERIFY_CAP",
6416                 [__HLS_GOING_UP_BP]      = "GOING_UP",
6417                 [__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6418                 [__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6419         };
6420
6421         name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6422         return name ? name : "unknown";
6423 }
6424
6425 /* return the link state reason name */
6426 static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6427 {
6428         if (state == HLS_UP_INIT) {
6429                 switch (ppd->linkinit_reason) {
6430                 case OPA_LINKINIT_REASON_LINKUP:
6431                         return "(LINKUP)";
6432                 case OPA_LINKINIT_REASON_FLAPPING:
6433                         return "(FLAPPING)";
6434                 case OPA_LINKINIT_OUTSIDE_POLICY:
6435                         return "(OUTSIDE_POLICY)";
6436                 case OPA_LINKINIT_QUARANTINED:
6437                         return "(QUARANTINED)";
6438                 case OPA_LINKINIT_INSUFIC_CAPABILITY:
6439                         return "(INSUFIC_CAPABILITY)";
6440                 default:
6441                         break;
6442                 }
6443         }
6444         return "";
6445 }
6446
6447 /*
6448  * driver_physical_state - convert the driver's notion of a port's
6449  * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6450  * Return -1 (converted to a u32) to indicate error.
6451  */
6452 u32 driver_physical_state(struct hfi1_pportdata *ppd)
6453 {
6454         switch (ppd->host_link_state) {
6455         case HLS_UP_INIT:
6456         case HLS_UP_ARMED:
6457         case HLS_UP_ACTIVE:
6458                 return IB_PORTPHYSSTATE_LINKUP;
6459         case HLS_DN_POLL:
6460                 return IB_PORTPHYSSTATE_POLLING;
6461         case HLS_DN_DISABLE:
6462                 return IB_PORTPHYSSTATE_DISABLED;
6463         case HLS_DN_OFFLINE:
6464                 return OPA_PORTPHYSSTATE_OFFLINE;
6465         case HLS_VERIFY_CAP:
6466                 return IB_PORTPHYSSTATE_POLLING;
6467         case HLS_GOING_UP:
6468                 return IB_PORTPHYSSTATE_POLLING;
6469         case HLS_GOING_OFFLINE:
6470                 return OPA_PORTPHYSSTATE_OFFLINE;
6471         case HLS_LINK_COOLDOWN:
6472                 return OPA_PORTPHYSSTATE_OFFLINE;
6473         case HLS_DN_DOWNDEF:
6474         default:
6475                 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6476                            ppd->host_link_state);
6477                 return  -1;
6478         }
6479 }
6480
6481 /*
6482  * driver_logical_state - convert the driver's notion of a port's
6483  * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6484  * (converted to a u32) to indicate error.
6485  */
6486 u32 driver_logical_state(struct hfi1_pportdata *ppd)
6487 {
6488         if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6489                 return IB_PORT_DOWN;
6490
6491         switch (ppd->host_link_state & HLS_UP) {
6492         case HLS_UP_INIT:
6493                 return IB_PORT_INIT;
6494         case HLS_UP_ARMED:
6495                 return IB_PORT_ARMED;
6496         case HLS_UP_ACTIVE:
6497                 return IB_PORT_ACTIVE;
6498         default:
6499                 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6500                            ppd->host_link_state);
6501         return -1;
6502         }
6503 }
6504
6505 void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6506                           u8 neigh_reason, u8 rem_reason)
6507 {
6508         if (ppd->local_link_down_reason.latest == 0 &&
6509             ppd->neigh_link_down_reason.latest == 0) {
6510                 ppd->local_link_down_reason.latest = lcl_reason;
6511                 ppd->neigh_link_down_reason.latest = neigh_reason;
6512                 ppd->remote_link_down_reason = rem_reason;
6513         }
6514 }
6515
6516 /*
6517  * Change the physical and/or logical link state.
6518  *
6519  * Do not call this routine while inside an interrupt.  It contains
6520  * calls to routines that can take multiple seconds to finish.
6521  *
6522  * Returns 0 on success, -errno on failure.
6523  */
6524 int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6525 {
6526         struct hfi1_devdata *dd = ppd->dd;
6527         struct ib_event event = {.device = NULL};
6528         int ret1, ret = 0;
6529         int was_up, is_down;
6530         int orig_new_state, poll_bounce;
6531
6532         mutex_lock(&ppd->hls_lock);
6533
6534         orig_new_state = state;
6535         if (state == HLS_DN_DOWNDEF)
6536                 state = dd->link_default;
6537
6538         /* interpret poll -> poll as a link bounce */
6539         poll_bounce = ppd->host_link_state == HLS_DN_POLL
6540                                 && state == HLS_DN_POLL;
6541
6542         dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6543                 link_state_name(ppd->host_link_state),
6544                 link_state_name(orig_new_state),
6545                 poll_bounce ? "(bounce) " : "",
6546                 link_state_reason_name(ppd, state));
6547
6548         was_up = !!(ppd->host_link_state & HLS_UP);
6549
6550         /*
6551          * If we're going to a (HLS_*) link state that implies the logical
6552          * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6553          * reset is_sm_config_started to 0.
6554          */
6555         if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6556                 ppd->is_sm_config_started = 0;
6557
6558         /*
6559          * Do nothing if the states match.  Let a poll to poll link bounce
6560          * go through.
6561          */
6562         if (ppd->host_link_state == state && !poll_bounce)
6563                 goto done;
6564
6565         switch (state) {
6566         case HLS_UP_INIT:
6567                 if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6568                             || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6569                         /*
6570                          * Quick link up jumps from polling to here.
6571                          *
6572                          * Whether in normal or loopback mode, the
6573                          * simulator jumps from polling to link up.
6574                          * Accept that here.
6575                          */
6576                         /* OK */;
6577                 } else if (ppd->host_link_state != HLS_GOING_UP) {
6578                         goto unexpected;
6579                 }
6580
6581                 ppd->host_link_state = HLS_UP_INIT;
6582                 ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6583                 if (ret) {
6584                         /* logical state didn't change, stay at going_up */
6585                         ppd->host_link_state = HLS_GOING_UP;
6586                         dd_dev_err(dd,
6587                                 "%s: logical state did not change to INIT\n",
6588                                 __func__);
6589                 } else {
6590                         /* clear old transient LINKINIT_REASON code */
6591                         if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6592                                 ppd->linkinit_reason =
6593                                         OPA_LINKINIT_REASON_LINKUP;
6594
6595                         /* enable the port */
6596                         add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6597
6598                         handle_linkup_change(dd, 1);
6599                 }
6600                 break;
6601         case HLS_UP_ARMED:
6602                 if (ppd->host_link_state != HLS_UP_INIT)
6603                         goto unexpected;
6604
6605                 ppd->host_link_state = HLS_UP_ARMED;
6606                 set_logical_state(dd, LSTATE_ARMED);
6607                 ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6608                 if (ret) {
6609                         /* logical state didn't change, stay at init */
6610                         ppd->host_link_state = HLS_UP_INIT;
6611                         dd_dev_err(dd,
6612                                 "%s: logical state did not change to ARMED\n",
6613                                 __func__);
6614                 }
6615                 /*
6616                  * The simulator does not currently implement SMA messages,
6617                  * so neighbor_normal is not set.  Set it here when we first
6618                  * move to Armed.
6619                  */
6620                 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6621                         ppd->neighbor_normal = 1;
6622                 break;
6623         case HLS_UP_ACTIVE:
6624                 if (ppd->host_link_state != HLS_UP_ARMED)
6625                         goto unexpected;
6626
6627                 ppd->host_link_state = HLS_UP_ACTIVE;
6628                 set_logical_state(dd, LSTATE_ACTIVE);
6629                 ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6630                 if (ret) {
6631                         /* logical state didn't change, stay at armed */
6632                         ppd->host_link_state = HLS_UP_ARMED;
6633                         dd_dev_err(dd,
6634                                 "%s: logical state did not change to ACTIVE\n",
6635                                 __func__);
6636                 } else {
6637
6638                         /* tell all engines to go running */
6639                         sdma_all_running(dd);
6640
6641                         /* Signal the IB layer that the port has went active */
6642                         event.device = &dd->verbs_dev.ibdev;
6643                         event.element.port_num = ppd->port;
6644                         event.event = IB_EVENT_PORT_ACTIVE;
6645                 }
6646                 break;
6647         case HLS_DN_POLL:
6648                 if ((ppd->host_link_state == HLS_DN_DISABLE ||
6649                      ppd->host_link_state == HLS_DN_OFFLINE) &&
6650                     dd->dc_shutdown)
6651                         dc_start(dd);
6652                 /* Hand LED control to the DC */
6653                 write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6654
6655                 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6656                         u8 tmp = ppd->link_enabled;
6657
6658                         ret = goto_offline(ppd, ppd->remote_link_down_reason);
6659                         if (ret) {
6660                                 ppd->link_enabled = tmp;
6661                                 break;
6662                         }
6663                         ppd->remote_link_down_reason = 0;
6664
6665                         if (ppd->driver_link_ready)
6666                                 ppd->link_enabled = 1;
6667                 }
6668
6669                 ret = set_local_link_attributes(ppd);
6670                 if (ret)
6671                         break;
6672
6673                 ppd->port_error_action = 0;
6674                 ppd->host_link_state = HLS_DN_POLL;
6675
6676                 if (quick_linkup) {
6677                         /* quick linkup does not go into polling */
6678                         ret = do_quick_linkup(dd);
6679                 } else {
6680                         ret1 = set_physical_link_state(dd, PLS_POLLING);
6681                         if (ret1 != HCMD_SUCCESS) {
6682                                 dd_dev_err(dd,
6683                                         "Failed to transition to Polling link state, return 0x%x\n",
6684                                         ret1);
6685                                 ret = -EINVAL;
6686                         }
6687                 }
6688                 ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6689                 /*
6690                  * If an error occurred above, go back to offline.  The
6691                  * caller may reschedule another attempt.
6692                  */
6693                 if (ret)
6694                         goto_offline(ppd, 0);
6695                 break;
6696         case HLS_DN_DISABLE:
6697                 /* link is disabled */
6698                 ppd->link_enabled = 0;
6699
6700                 /* allow any state to transition to disabled */
6701
6702                 /* must transition to offline first */
6703                 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6704                         ret = goto_offline(ppd, ppd->remote_link_down_reason);
6705                         if (ret)
6706                                 break;
6707                         ppd->remote_link_down_reason = 0;
6708                 }
6709
6710                 ret1 = set_physical_link_state(dd, PLS_DISABLED);
6711                 if (ret1 != HCMD_SUCCESS) {
6712                         dd_dev_err(dd,
6713                                 "Failed to transition to Disabled link state, return 0x%x\n",
6714                                 ret1);
6715                         ret = -EINVAL;
6716                         break;
6717                 }
6718                 ppd->host_link_state = HLS_DN_DISABLE;
6719                 dc_shutdown(dd);
6720                 break;
6721         case HLS_DN_OFFLINE:
6722                 if (ppd->host_link_state == HLS_DN_DISABLE)
6723                         dc_start(dd);
6724
6725                 /* allow any state to transition to offline */
6726                 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6727                 if (!ret)
6728                         ppd->remote_link_down_reason = 0;
6729                 break;
6730         case HLS_VERIFY_CAP:
6731                 if (ppd->host_link_state != HLS_DN_POLL)
6732                         goto unexpected;
6733                 ppd->host_link_state = HLS_VERIFY_CAP;
6734                 break;
6735         case HLS_GOING_UP:
6736                 if (ppd->host_link_state != HLS_VERIFY_CAP)
6737                         goto unexpected;
6738
6739                 ret1 = set_physical_link_state(dd, PLS_LINKUP);
6740                 if (ret1 != HCMD_SUCCESS) {
6741                         dd_dev_err(dd,
6742                                 "Failed to transition to link up state, return 0x%x\n",
6743                                 ret1);
6744                         ret = -EINVAL;
6745                         break;
6746                 }
6747                 ppd->host_link_state = HLS_GOING_UP;
6748                 break;
6749
6750         case HLS_GOING_OFFLINE:         /* transient within goto_offline() */
6751         case HLS_LINK_COOLDOWN:         /* transient within goto_offline() */
6752         default:
6753                 dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6754                         __func__, state);
6755                 ret = -EINVAL;
6756                 break;
6757         }
6758
6759         is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6760                         HLS_DN_DISABLE | HLS_DN_OFFLINE));
6761
6762         if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6763             ppd->neigh_link_down_reason.sma == 0) {
6764                 ppd->local_link_down_reason.sma =
6765                   ppd->local_link_down_reason.latest;
6766                 ppd->neigh_link_down_reason.sma =
6767                   ppd->neigh_link_down_reason.latest;
6768         }
6769
6770         goto done;
6771
6772 unexpected:
6773         dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6774                 __func__, link_state_name(ppd->host_link_state),
6775                 link_state_name(state));
6776         ret = -EINVAL;
6777
6778 done:
6779         mutex_unlock(&ppd->hls_lock);
6780
6781         if (event.device)
6782                 ib_dispatch_event(&event);
6783
6784         return ret;
6785 }
6786
6787 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6788 {
6789         u64 reg;
6790         int ret = 0;
6791
6792         switch (which) {
6793         case HFI1_IB_CFG_LIDLMC:
6794                 set_lidlmc(ppd);
6795                 break;
6796         case HFI1_IB_CFG_VL_HIGH_LIMIT:
6797                 /*
6798                  * The VL Arbitrator high limit is sent in units of 4k
6799                  * bytes, while HFI stores it in units of 64 bytes.
6800                  */
6801                 val *= 4096/64;
6802                 reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6803                         << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6804                 write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6805                 break;
6806         case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6807                 /* HFI only supports POLL as the default link down state */
6808                 if (val != HLS_DN_POLL)
6809                         ret = -EINVAL;
6810                 break;
6811         case HFI1_IB_CFG_OP_VLS:
6812                 if (ppd->vls_operational != val) {
6813                         ppd->vls_operational = val;
6814                         if (!ppd->port)
6815                                 ret = -EINVAL;
6816                         else
6817                                 ret = sdma_map_init(
6818                                         ppd->dd,
6819                                         ppd->port - 1,
6820                                         val,
6821                                         NULL);
6822                 }
6823                 break;
6824         /*
6825          * For link width, link width downgrade, and speed enable, always AND
6826          * the setting with what is actually supported.  This has two benefits.
6827          * First, enabled can't have unsupported values, no matter what the
6828          * SM or FM might want.  Second, the ALL_SUPPORTED wildcards that mean
6829          * "fill in with your supported value" have all the bits in the
6830          * field set, so simply ANDing with supported has the desired result.
6831          */
6832         case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6833                 ppd->link_width_enabled = val & ppd->link_width_supported;
6834                 break;
6835         case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6836                 ppd->link_width_downgrade_enabled =
6837                                 val & ppd->link_width_downgrade_supported;
6838                 break;
6839         case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6840                 ppd->link_speed_enabled = val & ppd->link_speed_supported;
6841                 break;
6842         case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6843                 /*
6844                  * HFI does not follow IB specs, save this value
6845                  * so we can report it, if asked.
6846                  */
6847                 ppd->overrun_threshold = val;
6848                 break;
6849         case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6850                 /*
6851                  * HFI does not follow IB specs, save this value
6852                  * so we can report it, if asked.
6853                  */
6854                 ppd->phy_error_threshold = val;
6855                 break;
6856
6857         case HFI1_IB_CFG_MTU:
6858                 set_send_length(ppd);
6859                 break;
6860
6861         case HFI1_IB_CFG_PKEYS:
6862                 if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6863                         set_partition_keys(ppd);
6864                 break;
6865
6866         default:
6867                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6868                         dd_dev_info(ppd->dd,
6869                           "%s: which %s, val 0x%x: not implemented\n",
6870                           __func__, ib_cfg_name(which), val);
6871                 break;
6872         }
6873         return ret;
6874 }
6875
6876 /* begin functions related to vl arbitration table caching */
6877 static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6878 {
6879         int i;
6880
6881         BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6882                         VL_ARB_LOW_PRIO_TABLE_SIZE);
6883         BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6884                         VL_ARB_HIGH_PRIO_TABLE_SIZE);
6885
6886         /*
6887          * Note that we always return values directly from the
6888          * 'vl_arb_cache' (and do no CSR reads) in response to a
6889          * 'Get(VLArbTable)'. This is obviously correct after a
6890          * 'Set(VLArbTable)', since the cache will then be up to
6891          * date. But it's also correct prior to any 'Set(VLArbTable)'
6892          * since then both the cache, and the relevant h/w registers
6893          * will be zeroed.
6894          */
6895
6896         for (i = 0; i < MAX_PRIO_TABLE; i++)
6897                 spin_lock_init(&ppd->vl_arb_cache[i].lock);
6898 }
6899
6900 /*
6901  * vl_arb_lock_cache
6902  *
6903  * All other vl_arb_* functions should be called only after locking
6904  * the cache.
6905  */
6906 static inline struct vl_arb_cache *
6907 vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6908 {
6909         if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6910                 return NULL;
6911         spin_lock(&ppd->vl_arb_cache[idx].lock);
6912         return &ppd->vl_arb_cache[idx];
6913 }
6914
6915 static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6916 {
6917         spin_unlock(&ppd->vl_arb_cache[idx].lock);
6918 }
6919
6920 static void vl_arb_get_cache(struct vl_arb_cache *cache,
6921                              struct ib_vl_weight_elem *vl)
6922 {
6923         memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6924 }
6925
6926 static void vl_arb_set_cache(struct vl_arb_cache *cache,
6927                              struct ib_vl_weight_elem *vl)
6928 {
6929         memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6930 }
6931
6932 static int vl_arb_match_cache(struct vl_arb_cache *cache,
6933                               struct ib_vl_weight_elem *vl)
6934 {
6935         return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6936 }
6937 /* end functions related to vl arbitration table caching */
6938
6939 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6940                           u32 size, struct ib_vl_weight_elem *vl)
6941 {
6942         struct hfi1_devdata *dd = ppd->dd;
6943         u64 reg;
6944         unsigned int i, is_up = 0;
6945         int drain, ret = 0;
6946
6947         mutex_lock(&ppd->hls_lock);
6948
6949         if (ppd->host_link_state & HLS_UP)
6950                 is_up = 1;
6951
6952         drain = !is_ax(dd) && is_up;
6953
6954         if (drain)
6955                 /*
6956                  * Before adjusting VL arbitration weights, empty per-VL
6957                  * FIFOs, otherwise a packet whose VL weight is being
6958                  * set to 0 could get stuck in a FIFO with no chance to
6959                  * egress.
6960                  */
6961                 ret = stop_drain_data_vls(dd);
6962
6963         if (ret) {
6964                 dd_dev_err(
6965                         dd,
6966                         "%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6967                         __func__);
6968                 goto err;
6969         }
6970
6971         for (i = 0; i < size; i++, vl++) {
6972                 /*
6973                  * NOTE: The low priority shift and mask are used here, but
6974                  * they are the same for both the low and high registers.
6975                  */
6976                 reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6977                                 << SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6978                       | (((u64)vl->weight
6979                                 & SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6980                                 << SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6981                 write_csr(dd, target + (i * 8), reg);
6982         }
6983         pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6984
6985         if (drain)
6986                 open_fill_data_vls(dd); /* reopen all VLs */
6987
6988 err:
6989         mutex_unlock(&ppd->hls_lock);
6990
6991         return ret;
6992 }
6993
6994 /*
6995  * Read one credit merge VL register.
6996  */
6997 static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6998                            struct vl_limit *vll)
6999 {
7000         u64 reg = read_csr(dd, csr);
7001
7002         vll->dedicated = cpu_to_be16(
7003                 (reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
7004                 & SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
7005         vll->shared = cpu_to_be16(
7006                 (reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
7007                 & SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
7008 }
7009
7010 /*
7011  * Read the current credit merge limits.
7012  */
7013 static int get_buffer_control(struct hfi1_devdata *dd,
7014                               struct buffer_control *bc, u16 *overall_limit)
7015 {
7016         u64 reg;
7017         int i;
7018
7019         /* not all entries are filled in */
7020         memset(bc, 0, sizeof(*bc));
7021
7022         /* OPA and HFI have a 1-1 mapping */
7023         for (i = 0; i < TXE_NUM_DATA_VL; i++)
7024                 read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
7025
7026         /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
7027         read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
7028
7029         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7030         bc->overall_shared_limit = cpu_to_be16(
7031                 (reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
7032                 & SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
7033         if (overall_limit)
7034                 *overall_limit = (reg
7035                         >> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
7036                         & SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
7037         return sizeof(struct buffer_control);
7038 }
7039
7040 static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7041 {
7042         u64 reg;
7043         int i;
7044
7045         /* each register contains 16 SC->VLnt mappings, 4 bits each */
7046         reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
7047         for (i = 0; i < sizeof(u64); i++) {
7048                 u8 byte = *(((u8 *)&reg) + i);
7049
7050                 dp->vlnt[2 * i] = byte & 0xf;
7051                 dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
7052         }
7053
7054         reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
7055         for (i = 0; i < sizeof(u64); i++) {
7056                 u8 byte = *(((u8 *)&reg) + i);
7057
7058                 dp->vlnt[16 + (2 * i)] = byte & 0xf;
7059                 dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
7060         }
7061         return sizeof(struct sc2vlnt);
7062 }
7063
7064 static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
7065                               struct ib_vl_weight_elem *vl)
7066 {
7067         unsigned int i;
7068
7069         for (i = 0; i < nelems; i++, vl++) {
7070                 vl->vl = 0xf;
7071                 vl->weight = 0;
7072         }
7073 }
7074
7075 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7076 {
7077         write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
7078                 DC_SC_VL_VAL(15_0,
7079                 0, dp->vlnt[0] & 0xf,
7080                 1, dp->vlnt[1] & 0xf,
7081                 2, dp->vlnt[2] & 0xf,
7082                 3, dp->vlnt[3] & 0xf,
7083                 4, dp->vlnt[4] & 0xf,
7084                 5, dp->vlnt[5] & 0xf,
7085                 6, dp->vlnt[6] & 0xf,
7086                 7, dp->vlnt[7] & 0xf,
7087                 8, dp->vlnt[8] & 0xf,
7088                 9, dp->vlnt[9] & 0xf,
7089                 10, dp->vlnt[10] & 0xf,
7090                 11, dp->vlnt[11] & 0xf,
7091                 12, dp->vlnt[12] & 0xf,
7092                 13, dp->vlnt[13] & 0xf,
7093                 14, dp->vlnt[14] & 0xf,
7094                 15, dp->vlnt[15] & 0xf));
7095         write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
7096                 DC_SC_VL_VAL(31_16,
7097                 16, dp->vlnt[16] & 0xf,
7098                 17, dp->vlnt[17] & 0xf,
7099                 18, dp->vlnt[18] & 0xf,
7100                 19, dp->vlnt[19] & 0xf,
7101                 20, dp->vlnt[20] & 0xf,
7102                 21, dp->vlnt[21] & 0xf,
7103                 22, dp->vlnt[22] & 0xf,
7104                 23, dp->vlnt[23] & 0xf,
7105                 24, dp->vlnt[24] & 0xf,
7106                 25, dp->vlnt[25] & 0xf,
7107                 26, dp->vlnt[26] & 0xf,
7108                 27, dp->vlnt[27] & 0xf,
7109                 28, dp->vlnt[28] & 0xf,
7110                 29, dp->vlnt[29] & 0xf,
7111                 30, dp->vlnt[30] & 0xf,
7112                 31, dp->vlnt[31] & 0xf));
7113 }
7114
7115 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
7116                         u16 limit)
7117 {
7118         if (limit != 0)
7119                 dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
7120                         what, (int)limit, idx);
7121 }
7122
7123 /* change only the shared limit portion of SendCmGLobalCredit */
7124 static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
7125 {
7126         u64 reg;
7127
7128         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7129         reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
7130         reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
7131         write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7132 }
7133
7134 /* change only the total credit limit portion of SendCmGLobalCredit */
7135 static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7136 {
7137         u64 reg;
7138
7139         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7140         reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7141         reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7142         write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7143 }
7144
7145 /* set the given per-VL shared limit */
7146 static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7147 {
7148         u64 reg;
7149         u32 addr;
7150
7151         if (vl < TXE_NUM_DATA_VL)
7152                 addr = SEND_CM_CREDIT_VL + (8 * vl);
7153         else
7154                 addr = SEND_CM_CREDIT_VL15;
7155
7156         reg = read_csr(dd, addr);
7157         reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7158         reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7159         write_csr(dd, addr, reg);
7160 }
7161
7162 /* set the given per-VL dedicated limit */
7163 static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7164 {
7165         u64 reg;
7166         u32 addr;
7167
7168         if (vl < TXE_NUM_DATA_VL)
7169                 addr = SEND_CM_CREDIT_VL + (8 * vl);
7170         else
7171                 addr = SEND_CM_CREDIT_VL15;
7172
7173         reg = read_csr(dd, addr);
7174         reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7175         reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7176         write_csr(dd, addr, reg);
7177 }
7178
7179 /* spin until the given per-VL status mask bits clear */
7180 static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7181                                      const char *which)
7182 {
7183         unsigned long timeout;
7184         u64 reg;
7185
7186         timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7187         while (1) {
7188                 reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7189
7190                 if (reg == 0)
7191                         return; /* success */
7192                 if (time_after(jiffies, timeout))
7193                         break;          /* timed out */
7194                 udelay(1);
7195         }
7196
7197         dd_dev_err(dd,
7198                 "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7199                 which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7200         /*
7201          * If this occurs, it is likely there was a credit loss on the link.
7202          * The only recovery from that is a link bounce.
7203          */
7204         dd_dev_err(dd,
7205                 "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
7206 }
7207
7208 /*
7209  * The number of credits on the VLs may be changed while everything
7210  * is "live", but the following algorithm must be followed due to
7211  * how the hardware is actually implemented.  In particular,
7212  * Return_Credit_Status[] is the only correct status check.
7213  *
7214  * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7215  *     set Global_Shared_Credit_Limit = 0
7216  *     use_all_vl = 1
7217  * mask0 = all VLs that are changing either dedicated or shared limits
7218  * set Shared_Limit[mask0] = 0
7219  * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7220  * if (changing any dedicated limit)
7221  *     mask1 = all VLs that are lowering dedicated limits
7222  *     lower Dedicated_Limit[mask1]
7223  *     spin until Return_Credit_Status[mask1] == 0
7224  *     raise Dedicated_Limits
7225  * raise Shared_Limits
7226  * raise Global_Shared_Credit_Limit
7227  *
7228  * lower = if the new limit is lower, set the limit to the new value
7229  * raise = if the new limit is higher than the current value (may be changed
7230  *      earlier in the algorithm), set the new limit to the new value
7231  */
7232 static int set_buffer_control(struct hfi1_devdata *dd,
7233                               struct buffer_control *new_bc)
7234 {
7235         u64 changing_mask, ld_mask, stat_mask;
7236         int change_count;
7237         int i, use_all_mask;
7238         int this_shared_changing;
7239         /*
7240          * A0: add the variable any_shared_limit_changing below and in the
7241          * algorithm above.  If removing A0 support, it can be removed.
7242          */
7243         int any_shared_limit_changing;
7244         struct buffer_control cur_bc;
7245         u8 changing[OPA_MAX_VLS];
7246         u8 lowering_dedicated[OPA_MAX_VLS];
7247         u16 cur_total;
7248         u32 new_total = 0;
7249         const u64 all_mask =
7250         SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7251          | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7252          | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7253          | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7254          | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7255          | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7256          | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7257          | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7258          | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7259
7260 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7261 #define NUM_USABLE_VLS 16       /* look at VL15 and less */
7262
7263
7264         /* find the new total credits, do sanity check on unused VLs */
7265         for (i = 0; i < OPA_MAX_VLS; i++) {
7266                 if (valid_vl(i)) {
7267                         new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7268                         continue;
7269                 }
7270                 nonzero_msg(dd, i, "dedicated",
7271                         be16_to_cpu(new_bc->vl[i].dedicated));
7272                 nonzero_msg(dd, i, "shared",
7273                         be16_to_cpu(new_bc->vl[i].shared));
7274                 new_bc->vl[i].dedicated = 0;
7275                 new_bc->vl[i].shared = 0;
7276         }
7277         new_total += be16_to_cpu(new_bc->overall_shared_limit);
7278         if (new_total > (u32)dd->link_credits)
7279                 return -EINVAL;
7280         /* fetch the current values */
7281         get_buffer_control(dd, &cur_bc, &cur_total);
7282
7283         /*
7284          * Create the masks we will use.
7285          */
7286         memset(changing, 0, sizeof(changing));
7287         memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7288         /* NOTE: Assumes that the individual VL bits are adjacent and in
7289            increasing order */
7290         stat_mask =
7291                 SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7292         changing_mask = 0;
7293         ld_mask = 0;
7294         change_count = 0;
7295         any_shared_limit_changing = 0;
7296         for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7297                 if (!valid_vl(i))
7298                         continue;
7299                 this_shared_changing = new_bc->vl[i].shared
7300                                                 != cur_bc.vl[i].shared;
7301                 if (this_shared_changing)
7302                         any_shared_limit_changing = 1;
7303                 if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7304                                 || this_shared_changing) {
7305                         changing[i] = 1;
7306                         changing_mask |= stat_mask;
7307                         change_count++;
7308                 }
7309                 if (be16_to_cpu(new_bc->vl[i].dedicated) <
7310                                         be16_to_cpu(cur_bc.vl[i].dedicated)) {
7311                         lowering_dedicated[i] = 1;
7312                         ld_mask |= stat_mask;
7313                 }
7314         }
7315
7316         /* bracket the credit change with a total adjustment */
7317         if (new_total > cur_total)
7318                 set_global_limit(dd, new_total);
7319
7320         /*
7321          * Start the credit change algorithm.
7322          */
7323         use_all_mask = 0;
7324         if ((be16_to_cpu(new_bc->overall_shared_limit) <
7325              be16_to_cpu(cur_bc.overall_shared_limit)) ||
7326             (is_ax(dd) && any_shared_limit_changing)) {
7327                 set_global_shared(dd, 0);
7328                 cur_bc.overall_shared_limit = 0;
7329                 use_all_mask = 1;
7330         }
7331
7332         for (i = 0; i < NUM_USABLE_VLS; i++) {
7333                 if (!valid_vl(i))
7334                         continue;
7335
7336                 if (changing[i]) {
7337                         set_vl_shared(dd, i, 0);
7338                         cur_bc.vl[i].shared = 0;
7339                 }
7340         }
7341
7342         wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7343                 "shared");
7344
7345         if (change_count > 0) {
7346                 for (i = 0; i < NUM_USABLE_VLS; i++) {
7347                         if (!valid_vl(i))
7348                                 continue;
7349
7350                         if (lowering_dedicated[i]) {
7351                                 set_vl_dedicated(dd, i,
7352                                         be16_to_cpu(new_bc->vl[i].dedicated));
7353                                 cur_bc.vl[i].dedicated =
7354                                                 new_bc->vl[i].dedicated;
7355                         }
7356                 }
7357
7358                 wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7359
7360                 /* now raise all dedicated that are going up */
7361                 for (i = 0; i < NUM_USABLE_VLS; i++) {
7362                         if (!valid_vl(i))
7363                                 continue;
7364
7365                         if (be16_to_cpu(new_bc->vl[i].dedicated) >
7366                                         be16_to_cpu(cur_bc.vl[i].dedicated))
7367                                 set_vl_dedicated(dd, i,
7368                                         be16_to_cpu(new_bc->vl[i].dedicated));
7369                 }
7370         }
7371
7372         /* next raise all shared that are going up */
7373         for (i = 0; i < NUM_USABLE_VLS; i++) {
7374                 if (!valid_vl(i))
7375                         continue;
7376
7377                 if (be16_to_cpu(new_bc->vl[i].shared) >
7378                                 be16_to_cpu(cur_bc.vl[i].shared))
7379                         set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7380         }
7381
7382         /* finally raise the global shared */
7383         if (be16_to_cpu(new_bc->overall_shared_limit) >
7384                         be16_to_cpu(cur_bc.overall_shared_limit))
7385                 set_global_shared(dd,
7386                         be16_to_cpu(new_bc->overall_shared_limit));
7387
7388         /* bracket the credit change with a total adjustment */
7389         if (new_total < cur_total)
7390                 set_global_limit(dd, new_total);
7391         return 0;
7392 }
7393
7394 /*
7395  * Read the given fabric manager table. Return the size of the
7396  * table (in bytes) on success, and a negative error code on
7397  * failure.
7398  */
7399 int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7400
7401 {
7402         int size;
7403         struct vl_arb_cache *vlc;
7404
7405         switch (which) {
7406         case FM_TBL_VL_HIGH_ARB:
7407                 size = 256;
7408                 /*
7409                  * OPA specifies 128 elements (of 2 bytes each), though
7410                  * HFI supports only 16 elements in h/w.
7411                  */
7412                 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7413                 vl_arb_get_cache(vlc, t);
7414                 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7415                 break;
7416         case FM_TBL_VL_LOW_ARB:
7417                 size = 256;
7418                 /*
7419                  * OPA specifies 128 elements (of 2 bytes each), though
7420                  * HFI supports only 16 elements in h/w.
7421                  */
7422                 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7423                 vl_arb_get_cache(vlc, t);
7424                 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7425                 break;
7426         case FM_TBL_BUFFER_CONTROL:
7427                 size = get_buffer_control(ppd->dd, t, NULL);
7428                 break;
7429         case FM_TBL_SC2VLNT:
7430                 size = get_sc2vlnt(ppd->dd, t);
7431                 break;
7432         case FM_TBL_VL_PREEMPT_ELEMS:
7433                 size = 256;
7434                 /* OPA specifies 128 elements, of 2 bytes each */
7435                 get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7436                 break;
7437         case FM_TBL_VL_PREEMPT_MATRIX:
7438                 size = 256;
7439                 /*
7440                  * OPA specifies that this is the same size as the VL
7441                  * arbitration tables (i.e., 256 bytes).
7442                  */
7443                 break;
7444         default:
7445                 return -EINVAL;
7446         }
7447         return size;
7448 }
7449
7450 /*
7451  * Write the given fabric manager table.
7452  */
7453 int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7454 {
7455         int ret = 0;
7456         struct vl_arb_cache *vlc;
7457
7458         switch (which) {
7459         case FM_TBL_VL_HIGH_ARB:
7460                 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7461                 if (vl_arb_match_cache(vlc, t)) {
7462                         vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7463                         break;
7464                 }
7465                 vl_arb_set_cache(vlc, t);
7466                 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7467                 ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7468                                      VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7469                 break;
7470         case FM_TBL_VL_LOW_ARB:
7471                 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7472                 if (vl_arb_match_cache(vlc, t)) {
7473                         vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7474                         break;
7475                 }
7476                 vl_arb_set_cache(vlc, t);
7477                 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7478                 ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7479                                      VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7480                 break;
7481         case FM_TBL_BUFFER_CONTROL:
7482                 ret = set_buffer_control(ppd->dd, t);
7483                 break;
7484         case FM_TBL_SC2VLNT:
7485                 set_sc2vlnt(ppd->dd, t);
7486                 break;
7487         default:
7488                 ret = -EINVAL;
7489         }
7490         return ret;
7491 }
7492
7493 /*
7494  * Disable all data VLs.
7495  *
7496  * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7497  */
7498 static int disable_data_vls(struct hfi1_devdata *dd)
7499 {
7500         if (is_ax(dd))
7501                 return 1;
7502
7503         pio_send_control(dd, PSC_DATA_VL_DISABLE);
7504
7505         return 0;
7506 }
7507
7508 /*
7509  * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7510  * Just re-enables all data VLs (the "fill" part happens
7511  * automatically - the name was chosen for symmetry with
7512  * stop_drain_data_vls()).
7513  *
7514  * Return 0 if successful, non-zero if the VLs cannot be enabled.
7515  */
7516 int open_fill_data_vls(struct hfi1_devdata *dd)
7517 {
7518         if (is_ax(dd))
7519                 return 1;
7520
7521         pio_send_control(dd, PSC_DATA_VL_ENABLE);
7522
7523         return 0;
7524 }
7525
7526 /*
7527  * drain_data_vls() - assumes that disable_data_vls() has been called,
7528  * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7529  * engines to drop to 0.
7530  */
7531 static void drain_data_vls(struct hfi1_devdata *dd)
7532 {
7533         sc_wait(dd);
7534         sdma_wait(dd);
7535         pause_for_credit_return(dd);
7536 }
7537
7538 /*
7539  * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7540  *
7541  * Use open_fill_data_vls() to resume using data VLs.  This pair is
7542  * meant to be used like this:
7543  *
7544  * stop_drain_data_vls(dd);
7545  * // do things with per-VL resources
7546  * open_fill_data_vls(dd);
7547  */
7548 int stop_drain_data_vls(struct hfi1_devdata *dd)
7549 {
7550         int ret;
7551
7552         ret = disable_data_vls(dd);
7553         if (ret == 0)
7554                 drain_data_vls(dd);
7555
7556         return ret;
7557 }
7558
7559 /*
7560  * Convert a nanosecond time to a cclock count.  No matter how slow
7561  * the cclock, a non-zero ns will always have a non-zero result.
7562  */
7563 u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7564 {
7565         u32 cclocks;
7566
7567         if (dd->icode == ICODE_FPGA_EMULATION)
7568                 cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7569         else  /* simulation pretends to be ASIC */
7570                 cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7571         if (ns && !cclocks)     /* if ns nonzero, must be at least 1 */
7572                 cclocks = 1;
7573         return cclocks;
7574 }
7575
7576 /*
7577  * Convert a cclock count to nanoseconds. Not matter how slow
7578  * the cclock, a non-zero cclocks will always have a non-zero result.
7579  */
7580 u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7581 {
7582         u32 ns;
7583
7584         if (dd->icode == ICODE_FPGA_EMULATION)
7585                 ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7586         else  /* simulation pretends to be ASIC */
7587                 ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7588         if (cclocks && !ns)
7589                 ns = 1;
7590         return ns;
7591 }
7592
7593 /*
7594  * Dynamically adjust the receive interrupt timeout for a context based on
7595  * incoming packet rate.
7596  *
7597  * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7598  */
7599 static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7600 {
7601         struct hfi1_devdata *dd = rcd->dd;
7602         u32 timeout = rcd->rcvavail_timeout;
7603
7604         /*
7605          * This algorithm doubles or halves the timeout depending on whether
7606          * the number of packets received in this interrupt were less than or
7607          * greater equal the interrupt count.
7608          *
7609          * The calculations below do not allow a steady state to be achieved.
7610          * Only at the endpoints it is possible to have an unchanging
7611          * timeout.
7612          */
7613         if (npkts < rcv_intr_count) {
7614                 /*
7615                  * Not enough packets arrived before the timeout, adjust
7616                  * timeout downward.
7617                  */
7618                 if (timeout < 2) /* already at minimum? */
7619                         return;
7620                 timeout >>= 1;
7621         } else {
7622                 /*
7623                  * More than enough packets arrived before the timeout, adjust
7624                  * timeout upward.
7625                  */
7626                 if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7627                         return;
7628                 timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7629         }
7630
7631         rcd->rcvavail_timeout = timeout;
7632         /* timeout cannot be larger than rcv_intr_timeout_csr which has already
7633            been verified to be in range */
7634         write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7635                 (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7636 }
7637
7638 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7639                     u32 intr_adjust, u32 npkts)
7640 {
7641         struct hfi1_devdata *dd = rcd->dd;
7642         u64 reg;
7643         u32 ctxt = rcd->ctxt;
7644
7645         /*
7646          * Need to write timeout register before updating RcvHdrHead to ensure
7647          * that a new value is used when the HW decides to restart counting.
7648          */
7649         if (intr_adjust)
7650                 adjust_rcv_timeout(rcd, npkts);
7651         if (updegr) {
7652                 reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7653                         << RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7654                 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7655         }
7656         mmiowb();
7657         reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7658                 (((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7659                         << RCV_HDR_HEAD_HEAD_SHIFT);
7660         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7661         mmiowb();
7662 }
7663
7664 u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7665 {
7666         u32 head, tail;
7667
7668         head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7669                 & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7670
7671         if (rcd->rcvhdrtail_kvaddr)
7672                 tail = get_rcvhdrtail(rcd);
7673         else
7674                 tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7675
7676         return head == tail;
7677 }
7678
7679 /*
7680  * Context Control and Receive Array encoding for buffer size:
7681  *      0x0 invalid
7682  *      0x1   4 KB
7683  *      0x2   8 KB
7684  *      0x3  16 KB
7685  *      0x4  32 KB
7686  *      0x5  64 KB
7687  *      0x6 128 KB
7688  *      0x7 256 KB
7689  *      0x8 512 KB (Receive Array only)
7690  *      0x9   1 MB (Receive Array only)
7691  *      0xa   2 MB (Receive Array only)
7692  *
7693  *      0xB-0xF - reserved (Receive Array only)
7694  *
7695  *
7696  * This routine assumes that the value has already been sanity checked.
7697  */
7698 static u32 encoded_size(u32 size)
7699 {
7700         switch (size) {
7701         case   4*1024: return 0x1;
7702         case   8*1024: return 0x2;
7703         case  16*1024: return 0x3;
7704         case  32*1024: return 0x4;
7705         case  64*1024: return 0x5;
7706         case 128*1024: return 0x6;
7707         case 256*1024: return 0x7;
7708         case 512*1024: return 0x8;
7709         case   1*1024*1024: return 0x9;
7710         case   2*1024*1024: return 0xa;
7711         }
7712         return 0x1;     /* if invalid, go with the minimum size */
7713 }
7714
7715 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7716 {
7717         struct hfi1_ctxtdata *rcd;
7718         u64 rcvctrl, reg;
7719         int did_enable = 0;
7720
7721         rcd = dd->rcd[ctxt];
7722         if (!rcd)
7723                 return;
7724
7725         hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7726
7727         rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7728         /* if the context already enabled, don't do the extra steps */
7729         if ((op & HFI1_RCVCTRL_CTXT_ENB)
7730                         && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7731                 /* reset the tail and hdr addresses, and sequence count */
7732                 write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7733                                 rcd->rcvhdrq_phys);
7734                 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7735                         write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7736                                         rcd->rcvhdrqtailaddr_phys);
7737                 rcd->seq_cnt = 1;
7738
7739                 /* reset the cached receive header queue head value */
7740                 rcd->head = 0;
7741
7742                 /*
7743                  * Zero the receive header queue so we don't get false
7744                  * positives when checking the sequence number.  The
7745                  * sequence numbers could land exactly on the same spot.
7746                  * E.g. a rcd restart before the receive header wrapped.
7747                  */
7748                 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7749
7750                 /* starting timeout */
7751                 rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7752
7753                 /* enable the context */
7754                 rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7755
7756                 /* clean the egr buffer size first */
7757                 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7758                 rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7759                                 & RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7760                                         << RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7761
7762                 /* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7763                 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7764                 did_enable = 1;
7765
7766                 /* zero RcvEgrIndexHead */
7767                 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7768
7769                 /* set eager count and base index */
7770                 reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7771                         & RCV_EGR_CTRL_EGR_CNT_MASK)
7772                        << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7773                         (((rcd->eager_base >> RCV_SHIFT)
7774                           & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7775                          << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7776                 write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7777
7778                 /*
7779                  * Set TID (expected) count and base index.
7780                  * rcd->expected_count is set to individual RcvArray entries,
7781                  * not pairs, and the CSR takes a pair-count in groups of
7782                  * four, so divide by 8.
7783                  */
7784                 reg = (((rcd->expected_count >> RCV_SHIFT)
7785                                         & RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7786                                 << RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7787                       (((rcd->expected_base >> RCV_SHIFT)
7788                                         & RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7789                                 << RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7790                 write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7791                 if (ctxt == HFI1_CTRL_CTXT)
7792                         write_csr(dd, RCV_VL15, HFI1_CTRL_CTXT);
7793         }
7794         if (op & HFI1_RCVCTRL_CTXT_DIS) {
7795                 write_csr(dd, RCV_VL15, 0);
7796                 /*
7797                  * When receive context is being disabled turn on tail
7798                  * update with a dummy tail address and then disable
7799                  * receive context.
7800                  */
7801                 if (dd->rcvhdrtail_dummy_physaddr) {
7802                         write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7803                                         dd->rcvhdrtail_dummy_physaddr);
7804                         rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7805                 }
7806
7807                 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7808         }
7809         if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7810                 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7811         if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7812                 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7813         if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7814                 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7815         if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7816                 rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7817         if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7818                 rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7819         if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7820                 rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7821         if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7822                 /* In one-packet-per-eager mode, the size comes from
7823                    the RcvArray entry. */
7824                 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7825                 rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7826         }
7827         if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7828                 rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7829         if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7830                 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7831         if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7832                 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7833         if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7834                 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7835         if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7836                 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7837         rcd->rcvctrl = rcvctrl;
7838         hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7839         write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7840
7841         /* work around sticky RcvCtxtStatus.BlockedRHQFull */
7842         if (did_enable
7843             && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7844                 reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7845                 if (reg != 0) {
7846                         dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7847                                 ctxt, reg);
7848                         read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7849                         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7850                         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7851                         read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7852                         reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7853                         dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7854                                 ctxt, reg, reg == 0 ? "not" : "still");
7855                 }
7856         }
7857
7858         if (did_enable) {
7859                 /*
7860                  * The interrupt timeout and count must be set after
7861                  * the context is enabled to take effect.
7862                  */
7863                 /* set interrupt timeout */
7864                 write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7865                         (u64)rcd->rcvavail_timeout <<
7866                                 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7867
7868                 /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7869                 reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7870                 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7871         }
7872
7873         if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7874                 /*
7875                  * If the context has been disabled and the Tail Update has
7876                  * been cleared, set the RCV_HDR_TAIL_ADDR CSR to dummy address
7877                  * so it doesn't contain an address that is invalid.
7878                  */
7879                 write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7880                                 dd->rcvhdrtail_dummy_physaddr);
7881 }
7882
7883 u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7884                     u64 **cntrp)
7885 {
7886         int ret;
7887         u64 val = 0;
7888
7889         if (namep) {
7890                 ret = dd->cntrnameslen;
7891                 if (pos != 0) {
7892                         dd_dev_err(dd, "read_cntrs does not support indexing");
7893                         return 0;
7894                 }
7895                 *namep = dd->cntrnames;
7896         } else {
7897                 const struct cntr_entry *entry;
7898                 int i, j;
7899
7900                 ret = (dd->ndevcntrs) * sizeof(u64);
7901                 if (pos != 0) {
7902                         dd_dev_err(dd, "read_cntrs does not support indexing");
7903                         return 0;
7904                 }
7905
7906                 /* Get the start of the block of counters */
7907                 *cntrp = dd->cntrs;
7908
7909                 /*
7910                  * Now go and fill in each counter in the block.
7911                  */
7912                 for (i = 0; i < DEV_CNTR_LAST; i++) {
7913                         entry = &dev_cntrs[i];
7914                         hfi1_cdbg(CNTR, "reading %s", entry->name);
7915                         if (entry->flags & CNTR_DISABLED) {
7916                                 /* Nothing */
7917                                 hfi1_cdbg(CNTR, "\tDisabled\n");
7918                         } else {
7919                                 if (entry->flags & CNTR_VL) {
7920                                         hfi1_cdbg(CNTR, "\tPer VL\n");
7921                                         for (j = 0; j < C_VL_COUNT; j++) {
7922                                                 val = entry->rw_cntr(entry,
7923                                                                   dd, j,
7924                                                                   CNTR_MODE_R,
7925                                                                   0);
7926                                                 hfi1_cdbg(
7927                                                    CNTR,
7928                                                    "\t\tRead 0x%llx for %d\n",
7929                                                    val, j);
7930                                                 dd->cntrs[entry->offset + j] =
7931                                                                             val;
7932                                         }
7933                                 } else {
7934                                         val = entry->rw_cntr(entry, dd,
7935                                                         CNTR_INVALID_VL,
7936                                                         CNTR_MODE_R, 0);
7937                                         dd->cntrs[entry->offset] = val;
7938                                         hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7939                                 }
7940                         }
7941                 }
7942         }
7943         return ret;
7944 }
7945
7946 /*
7947  * Used by sysfs to create files for hfi stats to read
7948  */
7949 u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7950                         char **namep, u64 **cntrp)
7951 {
7952         int ret;
7953         u64 val = 0;
7954
7955         if (namep) {
7956                 ret = dd->portcntrnameslen;
7957                 if (pos != 0) {
7958                         dd_dev_err(dd, "index not supported");
7959                         return 0;
7960                 }
7961                 *namep = dd->portcntrnames;
7962         } else {
7963                 const struct cntr_entry *entry;
7964                 struct hfi1_pportdata *ppd;
7965                 int i, j;
7966
7967                 ret = (dd->nportcntrs) * sizeof(u64);
7968                 if (pos != 0) {
7969                         dd_dev_err(dd, "indexing not supported");
7970                         return 0;
7971                 }
7972                 ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7973                 *cntrp = ppd->cntrs;
7974
7975                 for (i = 0; i < PORT_CNTR_LAST; i++) {
7976                         entry = &port_cntrs[i];
7977                         hfi1_cdbg(CNTR, "reading %s", entry->name);
7978                         if (entry->flags & CNTR_DISABLED) {
7979                                 /* Nothing */
7980                                 hfi1_cdbg(CNTR, "\tDisabled\n");
7981                                 continue;
7982                         }
7983
7984                         if (entry->flags & CNTR_VL) {
7985                                 hfi1_cdbg(CNTR, "\tPer VL");
7986                                 for (j = 0; j < C_VL_COUNT; j++) {
7987                                         val = entry->rw_cntr(entry, ppd, j,
7988                                                                CNTR_MODE_R,
7989                                                                0);
7990                                         hfi1_cdbg(
7991                                            CNTR,
7992                                            "\t\tRead 0x%llx for %d",
7993                                            val, j);
7994                                         ppd->cntrs[entry->offset + j] = val;
7995                                 }
7996                         } else {
7997                                 val = entry->rw_cntr(entry, ppd,
7998                                                        CNTR_INVALID_VL,
7999                                                        CNTR_MODE_R,
8000                                                        0);
8001                                 ppd->cntrs[entry->offset] = val;
8002                                 hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
8003                         }
8004                 }
8005         }
8006         return ret;
8007 }
8008
8009 static void free_cntrs(struct hfi1_devdata *dd)
8010 {
8011         struct hfi1_pportdata *ppd;
8012         int i;
8013
8014         if (dd->synth_stats_timer.data)
8015                 del_timer_sync(&dd->synth_stats_timer);
8016         dd->synth_stats_timer.data = 0;
8017         ppd = (struct hfi1_pportdata *)(dd + 1);
8018         for (i = 0; i < dd->num_pports; i++, ppd++) {
8019                 kfree(ppd->cntrs);
8020                 kfree(ppd->scntrs);
8021                 free_percpu(ppd->ibport_data.rc_acks);
8022                 free_percpu(ppd->ibport_data.rc_qacks);
8023                 free_percpu(ppd->ibport_data.rc_delayed_comp);
8024                 ppd->cntrs = NULL;
8025                 ppd->scntrs = NULL;
8026                 ppd->ibport_data.rc_acks = NULL;
8027                 ppd->ibport_data.rc_qacks = NULL;
8028                 ppd->ibport_data.rc_delayed_comp = NULL;
8029         }
8030         kfree(dd->portcntrnames);
8031         dd->portcntrnames = NULL;
8032         kfree(dd->cntrs);
8033         dd->cntrs = NULL;
8034         kfree(dd->scntrs);
8035         dd->scntrs = NULL;
8036         kfree(dd->cntrnames);
8037         dd->cntrnames = NULL;
8038 }
8039
8040 #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
8041 #define CNTR_32BIT_MAX 0x00000000FFFFFFFF
8042
8043 static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
8044                               u64 *psval, void *context, int vl)
8045 {
8046         u64 val;
8047         u64 sval = *psval;
8048
8049         if (entry->flags & CNTR_DISABLED) {
8050                 dd_dev_err(dd, "Counter %s not enabled", entry->name);
8051                 return 0;
8052         }
8053
8054         hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8055
8056         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
8057
8058         /* If its a synthetic counter there is more work we need to do */
8059         if (entry->flags & CNTR_SYNTH) {
8060                 if (sval == CNTR_MAX) {
8061                         /* No need to read already saturated */
8062                         return CNTR_MAX;
8063                 }
8064
8065                 if (entry->flags & CNTR_32BIT) {
8066                         /* 32bit counters can wrap multiple times */
8067                         u64 upper = sval >> 32;
8068                         u64 lower = (sval << 32) >> 32;
8069
8070                         if (lower > val) { /* hw wrapped */
8071                                 if (upper == CNTR_32BIT_MAX)
8072                                         val = CNTR_MAX;
8073                                 else
8074                                         upper++;
8075                         }
8076
8077                         if (val != CNTR_MAX)
8078                                 val = (upper << 32) | val;
8079
8080                 } else {
8081                         /* If we rolled we are saturated */
8082                         if ((val < sval) || (val > CNTR_MAX))
8083                                 val = CNTR_MAX;
8084                 }
8085         }
8086
8087         *psval = val;
8088
8089         hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8090
8091         return val;
8092 }
8093
8094 static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
8095                                struct cntr_entry *entry,
8096                                u64 *psval, void *context, int vl, u64 data)
8097 {
8098         u64 val;
8099
8100         if (entry->flags & CNTR_DISABLED) {
8101                 dd_dev_err(dd, "Counter %s not enabled", entry->name);
8102                 return 0;
8103         }
8104
8105         hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8106
8107         if (entry->flags & CNTR_SYNTH) {
8108                 *psval = data;
8109                 if (entry->flags & CNTR_32BIT) {
8110                         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8111                                              (data << 32) >> 32);
8112                         val = data; /* return the full 64bit value */
8113                 } else {
8114                         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8115                                              data);
8116                 }
8117         } else {
8118                 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
8119         }
8120
8121         *psval = val;
8122
8123         hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8124
8125         return val;
8126 }
8127
8128 u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
8129 {
8130         struct cntr_entry *entry;
8131         u64 *sval;
8132
8133         entry = &dev_cntrs[index];
8134         sval = dd->scntrs + entry->offset;
8135
8136         if (vl != CNTR_INVALID_VL)
8137                 sval += vl;
8138
8139         return read_dev_port_cntr(dd, entry, sval, dd, vl);
8140 }
8141
8142 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
8143 {
8144         struct cntr_entry *entry;
8145         u64 *sval;
8146
8147         entry = &dev_cntrs[index];
8148         sval = dd->scntrs + entry->offset;
8149
8150         if (vl != CNTR_INVALID_VL)
8151                 sval += vl;
8152
8153         return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8154 }
8155
8156 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8157 {
8158         struct cntr_entry *entry;
8159         u64 *sval;
8160
8161         entry = &port_cntrs[index];
8162         sval = ppd->scntrs + entry->offset;
8163
8164         if (vl != CNTR_INVALID_VL)
8165                 sval += vl;
8166
8167         if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8168             (index <= C_RCV_HDR_OVF_LAST)) {
8169                 /* We do not want to bother for disabled contexts */
8170                 return 0;
8171         }
8172
8173         return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8174 }
8175
8176 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8177 {
8178         struct cntr_entry *entry;
8179         u64 *sval;
8180
8181         entry = &port_cntrs[index];
8182         sval = ppd->scntrs + entry->offset;
8183
8184         if (vl != CNTR_INVALID_VL)
8185                 sval += vl;
8186
8187         if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8188             (index <= C_RCV_HDR_OVF_LAST)) {
8189                 /* We do not want to bother for disabled contexts */
8190                 return 0;
8191         }
8192
8193         return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8194 }
8195
8196 static void update_synth_timer(unsigned long opaque)
8197 {
8198         u64 cur_tx;
8199         u64 cur_rx;
8200         u64 total_flits;
8201         u8 update = 0;
8202         int i, j, vl;
8203         struct hfi1_pportdata *ppd;
8204         struct cntr_entry *entry;
8205
8206         struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8207
8208         /*
8209          * Rather than keep beating on the CSRs pick a minimal set that we can
8210          * check to watch for potential roll over. We can do this by looking at
8211          * the number of flits sent/recv. If the total flits exceeds 32bits then
8212          * we have to iterate all the counters and update.
8213          */
8214         entry = &dev_cntrs[C_DC_RCV_FLITS];
8215         cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8216
8217         entry = &dev_cntrs[C_DC_XMIT_FLITS];
8218         cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8219
8220         hfi1_cdbg(
8221             CNTR,
8222             "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8223             dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8224
8225         if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8226                 /*
8227                  * May not be strictly necessary to update but it won't hurt and
8228                  * simplifies the logic here.
8229                  */
8230                 update = 1;
8231                 hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8232                           dd->unit);
8233         } else {
8234                 total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8235                 hfi1_cdbg(CNTR,
8236                           "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8237                           total_flits, (u64)CNTR_32BIT_MAX);
8238                 if (total_flits >= CNTR_32BIT_MAX) {
8239                         hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8240                                   dd->unit);
8241                         update = 1;
8242                 }
8243         }
8244
8245         if (update) {
8246                 hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8247                 for (i = 0; i < DEV_CNTR_LAST; i++) {
8248                         entry = &dev_cntrs[i];
8249                         if (entry->flags & CNTR_VL) {
8250                                 for (vl = 0; vl < C_VL_COUNT; vl++)
8251                                         read_dev_cntr(dd, i, vl);
8252                         } else {
8253                                 read_dev_cntr(dd, i, CNTR_INVALID_VL);
8254                         }
8255                 }
8256                 ppd = (struct hfi1_pportdata *)(dd + 1);
8257                 for (i = 0; i < dd->num_pports; i++, ppd++) {
8258                         for (j = 0; j < PORT_CNTR_LAST; j++) {
8259                                 entry = &port_cntrs[j];
8260                                 if (entry->flags & CNTR_VL) {
8261                                         for (vl = 0; vl < C_VL_COUNT; vl++)
8262                                                 read_port_cntr(ppd, j, vl);
8263                                 } else {
8264                                         read_port_cntr(ppd, j, CNTR_INVALID_VL);
8265                                 }
8266                         }
8267                 }
8268
8269                 /*
8270                  * We want the value in the register. The goal is to keep track
8271                  * of the number of "ticks" not the counter value. In other
8272                  * words if the register rolls we want to notice it and go ahead
8273                  * and force an update.
8274                  */
8275                 entry = &dev_cntrs[C_DC_XMIT_FLITS];
8276                 dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8277                                                 CNTR_MODE_R, 0);
8278
8279                 entry = &dev_cntrs[C_DC_RCV_FLITS];
8280                 dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8281                                                 CNTR_MODE_R, 0);
8282
8283                 hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8284                           dd->unit, dd->last_tx, dd->last_rx);
8285
8286         } else {
8287                 hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8288         }
8289
8290 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8291 }
8292
8293 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
8294 static int init_cntrs(struct hfi1_devdata *dd)
8295 {
8296         int i, rcv_ctxts, index, j;
8297         size_t sz;
8298         char *p;
8299         char name[C_MAX_NAME];
8300         struct hfi1_pportdata *ppd;
8301
8302         /* set up the stats timer; the add_timer is done at the end */
8303         setup_timer(&dd->synth_stats_timer, update_synth_timer,
8304                     (unsigned long)dd);
8305
8306         /***********************/
8307         /* per device counters */
8308         /***********************/
8309
8310         /* size names and determine how many we have*/
8311         dd->ndevcntrs = 0;
8312         sz = 0;
8313         index = 0;
8314
8315         for (i = 0; i < DEV_CNTR_LAST; i++) {
8316                 hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8317                 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8318                         hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8319                         continue;
8320                 }
8321
8322                 if (dev_cntrs[i].flags & CNTR_VL) {
8323                         hfi1_dbg_early("\tProcessing VL cntr\n");
8324                         dev_cntrs[i].offset = index;
8325                         for (j = 0; j < C_VL_COUNT; j++) {
8326                                 memset(name, '\0', C_MAX_NAME);
8327                                 snprintf(name, C_MAX_NAME, "%s%d",
8328                                         dev_cntrs[i].name,
8329                                         vl_from_idx(j));
8330                                 sz += strlen(name);
8331                                 sz++;
8332                                 hfi1_dbg_early("\t\t%s\n", name);
8333                                 dd->ndevcntrs++;
8334                                 index++;
8335                         }
8336                 } else {
8337                         /* +1 for newline  */
8338                         sz += strlen(dev_cntrs[i].name) + 1;
8339                         dd->ndevcntrs++;
8340                         dev_cntrs[i].offset = index;
8341                         index++;
8342                         hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8343                 }
8344         }
8345
8346         /* allocate space for the counter values */
8347         dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8348         if (!dd->cntrs)
8349                 goto bail;
8350
8351         dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8352         if (!dd->scntrs)
8353                 goto bail;
8354
8355
8356         /* allocate space for the counter names */
8357         dd->cntrnameslen = sz;
8358         dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8359         if (!dd->cntrnames)
8360                 goto bail;
8361
8362         /* fill in the names */
8363         for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8364                 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8365                         /* Nothing */
8366                 } else {
8367                         if (dev_cntrs[i].flags & CNTR_VL) {
8368                                 for (j = 0; j < C_VL_COUNT; j++) {
8369                                         memset(name, '\0', C_MAX_NAME);
8370                                         snprintf(name, C_MAX_NAME, "%s%d",
8371                                                 dev_cntrs[i].name,
8372                                                 vl_from_idx(j));
8373                                         memcpy(p, name, strlen(name));
8374                                         p += strlen(name);
8375                                         *p++ = '\n';
8376                                 }
8377                         } else {
8378                                 memcpy(p, dev_cntrs[i].name,
8379                                        strlen(dev_cntrs[i].name));
8380                                 p += strlen(dev_cntrs[i].name);
8381                                 *p++ = '\n';
8382                         }
8383                         index++;
8384                 }
8385         }
8386
8387         /*********************/
8388         /* per port counters */
8389         /*********************/
8390
8391         /*
8392          * Go through the counters for the overflows and disable the ones we
8393          * don't need. This varies based on platform so we need to do it
8394          * dynamically here.
8395          */
8396         rcv_ctxts = dd->num_rcv_contexts;
8397         for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8398              i <= C_RCV_HDR_OVF_LAST; i++) {
8399                 port_cntrs[i].flags |= CNTR_DISABLED;
8400         }
8401
8402         /* size port counter names and determine how many we have*/
8403         sz = 0;
8404         dd->nportcntrs = 0;
8405         for (i = 0; i < PORT_CNTR_LAST; i++) {
8406                 hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8407                 if (port_cntrs[i].flags & CNTR_DISABLED) {
8408                         hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8409                         continue;
8410                 }
8411
8412                 if (port_cntrs[i].flags & CNTR_VL) {
8413                         hfi1_dbg_early("\tProcessing VL cntr\n");
8414                         port_cntrs[i].offset = dd->nportcntrs;
8415                         for (j = 0; j < C_VL_COUNT; j++) {
8416                                 memset(name, '\0', C_MAX_NAME);
8417                                 snprintf(name, C_MAX_NAME, "%s%d",
8418                                         port_cntrs[i].name,
8419                                         vl_from_idx(j));
8420                                 sz += strlen(name);
8421                                 sz++;
8422                                 hfi1_dbg_early("\t\t%s\n", name);
8423                                 dd->nportcntrs++;
8424                         }
8425                 } else {
8426                         /* +1 for newline  */
8427                         sz += strlen(port_cntrs[i].name) + 1;
8428                         port_cntrs[i].offset = dd->nportcntrs;
8429                         dd->nportcntrs++;
8430                         hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8431                 }
8432         }
8433
8434         /* allocate space for the counter names */
8435         dd->portcntrnameslen = sz;
8436         dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8437         if (!dd->portcntrnames)
8438                 goto bail;
8439
8440         /* fill in port cntr names */
8441         for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8442                 if (port_cntrs[i].flags & CNTR_DISABLED)
8443                         continue;
8444
8445                 if (port_cntrs[i].flags & CNTR_VL) {
8446                         for (j = 0; j < C_VL_COUNT; j++) {
8447                                 memset(name, '\0', C_MAX_NAME);
8448                                 snprintf(name, C_MAX_NAME, "%s%d",
8449                                         port_cntrs[i].name,
8450                                         vl_from_idx(j));
8451                                 memcpy(p, name, strlen(name));
8452                                 p += strlen(name);
8453                                 *p++ = '\n';
8454                         }
8455                 } else {
8456                         memcpy(p, port_cntrs[i].name,
8457                                strlen(port_cntrs[i].name));
8458                         p += strlen(port_cntrs[i].name);
8459                         *p++ = '\n';
8460                 }
8461         }
8462
8463         /* allocate per port storage for counter values */
8464         ppd = (struct hfi1_pportdata *)(dd + 1);
8465         for (i = 0; i < dd->num_pports; i++, ppd++) {
8466                 ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8467                 if (!ppd->cntrs)
8468                         goto bail;
8469
8470                 ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8471                 if (!ppd->scntrs)
8472                         goto bail;
8473         }
8474
8475         /* CPU counters need to be allocated and zeroed */
8476         if (init_cpu_counters(dd))
8477                 goto bail;
8478
8479         mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8480         return 0;
8481 bail:
8482         free_cntrs(dd);
8483         return -ENOMEM;
8484 }
8485
8486
8487 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8488 {
8489         switch (chip_lstate) {
8490         default:
8491                 dd_dev_err(dd,
8492                          "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8493                          chip_lstate);
8494                 /* fall through */
8495         case LSTATE_DOWN:
8496                 return IB_PORT_DOWN;
8497         case LSTATE_INIT:
8498                 return IB_PORT_INIT;
8499         case LSTATE_ARMED:
8500                 return IB_PORT_ARMED;
8501         case LSTATE_ACTIVE:
8502                 return IB_PORT_ACTIVE;
8503         }
8504 }
8505
8506 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8507 {
8508         /* look at the HFI meta-states only */
8509         switch (chip_pstate & 0xf0) {
8510         default:
8511                 dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8512                         chip_pstate);
8513                 /* fall through */
8514         case PLS_DISABLED:
8515                 return IB_PORTPHYSSTATE_DISABLED;
8516         case PLS_OFFLINE:
8517                 return OPA_PORTPHYSSTATE_OFFLINE;
8518         case PLS_POLLING:
8519                 return IB_PORTPHYSSTATE_POLLING;
8520         case PLS_CONFIGPHY:
8521                 return IB_PORTPHYSSTATE_TRAINING;
8522         case PLS_LINKUP:
8523                 return IB_PORTPHYSSTATE_LINKUP;
8524         case PLS_PHYTEST:
8525                 return IB_PORTPHYSSTATE_PHY_TEST;
8526         }
8527 }
8528
8529 /* return the OPA port logical state name */
8530 const char *opa_lstate_name(u32 lstate)
8531 {
8532         static const char * const port_logical_names[] = {
8533                 "PORT_NOP",
8534                 "PORT_DOWN",
8535                 "PORT_INIT",
8536                 "PORT_ARMED",
8537                 "PORT_ACTIVE",
8538                 "PORT_ACTIVE_DEFER",
8539         };
8540         if (lstate < ARRAY_SIZE(port_logical_names))
8541                 return port_logical_names[lstate];
8542         return "unknown";
8543 }
8544
8545 /* return the OPA port physical state name */
8546 const char *opa_pstate_name(u32 pstate)
8547 {
8548         static const char * const port_physical_names[] = {
8549                 "PHYS_NOP",
8550                 "reserved1",
8551                 "PHYS_POLL",
8552                 "PHYS_DISABLED",
8553                 "PHYS_TRAINING",
8554                 "PHYS_LINKUP",
8555                 "PHYS_LINK_ERR_RECOVER",
8556                 "PHYS_PHY_TEST",
8557                 "reserved8",
8558                 "PHYS_OFFLINE",
8559                 "PHYS_GANGED",
8560                 "PHYS_TEST",
8561         };
8562         if (pstate < ARRAY_SIZE(port_physical_names))
8563                 return port_physical_names[pstate];
8564         return "unknown";
8565 }
8566
8567 /*
8568  * Read the hardware link state and set the driver's cached value of it.
8569  * Return the (new) current value.
8570  */
8571 u32 get_logical_state(struct hfi1_pportdata *ppd)
8572 {
8573         u32 new_state;
8574
8575         new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8576         if (new_state != ppd->lstate) {
8577                 dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8578                         opa_lstate_name(new_state), new_state);
8579                 ppd->lstate = new_state;
8580         }
8581         /*
8582          * Set port status flags in the page mapped into userspace
8583          * memory. Do it here to ensure a reliable state - this is
8584          * the only function called by all state handling code.
8585          * Always set the flags due to the fact that the cache value
8586          * might have been changed explicitly outside of this
8587          * function.
8588          */
8589         if (ppd->statusp) {
8590                 switch (ppd->lstate) {
8591                 case IB_PORT_DOWN:
8592                 case IB_PORT_INIT:
8593                         *ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8594                                            HFI1_STATUS_IB_READY);
8595                         break;
8596                 case IB_PORT_ARMED:
8597                         *ppd->statusp |= HFI1_STATUS_IB_CONF;
8598                         break;
8599                 case IB_PORT_ACTIVE:
8600                         *ppd->statusp |= HFI1_STATUS_IB_READY;
8601                         break;
8602                 }
8603         }
8604         return ppd->lstate;
8605 }
8606
8607 /**
8608  * wait_logical_linkstate - wait for an IB link state change to occur
8609  * @ppd: port device
8610  * @state: the state to wait for
8611  * @msecs: the number of milliseconds to wait
8612  *
8613  * Wait up to msecs milliseconds for IB link state change to occur.
8614  * For now, take the easy polling route.
8615  * Returns 0 if state reached, otherwise -ETIMEDOUT.
8616  */
8617 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8618                                   int msecs)
8619 {
8620         unsigned long timeout;
8621
8622         timeout = jiffies + msecs_to_jiffies(msecs);
8623         while (1) {
8624                 if (get_logical_state(ppd) == state)
8625                         return 0;
8626                 if (time_after(jiffies, timeout))
8627                         break;
8628                 msleep(20);
8629         }
8630         dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8631
8632         return -ETIMEDOUT;
8633 }
8634
8635 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8636 {
8637         static u32 remembered_state = 0xff;
8638         u32 pstate;
8639         u32 ib_pstate;
8640
8641         pstate = read_physical_state(ppd->dd);
8642         ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8643         if (remembered_state != ib_pstate) {
8644                 dd_dev_info(ppd->dd,
8645                         "%s: physical state changed to %s (0x%x), phy 0x%x\n",
8646                         __func__, opa_pstate_name(ib_pstate), ib_pstate,
8647                         pstate);
8648                 remembered_state = ib_pstate;
8649         }
8650         return ib_pstate;
8651 }
8652
8653 /*
8654  * Read/modify/write ASIC_QSFP register bits as selected by mask
8655  * data: 0 or 1 in the positions depending on what needs to be written
8656  * dir: 0 for read, 1 for write
8657  * mask: select by setting
8658  *      I2CCLK  (bit 0)
8659  *      I2CDATA (bit 1)
8660  */
8661 u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8662                   u32 mask)
8663 {
8664         u64 qsfp_oe, target_oe;
8665
8666         target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8667         if (mask) {
8668                 /* We are writing register bits, so lock access */
8669                 dir &= mask;
8670                 data &= mask;
8671
8672                 qsfp_oe = read_csr(dd, target_oe);
8673                 qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8674                 write_csr(dd, target_oe, qsfp_oe);
8675         }
8676         /* We are exclusively reading bits here, but it is unlikely
8677          * we'll get valid data when we set the direction of the pin
8678          * in the same call, so read should call this function again
8679          * to get valid data
8680          */
8681         return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8682 }
8683
8684 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8685 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8686
8687 #define SET_STATIC_RATE_CONTROL_SMASK(r) \
8688 (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8689
8690 int hfi1_init_ctxt(struct send_context *sc)
8691 {
8692         if (sc != NULL) {
8693                 struct hfi1_devdata *dd = sc->dd;
8694                 u64 reg;
8695                 u8 set = (sc->type == SC_USER ?
8696                           HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8697                           HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8698                 reg = read_kctxt_csr(dd, sc->hw_context,
8699                                      SEND_CTXT_CHECK_ENABLE);
8700                 if (set)
8701                         CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8702                 else
8703                         SET_STATIC_RATE_CONTROL_SMASK(reg);
8704                 write_kctxt_csr(dd, sc->hw_context,
8705                                 SEND_CTXT_CHECK_ENABLE, reg);
8706         }
8707         return 0;
8708 }
8709
8710 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8711 {
8712         int ret = 0;
8713         u64 reg;
8714
8715         if (dd->icode != ICODE_RTL_SILICON) {
8716                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8717                         dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8718                                     __func__);
8719                 return -EINVAL;
8720         }
8721         reg = read_csr(dd, ASIC_STS_THERM);
8722         temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8723                       ASIC_STS_THERM_CURR_TEMP_MASK);
8724         temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8725                         ASIC_STS_THERM_LO_TEMP_MASK);
8726         temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8727                         ASIC_STS_THERM_HI_TEMP_MASK);
8728         temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8729                           ASIC_STS_THERM_CRIT_TEMP_MASK);
8730         /* triggers is a 3-bit value - 1 bit per trigger. */
8731         temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8732
8733         return ret;
8734 }
8735
8736 /* ========================================================================= */
8737
8738 /*
8739  * Enable/disable chip from delivering interrupts.
8740  */
8741 void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8742 {
8743         int i;
8744
8745         /*
8746          * In HFI, the mask needs to be 1 to allow interrupts.
8747          */
8748         if (enable) {
8749                 u64 cce_int_mask;
8750                 const int qsfp1_int_smask = QSFP1_INT % 64;
8751                 const int qsfp2_int_smask = QSFP2_INT % 64;
8752
8753                 /* enable all interrupts */
8754                 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8755                         write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8756
8757                 /*
8758                  * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8759                  * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8760                  * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8761                  * the index of the appropriate CSR in the CCEIntMask CSR array
8762                  */
8763                 cce_int_mask = read_csr(dd, CCE_INT_MASK +
8764                                                 (8*(QSFP1_INT/64)));
8765                 if (dd->hfi1_id) {
8766                         cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8767                         write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8768                                         cce_int_mask);
8769                 } else {
8770                         cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8771                         write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8772                                         cce_int_mask);
8773                 }
8774         } else {
8775                 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8776                         write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8777         }
8778 }
8779
8780 /*
8781  * Clear all interrupt sources on the chip.
8782  */
8783 static void clear_all_interrupts(struct hfi1_devdata *dd)
8784 {
8785         int i;
8786
8787         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8788                 write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8789
8790         write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8791         write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8792         write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8793         write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8794         write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8795         write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8796         write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8797         for (i = 0; i < dd->chip_send_contexts; i++)
8798                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8799         for (i = 0; i < dd->chip_sdma_engines; i++)
8800                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8801
8802         write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8803         write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8804         write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8805 }
8806
8807 /* Move to pcie.c? */
8808 static void disable_intx(struct pci_dev *pdev)
8809 {
8810         pci_intx(pdev, 0);
8811 }
8812
8813 static void clean_up_interrupts(struct hfi1_devdata *dd)
8814 {
8815         int i;
8816
8817         /* remove irqs - must happen before disabling/turning off */
8818         if (dd->num_msix_entries) {
8819                 /* MSI-X */
8820                 struct hfi1_msix_entry *me = dd->msix_entries;
8821
8822                 for (i = 0; i < dd->num_msix_entries; i++, me++) {
8823                         if (me->arg == NULL) /* => no irq, no affinity */
8824                                 break;
8825                         irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8826                                         NULL);
8827                         free_irq(me->msix.vector, me->arg);
8828                 }
8829         } else {
8830                 /* INTx */
8831                 if (dd->requested_intx_irq) {
8832                         free_irq(dd->pcidev->irq, dd);
8833                         dd->requested_intx_irq = 0;
8834                 }
8835         }
8836
8837         /* turn off interrupts */
8838         if (dd->num_msix_entries) {
8839                 /* MSI-X */
8840                 pci_disable_msix(dd->pcidev);
8841         } else {
8842                 /* INTx */
8843                 disable_intx(dd->pcidev);
8844         }
8845
8846         /* clean structures */
8847         for (i = 0; i < dd->num_msix_entries; i++)
8848                 free_cpumask_var(dd->msix_entries[i].mask);
8849         kfree(dd->msix_entries);
8850         dd->msix_entries = NULL;
8851         dd->num_msix_entries = 0;
8852 }
8853
8854 /*
8855  * Remap the interrupt source from the general handler to the given MSI-X
8856  * interrupt.
8857  */
8858 static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8859 {
8860         u64 reg;
8861         int m, n;
8862
8863         /* clear from the handled mask of the general interrupt */
8864         m = isrc / 64;
8865         n = isrc % 64;
8866         dd->gi_mask[m] &= ~((u64)1 << n);
8867
8868         /* direct the chip source to the given MSI-X interrupt */
8869         m = isrc / 8;
8870         n = isrc % 8;
8871         reg = read_csr(dd, CCE_INT_MAP + (8*m));
8872         reg &= ~((u64)0xff << (8*n));
8873         reg |= ((u64)msix_intr & 0xff) << (8*n);
8874         write_csr(dd, CCE_INT_MAP + (8*m), reg);
8875 }
8876
8877 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8878                                   int engine, int msix_intr)
8879 {
8880         /*
8881          * SDMA engine interrupt sources grouped by type, rather than
8882          * engine.  Per-engine interrupts are as follows:
8883          *      SDMA
8884          *      SDMAProgress
8885          *      SDMAIdle
8886          */
8887         remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8888                 msix_intr);
8889         remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8890                 msix_intr);
8891         remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8892                 msix_intr);
8893 }
8894
8895 static int request_intx_irq(struct hfi1_devdata *dd)
8896 {
8897         int ret;
8898
8899         snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d",
8900                  dd->unit);
8901         ret = request_irq(dd->pcidev->irq, general_interrupt,
8902                                   IRQF_SHARED, dd->intx_name, dd);
8903         if (ret)
8904                 dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8905                                 ret);
8906         else
8907                 dd->requested_intx_irq = 1;
8908         return ret;
8909 }
8910
8911 static int request_msix_irqs(struct hfi1_devdata *dd)
8912 {
8913         const struct cpumask *local_mask;
8914         cpumask_var_t def, rcv;
8915         bool def_ret, rcv_ret;
8916         int first_general, last_general;
8917         int first_sdma, last_sdma;
8918         int first_rx, last_rx;
8919         int first_cpu, curr_cpu;
8920         int rcv_cpu, sdma_cpu;
8921         int i, ret = 0, possible;
8922         int ht;
8923
8924         /* calculate the ranges we are going to use */
8925         first_general = 0;
8926         first_sdma = last_general = first_general + 1;
8927         first_rx = last_sdma = first_sdma + dd->num_sdma;
8928         last_rx = first_rx + dd->n_krcv_queues;
8929
8930         /*
8931          * Interrupt affinity.
8932          *
8933          * non-rcv avail gets a default mask that
8934          * starts as possible cpus with threads reset
8935          * and each rcv avail reset.
8936          *
8937          * rcv avail gets node relative 1 wrapping back
8938          * to the node relative 1 as necessary.
8939          *
8940          */
8941         local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8942         /* if first cpu is invalid, use NUMA 0 */
8943         if (cpumask_first(local_mask) >= nr_cpu_ids)
8944                 local_mask = topology_core_cpumask(0);
8945
8946         def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8947         rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8948         if (!def_ret || !rcv_ret)
8949                 goto bail;
8950         /* use local mask as default */
8951         cpumask_copy(def, local_mask);
8952         possible = cpumask_weight(def);
8953         /* disarm threads from default */
8954         ht = cpumask_weight(
8955                         topology_sibling_cpumask(cpumask_first(local_mask)));
8956         for (i = possible/ht; i < possible; i++)
8957                 cpumask_clear_cpu(i, def);
8958         /* def now has full cores on chosen node*/
8959         first_cpu = cpumask_first(def);
8960         if (nr_cpu_ids >= first_cpu)
8961                 first_cpu++;
8962         curr_cpu = first_cpu;
8963
8964         /*  One context is reserved as control context */
8965         for (i = first_cpu; i < dd->n_krcv_queues + first_cpu - 1; i++) {
8966                 cpumask_clear_cpu(curr_cpu, def);
8967                 cpumask_set_cpu(curr_cpu, rcv);
8968                 curr_cpu = cpumask_next(curr_cpu, def);
8969                 if (curr_cpu >= nr_cpu_ids)
8970                         break;
8971         }
8972         /* def mask has non-rcv, rcv has recv mask */
8973         rcv_cpu = cpumask_first(rcv);
8974         sdma_cpu = cpumask_first(def);
8975
8976         /*
8977          * Sanity check - the code expects all SDMA chip source
8978          * interrupts to be in the same CSR, starting at bit 0.  Verify
8979          * that this is true by checking the bit location of the start.
8980          */
8981         BUILD_BUG_ON(IS_SDMA_START % 64);
8982
8983         for (i = 0; i < dd->num_msix_entries; i++) {
8984                 struct hfi1_msix_entry *me = &dd->msix_entries[i];
8985                 const char *err_info;
8986                 irq_handler_t handler;
8987                 irq_handler_t thread = NULL;
8988                 void *arg;
8989                 int idx;
8990                 struct hfi1_ctxtdata *rcd = NULL;
8991                 struct sdma_engine *sde = NULL;
8992
8993                 /* obtain the arguments to request_irq */
8994                 if (first_general <= i && i < last_general) {
8995                         idx = i - first_general;
8996                         handler = general_interrupt;
8997                         arg = dd;
8998                         snprintf(me->name, sizeof(me->name),
8999                                  DRIVER_NAME "_%d", dd->unit);
9000                         err_info = "general";
9001                 } else if (first_sdma <= i && i < last_sdma) {
9002                         idx = i - first_sdma;
9003                         sde = &dd->per_sdma[idx];
9004                         handler = sdma_interrupt;
9005                         arg = sde;
9006                         snprintf(me->name, sizeof(me->name),
9007                                  DRIVER_NAME "_%d sdma%d", dd->unit, idx);
9008                         err_info = "sdma";
9009                         remap_sdma_interrupts(dd, idx, i);
9010                 } else if (first_rx <= i && i < last_rx) {
9011                         idx = i - first_rx;
9012                         rcd = dd->rcd[idx];
9013                         /* no interrupt if no rcd */
9014                         if (!rcd)
9015                                 continue;
9016                         /*
9017                          * Set the interrupt register and mask for this
9018                          * context's interrupt.
9019                          */
9020                         rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
9021                         rcd->imask = ((u64)1) <<
9022                                         ((IS_RCVAVAIL_START+idx) % 64);
9023                         handler = receive_context_interrupt;
9024                         thread = receive_context_thread;
9025                         arg = rcd;
9026                         snprintf(me->name, sizeof(me->name),
9027                                  DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
9028                         err_info = "receive context";
9029                         remap_intr(dd, IS_RCVAVAIL_START + idx, i);
9030                 } else {
9031                         /* not in our expected range - complain, then
9032                            ignore it */
9033                         dd_dev_err(dd,
9034                                 "Unexpected extra MSI-X interrupt %d\n", i);
9035                         continue;
9036                 }
9037                 /* no argument, no interrupt */
9038                 if (arg == NULL)
9039                         continue;
9040                 /* make sure the name is terminated */
9041                 me->name[sizeof(me->name)-1] = 0;
9042
9043                 ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
9044                                                 me->name, arg);
9045                 if (ret) {
9046                         dd_dev_err(dd,
9047                                 "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
9048                                  err_info, me->msix.vector, idx, ret);
9049                         return ret;
9050                 }
9051                 /*
9052                  * assign arg after request_irq call, so it will be
9053                  * cleaned up
9054                  */
9055                 me->arg = arg;
9056
9057                 if (!zalloc_cpumask_var(
9058                         &dd->msix_entries[i].mask,
9059                         GFP_KERNEL))
9060                         goto bail;
9061                 if (handler == sdma_interrupt) {
9062                         dd_dev_info(dd, "sdma engine %d cpu %d\n",
9063                                 sde->this_idx, sdma_cpu);
9064                         sde->cpu = sdma_cpu;
9065                         cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
9066                         sdma_cpu = cpumask_next(sdma_cpu, def);
9067                         if (sdma_cpu >= nr_cpu_ids)
9068                                 sdma_cpu = cpumask_first(def);
9069                 } else if (handler == receive_context_interrupt) {
9070                         dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt,
9071                                     (rcd->ctxt == HFI1_CTRL_CTXT) ?
9072                                             cpumask_first(def) : rcv_cpu);
9073                         if (rcd->ctxt == HFI1_CTRL_CTXT) {
9074                                 /* map to first default */
9075                                 cpumask_set_cpu(cpumask_first(def),
9076                                                 dd->msix_entries[i].mask);
9077                         } else {
9078                                 cpumask_set_cpu(rcv_cpu,
9079                                                 dd->msix_entries[i].mask);
9080                                 rcv_cpu = cpumask_next(rcv_cpu, rcv);
9081                                 if (rcv_cpu >= nr_cpu_ids)
9082                                         rcv_cpu = cpumask_first(rcv);
9083                         }
9084                 } else {
9085                         /* otherwise first def */
9086                         dd_dev_info(dd, "%s cpu %d\n",
9087                                 err_info, cpumask_first(def));
9088                         cpumask_set_cpu(
9089                                 cpumask_first(def), dd->msix_entries[i].mask);
9090                 }
9091                 irq_set_affinity_hint(
9092                         dd->msix_entries[i].msix.vector,
9093                         dd->msix_entries[i].mask);
9094         }
9095
9096 out:
9097         free_cpumask_var(def);
9098         free_cpumask_var(rcv);
9099         return ret;
9100 bail:
9101         ret = -ENOMEM;
9102         goto  out;
9103 }
9104
9105 /*
9106  * Set the general handler to accept all interrupts, remap all
9107  * chip interrupts back to MSI-X 0.
9108  */
9109 static void reset_interrupts(struct hfi1_devdata *dd)
9110 {
9111         int i;
9112
9113         /* all interrupts handled by the general handler */
9114         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9115                 dd->gi_mask[i] = ~(u64)0;
9116
9117         /* all chip interrupts map to MSI-X 0 */
9118         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9119                 write_csr(dd, CCE_INT_MAP + (8*i), 0);
9120 }
9121
9122 static int set_up_interrupts(struct hfi1_devdata *dd)
9123 {
9124         struct hfi1_msix_entry *entries;
9125         u32 total, request;
9126         int i, ret;
9127         int single_interrupt = 0; /* we expect to have all the interrupts */
9128
9129         /*
9130          * Interrupt count:
9131          *      1 general, "slow path" interrupt (includes the SDMA engines
9132          *              slow source, SDMACleanupDone)
9133          *      N interrupts - one per used SDMA engine
9134          *      M interrupt - one per kernel receive context
9135          */
9136         total = 1 + dd->num_sdma + dd->n_krcv_queues;
9137
9138         entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
9139         if (!entries) {
9140                 ret = -ENOMEM;
9141                 goto fail;
9142         }
9143         /* 1-1 MSI-X entry assignment */
9144         for (i = 0; i < total; i++)
9145                 entries[i].msix.entry = i;
9146
9147         /* ask for MSI-X interrupts */
9148         request = total;
9149         request_msix(dd, &request, entries);
9150
9151         if (request == 0) {
9152                 /* using INTx */
9153                 /* dd->num_msix_entries already zero */
9154                 kfree(entries);
9155                 single_interrupt = 1;
9156                 dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9157         } else {
9158                 /* using MSI-X */
9159                 dd->num_msix_entries = request;
9160                 dd->msix_entries = entries;
9161
9162                 if (request != total) {
9163                         /* using MSI-X, with reduced interrupts */
9164                         dd_dev_err(
9165                                 dd,
9166                                 "cannot handle reduced interrupt case, want %u, got %u\n",
9167                                 total, request);
9168                         ret = -EINVAL;
9169                         goto fail;
9170                 }
9171                 dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9172         }
9173
9174         /* mask all interrupts */
9175         set_intr_state(dd, 0);
9176         /* clear all pending interrupts */
9177         clear_all_interrupts(dd);
9178
9179         /* reset general handler mask, chip MSI-X mappings */
9180         reset_interrupts(dd);
9181
9182         if (single_interrupt)
9183                 ret = request_intx_irq(dd);
9184         else
9185                 ret = request_msix_irqs(dd);
9186         if (ret)
9187                 goto fail;
9188
9189         return 0;
9190
9191 fail:
9192         clean_up_interrupts(dd);
9193         return ret;
9194 }
9195
9196 /*
9197  * Set up context values in dd.  Sets:
9198  *
9199  *      num_rcv_contexts - number of contexts being used
9200  *      n_krcv_queues - number of kernel contexts
9201  *      first_user_ctxt - first non-kernel context in array of contexts
9202  *      freectxts  - number of free user contexts
9203  *      num_send_contexts - number of PIO send contexts being used
9204  */
9205 static int set_up_context_variables(struct hfi1_devdata *dd)
9206 {
9207         int num_kernel_contexts;
9208         int num_user_contexts;
9209         int total_contexts;
9210         int ret;
9211         unsigned ngroups;
9212
9213         /*
9214          * Kernel contexts: (to be fixed later):
9215          * - min or 2 or 1 context/numa
9216          * - Context 0 - control context (VL15/multicast/error)
9217          * - Context 1 - default context
9218          */
9219         if (n_krcvqs)
9220                 /*
9221                  * Don't count context 0 in n_krcvqs since
9222                  * is isn't used for normal verbs traffic.
9223                  *
9224                  * krcvqs will reflect number of kernel
9225                  * receive contexts above 0.
9226                  */
9227                 num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1;
9228         else
9229                 num_kernel_contexts = num_online_nodes();
9230         num_kernel_contexts =
9231                 max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9232         /*
9233          * Every kernel receive context needs an ACK send context.
9234          * one send context is allocated for each VL{0-7} and VL15
9235          */
9236         if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9237                 dd_dev_err(dd,
9238                            "Reducing # kernel rcv contexts to: %d, from %d\n",
9239                            (int)(dd->chip_send_contexts - num_vls - 1),
9240                            (int)num_kernel_contexts);
9241                 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9242         }
9243         /*
9244          * User contexts: (to be fixed later)
9245          *      - set to num_rcv_contexts if non-zero
9246          *      - default to 1 user context per CPU
9247          */
9248         if (num_rcv_contexts)
9249                 num_user_contexts = num_rcv_contexts;
9250         else
9251                 num_user_contexts = num_online_cpus();
9252
9253         total_contexts = num_kernel_contexts + num_user_contexts;
9254
9255         /*
9256          * Adjust the counts given a global max.
9257          */
9258         if (total_contexts > dd->chip_rcv_contexts) {
9259                 dd_dev_err(dd,
9260                            "Reducing # user receive contexts to: %d, from %d\n",
9261                            (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9262                            (int)num_user_contexts);
9263                 num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9264                 /* recalculate */
9265                 total_contexts = num_kernel_contexts + num_user_contexts;
9266         }
9267
9268         /* the first N are kernel contexts, the rest are user contexts */
9269         dd->num_rcv_contexts = total_contexts;
9270         dd->n_krcv_queues = num_kernel_contexts;
9271         dd->first_user_ctxt = num_kernel_contexts;
9272         dd->freectxts = num_user_contexts;
9273         dd_dev_info(dd,
9274                 "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9275                 (int)dd->chip_rcv_contexts,
9276                 (int)dd->num_rcv_contexts,
9277                 (int)dd->n_krcv_queues,
9278                 (int)dd->num_rcv_contexts - dd->n_krcv_queues);
9279
9280         /*
9281          * Receive array allocation:
9282          *   All RcvArray entries are divided into groups of 8. This
9283          *   is required by the hardware and will speed up writes to
9284          *   consecutive entries by using write-combining of the entire
9285          *   cacheline.
9286          *
9287          *   The number of groups are evenly divided among all contexts.
9288          *   any left over groups will be given to the first N user
9289          *   contexts.
9290          */
9291         dd->rcv_entries.group_size = RCV_INCREMENT;
9292         ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9293         dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9294         dd->rcv_entries.nctxt_extra = ngroups -
9295                 (dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9296         dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9297                     dd->rcv_entries.ngroups,
9298                     dd->rcv_entries.nctxt_extra);
9299         if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9300             MAX_EAGER_ENTRIES * 2) {
9301                 dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9302                         dd->rcv_entries.group_size;
9303                 dd_dev_info(dd,
9304                    "RcvArray group count too high, change to %u\n",
9305                    dd->rcv_entries.ngroups);
9306                 dd->rcv_entries.nctxt_extra = 0;
9307         }
9308         /*
9309          * PIO send contexts
9310          */
9311         ret = init_sc_pools_and_sizes(dd);
9312         if (ret >= 0) { /* success */
9313                 dd->num_send_contexts = ret;
9314                 dd_dev_info(
9315                         dd,
9316                         "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9317                         dd->chip_send_contexts,
9318                         dd->num_send_contexts,
9319                         dd->sc_sizes[SC_KERNEL].count,
9320                         dd->sc_sizes[SC_ACK].count,
9321                         dd->sc_sizes[SC_USER].count);
9322                 ret = 0;        /* success */
9323         }
9324
9325         return ret;
9326 }
9327
9328 /*
9329  * Set the device/port partition key table. The MAD code
9330  * will ensure that, at least, the partial management
9331  * partition key is present in the table.
9332  */
9333 static void set_partition_keys(struct hfi1_pportdata *ppd)
9334 {
9335         struct hfi1_devdata *dd = ppd->dd;
9336         u64 reg = 0;
9337         int i;
9338
9339         dd_dev_info(dd, "Setting partition keys\n");
9340         for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9341                 reg |= (ppd->pkeys[i] &
9342                         RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9343                         ((i % 4) *
9344                          RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9345                 /* Each register holds 4 PKey values. */
9346                 if ((i % 4) == 3) {
9347                         write_csr(dd, RCV_PARTITION_KEY +
9348                                   ((i - 3) * 2), reg);
9349                         reg = 0;
9350                 }
9351         }
9352
9353         /* Always enable HW pkeys check when pkeys table is set */
9354         add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9355 }
9356
9357 /*
9358  * These CSRs and memories are uninitialized on reset and must be
9359  * written before reading to set the ECC/parity bits.
9360  *
9361  * NOTE: All user context CSRs that are not mmaped write-only
9362  * (e.g. the TID flows) must be initialized even if the driver never
9363  * reads them.
9364  */
9365 static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9366 {
9367         int i, j;
9368
9369         /* CceIntMap */
9370         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9371                 write_csr(dd, CCE_INT_MAP+(8*i), 0);
9372
9373         /* SendCtxtCreditReturnAddr */
9374         for (i = 0; i < dd->chip_send_contexts; i++)
9375                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9376
9377         /* PIO Send buffers */
9378         /* SDMA Send buffers */
9379         /* These are not normally read, and (presently) have no method
9380            to be read, so are not pre-initialized */
9381
9382         /* RcvHdrAddr */
9383         /* RcvHdrTailAddr */
9384         /* RcvTidFlowTable */
9385         for (i = 0; i < dd->chip_rcv_contexts; i++) {
9386                 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9387                 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9388                 for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9389                         write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9390         }
9391
9392         /* RcvArray */
9393         for (i = 0; i < dd->chip_rcv_array_count; i++)
9394                 write_csr(dd, RCV_ARRAY + (8*i),
9395                                         RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9396
9397         /* RcvQPMapTable */
9398         for (i = 0; i < 32; i++)
9399                 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9400 }
9401
9402 /*
9403  * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9404  */
9405 static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9406                              u64 ctrl_bits)
9407 {
9408         unsigned long timeout;
9409         u64 reg;
9410
9411         /* is the condition present? */
9412         reg = read_csr(dd, CCE_STATUS);
9413         if ((reg & status_bits) == 0)
9414                 return;
9415
9416         /* clear the condition */
9417         write_csr(dd, CCE_CTRL, ctrl_bits);
9418
9419         /* wait for the condition to clear */
9420         timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9421         while (1) {
9422                 reg = read_csr(dd, CCE_STATUS);
9423                 if ((reg & status_bits) == 0)
9424                         return;
9425                 if (time_after(jiffies, timeout)) {
9426                         dd_dev_err(dd,
9427                                 "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9428                                 status_bits, reg & status_bits);
9429                         return;
9430                 }
9431                 udelay(1);
9432         }
9433 }
9434
9435 /* set CCE CSRs to chip reset defaults */
9436 static void reset_cce_csrs(struct hfi1_devdata *dd)
9437 {
9438         int i;
9439
9440         /* CCE_REVISION read-only */
9441         /* CCE_REVISION2 read-only */
9442         /* CCE_CTRL - bits clear automatically */
9443         /* CCE_STATUS read-only, use CceCtrl to clear */
9444         clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9445         clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9446         clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9447         for (i = 0; i < CCE_NUM_SCRATCH; i++)
9448                 write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9449         /* CCE_ERR_STATUS read-only */
9450         write_csr(dd, CCE_ERR_MASK, 0);
9451         write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9452         /* CCE_ERR_FORCE leave alone */
9453         for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9454                 write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9455         write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9456         /* CCE_PCIE_CTRL leave alone */
9457         for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9458                 write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9459                 write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9460                                         CCE_MSIX_TABLE_UPPER_RESETCSR);
9461         }
9462         for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9463                 /* CCE_MSIX_PBA read-only */
9464                 write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9465                 write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9466         }
9467         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9468                 write_csr(dd, CCE_INT_MAP, 0);
9469         for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9470                 /* CCE_INT_STATUS read-only */
9471                 write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9472                 write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9473                 /* CCE_INT_FORCE leave alone */
9474                 /* CCE_INT_BLOCKED read-only */
9475         }
9476         for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9477                 write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9478 }
9479
9480 /* set ASIC CSRs to chip reset defaults */
9481 static void reset_asic_csrs(struct hfi1_devdata *dd)
9482 {
9483         int i;
9484
9485         /*
9486          * If the HFIs are shared between separate nodes or VMs,
9487          * then more will need to be done here.  One idea is a module
9488          * parameter that returns early, letting the first power-on or
9489          * a known first load do the reset and blocking all others.
9490          */
9491
9492         if (!(dd->flags & HFI1_DO_INIT_ASIC))
9493                 return;
9494
9495         if (dd->icode != ICODE_FPGA_EMULATION) {
9496                 /* emulation does not have an SBus - leave these alone */
9497                 /*
9498                  * All writes to ASIC_CFG_SBUS_REQUEST do something.
9499                  * Notes:
9500                  * o The reset is not zero if aimed at the core.  See the
9501                  *   SBus documentation for details.
9502                  * o If the SBus firmware has been updated (e.g. by the BIOS),
9503                  *   will the reset revert that?
9504                  */
9505                 /* ASIC_CFG_SBUS_REQUEST leave alone */
9506                 write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9507         }
9508         /* ASIC_SBUS_RESULT read-only */
9509         write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9510         for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9511                 write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9512         write_csr(dd, ASIC_CFG_MUTEX, 0);       /* this will clear it */
9513
9514         /* We might want to retain this state across FLR if we ever use it */
9515         write_csr(dd, ASIC_CFG_DRV_STR, 0);
9516
9517         /* ASIC_CFG_THERM_POLL_EN leave alone */
9518         /* ASIC_STS_THERM read-only */
9519         /* ASIC_CFG_RESET leave alone */
9520
9521         write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9522         /* ASIC_PCIE_SD_HOST_STATUS read-only */
9523         write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9524         write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9525         /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9526         write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9527         /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9528         /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9529         for (i = 0; i < 16; i++)
9530                 write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9531
9532         /* ASIC_GPIO_IN read-only */
9533         write_csr(dd, ASIC_GPIO_OE, 0);
9534         write_csr(dd, ASIC_GPIO_INVERT, 0);
9535         write_csr(dd, ASIC_GPIO_OUT, 0);
9536         write_csr(dd, ASIC_GPIO_MASK, 0);
9537         /* ASIC_GPIO_STATUS read-only */
9538         write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9539         /* ASIC_GPIO_FORCE leave alone */
9540
9541         /* ASIC_QSFP1_IN read-only */
9542         write_csr(dd, ASIC_QSFP1_OE, 0);
9543         write_csr(dd, ASIC_QSFP1_INVERT, 0);
9544         write_csr(dd, ASIC_QSFP1_OUT, 0);
9545         write_csr(dd, ASIC_QSFP1_MASK, 0);
9546         /* ASIC_QSFP1_STATUS read-only */
9547         write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9548         /* ASIC_QSFP1_FORCE leave alone */
9549
9550         /* ASIC_QSFP2_IN read-only */
9551         write_csr(dd, ASIC_QSFP2_OE, 0);
9552         write_csr(dd, ASIC_QSFP2_INVERT, 0);
9553         write_csr(dd, ASIC_QSFP2_OUT, 0);
9554         write_csr(dd, ASIC_QSFP2_MASK, 0);
9555         /* ASIC_QSFP2_STATUS read-only */
9556         write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9557         /* ASIC_QSFP2_FORCE leave alone */
9558
9559         write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9560         /* this also writes a NOP command, clearing paging mode */
9561         write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9562         write_csr(dd, ASIC_EEP_DATA, 0);
9563 }
9564
9565 /* set MISC CSRs to chip reset defaults */
9566 static void reset_misc_csrs(struct hfi1_devdata *dd)
9567 {
9568         int i;
9569
9570         for (i = 0; i < 32; i++) {
9571                 write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9572                 write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9573                 write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9574         }
9575         /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9576            only be written 128-byte chunks */
9577         /* init RSA engine to clear lingering errors */
9578         write_csr(dd, MISC_CFG_RSA_CMD, 1);
9579         write_csr(dd, MISC_CFG_RSA_MU, 0);
9580         write_csr(dd, MISC_CFG_FW_CTRL, 0);
9581         /* MISC_STS_8051_DIGEST read-only */
9582         /* MISC_STS_SBM_DIGEST read-only */
9583         /* MISC_STS_PCIE_DIGEST read-only */
9584         /* MISC_STS_FAB_DIGEST read-only */
9585         /* MISC_ERR_STATUS read-only */
9586         write_csr(dd, MISC_ERR_MASK, 0);
9587         write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9588         /* MISC_ERR_FORCE leave alone */
9589 }
9590
9591 /* set TXE CSRs to chip reset defaults */
9592 static void reset_txe_csrs(struct hfi1_devdata *dd)
9593 {
9594         int i;
9595
9596         /*
9597          * TXE Kernel CSRs
9598          */
9599         write_csr(dd, SEND_CTRL, 0);
9600         __cm_reset(dd, 0);      /* reset CM internal state */
9601         /* SEND_CONTEXTS read-only */
9602         /* SEND_DMA_ENGINES read-only */
9603         /* SEND_PIO_MEM_SIZE read-only */
9604         /* SEND_DMA_MEM_SIZE read-only */
9605         write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9606         pio_reset_all(dd);      /* SEND_PIO_INIT_CTXT */
9607         /* SEND_PIO_ERR_STATUS read-only */
9608         write_csr(dd, SEND_PIO_ERR_MASK, 0);
9609         write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9610         /* SEND_PIO_ERR_FORCE leave alone */
9611         /* SEND_DMA_ERR_STATUS read-only */
9612         write_csr(dd, SEND_DMA_ERR_MASK, 0);
9613         write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9614         /* SEND_DMA_ERR_FORCE leave alone */
9615         /* SEND_EGRESS_ERR_STATUS read-only */
9616         write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9617         write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9618         /* SEND_EGRESS_ERR_FORCE leave alone */
9619         write_csr(dd, SEND_BTH_QP, 0);
9620         write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9621         write_csr(dd, SEND_SC2VLT0, 0);
9622         write_csr(dd, SEND_SC2VLT1, 0);
9623         write_csr(dd, SEND_SC2VLT2, 0);
9624         write_csr(dd, SEND_SC2VLT3, 0);
9625         write_csr(dd, SEND_LEN_CHECK0, 0);
9626         write_csr(dd, SEND_LEN_CHECK1, 0);
9627         /* SEND_ERR_STATUS read-only */
9628         write_csr(dd, SEND_ERR_MASK, 0);
9629         write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9630         /* SEND_ERR_FORCE read-only */
9631         for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9632                 write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9633         for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9634                 write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9635         for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9636                 write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9637         for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9638                 write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9639         for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9640                 write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9641         write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9642         write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9643                                         SEND_CM_GLOBAL_CREDIT_RESETCSR);
9644         /* SEND_CM_CREDIT_USED_STATUS read-only */
9645         write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9646         write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9647         write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9648         write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9649         write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9650         for (i = 0; i < TXE_NUM_DATA_VL; i++)
9651                 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9652         write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9653         /* SEND_CM_CREDIT_USED_VL read-only */
9654         /* SEND_CM_CREDIT_USED_VL15 read-only */
9655         /* SEND_EGRESS_CTXT_STATUS read-only */
9656         /* SEND_EGRESS_SEND_DMA_STATUS read-only */
9657         write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9658         /* SEND_EGRESS_ERR_INFO read-only */
9659         /* SEND_EGRESS_ERR_SOURCE read-only */
9660
9661         /*
9662          * TXE Per-Context CSRs
9663          */
9664         for (i = 0; i < dd->chip_send_contexts; i++) {
9665                 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9666                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9667                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9668                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9669                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9670                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9671                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9672                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9673                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9674                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9675                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9676                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9677         }
9678
9679         /*
9680          * TXE Per-SDMA CSRs
9681          */
9682         for (i = 0; i < dd->chip_sdma_engines; i++) {
9683                 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9684                 /* SEND_DMA_STATUS read-only */
9685                 write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9686                 write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9687                 write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9688                 /* SEND_DMA_HEAD read-only */
9689                 write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9690                 write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9691                 /* SEND_DMA_IDLE_CNT read-only */
9692                 write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9693                 write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9694                 /* SEND_DMA_DESC_FETCHED_CNT read-only */
9695                 /* SEND_DMA_ENG_ERR_STATUS read-only */
9696                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9697                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9698                 /* SEND_DMA_ENG_ERR_FORCE leave alone */
9699                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9700                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9701                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9702                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9703                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9704                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9705                 write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9706         }
9707 }
9708
9709 /*
9710  * Expect on entry:
9711  * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9712  */
9713 static void init_rbufs(struct hfi1_devdata *dd)
9714 {
9715         u64 reg;
9716         int count;
9717
9718         /*
9719          * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9720          * clear.
9721          */
9722         count = 0;
9723         while (1) {
9724                 reg = read_csr(dd, RCV_STATUS);
9725                 if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9726                             | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9727                         break;
9728                 /*
9729                  * Give up after 1ms - maximum wait time.
9730                  *
9731                  * RBuf size is 148KiB.  Slowest possible is PCIe Gen1 x1 at
9732                  * 250MB/s bandwidth.  Lower rate to 66% for overhead to get:
9733                  *      148 KB / (66% * 250MB/s) = 920us
9734                  */
9735                 if (count++ > 500) {
9736                         dd_dev_err(dd,
9737                                 "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9738                                 __func__, reg);
9739                         break;
9740                 }
9741                 udelay(2); /* do not busy-wait the CSR */
9742         }
9743
9744         /* start the init - expect RcvCtrl to be 0 */
9745         write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9746
9747         /*
9748          * Read to force the write of Rcvtrl.RxRbufInit.  There is a brief
9749          * period after the write before RcvStatus.RxRbufInitDone is valid.
9750          * The delay in the first run through the loop below is sufficient and
9751          * required before the first read of RcvStatus.RxRbufInintDone.
9752          */
9753         read_csr(dd, RCV_CTRL);
9754
9755         /* wait for the init to finish */
9756         count = 0;
9757         while (1) {
9758                 /* delay is required first time through - see above */
9759                 udelay(2); /* do not busy-wait the CSR */
9760                 reg = read_csr(dd, RCV_STATUS);
9761                 if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9762                         break;
9763
9764                 /* give up after 100us - slowest possible at 33MHz is 73us */
9765                 if (count++ > 50) {
9766                         dd_dev_err(dd,
9767                                 "%s: RcvStatus.RxRbufInit not set, continuing\n",
9768                                 __func__);
9769                         break;
9770                 }
9771         }
9772 }
9773
9774 /* set RXE CSRs to chip reset defaults */
9775 static void reset_rxe_csrs(struct hfi1_devdata *dd)
9776 {
9777         int i, j;
9778
9779         /*
9780          * RXE Kernel CSRs
9781          */
9782         write_csr(dd, RCV_CTRL, 0);
9783         init_rbufs(dd);
9784         /* RCV_STATUS read-only */
9785         /* RCV_CONTEXTS read-only */
9786         /* RCV_ARRAY_CNT read-only */
9787         /* RCV_BUF_SIZE read-only */
9788         write_csr(dd, RCV_BTH_QP, 0);
9789         write_csr(dd, RCV_MULTICAST, 0);
9790         write_csr(dd, RCV_BYPASS, 0);
9791         write_csr(dd, RCV_VL15, 0);
9792         /* this is a clear-down */
9793         write_csr(dd, RCV_ERR_INFO,
9794                         RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9795         /* RCV_ERR_STATUS read-only */
9796         write_csr(dd, RCV_ERR_MASK, 0);
9797         write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9798         /* RCV_ERR_FORCE leave alone */
9799         for (i = 0; i < 32; i++)
9800                 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9801         for (i = 0; i < 4; i++)
9802                 write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9803         for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9804                 write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9805         for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9806                 write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9807         for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9808                 write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9809                 write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9810                 write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9811         }
9812         for (i = 0; i < 32; i++)
9813                 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9814
9815         /*
9816          * RXE Kernel and User Per-Context CSRs
9817          */
9818         for (i = 0; i < dd->chip_rcv_contexts; i++) {
9819                 /* kernel */
9820                 write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9821                 /* RCV_CTXT_STATUS read-only */
9822                 write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9823                 write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9824                 write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9825                 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9826                 write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9827                 write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9828                 write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9829                 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9830                 write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9831                 write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9832
9833                 /* user */
9834                 /* RCV_HDR_TAIL read-only */
9835                 write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9836                 /* RCV_EGR_INDEX_TAIL read-only */
9837                 write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9838                 /* RCV_EGR_OFFSET_TAIL read-only */
9839                 for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9840                         write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9841                                 0);
9842                 }
9843         }
9844 }
9845
9846 /*
9847  * Set sc2vl tables.
9848  *
9849  * They power on to zeros, so to avoid send context errors
9850  * they need to be set:
9851  *
9852  * SC 0-7 -> VL 0-7 (respectively)
9853  * SC 15  -> VL 15
9854  * otherwise
9855  *        -> VL 0
9856  */
9857 static void init_sc2vl_tables(struct hfi1_devdata *dd)
9858 {
9859         int i;
9860         /* init per architecture spec, constrained by hardware capability */
9861
9862         /* HFI maps sent packets */
9863         write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9864                 0,
9865                 0, 0, 1, 1,
9866                 2, 2, 3, 3,
9867                 4, 4, 5, 5,
9868                 6, 6, 7, 7));
9869         write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9870                 1,
9871                 8, 0, 9, 0,
9872                 10, 0, 11, 0,
9873                 12, 0, 13, 0,
9874                 14, 0, 15, 15));
9875         write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9876                 2,
9877                 16, 0, 17, 0,
9878                 18, 0, 19, 0,
9879                 20, 0, 21, 0,
9880                 22, 0, 23, 0));
9881         write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9882                 3,
9883                 24, 0, 25, 0,
9884                 26, 0, 27, 0,
9885                 28, 0, 29, 0,
9886                 30, 0, 31, 0));
9887
9888         /* DC maps received packets */
9889         write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9890                 15_0,
9891                 0, 0, 1, 1,  2, 2,  3, 3,  4, 4,  5, 5,  6, 6,  7,  7,
9892                 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9893         write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9894                 31_16,
9895                 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9896                 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9897
9898         /* initialize the cached sc2vl values consistently with h/w */
9899         for (i = 0; i < 32; i++) {
9900                 if (i < 8 || i == 15)
9901                         *((u8 *)(dd->sc2vl) + i) = (u8)i;
9902                 else
9903                         *((u8 *)(dd->sc2vl) + i) = 0;
9904         }
9905 }
9906
9907 /*
9908  * Read chip sizes and then reset parts to sane, disabled, values.  We cannot
9909  * depend on the chip going through a power-on reset - a driver may be loaded
9910  * and unloaded many times.
9911  *
9912  * Do not write any CSR values to the chip in this routine - there may be
9913  * a reset following the (possible) FLR in this routine.
9914  *
9915  */
9916 static void init_chip(struct hfi1_devdata *dd)
9917 {
9918         int i;
9919
9920         /*
9921          * Put the HFI CSRs in a known state.
9922          * Combine this with a DC reset.
9923          *
9924          * Stop the device from doing anything while we do a
9925          * reset.  We know there are no other active users of
9926          * the device since we are now in charge.  Turn off
9927          * off all outbound and inbound traffic and make sure
9928          * the device does not generate any interrupts.
9929          */
9930
9931         /* disable send contexts and SDMA engines */
9932         write_csr(dd, SEND_CTRL, 0);
9933         for (i = 0; i < dd->chip_send_contexts; i++)
9934                 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9935         for (i = 0; i < dd->chip_sdma_engines; i++)
9936                 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9937         /* disable port (turn off RXE inbound traffic) and contexts */
9938         write_csr(dd, RCV_CTRL, 0);
9939         for (i = 0; i < dd->chip_rcv_contexts; i++)
9940                 write_csr(dd, RCV_CTXT_CTRL, 0);
9941         /* mask all interrupt sources */
9942         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9943                 write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9944
9945         /*
9946          * DC Reset: do a full DC reset before the register clear.
9947          * A recommended length of time to hold is one CSR read,
9948          * so reread the CceDcCtrl.  Then, hold the DC in reset
9949          * across the clear.
9950          */
9951         write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9952         (void) read_csr(dd, CCE_DC_CTRL);
9953
9954         if (use_flr) {
9955                 /*
9956                  * A FLR will reset the SPC core and part of the PCIe.
9957                  * The parts that need to be restored have already been
9958                  * saved.
9959                  */
9960                 dd_dev_info(dd, "Resetting CSRs with FLR\n");
9961
9962                 /* do the FLR, the DC reset will remain */
9963                 hfi1_pcie_flr(dd);
9964
9965                 /* restore command and BARs */
9966                 restore_pci_variables(dd);
9967
9968                 if (is_ax(dd)) {
9969                         dd_dev_info(dd, "Resetting CSRs with FLR\n");
9970                         hfi1_pcie_flr(dd);
9971                         restore_pci_variables(dd);
9972                 }
9973
9974                 reset_asic_csrs(dd);
9975         } else {
9976                 dd_dev_info(dd, "Resetting CSRs with writes\n");
9977                 reset_cce_csrs(dd);
9978                 reset_txe_csrs(dd);
9979                 reset_rxe_csrs(dd);
9980                 reset_asic_csrs(dd);
9981                 reset_misc_csrs(dd);
9982         }
9983         /* clear the DC reset */
9984         write_csr(dd, CCE_DC_CTRL, 0);
9985
9986         /* Set the LED off */
9987         if (is_ax(dd))
9988                 setextled(dd, 0);
9989         /*
9990          * Clear the QSFP reset.
9991          * An FLR enforces a 0 on all out pins. The driver does not touch
9992          * ASIC_QSFPn_OUT otherwise.  This leaves RESET_N low and
9993          * anything plugged constantly in reset, if it pays attention
9994          * to RESET_N.
9995          * Prime examples of this are optical cables. Set all pins high.
9996          * I2CCLK and I2CDAT will change per direction, and INT_N and
9997          * MODPRS_N are input only and their value is ignored.
9998          */
9999         write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
10000         write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
10001 }
10002
10003 static void init_early_variables(struct hfi1_devdata *dd)
10004 {
10005         int i;
10006
10007         /* assign link credit variables */
10008         dd->vau = CM_VAU;
10009         dd->link_credits = CM_GLOBAL_CREDITS;
10010         if (is_ax(dd))
10011                 dd->link_credits--;
10012         dd->vcu = cu_to_vcu(hfi1_cu);
10013         /* enough room for 8 MAD packets plus header - 17K */
10014         dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
10015         if (dd->vl15_init > dd->link_credits)
10016                 dd->vl15_init = dd->link_credits;
10017
10018         write_uninitialized_csrs_and_memories(dd);
10019
10020         if (HFI1_CAP_IS_KSET(PKEY_CHECK))
10021                 for (i = 0; i < dd->num_pports; i++) {
10022                         struct hfi1_pportdata *ppd = &dd->pport[i];
10023
10024                         set_partition_keys(ppd);
10025                 }
10026         init_sc2vl_tables(dd);
10027 }
10028
10029 static void init_kdeth_qp(struct hfi1_devdata *dd)
10030 {
10031         /* user changed the KDETH_QP */
10032         if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
10033                 /* out of range or illegal value */
10034                 dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
10035                 kdeth_qp = 0;
10036         }
10037         if (kdeth_qp == 0)      /* not set, or failed range check */
10038                 kdeth_qp = DEFAULT_KDETH_QP;
10039
10040         write_csr(dd, SEND_BTH_QP,
10041                         (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
10042                                 << SEND_BTH_QP_KDETH_QP_SHIFT);
10043
10044         write_csr(dd, RCV_BTH_QP,
10045                         (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
10046                                 << RCV_BTH_QP_KDETH_QP_SHIFT);
10047 }
10048
10049 /**
10050  * init_qpmap_table
10051  * @dd - device data
10052  * @first_ctxt - first context
10053  * @last_ctxt - first context
10054  *
10055  * This return sets the qpn mapping table that
10056  * is indexed by qpn[8:1].
10057  *
10058  * The routine will round robin the 256 settings
10059  * from first_ctxt to last_ctxt.
10060  *
10061  * The first/last looks ahead to having specialized
10062  * receive contexts for mgmt and bypass.  Normal
10063  * verbs traffic will assumed to be on a range
10064  * of receive contexts.
10065  */
10066 static void init_qpmap_table(struct hfi1_devdata *dd,
10067                              u32 first_ctxt,
10068                              u32 last_ctxt)
10069 {
10070         u64 reg = 0;
10071         u64 regno = RCV_QP_MAP_TABLE;
10072         int i;
10073         u64 ctxt = first_ctxt;
10074
10075         for (i = 0; i < 256;) {
10076                 reg |= ctxt << (8 * (i % 8));
10077                 i++;
10078                 ctxt++;
10079                 if (ctxt > last_ctxt)
10080                         ctxt = first_ctxt;
10081                 if (i % 8 == 0) {
10082                         write_csr(dd, regno, reg);
10083                         reg = 0;
10084                         regno += 8;
10085                 }
10086         }
10087         if (i % 8)
10088                 write_csr(dd, regno, reg);
10089
10090         add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
10091                         | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
10092 }
10093
10094 /**
10095  * init_qos - init RX qos
10096  * @dd - device data
10097  * @first_context
10098  *
10099  * This routine initializes Rule 0 and the
10100  * RSM map table to implement qos.
10101  *
10102  * If all of the limit tests succeed,
10103  * qos is applied based on the array
10104  * interpretation of krcvqs where
10105  * entry 0 is VL0.
10106  *
10107  * The number of vl bits (n) and the number of qpn
10108  * bits (m) are computed to feed both the RSM map table
10109  * and the single rule.
10110  *
10111  */
10112 static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
10113 {
10114         u8 max_by_vl = 0;
10115         unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
10116         u64 *rsmmap;
10117         u64 reg;
10118         u8  rxcontext = is_ax(dd) ? 0 : 0xff;  /* 0 is default if a0 ver. */
10119
10120         /* validate */
10121         if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
10122             num_vls == 1 ||
10123             krcvqsset <= 1)
10124                 goto bail;
10125         for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
10126                 if (krcvqs[i] > max_by_vl)
10127                         max_by_vl = krcvqs[i];
10128         if (max_by_vl > 32)
10129                 goto bail;
10130         qpns_per_vl = __roundup_pow_of_two(max_by_vl);
10131         /* determine bits vl */
10132         n = ilog2(num_vls);
10133         /* determine bits for qpn */
10134         m = ilog2(qpns_per_vl);
10135         if ((m + n) > 7)
10136                 goto bail;
10137         if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10138                 goto bail;
10139         rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10140         memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10141         /* init the local copy of the table */
10142         for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10143                 unsigned tctxt;
10144
10145                 for (qpn = 0, tctxt = ctxt;
10146                      krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10147                         unsigned idx, regoff, regidx;
10148
10149                         /* generate index <= 128 */
10150                         idx = (qpn << n) ^ i;
10151                         regoff = (idx % 8) * 8;
10152                         regidx = idx / 8;
10153                         reg = rsmmap[regidx];
10154                         /* replace 0xff with context number */
10155                         reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10156                                 << regoff);
10157                         reg |= (u64)(tctxt++) << regoff;
10158                         rsmmap[regidx] = reg;
10159                         if (tctxt == ctxt + krcvqs[i])
10160                                 tctxt = ctxt;
10161                 }
10162                 ctxt += krcvqs[i];
10163         }
10164         /* flush cached copies to chip */
10165         for (i = 0; i < NUM_MAP_REGS; i++)
10166                 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10167         /* add rule0 */
10168         write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10169                 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10170                         << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10171                 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10172         write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10173                 LRH_BTH_MATCH_OFFSET
10174                         << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10175                 LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10176                 LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10177                 ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10178                 QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10179                 ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10180         write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10181                 LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10182                 LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10183                 LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10184                 LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10185         /* Enable RSM */
10186         add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10187         kfree(rsmmap);
10188         /* map everything else to first context */
10189         init_qpmap_table(dd, FIRST_KERNEL_KCTXT, MIN_KERNEL_KCTXTS - 1);
10190         dd->qos_shift = n + 1;
10191         return;
10192 bail:
10193         dd->qos_shift = 1;
10194         init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
10195 }
10196
10197 static void init_rxe(struct hfi1_devdata *dd)
10198 {
10199         /* enable all receive errors */
10200         write_csr(dd, RCV_ERR_MASK, ~0ull);
10201         /* setup QPN map table - start where VL15 context leaves off */
10202         init_qos(
10203                 dd,
10204                 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10205         /*
10206          * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10207          * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10208          * space, PciCfgCap2.MaxPayloadSize in HFI).  There is only one
10209          * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10210          * Max_PayLoad_Size set to its minimum of 128.
10211          *
10212          * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10213          * (64 bytes).  Max_Payload_Size is possibly modified upward in
10214          * tune_pcie_caps() which is called after this routine.
10215          */
10216 }
10217
10218 static void init_other(struct hfi1_devdata *dd)
10219 {
10220         /* enable all CCE errors */
10221         write_csr(dd, CCE_ERR_MASK, ~0ull);
10222         /* enable *some* Misc errors */
10223         write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10224         /* enable all DC errors, except LCB */
10225         write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10226         write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10227 }
10228
10229 /*
10230  * Fill out the given AU table using the given CU.  A CU is defined in terms
10231  * AUs.  The table is a an encoding: given the index, how many AUs does that
10232  * represent?
10233  *
10234  * NOTE: Assumes that the register layout is the same for the
10235  * local and remote tables.
10236  */
10237 static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10238                                u32 csr0to3, u32 csr4to7)
10239 {
10240         write_csr(dd, csr0to3,
10241                    0ull <<
10242                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10243                 |  1ull <<
10244                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10245                 |  2ull * cu <<
10246                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10247                 |  4ull * cu <<
10248                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10249         write_csr(dd, csr4to7,
10250                    8ull * cu <<
10251                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10252                 | 16ull * cu <<
10253                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10254                 | 32ull * cu <<
10255                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10256                 | 64ull * cu <<
10257                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10258
10259 }
10260
10261 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10262 {
10263         assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10264                                         SEND_CM_LOCAL_AU_TABLE4_TO7);
10265 }
10266
10267 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10268 {
10269         assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10270                                         SEND_CM_REMOTE_AU_TABLE4_TO7);
10271 }
10272
10273 static void init_txe(struct hfi1_devdata *dd)
10274 {
10275         int i;
10276
10277         /* enable all PIO, SDMA, general, and Egress errors */
10278         write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10279         write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10280         write_csr(dd, SEND_ERR_MASK, ~0ull);
10281         write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10282
10283         /* enable all per-context and per-SDMA engine errors */
10284         for (i = 0; i < dd->chip_send_contexts; i++)
10285                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10286         for (i = 0; i < dd->chip_sdma_engines; i++)
10287                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10288
10289         /* set the local CU to AU mapping */
10290         assign_local_cm_au_table(dd, dd->vcu);
10291
10292         /*
10293          * Set reasonable default for Credit Return Timer
10294          * Don't set on Simulator - causes it to choke.
10295          */
10296         if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10297                 write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10298 }
10299
10300 int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10301 {
10302         struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10303         unsigned sctxt;
10304         int ret = 0;
10305         u64 reg;
10306
10307         if (!rcd || !rcd->sc) {
10308                 ret = -EINVAL;
10309                 goto done;
10310         }
10311         sctxt = rcd->sc->hw_context;
10312         reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10313                 ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10314                  SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10315         /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10316         if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10317                 reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10318         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10319         /*
10320          * Enable send-side J_KEY integrity check, unless this is A0 h/w
10321          * (due to A0 erratum).
10322          */
10323         if (!is_ax(dd)) {
10324                 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10325                 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10326                 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10327         }
10328
10329         /* Enable J_KEY check on receive context. */
10330         reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10331                 ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10332                  RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10333         write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10334 done:
10335         return ret;
10336 }
10337
10338 int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10339 {
10340         struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10341         unsigned sctxt;
10342         int ret = 0;
10343         u64 reg;
10344
10345         if (!rcd || !rcd->sc) {
10346                 ret = -EINVAL;
10347                 goto done;
10348         }
10349         sctxt = rcd->sc->hw_context;
10350         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10351         /*
10352          * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10353          * This check would not have been enabled for A0 h/w, see
10354          * set_ctxt_jkey().
10355          */
10356         if (!is_ax(dd)) {
10357                 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10358                 reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10359                 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10360         }
10361         /* Turn off the J_KEY on the receive side */
10362         write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10363 done:
10364         return ret;
10365 }
10366
10367 int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10368 {
10369         struct hfi1_ctxtdata *rcd;
10370         unsigned sctxt;
10371         int ret = 0;
10372         u64 reg;
10373
10374         if (ctxt < dd->num_rcv_contexts)
10375                 rcd = dd->rcd[ctxt];
10376         else {
10377                 ret = -EINVAL;
10378                 goto done;
10379         }
10380         if (!rcd || !rcd->sc) {
10381                 ret = -EINVAL;
10382                 goto done;
10383         }
10384         sctxt = rcd->sc->hw_context;
10385         reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10386                 SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10387         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10388         reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10389         reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10390         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10391 done:
10392         return ret;
10393 }
10394
10395 int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10396 {
10397         struct hfi1_ctxtdata *rcd;
10398         unsigned sctxt;
10399         int ret = 0;
10400         u64 reg;
10401
10402         if (ctxt < dd->num_rcv_contexts)
10403                 rcd = dd->rcd[ctxt];
10404         else {
10405                 ret = -EINVAL;
10406                 goto done;
10407         }
10408         if (!rcd || !rcd->sc) {
10409                 ret = -EINVAL;
10410                 goto done;
10411         }
10412         sctxt = rcd->sc->hw_context;
10413         reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10414         reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10415         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10416         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10417 done:
10418         return ret;
10419 }
10420
10421 /*
10422  * Start doing the clean up the the chip. Our clean up happens in multiple
10423  * stages and this is just the first.
10424  */
10425 void hfi1_start_cleanup(struct hfi1_devdata *dd)
10426 {
10427         free_cntrs(dd);
10428         free_rcverr(dd);
10429         clean_up_interrupts(dd);
10430 }
10431
10432 #define HFI_BASE_GUID(dev) \
10433         ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10434
10435 /*
10436  * Certain chip functions need to be initialized only once per asic
10437  * instead of per-device. This function finds the peer device and
10438  * checks whether that chip initialization needs to be done by this
10439  * device.
10440  */
10441 static void asic_should_init(struct hfi1_devdata *dd)
10442 {
10443         unsigned long flags;
10444         struct hfi1_devdata *tmp, *peer = NULL;
10445
10446         spin_lock_irqsave(&hfi1_devs_lock, flags);
10447         /* Find our peer device */
10448         list_for_each_entry(tmp, &hfi1_dev_list, list) {
10449                 if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10450                     dd->unit != tmp->unit) {
10451                         peer = tmp;
10452                         break;
10453                 }
10454         }
10455
10456         /*
10457          * "Claim" the ASIC for initialization if it hasn't been
10458          " "claimed" yet.
10459          */
10460         if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10461                 dd->flags |= HFI1_DO_INIT_ASIC;
10462         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10463 }
10464
10465 /*
10466  * Set dd->boardname.  Use a generic name if a name is not returned from
10467  * EFI variable space.
10468  *
10469  * Return 0 on success, -ENOMEM if space could not be allocated.
10470  */
10471 static int obtain_boardname(struct hfi1_devdata *dd)
10472 {
10473         /* generic board description */
10474         const char generic[] =
10475                 "Intel Omni-Path Host Fabric Interface Adapter 100 Series";
10476         unsigned long size;
10477         int ret;
10478
10479         ret = read_hfi1_efi_var(dd, "description", &size,
10480                                 (void **)&dd->boardname);
10481         if (ret) {
10482                 dd_dev_err(dd, "Board description not found\n");
10483                 /* use generic description */
10484                 dd->boardname = kstrdup(generic, GFP_KERNEL);
10485                 if (!dd->boardname)
10486                         return -ENOMEM;
10487         }
10488         return 0;
10489 }
10490
10491 /**
10492  * Allocate and initialize the device structure for the hfi.
10493  * @dev: the pci_dev for hfi1_ib device
10494  * @ent: pci_device_id struct for this dev
10495  *
10496  * Also allocates, initializes, and returns the devdata struct for this
10497  * device instance
10498  *
10499  * This is global, and is called directly at init to set up the
10500  * chip-specific function pointers for later use.
10501  */
10502 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10503                                   const struct pci_device_id *ent)
10504 {
10505         struct hfi1_devdata *dd;
10506         struct hfi1_pportdata *ppd;
10507         u64 reg;
10508         int i, ret;
10509         static const char * const inames[] = { /* implementation names */
10510                 "RTL silicon",
10511                 "RTL VCS simulation",
10512                 "RTL FPGA emulation",
10513                 "Functional simulator"
10514         };
10515
10516         dd = hfi1_alloc_devdata(pdev,
10517                 NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10518         if (IS_ERR(dd))
10519                 goto bail;
10520         ppd = dd->pport;
10521         for (i = 0; i < dd->num_pports; i++, ppd++) {
10522                 int vl;
10523                 /* init common fields */
10524                 hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10525                 /* DC supports 4 link widths */
10526                 ppd->link_width_supported =
10527                         OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10528                         OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10529                 ppd->link_width_downgrade_supported =
10530                         ppd->link_width_supported;
10531                 /* start out enabling only 4X */
10532                 ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10533                 ppd->link_width_downgrade_enabled =
10534                                         ppd->link_width_downgrade_supported;
10535                 /* link width active is 0 when link is down */
10536                 /* link width downgrade active is 0 when link is down */
10537
10538                 if (num_vls < HFI1_MIN_VLS_SUPPORTED
10539                         || num_vls > HFI1_MAX_VLS_SUPPORTED) {
10540                         hfi1_early_err(&pdev->dev,
10541                                        "Invalid num_vls %u, using %u VLs\n",
10542                                     num_vls, HFI1_MAX_VLS_SUPPORTED);
10543                         num_vls = HFI1_MAX_VLS_SUPPORTED;
10544                 }
10545                 ppd->vls_supported = num_vls;
10546                 ppd->vls_operational = ppd->vls_supported;
10547                 /* Set the default MTU. */
10548                 for (vl = 0; vl < num_vls; vl++)
10549                         dd->vld[vl].mtu = hfi1_max_mtu;
10550                 dd->vld[15].mtu = MAX_MAD_PACKET;
10551                 /*
10552                  * Set the initial values to reasonable default, will be set
10553                  * for real when link is up.
10554                  */
10555                 ppd->lstate = IB_PORT_DOWN;
10556                 ppd->overrun_threshold = 0x4;
10557                 ppd->phy_error_threshold = 0xf;
10558                 ppd->port_crc_mode_enabled = link_crc_mask;
10559                 /* initialize supported LTP CRC mode */
10560                 ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10561                 /* initialize enabled LTP CRC mode */
10562                 ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10563                 /* start in offline */
10564                 ppd->host_link_state = HLS_DN_OFFLINE;
10565                 init_vl_arb_caches(ppd);
10566         }
10567
10568         dd->link_default = HLS_DN_POLL;
10569
10570         /*
10571          * Do remaining PCIe setup and save PCIe values in dd.
10572          * Any error printing is already done by the init code.
10573          * On return, we have the chip mapped.
10574          */
10575         ret = hfi1_pcie_ddinit(dd, pdev, ent);
10576         if (ret < 0)
10577                 goto bail_free;
10578
10579         /* verify that reads actually work, save revision for reset check */
10580         dd->revision = read_csr(dd, CCE_REVISION);
10581         if (dd->revision == ~(u64)0) {
10582                 dd_dev_err(dd, "cannot read chip CSRs\n");
10583                 ret = -EINVAL;
10584                 goto bail_cleanup;
10585         }
10586         dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10587                         & CCE_REVISION_CHIP_REV_MAJOR_MASK;
10588         dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10589                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
10590
10591         /* obtain the hardware ID - NOT related to unit, which is a
10592            software enumeration */
10593         reg = read_csr(dd, CCE_REVISION2);
10594         dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10595                                         & CCE_REVISION2_HFI_ID_MASK;
10596         /* the variable size will remove unwanted bits */
10597         dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10598         dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10599         dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10600                 dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10601                 (int)dd->irev);
10602
10603         /* speeds the hardware can support */
10604         dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10605         /* speeds allowed to run at */
10606         dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10607         /* give a reasonable active value, will be set on link up */
10608         dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10609
10610         dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10611         dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10612         dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10613         dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10614         dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10615         /* fix up link widths for emulation _p */
10616         ppd = dd->pport;
10617         if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10618                 ppd->link_width_supported =
10619                         ppd->link_width_enabled =
10620                         ppd->link_width_downgrade_supported =
10621                         ppd->link_width_downgrade_enabled =
10622                                 OPA_LINK_WIDTH_1X;
10623         }
10624         /* insure num_vls isn't larger than number of sdma engines */
10625         if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10626                 dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10627                                 num_vls, HFI1_MAX_VLS_SUPPORTED);
10628                 ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10629                 ppd->vls_operational = ppd->vls_supported;
10630         }
10631
10632         /*
10633          * Convert the ns parameter to the 64 * cclocks used in the CSR.
10634          * Limit the max if larger than the field holds.  If timeout is
10635          * non-zero, then the calculated field will be at least 1.
10636          *
10637          * Must be after icode is set up - the cclock rate depends
10638          * on knowing the hardware being used.
10639          */
10640         dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10641         if (dd->rcv_intr_timeout_csr >
10642                         RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10643                 dd->rcv_intr_timeout_csr =
10644                         RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10645         else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10646                 dd->rcv_intr_timeout_csr = 1;
10647
10648         /* needs to be done before we look for the peer device */
10649         read_guid(dd);
10650
10651         /* should this device init the ASIC block? */
10652         asic_should_init(dd);
10653
10654         /* obtain chip sizes, reset chip CSRs */
10655         init_chip(dd);
10656
10657         /* read in the PCIe link speed information */
10658         ret = pcie_speeds(dd);
10659         if (ret)
10660                 goto bail_cleanup;
10661
10662         /* read in firmware */
10663         ret = hfi1_firmware_init(dd);
10664         if (ret)
10665                 goto bail_cleanup;
10666
10667         /*
10668          * In general, the PCIe Gen3 transition must occur after the
10669          * chip has been idled (so it won't initiate any PCIe transactions
10670          * e.g. an interrupt) and before the driver changes any registers
10671          * (the transition will reset the registers).
10672          *
10673          * In particular, place this call after:
10674          * - init_chip()     - the chip will not initiate any PCIe transactions
10675          * - pcie_speeds()   - reads the current link speed
10676          * - hfi1_firmware_init() - the needed firmware is ready to be
10677          *                          downloaded
10678          */
10679         ret = do_pcie_gen3_transition(dd);
10680         if (ret)
10681                 goto bail_cleanup;
10682
10683         /* start setting dd values and adjusting CSRs */
10684         init_early_variables(dd);
10685
10686         parse_platform_config(dd);
10687
10688         ret = obtain_boardname(dd);
10689         if (ret)
10690                 goto bail_cleanup;
10691
10692         snprintf(dd->boardversion, BOARD_VERS_MAX,
10693                  "ChipABI %u.%u, ChipRev %u.%u, SW Compat %llu\n",
10694                  HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10695                  (u32)dd->majrev,
10696                  (u32)dd->minrev,
10697                  (dd->revision >> CCE_REVISION_SW_SHIFT)
10698                     & CCE_REVISION_SW_MASK);
10699
10700         ret = set_up_context_variables(dd);
10701         if (ret)
10702                 goto bail_cleanup;
10703
10704         /* set initial RXE CSRs */
10705         init_rxe(dd);
10706         /* set initial TXE CSRs */
10707         init_txe(dd);
10708         /* set initial non-RXE, non-TXE CSRs */
10709         init_other(dd);
10710         /* set up KDETH QP prefix in both RX and TX CSRs */
10711         init_kdeth_qp(dd);
10712
10713         /* send contexts must be set up before receive contexts */
10714         ret = init_send_contexts(dd);
10715         if (ret)
10716                 goto bail_cleanup;
10717
10718         ret = hfi1_create_ctxts(dd);
10719         if (ret)
10720                 goto bail_cleanup;
10721
10722         dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10723         /*
10724          * rcd[0] is guaranteed to be valid by this point. Also, all
10725          * context are using the same value, as per the module parameter.
10726          */
10727         dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10728
10729         ret = init_pervl_scs(dd);
10730         if (ret)
10731                 goto bail_cleanup;
10732
10733         /* sdma init */
10734         for (i = 0; i < dd->num_pports; ++i) {
10735                 ret = sdma_init(dd, i);
10736                 if (ret)
10737                         goto bail_cleanup;
10738         }
10739
10740         /* use contexts created by hfi1_create_ctxts */
10741         ret = set_up_interrupts(dd);
10742         if (ret)
10743                 goto bail_cleanup;
10744
10745         /* set up LCB access - must be after set_up_interrupts() */
10746         init_lcb_access(dd);
10747
10748         snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10749                  dd->base_guid & 0xFFFFFF);
10750
10751         dd->oui1 = dd->base_guid >> 56 & 0xFF;
10752         dd->oui2 = dd->base_guid >> 48 & 0xFF;
10753         dd->oui3 = dd->base_guid >> 40 & 0xFF;
10754
10755         ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10756         if (ret)
10757                 goto bail_clear_intr;
10758         check_fabric_firmware_versions(dd);
10759
10760         thermal_init(dd);
10761
10762         ret = init_cntrs(dd);
10763         if (ret)
10764                 goto bail_clear_intr;
10765
10766         ret = init_rcverr(dd);
10767         if (ret)
10768                 goto bail_free_cntrs;
10769
10770         ret = eprom_init(dd);
10771         if (ret)
10772                 goto bail_free_rcverr;
10773
10774         goto bail;
10775
10776 bail_free_rcverr:
10777         free_rcverr(dd);
10778 bail_free_cntrs:
10779         free_cntrs(dd);
10780 bail_clear_intr:
10781         clean_up_interrupts(dd);
10782 bail_cleanup:
10783         hfi1_pcie_ddcleanup(dd);
10784 bail_free:
10785         hfi1_free_devdata(dd);
10786         dd = ERR_PTR(ret);
10787 bail:
10788         return dd;
10789 }
10790
10791 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10792                         u32 dw_len)
10793 {
10794         u32 delta_cycles;
10795         u32 current_egress_rate = ppd->current_egress_rate;
10796         /* rates here are in units of 10^6 bits/sec */
10797
10798         if (desired_egress_rate == -1)
10799                 return 0; /* shouldn't happen */
10800
10801         if (desired_egress_rate >= current_egress_rate)
10802                 return 0; /* we can't help go faster, only slower */
10803
10804         delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10805                         egress_cycles(dw_len * 4, current_egress_rate);
10806
10807         return (u16)delta_cycles;
10808 }
10809
10810
10811 /**
10812  * create_pbc - build a pbc for transmission
10813  * @flags: special case flags or-ed in built pbc
10814  * @srate: static rate
10815  * @vl: vl
10816  * @dwlen: dword length (header words + data words + pbc words)
10817  *
10818  * Create a PBC with the given flags, rate, VL, and length.
10819  *
10820  * NOTE: The PBC created will not insert any HCRC - all callers but one are
10821  * for verbs, which does not use this PSM feature.  The lone other caller
10822  * is for the diagnostic interface which calls this if the user does not
10823  * supply their own PBC.
10824  */
10825 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10826                u32 dw_len)
10827 {
10828         u64 pbc, delay = 0;
10829
10830         if (unlikely(srate_mbs))
10831                 delay = delay_cycles(ppd, srate_mbs, dw_len);
10832
10833         pbc = flags
10834                 | (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10835                 | ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10836                 | (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10837                 | (dw_len & PBC_LENGTH_DWS_MASK)
10838                         << PBC_LENGTH_DWS_SHIFT;
10839
10840         return pbc;
10841 }
10842
10843 #define SBUS_THERMAL    0x4f
10844 #define SBUS_THERM_MONITOR_MODE 0x1
10845
10846 #define THERM_FAILURE(dev, ret, reason) \
10847         dd_dev_err((dd),                                                \
10848                    "Thermal sensor initialization failed: %s (%d)\n",   \
10849                    (reason), (ret))
10850
10851 /*
10852  * Initialize the Avago Thermal sensor.
10853  *
10854  * After initialization, enable polling of thermal sensor through
10855  * SBus interface. In order for this to work, the SBus Master
10856  * firmware has to be loaded due to the fact that the HW polling
10857  * logic uses SBus interrupts, which are not supported with
10858  * default firmware. Otherwise, no data will be returned through
10859  * the ASIC_STS_THERM CSR.
10860  */
10861 static int thermal_init(struct hfi1_devdata *dd)
10862 {
10863         int ret = 0;
10864
10865         if (dd->icode != ICODE_RTL_SILICON ||
10866             !(dd->flags & HFI1_DO_INIT_ASIC))
10867                 return ret;
10868
10869         acquire_hw_mutex(dd);
10870         dd_dev_info(dd, "Initializing thermal sensor\n");
10871         /* Disable polling of thermal readings */
10872         write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x0);
10873         msleep(100);
10874         /* Thermal Sensor Initialization */
10875         /*    Step 1: Reset the Thermal SBus Receiver */
10876         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10877                                 RESET_SBUS_RECEIVER, 0);
10878         if (ret) {
10879                 THERM_FAILURE(dd, ret, "Bus Reset");
10880                 goto done;
10881         }
10882         /*    Step 2: Set Reset bit in Thermal block */
10883         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10884                                 WRITE_SBUS_RECEIVER, 0x1);
10885         if (ret) {
10886                 THERM_FAILURE(dd, ret, "Therm Block Reset");
10887                 goto done;
10888         }
10889         /*    Step 3: Write clock divider value (100MHz -> 2MHz) */
10890         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10891                                 WRITE_SBUS_RECEIVER, 0x32);
10892         if (ret) {
10893                 THERM_FAILURE(dd, ret, "Write Clock Div");
10894                 goto done;
10895         }
10896         /*    Step 4: Select temperature mode */
10897         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10898                                 WRITE_SBUS_RECEIVER,
10899                                 SBUS_THERM_MONITOR_MODE);
10900         if (ret) {
10901                 THERM_FAILURE(dd, ret, "Write Mode Sel");
10902                 goto done;
10903         }
10904         /*    Step 5: De-assert block reset and start conversion */
10905         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10906                                 WRITE_SBUS_RECEIVER, 0x2);
10907         if (ret) {
10908                 THERM_FAILURE(dd, ret, "Write Reset Deassert");
10909                 goto done;
10910         }
10911         /*    Step 5.1: Wait for first conversion (21.5ms per spec) */
10912         msleep(22);
10913
10914         /* Enable polling of thermal readings */
10915         write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10916 done:
10917         release_hw_mutex(dd);
10918         return ret;
10919 }
10920
10921 static void handle_temp_err(struct hfi1_devdata *dd)
10922 {
10923         struct hfi1_pportdata *ppd = &dd->pport[0];
10924         /*
10925          * Thermal Critical Interrupt
10926          * Put the device into forced freeze mode, take link down to
10927          * offline, and put DC into reset.
10928          */
10929         dd_dev_emerg(dd,
10930                      "Critical temperature reached! Forcing device into freeze mode!\n");
10931         dd->flags |= HFI1_FORCED_FREEZE;
10932         start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10933         /*
10934          * Shut DC down as much and as quickly as possible.
10935          *
10936          * Step 1: Take the link down to OFFLINE. This will cause the
10937          *         8051 to put the Serdes in reset. However, we don't want to
10938          *         go through the entire link state machine since we want to
10939          *         shutdown ASAP. Furthermore, this is not a graceful shutdown
10940          *         but rather an attempt to save the chip.
10941          *         Code below is almost the same as quiet_serdes() but avoids
10942          *         all the extra work and the sleeps.
10943          */
10944         ppd->driver_link_ready = 0;
10945         ppd->link_enabled = 0;
10946         set_physical_link_state(dd, PLS_OFFLINE |
10947                                 (OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10948         /*
10949          * Step 2: Shutdown LCB and 8051
10950          *         After shutdown, do not restore DC_CFG_RESET value.
10951          */
10952         dc_shutdown(dd);
10953 }