2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
7 * Copyright(c) 2012 Intel Corporation. All rights reserved.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
15 * Copyright(c) 2012 Intel Corporation. All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
21 * * Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * * Redistributions in binary form must reproduce the above copy
24 * notice, this list of conditions and the following disclaimer in
25 * the documentation and/or other materials provided with the
27 * * Neither the name of Intel Corporation nor the names of its
28 * contributors may be used to endorse or promote products derived
29 * from this software without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * Intel PCIe NTB Linux driver
45 * Contact Information:
46 * Jon Mason <jon.mason@intel.com>
48 #include <linux/debugfs.h>
49 #include <linux/delay.h>
50 #include <linux/init.h>
51 #include <linux/interrupt.h>
52 #include <linux/module.h>
53 #include <linux/pci.h>
54 #include <linux/random.h>
55 #include <linux/slab.h>
56 #include "ntb_hw_intel.h"
58 #define NTB_NAME "Intel(R) PCI-E Non-Transparent Bridge Driver"
61 MODULE_DESCRIPTION(NTB_NAME);
62 MODULE_VERSION(NTB_VER);
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_AUTHOR("Intel Corporation");
67 NTB_CONN_TRANSPARENT = 0,
82 static struct dentry *debugfs_dir;
84 #define BWD_LINK_RECOVERY_TIME 500
86 /* Translate memory window 0,1,2 to BAR 2,4,5 */
87 #define MW_TO_BAR(mw) (mw == 0 ? 2 : (mw == 1 ? 4 : 5))
89 static const struct pci_device_id ntb_pci_tbl[] = {
90 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)},
91 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)},
92 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
93 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)},
94 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)},
95 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)},
96 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)},
97 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)},
98 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)},
99 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)},
100 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)},
101 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)},
102 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)},
105 MODULE_DEVICE_TABLE(pci, ntb_pci_tbl);
107 static int is_ntb_xeon(struct ntb_device *ndev)
109 switch (ndev->pdev->device) {
110 case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
111 case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
112 case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
113 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
114 case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
115 case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
116 case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
117 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
118 case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
119 case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
120 case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
121 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
130 static int is_ntb_atom(struct ntb_device *ndev)
132 switch (ndev->pdev->device) {
133 case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD:
142 static void ntb_set_errata_flags(struct ntb_device *ndev)
144 switch (ndev->pdev->device) {
146 * this workaround applies to all platform up to IvyBridge
147 * Haswell has splitbar support and use a different workaround
149 case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
150 case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
151 case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
152 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
153 case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
154 case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
155 case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
156 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
157 case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
158 case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
159 case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
160 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
161 ndev->wa_flags |= WA_SNB_ERR;
167 * ntb_register_event_callback() - register event callback
168 * @ndev: pointer to ntb_device instance
169 * @func: callback function to register
171 * This function registers a callback for any HW driver events such as link
172 * up/down, power management notices and etc.
174 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
176 int ntb_register_event_callback(struct ntb_device *ndev,
177 void (*func)(void *handle,
178 enum ntb_hw_event event))
183 ndev->event_cb = func;
189 * ntb_unregister_event_callback() - unregisters the event callback
190 * @ndev: pointer to ntb_device instance
192 * This function unregisters the existing callback from transport
194 void ntb_unregister_event_callback(struct ntb_device *ndev)
196 ndev->event_cb = NULL;
199 static void ntb_irq_work(unsigned long data)
201 struct ntb_db_cb *db_cb = (struct ntb_db_cb *)data;
204 rc = db_cb->callback(db_cb->data, db_cb->db_num);
206 tasklet_schedule(&db_cb->irq_work);
208 struct ntb_device *ndev = db_cb->ndev;
211 mask = readw(ndev->reg_ofs.ldb_mask);
212 clear_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
213 writew(mask, ndev->reg_ofs.ldb_mask);
218 * ntb_register_db_callback() - register a callback for doorbell interrupt
219 * @ndev: pointer to ntb_device instance
220 * @idx: doorbell index to register callback, zero based
221 * @data: pointer to be returned to caller with every callback
222 * @func: callback function to register
224 * This function registers a callback function for the doorbell interrupt
225 * on the primary side. The function will unmask the doorbell as well to
228 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
230 int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx,
231 void *data, int (*func)(void *data, int db_num))
235 if (idx >= ndev->max_cbs || ndev->db_cb[idx].callback) {
236 dev_warn(&ndev->pdev->dev, "Invalid Index.\n");
240 ndev->db_cb[idx].callback = func;
241 ndev->db_cb[idx].data = data;
242 ndev->db_cb[idx].ndev = ndev;
244 tasklet_init(&ndev->db_cb[idx].irq_work, ntb_irq_work,
245 (unsigned long) &ndev->db_cb[idx]);
247 /* unmask interrupt */
248 mask = readw(ndev->reg_ofs.ldb_mask);
249 clear_bit(idx * ndev->bits_per_vector, &mask);
250 writew(mask, ndev->reg_ofs.ldb_mask);
256 * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
257 * @ndev: pointer to ntb_device instance
258 * @idx: doorbell index to register callback, zero based
260 * This function unregisters a callback function for the doorbell interrupt
261 * on the primary side. The function will also mask the said doorbell.
263 void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx)
267 if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback)
270 mask = readw(ndev->reg_ofs.ldb_mask);
271 set_bit(idx * ndev->bits_per_vector, &mask);
272 writew(mask, ndev->reg_ofs.ldb_mask);
274 tasklet_disable(&ndev->db_cb[idx].irq_work);
276 ndev->db_cb[idx].callback = NULL;
280 * ntb_find_transport() - find the transport pointer
281 * @transport: pointer to pci device
283 * Given the pci device pointer, return the transport pointer passed in when
284 * the transport attached when it was inited.
286 * RETURNS: pointer to transport.
288 void *ntb_find_transport(struct pci_dev *pdev)
290 struct ntb_device *ndev = pci_get_drvdata(pdev);
291 return ndev->ntb_transport;
295 * ntb_register_transport() - Register NTB transport with NTB HW driver
296 * @transport: transport identifier
298 * This function allows a transport to reserve the hardware driver for
301 * RETURNS: pointer to ntb_device, NULL on error.
303 struct ntb_device *ntb_register_transport(struct pci_dev *pdev, void *transport)
305 struct ntb_device *ndev = pci_get_drvdata(pdev);
307 if (ndev->ntb_transport)
310 ndev->ntb_transport = transport;
315 * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
316 * @ndev - ntb_device of the transport to be freed
318 * This function unregisters the transport from the HW driver and performs any
319 * necessary cleanups.
321 void ntb_unregister_transport(struct ntb_device *ndev)
325 if (!ndev->ntb_transport)
328 for (i = 0; i < ndev->max_cbs; i++)
329 ntb_unregister_db_callback(ndev, i);
331 ntb_unregister_event_callback(ndev);
332 ndev->ntb_transport = NULL;
336 * ntb_write_local_spad() - write to the secondary scratchpad register
337 * @ndev: pointer to ntb_device instance
338 * @idx: index to the scratchpad register, 0 based
339 * @val: the data value to put into the register
341 * This function allows writing of a 32bit value to the indexed scratchpad
342 * register. This writes over the data mirrored to the local scratchpad register
343 * by the remote system.
345 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
347 int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
349 if (idx >= ndev->limits.max_spads)
352 dev_dbg(&ndev->pdev->dev, "Writing %x to local scratch pad index %d\n",
354 writel(val, ndev->reg_ofs.spad_read + idx * 4);
360 * ntb_read_local_spad() - read from the primary scratchpad register
361 * @ndev: pointer to ntb_device instance
362 * @idx: index to scratchpad register, 0 based
363 * @val: pointer to 32bit integer for storing the register value
365 * This function allows reading of the 32bit scratchpad register on
366 * the primary (internal) side. This allows the local system to read data
367 * written and mirrored to the scratchpad register by the remote system.
369 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
371 int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
373 if (idx >= ndev->limits.max_spads)
376 *val = readl(ndev->reg_ofs.spad_write + idx * 4);
377 dev_dbg(&ndev->pdev->dev,
378 "Reading %x from local scratch pad index %d\n", *val, idx);
384 * ntb_write_remote_spad() - write to the secondary scratchpad register
385 * @ndev: pointer to ntb_device instance
386 * @idx: index to the scratchpad register, 0 based
387 * @val: the data value to put into the register
389 * This function allows writing of a 32bit value to the indexed scratchpad
390 * register. The register resides on the secondary (external) side. This allows
391 * the local system to write data to be mirrored to the remote systems
392 * scratchpad register.
394 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
396 int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
398 if (idx >= ndev->limits.max_spads)
401 dev_dbg(&ndev->pdev->dev, "Writing %x to remote scratch pad index %d\n",
403 writel(val, ndev->reg_ofs.spad_write + idx * 4);
409 * ntb_read_remote_spad() - read from the primary scratchpad register
410 * @ndev: pointer to ntb_device instance
411 * @idx: index to scratchpad register, 0 based
412 * @val: pointer to 32bit integer for storing the register value
414 * This function allows reading of the 32bit scratchpad register on
415 * the primary (internal) side. This alloows the local system to read the data
416 * it wrote to be mirrored on the remote system.
418 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
420 int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
422 if (idx >= ndev->limits.max_spads)
425 *val = readl(ndev->reg_ofs.spad_read + idx * 4);
426 dev_dbg(&ndev->pdev->dev,
427 "Reading %x from remote scratch pad index %d\n", *val, idx);
433 * ntb_get_mw_base() - get addr for the NTB memory window
434 * @ndev: pointer to ntb_device instance
435 * @mw: memory window number
437 * This function provides the base address of the memory window specified.
439 * RETURNS: address, or NULL on error.
441 resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw)
443 if (mw >= ntb_max_mw(ndev))
446 return pci_resource_start(ndev->pdev, MW_TO_BAR(mw));
450 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
451 * @ndev: pointer to ntb_device instance
452 * @mw: memory window number
454 * This function provides the base virtual address of the memory window
457 * RETURNS: pointer to virtual address, or NULL on error.
459 void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw)
461 if (mw >= ntb_max_mw(ndev))
464 return ndev->mw[mw].vbase;
468 * ntb_get_mw_size() - return size of NTB memory window
469 * @ndev: pointer to ntb_device instance
470 * @mw: memory window number
472 * This function provides the physical size of the memory window specified
474 * RETURNS: the size of the memory window or zero on error
476 u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw)
478 if (mw >= ntb_max_mw(ndev))
481 return ndev->mw[mw].bar_sz;
485 * ntb_set_mw_addr - set the memory window address
486 * @ndev: pointer to ntb_device instance
487 * @mw: memory window number
488 * @addr: base address for data
490 * This function sets the base physical address of the memory window. This
491 * memory address is where data from the remote system will be transfered into
492 * or out of depending on how the transport is configured.
494 void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
496 if (mw >= ntb_max_mw(ndev))
499 dev_dbg(&ndev->pdev->dev, "Writing addr %Lx to BAR %d\n", addr,
502 ndev->mw[mw].phys_addr = addr;
504 switch (MW_TO_BAR(mw)) {
506 writeq(addr, ndev->reg_ofs.bar2_xlat);
510 writel(addr, ndev->reg_ofs.bar4_xlat);
512 writeq(addr, ndev->reg_ofs.bar4_xlat);
515 writel(addr, ndev->reg_ofs.bar5_xlat);
521 * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
522 * @ndev: pointer to ntb_device instance
523 * @db: doorbell to ring
525 * This function allows triggering of a doorbell on the secondary/external
526 * side that will initiate an interrupt on the remote host
528 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
530 void ntb_ring_doorbell(struct ntb_device *ndev, unsigned int db)
532 dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db);
534 if (ndev->hw_type == BWD_HW)
535 writeq((u64) 1 << db, ndev->reg_ofs.rdb);
537 writew(((1 << ndev->bits_per_vector) - 1) <<
538 (db * ndev->bits_per_vector), ndev->reg_ofs.rdb);
541 static void bwd_recover_link(struct ntb_device *ndev)
545 /* Driver resets the NTB ModPhy lanes - magic! */
546 writeb(0xe0, ndev->reg_base + BWD_MODPHY_PCSREG6);
547 writeb(0x40, ndev->reg_base + BWD_MODPHY_PCSREG4);
548 writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG4);
549 writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG6);
551 /* Driver waits 100ms to allow the NTB ModPhy to settle */
554 /* Clear AER Errors, write to clear */
555 status = readl(ndev->reg_base + BWD_ERRCORSTS_OFFSET);
556 dev_dbg(&ndev->pdev->dev, "ERRCORSTS = %x\n", status);
557 status &= PCI_ERR_COR_REP_ROLL;
558 writel(status, ndev->reg_base + BWD_ERRCORSTS_OFFSET);
560 /* Clear unexpected electrical idle event in LTSSM, write to clear */
561 status = readl(ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
562 dev_dbg(&ndev->pdev->dev, "LTSSMERRSTS0 = %x\n", status);
563 status |= BWD_LTSSMERRSTS0_UNEXPECTEDEI;
564 writel(status, ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
566 /* Clear DeSkew Buffer error, write to clear */
567 status = readl(ndev->reg_base + BWD_DESKEWSTS_OFFSET);
568 dev_dbg(&ndev->pdev->dev, "DESKEWSTS = %x\n", status);
569 status |= BWD_DESKEWSTS_DBERR;
570 writel(status, ndev->reg_base + BWD_DESKEWSTS_OFFSET);
572 status = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
573 dev_dbg(&ndev->pdev->dev, "IBSTERRRCRVSTS0 = %x\n", status);
574 status &= BWD_IBIST_ERR_OFLOW;
575 writel(status, ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
577 /* Releases the NTB state machine to allow the link to retrain */
578 status = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
579 dev_dbg(&ndev->pdev->dev, "LTSSMSTATEJMP = %x\n", status);
580 status &= ~BWD_LTSSMSTATEJMP_FORCEDETECT;
581 writel(status, ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
584 static void ntb_link_event(struct ntb_device *ndev, int link_state)
588 if (ndev->link_status == link_state)
591 if (link_state == NTB_LINK_UP) {
594 dev_info(&ndev->pdev->dev, "Link Up\n");
595 ndev->link_status = NTB_LINK_UP;
596 event = NTB_EVENT_HW_LINK_UP;
598 if (is_ntb_atom(ndev) ||
599 ndev->conn_type == NTB_CONN_TRANSPARENT)
600 status = readw(ndev->reg_ofs.lnk_stat);
602 int rc = pci_read_config_word(ndev->pdev,
603 SNB_LINK_STATUS_OFFSET,
609 ndev->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
610 ndev->link_speed = (status & NTB_LINK_SPEED_MASK);
611 dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n",
612 ndev->link_width, ndev->link_speed);
614 dev_info(&ndev->pdev->dev, "Link Down\n");
615 ndev->link_status = NTB_LINK_DOWN;
616 event = NTB_EVENT_HW_LINK_DOWN;
617 /* Don't modify link width/speed, we need it in link recovery */
620 /* notify the upper layer if we have an event change */
622 ndev->event_cb(ndev->ntb_transport, event);
625 static int ntb_link_status(struct ntb_device *ndev)
629 if (is_ntb_atom(ndev)) {
632 ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
633 if (ntb_cntl & BWD_CNTL_LINK_DOWN)
634 link_state = NTB_LINK_DOWN;
636 link_state = NTB_LINK_UP;
641 rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET,
646 if (status & NTB_LINK_STATUS_ACTIVE)
647 link_state = NTB_LINK_UP;
649 link_state = NTB_LINK_DOWN;
652 ntb_link_event(ndev, link_state);
657 static void bwd_link_recovery(struct work_struct *work)
659 struct ntb_device *ndev = container_of(work, struct ntb_device,
663 bwd_recover_link(ndev);
664 /* There is a potential race between the 2 NTB devices recovering at the
665 * same time. If the times are the same, the link will not recover and
666 * the driver will be stuck in this loop forever. Add a random interval
667 * to the recovery time to prevent this race.
669 msleep(BWD_LINK_RECOVERY_TIME + prandom_u32() % BWD_LINK_RECOVERY_TIME);
671 status32 = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
672 if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT)
675 status32 = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
676 if (status32 & BWD_IBIST_ERR_OFLOW)
679 status32 = readl(ndev->reg_ofs.lnk_cntl);
680 if (!(status32 & BWD_CNTL_LINK_DOWN)) {
681 unsigned char speed, width;
684 status16 = readw(ndev->reg_ofs.lnk_stat);
685 width = (status16 & NTB_LINK_WIDTH_MASK) >> 4;
686 speed = (status16 & NTB_LINK_SPEED_MASK);
687 if (ndev->link_width != width || ndev->link_speed != speed)
691 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
695 schedule_delayed_work(&ndev->lr_timer, NTB_HB_TIMEOUT);
698 /* BWD doesn't have link status interrupt, poll on that platform */
699 static void bwd_link_poll(struct work_struct *work)
701 struct ntb_device *ndev = container_of(work, struct ntb_device,
703 unsigned long ts = jiffies;
705 /* If we haven't gotten an interrupt in a while, check the BWD link
708 if (ts > ndev->last_ts + NTB_HB_TIMEOUT) {
709 int rc = ntb_link_status(ndev);
711 dev_err(&ndev->pdev->dev,
712 "Error determining link status\n");
714 /* Check to see if a link error is the cause of the link down */
715 if (ndev->link_status == NTB_LINK_DOWN) {
716 u32 status32 = readl(ndev->reg_base +
717 BWD_LTSSMSTATEJMP_OFFSET);
718 if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) {
719 schedule_delayed_work(&ndev->lr_timer, 0);
725 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
728 static int ntb_xeon_setup(struct ntb_device *ndev)
730 switch (ndev->conn_type) {
732 ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
733 ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
734 ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
735 ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
736 ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
738 ndev->reg_ofs.bar5_xlat =
739 ndev->reg_base + SNB_SBAR5XLAT_OFFSET;
740 ndev->limits.max_spads = SNB_MAX_B2B_SPADS;
742 /* There is a Xeon hardware errata related to writes to
743 * SDOORBELL or B2BDOORBELL in conjunction with inbound access
744 * to NTB MMIO Space, which may hang the system. To workaround
745 * this use the second memory window to access the interrupt and
746 * scratch pad registers on the remote system.
748 if (ndev->wa_flags & WA_SNB_ERR) {
749 if (!ndev->mw[ndev->limits.max_mw - 1].bar_sz)
752 ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
753 ndev->reg_ofs.spad_write =
754 ndev->mw[ndev->limits.max_mw - 1].vbase +
757 ndev->mw[ndev->limits.max_mw - 1].vbase +
758 SNB_PDOORBELL_OFFSET;
760 /* Set the Limit register to 4k, the minimum size, to
761 * prevent an illegal access
763 writeq(ndev->mw[1].bar_sz + 0x1000, ndev->reg_base +
764 SNB_PBAR4LMT_OFFSET);
765 /* HW errata on the Limit registers. They can only be
766 * written when the base register is 4GB aligned and
767 * < 32bit. This should already be the case based on
768 * the driver defaults, but write the Limit registers
769 * first just in case.
772 ndev->limits.max_mw = SNB_ERRATA_MAX_MW;
774 /* HW Errata on bit 14 of b2bdoorbell register. Writes
775 * will not be mirrored to the remote system. Shrink
776 * the number of bits by one, since bit 14 is the last
779 ndev->limits.max_db_bits = SNB_MAX_DB_BITS - 1;
780 ndev->reg_ofs.spad_write = ndev->reg_base +
782 ndev->reg_ofs.rdb = ndev->reg_base +
783 SNB_B2B_DOORBELL_OFFSET;
785 /* Disable the Limit register, just incase it is set to
786 * something silly. A 64bit write should handle it
787 * regardless of whether it has a split BAR or not.
789 writeq(0, ndev->reg_base + SNB_PBAR4LMT_OFFSET);
790 /* HW errata on the Limit registers. They can only be
791 * written when the base register is 4GB aligned and
792 * < 32bit. This should already be the case based on
793 * the driver defaults, but write the Limit registers
794 * first just in case.
797 ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
799 ndev->limits.max_mw = SNB_MAX_MW;
802 /* The Xeon errata workaround requires setting SBAR Base
803 * addresses to known values, so that the PBAR XLAT can be
804 * pointed at SBAR0 of the remote system.
806 if (ndev->dev_type == NTB_DEV_USD) {
807 writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
808 SNB_PBAR2XLAT_OFFSET);
809 if (ndev->wa_flags & WA_SNB_ERR)
810 writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
811 SNB_PBAR4XLAT_OFFSET);
813 if (ndev->split_bar) {
814 writel(SNB_MBAR4_DSD_ADDR,
816 SNB_PBAR4XLAT_OFFSET);
817 writel(SNB_MBAR5_DSD_ADDR,
819 SNB_PBAR5XLAT_OFFSET);
821 writeq(SNB_MBAR4_DSD_ADDR,
823 SNB_PBAR4XLAT_OFFSET);
825 /* B2B_XLAT_OFFSET is a 64bit register, but can
826 * only take 32bit writes
828 writel(SNB_MBAR01_DSD_ADDR & 0xffffffff,
829 ndev->reg_base + SNB_B2B_XLAT_OFFSETL);
830 writel(SNB_MBAR01_DSD_ADDR >> 32,
831 ndev->reg_base + SNB_B2B_XLAT_OFFSETU);
834 writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
835 SNB_SBAR0BASE_OFFSET);
836 writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
837 SNB_SBAR2BASE_OFFSET);
838 if (ndev->split_bar) {
839 writel(SNB_MBAR4_USD_ADDR, ndev->reg_base +
840 SNB_SBAR4BASE_OFFSET);
841 writel(SNB_MBAR5_USD_ADDR, ndev->reg_base +
842 SNB_SBAR5BASE_OFFSET);
844 writeq(SNB_MBAR4_USD_ADDR, ndev->reg_base +
845 SNB_SBAR4BASE_OFFSET);
847 writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
848 SNB_PBAR2XLAT_OFFSET);
849 if (ndev->wa_flags & WA_SNB_ERR)
850 writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
851 SNB_PBAR4XLAT_OFFSET);
853 if (ndev->split_bar) {
854 writel(SNB_MBAR4_USD_ADDR,
856 SNB_PBAR4XLAT_OFFSET);
857 writel(SNB_MBAR5_USD_ADDR,
859 SNB_PBAR5XLAT_OFFSET);
861 writeq(SNB_MBAR4_USD_ADDR,
863 SNB_PBAR4XLAT_OFFSET);
866 * B2B_XLAT_OFFSET is a 64bit register, but can
867 * only take 32bit writes
869 writel(SNB_MBAR01_USD_ADDR & 0xffffffff,
870 ndev->reg_base + SNB_B2B_XLAT_OFFSETL);
871 writel(SNB_MBAR01_USD_ADDR >> 32,
872 ndev->reg_base + SNB_B2B_XLAT_OFFSETU);
874 writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
875 SNB_SBAR0BASE_OFFSET);
876 writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
877 SNB_SBAR2BASE_OFFSET);
878 if (ndev->split_bar) {
879 writel(SNB_MBAR4_DSD_ADDR, ndev->reg_base +
880 SNB_SBAR4BASE_OFFSET);
881 writel(SNB_MBAR5_DSD_ADDR, ndev->reg_base +
882 SNB_SBAR5BASE_OFFSET);
884 writeq(SNB_MBAR4_DSD_ADDR, ndev->reg_base +
885 SNB_SBAR4BASE_OFFSET);
890 if (ndev->wa_flags & WA_SNB_ERR) {
891 dev_err(&ndev->pdev->dev,
892 "NTB-RP disabled due to hardware errata.\n");
896 /* Scratch pads need to have exclusive access from the primary
897 * or secondary side. Halve the num spads so that each side can
898 * have an equal amount.
900 ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS / 2;
901 ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
902 /* Note: The SDOORBELL is the cause of the errata. You REALLY
903 * don't want to touch it.
905 ndev->reg_ofs.rdb = ndev->reg_base + SNB_SDOORBELL_OFFSET;
906 ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
907 ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
908 /* Offset the start of the spads to correspond to whether it is
909 * primary or secondary
911 ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET +
912 ndev->limits.max_spads * 4;
913 ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
914 ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
915 ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
916 if (ndev->split_bar) {
917 ndev->reg_ofs.bar5_xlat =
918 ndev->reg_base + SNB_SBAR5XLAT_OFFSET;
919 ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
921 ndev->limits.max_mw = SNB_MAX_MW;
923 case NTB_CONN_TRANSPARENT:
924 if (ndev->wa_flags & WA_SNB_ERR) {
925 dev_err(&ndev->pdev->dev,
926 "NTB-TRANSPARENT disabled due to hardware errata.\n");
930 /* Scratch pads need to have exclusive access from the primary
931 * or secondary side. Halve the num spads so that each side can
932 * have an equal amount.
934 ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS / 2;
935 ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
936 ndev->reg_ofs.rdb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
937 ndev->reg_ofs.ldb = ndev->reg_base + SNB_SDOORBELL_OFFSET;
938 ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_SDBMSK_OFFSET;
939 ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET;
940 /* Offset the start of the spads to correspond to whether it is
941 * primary or secondary
943 ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET +
944 ndev->limits.max_spads * 4;
945 ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_PBAR2XLAT_OFFSET;
946 ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_PBAR4XLAT_OFFSET;
948 if (ndev->split_bar) {
949 ndev->reg_ofs.bar5_xlat =
950 ndev->reg_base + SNB_PBAR5XLAT_OFFSET;
951 ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
953 ndev->limits.max_mw = SNB_MAX_MW;
957 * we should never hit this. the detect function should've
958 * take cared of everything.
963 ndev->reg_ofs.lnk_cntl = ndev->reg_base + SNB_NTBCNTL_OFFSET;
964 ndev->reg_ofs.lnk_stat = ndev->reg_base + SNB_SLINK_STATUS_OFFSET;
965 ndev->reg_ofs.spci_cmd = ndev->reg_base + SNB_PCICMD_OFFSET;
967 ndev->limits.msix_cnt = SNB_MSIX_CNT;
968 ndev->bits_per_vector = SNB_DB_BITS_PER_VEC;
973 static int ntb_bwd_setup(struct ntb_device *ndev)
978 ndev->hw_type = BWD_HW;
980 rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &val);
984 switch ((val & BWD_PPD_CONN_TYPE) >> 8) {
986 ndev->conn_type = NTB_CONN_B2B;
990 dev_err(&ndev->pdev->dev, "Unsupported NTB configuration\n");
994 if (val & BWD_PPD_DEV_TYPE)
995 ndev->dev_type = NTB_DEV_DSD;
997 ndev->dev_type = NTB_DEV_USD;
999 /* Initiate PCI-E link training */
1000 rc = pci_write_config_dword(ndev->pdev, NTB_PPD_OFFSET,
1001 val | BWD_PPD_INIT_LINK);
1005 ndev->reg_ofs.ldb = ndev->reg_base + BWD_PDOORBELL_OFFSET;
1006 ndev->reg_ofs.ldb_mask = ndev->reg_base + BWD_PDBMSK_OFFSET;
1007 ndev->reg_ofs.rdb = ndev->reg_base + BWD_B2B_DOORBELL_OFFSET;
1008 ndev->reg_ofs.bar2_xlat = ndev->reg_base + BWD_SBAR2XLAT_OFFSET;
1009 ndev->reg_ofs.bar4_xlat = ndev->reg_base + BWD_SBAR4XLAT_OFFSET;
1010 ndev->reg_ofs.lnk_cntl = ndev->reg_base + BWD_NTBCNTL_OFFSET;
1011 ndev->reg_ofs.lnk_stat = ndev->reg_base + BWD_LINK_STATUS_OFFSET;
1012 ndev->reg_ofs.spad_read = ndev->reg_base + BWD_SPAD_OFFSET;
1013 ndev->reg_ofs.spad_write = ndev->reg_base + BWD_B2B_SPAD_OFFSET;
1014 ndev->reg_ofs.spci_cmd = ndev->reg_base + BWD_PCICMD_OFFSET;
1015 ndev->limits.max_mw = BWD_MAX_MW;
1016 ndev->limits.max_spads = BWD_MAX_SPADS;
1017 ndev->limits.max_db_bits = BWD_MAX_DB_BITS;
1018 ndev->limits.msix_cnt = BWD_MSIX_CNT;
1019 ndev->bits_per_vector = BWD_DB_BITS_PER_VEC;
1021 /* Since bwd doesn't have a link interrupt, setup a poll timer */
1022 INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll);
1023 INIT_DELAYED_WORK(&ndev->lr_timer, bwd_link_recovery);
1024 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
1029 static int ntb_device_setup(struct ntb_device *ndev)
1033 if (is_ntb_xeon(ndev))
1034 rc = ntb_xeon_setup(ndev);
1035 else if (is_ntb_atom(ndev))
1036 rc = ntb_bwd_setup(ndev);
1043 if (ndev->conn_type == NTB_CONN_B2B)
1044 /* Enable Bus Master and Memory Space on the secondary side */
1045 writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER,
1046 ndev->reg_ofs.spci_cmd);
1051 static void ntb_device_free(struct ntb_device *ndev)
1053 if (is_ntb_atom(ndev)) {
1054 cancel_delayed_work_sync(&ndev->hb_timer);
1055 cancel_delayed_work_sync(&ndev->lr_timer);
1059 static irqreturn_t bwd_callback_msix_irq(int irq, void *data)
1061 struct ntb_db_cb *db_cb = data;
1062 struct ntb_device *ndev = db_cb->ndev;
1065 dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
1068 mask = readw(ndev->reg_ofs.ldb_mask);
1069 set_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
1070 writew(mask, ndev->reg_ofs.ldb_mask);
1072 tasklet_schedule(&db_cb->irq_work);
1074 /* No need to check for the specific HB irq, any interrupt means
1077 ndev->last_ts = jiffies;
1079 writeq((u64) 1 << db_cb->db_num, ndev->reg_ofs.ldb);
1084 static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
1086 struct ntb_db_cb *db_cb = data;
1087 struct ntb_device *ndev = db_cb->ndev;
1090 dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
1093 mask = readw(ndev->reg_ofs.ldb_mask);
1094 set_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
1095 writew(mask, ndev->reg_ofs.ldb_mask);
1097 tasklet_schedule(&db_cb->irq_work);
1099 /* On Sandybridge, there are 16 bits in the interrupt register
1100 * but only 4 vectors. So, 5 bits are assigned to the first 3
1101 * vectors, with the 4th having a single bit for link
1104 writew(((1 << ndev->bits_per_vector) - 1) <<
1105 (db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.ldb);
1110 /* Since we do not have a HW doorbell in BWD, this is only used in JF/JT */
1111 static irqreturn_t xeon_event_msix_irq(int irq, void *dev)
1113 struct ntb_device *ndev = dev;
1116 dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for Events\n", irq);
1118 rc = ntb_link_status(ndev);
1120 dev_err(&ndev->pdev->dev, "Error determining link status\n");
1122 /* bit 15 is always the link bit */
1123 writew(1 << SNB_LINK_DB, ndev->reg_ofs.ldb);
1128 static irqreturn_t ntb_interrupt(int irq, void *dev)
1130 struct ntb_device *ndev = dev;
1133 if (is_ntb_atom(ndev)) {
1134 u64 ldb = readq(ndev->reg_ofs.ldb);
1136 dev_dbg(&ndev->pdev->dev, "irq %d - ldb = %Lx\n", irq, ldb);
1141 bwd_callback_msix_irq(irq, &ndev->db_cb[i]);
1144 u16 ldb = readw(ndev->reg_ofs.ldb);
1146 dev_dbg(&ndev->pdev->dev, "irq %d - ldb = %x\n", irq, ldb);
1148 if (ldb & SNB_DB_HW_LINK) {
1149 xeon_event_msix_irq(irq, dev);
1150 ldb &= ~SNB_DB_HW_LINK;
1156 xeon_callback_msix_irq(irq, &ndev->db_cb[i]);
1163 static int ntb_setup_snb_msix(struct ntb_device *ndev, int msix_entries)
1165 struct pci_dev *pdev = ndev->pdev;
1166 struct msix_entry *msix;
1169 if (msix_entries < ndev->limits.msix_cnt)
1172 rc = pci_enable_msix_exact(pdev, ndev->msix_entries, msix_entries);
1176 for (i = 0; i < msix_entries; i++) {
1177 msix = &ndev->msix_entries[i];
1178 WARN_ON(!msix->vector);
1180 if (i == msix_entries - 1) {
1181 rc = request_irq(msix->vector,
1182 xeon_event_msix_irq, 0,
1183 "ntb-event-msix", ndev);
1187 rc = request_irq(msix->vector,
1188 xeon_callback_msix_irq, 0,
1189 "ntb-callback-msix",
1196 ndev->num_msix = msix_entries;
1197 ndev->max_cbs = msix_entries - 1;
1203 /* Code never reaches here for entry nr 'ndev->num_msix - 1' */
1204 msix = &ndev->msix_entries[i];
1205 free_irq(msix->vector, &ndev->db_cb[i]);
1208 pci_disable_msix(pdev);
1214 static int ntb_setup_bwd_msix(struct ntb_device *ndev, int msix_entries)
1216 struct pci_dev *pdev = ndev->pdev;
1217 struct msix_entry *msix;
1220 msix_entries = pci_enable_msix_range(pdev, ndev->msix_entries,
1222 if (msix_entries < 0)
1223 return msix_entries;
1225 for (i = 0; i < msix_entries; i++) {
1226 msix = &ndev->msix_entries[i];
1227 WARN_ON(!msix->vector);
1229 rc = request_irq(msix->vector, bwd_callback_msix_irq, 0,
1230 "ntb-callback-msix", &ndev->db_cb[i]);
1235 ndev->num_msix = msix_entries;
1236 ndev->max_cbs = msix_entries;
1242 free_irq(msix->vector, &ndev->db_cb[i]);
1244 pci_disable_msix(pdev);
1250 static int ntb_setup_msix(struct ntb_device *ndev)
1252 struct pci_dev *pdev = ndev->pdev;
1256 msix_entries = pci_msix_vec_count(pdev);
1257 if (msix_entries < 0) {
1260 } else if (msix_entries > ndev->limits.msix_cnt) {
1265 ndev->msix_entries = kmalloc(sizeof(struct msix_entry) * msix_entries,
1267 if (!ndev->msix_entries) {
1272 for (i = 0; i < msix_entries; i++)
1273 ndev->msix_entries[i].entry = i;
1275 if (is_ntb_atom(ndev))
1276 rc = ntb_setup_bwd_msix(ndev, msix_entries);
1278 rc = ntb_setup_snb_msix(ndev, msix_entries);
1285 kfree(ndev->msix_entries);
1287 dev_err(&pdev->dev, "Error allocating MSI-X interrupt\n");
1291 static int ntb_setup_msi(struct ntb_device *ndev)
1293 struct pci_dev *pdev = ndev->pdev;
1296 rc = pci_enable_msi(pdev);
1300 rc = request_irq(pdev->irq, ntb_interrupt, 0, "ntb-msi", ndev);
1302 pci_disable_msi(pdev);
1303 dev_err(&pdev->dev, "Error allocating MSI interrupt\n");
1310 static int ntb_setup_intx(struct ntb_device *ndev)
1312 struct pci_dev *pdev = ndev->pdev;
1315 /* Verify intx is enabled */
1318 rc = request_irq(pdev->irq, ntb_interrupt, IRQF_SHARED, "ntb-intx",
1326 static int ntb_setup_interrupts(struct ntb_device *ndev)
1330 /* On BWD, disable all interrupts. On SNB, disable all but Link
1331 * Interrupt. The rest will be unmasked as callbacks are registered.
1333 if (is_ntb_atom(ndev))
1334 writeq(~0, ndev->reg_ofs.ldb_mask);
1336 u16 var = 1 << SNB_LINK_DB;
1337 writew(~var, ndev->reg_ofs.ldb_mask);
1340 rc = ntb_setup_msix(ndev);
1344 ndev->bits_per_vector = 1;
1345 ndev->max_cbs = ndev->limits.max_db_bits;
1347 rc = ntb_setup_msi(ndev);
1351 rc = ntb_setup_intx(ndev);
1353 dev_err(&ndev->pdev->dev, "no usable interrupts\n");
1361 static void ntb_free_interrupts(struct ntb_device *ndev)
1363 struct pci_dev *pdev = ndev->pdev;
1365 /* mask interrupts */
1366 if (is_ntb_atom(ndev))
1367 writeq(~0, ndev->reg_ofs.ldb_mask);
1369 writew(~0, ndev->reg_ofs.ldb_mask);
1371 if (ndev->num_msix) {
1372 struct msix_entry *msix;
1375 for (i = 0; i < ndev->num_msix; i++) {
1376 msix = &ndev->msix_entries[i];
1377 if (is_ntb_xeon(ndev) && i == ndev->num_msix - 1)
1378 free_irq(msix->vector, ndev);
1380 free_irq(msix->vector, &ndev->db_cb[i]);
1382 pci_disable_msix(pdev);
1383 kfree(ndev->msix_entries);
1385 free_irq(pdev->irq, ndev);
1387 if (pci_dev_msi_enabled(pdev))
1388 pci_disable_msi(pdev);
1392 static int ntb_create_callbacks(struct ntb_device *ndev)
1396 /* Chicken-egg issue. We won't know how many callbacks are necessary
1397 * until we see how many MSI-X vectors we get, but these pointers need
1398 * to be passed into the MSI-X register function. So, we allocate the
1399 * max, knowing that they might not all be used, to work around this.
1401 ndev->db_cb = kcalloc(ndev->limits.max_db_bits,
1402 sizeof(struct ntb_db_cb),
1407 for (i = 0; i < ndev->limits.max_db_bits; i++) {
1408 ndev->db_cb[i].db_num = i;
1409 ndev->db_cb[i].ndev = ndev;
1415 static void ntb_free_callbacks(struct ntb_device *ndev)
1419 for (i = 0; i < ndev->limits.max_db_bits; i++)
1420 ntb_unregister_db_callback(ndev, i);
1425 static ssize_t ntb_debugfs_read(struct file *filp, char __user *ubuf,
1426 size_t count, loff_t *offp)
1428 struct ntb_device *ndev;
1430 ssize_t ret, offset, out_count;
1434 buf = kmalloc(out_count, GFP_KERNEL);
1438 ndev = filp->private_data;
1440 offset += snprintf(buf + offset, out_count - offset,
1441 "NTB Device Information:\n");
1442 offset += snprintf(buf + offset, out_count - offset,
1443 "Connection Type - \t\t%s\n",
1444 ndev->conn_type == NTB_CONN_TRANSPARENT ?
1445 "Transparent" : (ndev->conn_type == NTB_CONN_B2B) ?
1446 "Back to back" : "Root Port");
1447 offset += snprintf(buf + offset, out_count - offset,
1448 "Device Type - \t\t\t%s\n",
1449 ndev->dev_type == NTB_DEV_USD ?
1450 "DSD/USP" : "USD/DSP");
1451 offset += snprintf(buf + offset, out_count - offset,
1452 "Max Number of Callbacks - \t%u\n",
1454 offset += snprintf(buf + offset, out_count - offset,
1455 "Link Status - \t\t\t%s\n",
1456 ntb_hw_link_status(ndev) ? "Up" : "Down");
1457 if (ntb_hw_link_status(ndev)) {
1458 offset += snprintf(buf + offset, out_count - offset,
1459 "Link Speed - \t\t\tPCI-E Gen %u\n",
1461 offset += snprintf(buf + offset, out_count - offset,
1462 "Link Width - \t\t\tx%u\n",
1466 if (is_ntb_xeon(ndev)) {
1471 offset += snprintf(buf + offset, out_count - offset,
1472 "\nNTB Device Statistics:\n");
1473 offset += snprintf(buf + offset, out_count - offset,
1474 "Upstream Memory Miss - \t%u\n",
1475 readw(ndev->reg_base +
1476 SNB_USMEMMISS_OFFSET));
1478 offset += snprintf(buf + offset, out_count - offset,
1479 "\nNTB Hardware Errors:\n");
1481 rc = pci_read_config_word(ndev->pdev, SNB_DEVSTS_OFFSET,
1484 offset += snprintf(buf + offset, out_count - offset,
1485 "DEVSTS - \t%#06x\n", status16);
1487 rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET,
1490 offset += snprintf(buf + offset, out_count - offset,
1491 "LNKSTS - \t%#06x\n", status16);
1493 rc = pci_read_config_dword(ndev->pdev, SNB_UNCERRSTS_OFFSET,
1496 offset += snprintf(buf + offset, out_count - offset,
1497 "UNCERRSTS - \t%#010x\n", status32);
1499 rc = pci_read_config_dword(ndev->pdev, SNB_CORERRSTS_OFFSET,
1502 offset += snprintf(buf + offset, out_count - offset,
1503 "CORERRSTS - \t%#010x\n", status32);
1506 if (offset > out_count)
1509 ret = simple_read_from_buffer(ubuf, count, offp, buf, offset);
1514 static const struct file_operations ntb_debugfs_info = {
1515 .owner = THIS_MODULE,
1516 .open = simple_open,
1517 .read = ntb_debugfs_read,
1520 static void ntb_setup_debugfs(struct ntb_device *ndev)
1522 if (!debugfs_initialized())
1526 debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
1528 ndev->debugfs_dir = debugfs_create_dir(pci_name(ndev->pdev),
1530 if (ndev->debugfs_dir)
1531 ndev->debugfs_info = debugfs_create_file("info", S_IRUSR,
1537 static void ntb_free_debugfs(struct ntb_device *ndev)
1539 debugfs_remove_recursive(ndev->debugfs_dir);
1541 if (debugfs_dir && simple_empty(debugfs_dir)) {
1542 debugfs_remove_recursive(debugfs_dir);
1547 static void ntb_hw_link_up(struct ntb_device *ndev)
1549 if (ndev->conn_type == NTB_CONN_TRANSPARENT)
1550 ntb_link_event(ndev, NTB_LINK_UP);
1554 /* Let's bring the NTB link up */
1555 ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
1556 ntb_cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1557 ntb_cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1558 ntb_cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1559 if (ndev->split_bar)
1560 ntb_cntl |= NTB_CNTL_P2S_BAR5_SNOOP |
1561 NTB_CNTL_S2P_BAR5_SNOOP;
1563 writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
1567 static void ntb_hw_link_down(struct ntb_device *ndev)
1571 if (ndev->conn_type == NTB_CONN_TRANSPARENT) {
1572 ntb_link_event(ndev, NTB_LINK_DOWN);
1576 /* Bring NTB link down */
1577 ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
1578 ntb_cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1579 ntb_cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1580 if (ndev->split_bar)
1581 ntb_cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP |
1582 NTB_CNTL_S2P_BAR5_SNOOP);
1583 ntb_cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1584 writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
1587 static void ntb_max_mw_detect(struct ntb_device *ndev)
1589 if (ndev->split_bar)
1590 ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
1592 ndev->limits.max_mw = SNB_MAX_MW;
1595 static int ntb_xeon_detect(struct ntb_device *ndev)
1601 ndev->hw_type = SNB_HW;
1603 rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &ppd);
1607 if (ppd & SNB_PPD_DEV_TYPE)
1608 ndev->dev_type = NTB_DEV_USD;
1610 ndev->dev_type = NTB_DEV_DSD;
1612 ndev->split_bar = (ppd & SNB_PPD_SPLIT_BAR) ? 1 : 0;
1614 switch (ppd & SNB_PPD_CONN_TYPE) {
1616 dev_info(&ndev->pdev->dev, "Conn Type = B2B\n");
1617 ndev->conn_type = NTB_CONN_B2B;
1620 dev_info(&ndev->pdev->dev, "Conn Type = RP\n");
1621 ndev->conn_type = NTB_CONN_RP;
1623 case NTB_CONN_TRANSPARENT:
1624 dev_info(&ndev->pdev->dev, "Conn Type = TRANSPARENT\n");
1625 ndev->conn_type = NTB_CONN_TRANSPARENT;
1627 * This mode is default to USD/DSP. HW does not report
1628 * properly in transparent mode as it has no knowledge of
1629 * NTB. We will just force correct here.
1631 ndev->dev_type = NTB_DEV_USD;
1634 * This is a way for transparent BAR to figure out if we
1635 * are doing split BAR or not. There is no way for the hw
1636 * on the transparent side to know and set the PPD.
1638 bars_mask = pci_select_bars(ndev->pdev, IORESOURCE_MEM);
1639 bars = hweight32(bars_mask);
1640 if (bars == (HSX_SPLITBAR_MAX_MW + 1))
1641 ndev->split_bar = 1;
1645 dev_err(&ndev->pdev->dev, "Unknown PPD %x\n", ppd);
1649 ntb_max_mw_detect(ndev);
1654 static int ntb_atom_detect(struct ntb_device *ndev)
1659 ndev->hw_type = BWD_HW;
1660 ndev->limits.max_mw = BWD_MAX_MW;
1662 rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &ppd);
1666 switch ((ppd & BWD_PPD_CONN_TYPE) >> 8) {
1668 dev_info(&ndev->pdev->dev, "Conn Type = B2B\n");
1669 ndev->conn_type = NTB_CONN_B2B;
1673 dev_err(&ndev->pdev->dev, "Unsupported NTB configuration\n");
1677 if (ppd & BWD_PPD_DEV_TYPE)
1678 ndev->dev_type = NTB_DEV_DSD;
1680 ndev->dev_type = NTB_DEV_USD;
1685 static int ntb_device_detect(struct ntb_device *ndev)
1689 if (is_ntb_xeon(ndev))
1690 rc = ntb_xeon_detect(ndev);
1691 else if (is_ntb_atom(ndev))
1692 rc = ntb_atom_detect(ndev);
1696 dev_info(&ndev->pdev->dev, "Device Type = %s\n",
1697 ndev->dev_type == NTB_DEV_USD ? "USD/DSP" : "DSD/USP");
1702 static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1704 struct ntb_device *ndev;
1707 ndev = kzalloc(sizeof(struct ntb_device), GFP_KERNEL);
1713 ntb_set_errata_flags(ndev);
1715 ndev->link_status = NTB_LINK_DOWN;
1716 pci_set_drvdata(pdev, ndev);
1717 ntb_setup_debugfs(ndev);
1719 rc = pci_enable_device(pdev);
1723 pci_set_master(ndev->pdev);
1725 rc = ntb_device_detect(ndev);
1729 ndev->mw = kcalloc(ndev->limits.max_mw, sizeof(struct ntb_mw),
1736 if (ndev->split_bar)
1737 rc = pci_request_selected_regions(pdev, NTB_SPLITBAR_MASK,
1740 rc = pci_request_selected_regions(pdev, NTB_BAR_MASK,
1746 ndev->reg_base = pci_ioremap_bar(pdev, NTB_BAR_MMIO);
1747 if (!ndev->reg_base) {
1748 dev_warn(&pdev->dev, "Cannot remap BAR 0\n");
1753 for (i = 0; i < ndev->limits.max_mw; i++) {
1754 ndev->mw[i].bar_sz = pci_resource_len(pdev, MW_TO_BAR(i));
1757 * with the errata we need to steal last of the memory
1758 * windows for workarounds and they point to MMIO registers.
1760 if ((ndev->wa_flags & WA_SNB_ERR) &&
1761 (i == (ndev->limits.max_mw - 1))) {
1763 ioremap_nocache(pci_resource_start(pdev,
1765 ndev->mw[i].bar_sz);
1768 ioremap_wc(pci_resource_start(pdev,
1770 ndev->mw[i].bar_sz);
1773 dev_info(&pdev->dev, "MW %d size %llu\n", i,
1774 (unsigned long long) ndev->mw[i].bar_sz);
1775 if (!ndev->mw[i].vbase) {
1776 dev_warn(&pdev->dev, "Cannot remap BAR %d\n",
1783 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1785 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1789 dev_warn(&pdev->dev, "Cannot DMA highmem\n");
1792 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1794 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1798 dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
1801 rc = ntb_device_setup(ndev);
1805 rc = ntb_create_callbacks(ndev);
1809 rc = ntb_setup_interrupts(ndev);
1813 /* The scratchpad registers keep the values between rmmod/insmod,
1816 for (i = 0; i < ndev->limits.max_spads; i++) {
1817 ntb_write_local_spad(ndev, i, 0);
1818 ntb_write_remote_spad(ndev, i, 0);
1821 rc = ntb_transport_init(pdev);
1825 ntb_hw_link_up(ndev);
1830 ntb_free_interrupts(ndev);
1832 ntb_free_callbacks(ndev);
1834 ntb_device_free(ndev);
1836 for (i--; i >= 0; i--)
1837 iounmap(ndev->mw[i].vbase);
1838 iounmap(ndev->reg_base);
1840 if (ndev->split_bar)
1841 pci_release_selected_regions(pdev, NTB_SPLITBAR_MASK);
1843 pci_release_selected_regions(pdev, NTB_BAR_MASK);
1847 pci_disable_device(pdev);
1849 ntb_free_debugfs(ndev);
1852 dev_err(&pdev->dev, "Error loading %s module\n", KBUILD_MODNAME);
1856 static void ntb_pci_remove(struct pci_dev *pdev)
1858 struct ntb_device *ndev = pci_get_drvdata(pdev);
1861 ntb_hw_link_down(ndev);
1863 ntb_transport_free(ndev->ntb_transport);
1865 ntb_free_interrupts(ndev);
1866 ntb_free_callbacks(ndev);
1867 ntb_device_free(ndev);
1869 /* need to reset max_mw limits so we can unmap properly */
1870 if (ndev->hw_type == SNB_HW)
1871 ntb_max_mw_detect(ndev);
1873 for (i = 0; i < ndev->limits.max_mw; i++)
1874 iounmap(ndev->mw[i].vbase);
1877 iounmap(ndev->reg_base);
1878 if (ndev->split_bar)
1879 pci_release_selected_regions(pdev, NTB_SPLITBAR_MASK);
1881 pci_release_selected_regions(pdev, NTB_BAR_MASK);
1882 pci_disable_device(pdev);
1883 ntb_free_debugfs(ndev);
1887 static struct pci_driver ntb_pci_driver = {
1888 .name = KBUILD_MODNAME,
1889 .id_table = ntb_pci_tbl,
1890 .probe = ntb_pci_probe,
1891 .remove = ntb_pci_remove,
1894 module_pci_driver(ntb_pci_driver);