powerpc/powernv: Rename PE# fields in struct pnv_phb
[cascardo/linux.git] / arch / powerpc / platforms / powernv / pci.c
1 /*
2  * Support PCI/PCIe on PowerNV platforms
3  *
4  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/pci.h>
14 #include <linux/delay.h>
15 #include <linux/string.h>
16 #include <linux/init.h>
17 #include <linux/irq.h>
18 #include <linux/io.h>
19 #include <linux/msi.h>
20 #include <linux/iommu.h>
21
22 #include <asm/sections.h>
23 #include <asm/io.h>
24 #include <asm/prom.h>
25 #include <asm/pci-bridge.h>
26 #include <asm/machdep.h>
27 #include <asm/msi_bitmap.h>
28 #include <asm/ppc-pci.h>
29 #include <asm/opal.h>
30 #include <asm/iommu.h>
31 #include <asm/tce.h>
32 #include <asm/firmware.h>
33 #include <asm/eeh_event.h>
34 #include <asm/eeh.h>
35
36 #include "powernv.h"
37 #include "pci.h"
38
39 /* Delay in usec */
40 #define PCI_RESET_DELAY_US      3000000
41
42 #ifdef CONFIG_PCI_MSI
43 int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
44 {
45         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
46         struct pnv_phb *phb = hose->private_data;
47         struct msi_desc *entry;
48         struct msi_msg msg;
49         int hwirq;
50         unsigned int virq;
51         int rc;
52
53         if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
54                 return -ENODEV;
55
56         if (pdev->no_64bit_msi && !phb->msi32_support)
57                 return -ENODEV;
58
59         for_each_pci_msi_entry(entry, pdev) {
60                 if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
61                         pr_warn("%s: Supports only 64-bit MSIs\n",
62                                 pci_name(pdev));
63                         return -ENXIO;
64                 }
65                 hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1);
66                 if (hwirq < 0) {
67                         pr_warn("%s: Failed to find a free MSI\n",
68                                 pci_name(pdev));
69                         return -ENOSPC;
70                 }
71                 virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
72                 if (virq == NO_IRQ) {
73                         pr_warn("%s: Failed to map MSI to linux irq\n",
74                                 pci_name(pdev));
75                         msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
76                         return -ENOMEM;
77                 }
78                 rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq,
79                                     virq, entry->msi_attrib.is_64, &msg);
80                 if (rc) {
81                         pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
82                         irq_dispose_mapping(virq);
83                         msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
84                         return rc;
85                 }
86                 irq_set_msi_desc(virq, entry);
87                 pci_write_msi_msg(virq, &msg);
88         }
89         return 0;
90 }
91
92 void pnv_teardown_msi_irqs(struct pci_dev *pdev)
93 {
94         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
95         struct pnv_phb *phb = hose->private_data;
96         struct msi_desc *entry;
97         irq_hw_number_t hwirq;
98
99         if (WARN_ON(!phb))
100                 return;
101
102         for_each_pci_msi_entry(entry, pdev) {
103                 if (entry->irq == NO_IRQ)
104                         continue;
105                 hwirq = virq_to_hw(entry->irq);
106                 irq_set_msi_desc(entry->irq, NULL);
107                 irq_dispose_mapping(entry->irq);
108                 msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1);
109         }
110 }
111 #endif /* CONFIG_PCI_MSI */
112
113 static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
114                                          struct OpalIoPhbErrorCommon *common)
115 {
116         struct OpalIoP7IOCPhbErrorData *data;
117         int i;
118
119         data = (struct OpalIoP7IOCPhbErrorData *)common;
120         pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
121                 hose->global_number, be32_to_cpu(common->version));
122
123         if (data->brdgCtl)
124                 pr_info("brdgCtl:     %08x\n",
125                         be32_to_cpu(data->brdgCtl));
126         if (data->portStatusReg || data->rootCmplxStatus ||
127             data->busAgentStatus)
128                 pr_info("UtlSts:      %08x %08x %08x\n",
129                         be32_to_cpu(data->portStatusReg),
130                         be32_to_cpu(data->rootCmplxStatus),
131                         be32_to_cpu(data->busAgentStatus));
132         if (data->deviceStatus || data->slotStatus   ||
133             data->linkStatus   || data->devCmdStatus ||
134             data->devSecStatus)
135                 pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
136                         be32_to_cpu(data->deviceStatus),
137                         be32_to_cpu(data->slotStatus),
138                         be32_to_cpu(data->linkStatus),
139                         be32_to_cpu(data->devCmdStatus),
140                         be32_to_cpu(data->devSecStatus));
141         if (data->rootErrorStatus   || data->uncorrErrorStatus ||
142             data->corrErrorStatus)
143                 pr_info("RootErrSts:  %08x %08x %08x\n",
144                         be32_to_cpu(data->rootErrorStatus),
145                         be32_to_cpu(data->uncorrErrorStatus),
146                         be32_to_cpu(data->corrErrorStatus));
147         if (data->tlpHdr1 || data->tlpHdr2 ||
148             data->tlpHdr3 || data->tlpHdr4)
149                 pr_info("RootErrLog:  %08x %08x %08x %08x\n",
150                         be32_to_cpu(data->tlpHdr1),
151                         be32_to_cpu(data->tlpHdr2),
152                         be32_to_cpu(data->tlpHdr3),
153                         be32_to_cpu(data->tlpHdr4));
154         if (data->sourceId || data->errorClass ||
155             data->correlator)
156                 pr_info("RootErrLog1: %08x %016llx %016llx\n",
157                         be32_to_cpu(data->sourceId),
158                         be64_to_cpu(data->errorClass),
159                         be64_to_cpu(data->correlator));
160         if (data->p7iocPlssr || data->p7iocCsr)
161                 pr_info("PhbSts:      %016llx %016llx\n",
162                         be64_to_cpu(data->p7iocPlssr),
163                         be64_to_cpu(data->p7iocCsr));
164         if (data->lemFir)
165                 pr_info("Lem:         %016llx %016llx %016llx\n",
166                         be64_to_cpu(data->lemFir),
167                         be64_to_cpu(data->lemErrorMask),
168                         be64_to_cpu(data->lemWOF));
169         if (data->phbErrorStatus)
170                 pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
171                         be64_to_cpu(data->phbErrorStatus),
172                         be64_to_cpu(data->phbFirstErrorStatus),
173                         be64_to_cpu(data->phbErrorLog0),
174                         be64_to_cpu(data->phbErrorLog1));
175         if (data->mmioErrorStatus)
176                 pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
177                         be64_to_cpu(data->mmioErrorStatus),
178                         be64_to_cpu(data->mmioFirstErrorStatus),
179                         be64_to_cpu(data->mmioErrorLog0),
180                         be64_to_cpu(data->mmioErrorLog1));
181         if (data->dma0ErrorStatus)
182                 pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
183                         be64_to_cpu(data->dma0ErrorStatus),
184                         be64_to_cpu(data->dma0FirstErrorStatus),
185                         be64_to_cpu(data->dma0ErrorLog0),
186                         be64_to_cpu(data->dma0ErrorLog1));
187         if (data->dma1ErrorStatus)
188                 pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
189                         be64_to_cpu(data->dma1ErrorStatus),
190                         be64_to_cpu(data->dma1FirstErrorStatus),
191                         be64_to_cpu(data->dma1ErrorLog0),
192                         be64_to_cpu(data->dma1ErrorLog1));
193
194         for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
195                 if ((data->pestA[i] >> 63) == 0 &&
196                     (data->pestB[i] >> 63) == 0)
197                         continue;
198
199                 pr_info("PE[%3d] A/B: %016llx %016llx\n",
200                         i, be64_to_cpu(data->pestA[i]),
201                         be64_to_cpu(data->pestB[i]));
202         }
203 }
204
205 static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
206                                         struct OpalIoPhbErrorCommon *common)
207 {
208         struct OpalIoPhb3ErrorData *data;
209         int i;
210
211         data = (struct OpalIoPhb3ErrorData*)common;
212         pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
213                 hose->global_number, be32_to_cpu(common->version));
214         if (data->brdgCtl)
215                 pr_info("brdgCtl:     %08x\n",
216                         be32_to_cpu(data->brdgCtl));
217         if (data->portStatusReg || data->rootCmplxStatus ||
218             data->busAgentStatus)
219                 pr_info("UtlSts:      %08x %08x %08x\n",
220                         be32_to_cpu(data->portStatusReg),
221                         be32_to_cpu(data->rootCmplxStatus),
222                         be32_to_cpu(data->busAgentStatus));
223         if (data->deviceStatus || data->slotStatus   ||
224             data->linkStatus   || data->devCmdStatus ||
225             data->devSecStatus)
226                 pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
227                         be32_to_cpu(data->deviceStatus),
228                         be32_to_cpu(data->slotStatus),
229                         be32_to_cpu(data->linkStatus),
230                         be32_to_cpu(data->devCmdStatus),
231                         be32_to_cpu(data->devSecStatus));
232         if (data->rootErrorStatus || data->uncorrErrorStatus ||
233             data->corrErrorStatus)
234                 pr_info("RootErrSts:  %08x %08x %08x\n",
235                         be32_to_cpu(data->rootErrorStatus),
236                         be32_to_cpu(data->uncorrErrorStatus),
237                         be32_to_cpu(data->corrErrorStatus));
238         if (data->tlpHdr1 || data->tlpHdr2 ||
239             data->tlpHdr3 || data->tlpHdr4)
240                 pr_info("RootErrLog:  %08x %08x %08x %08x\n",
241                         be32_to_cpu(data->tlpHdr1),
242                         be32_to_cpu(data->tlpHdr2),
243                         be32_to_cpu(data->tlpHdr3),
244                         be32_to_cpu(data->tlpHdr4));
245         if (data->sourceId || data->errorClass ||
246             data->correlator)
247                 pr_info("RootErrLog1: %08x %016llx %016llx\n",
248                         be32_to_cpu(data->sourceId),
249                         be64_to_cpu(data->errorClass),
250                         be64_to_cpu(data->correlator));
251         if (data->nFir)
252                 pr_info("nFir:        %016llx %016llx %016llx\n",
253                         be64_to_cpu(data->nFir),
254                         be64_to_cpu(data->nFirMask),
255                         be64_to_cpu(data->nFirWOF));
256         if (data->phbPlssr || data->phbCsr)
257                 pr_info("PhbSts:      %016llx %016llx\n",
258                         be64_to_cpu(data->phbPlssr),
259                         be64_to_cpu(data->phbCsr));
260         if (data->lemFir)
261                 pr_info("Lem:         %016llx %016llx %016llx\n",
262                         be64_to_cpu(data->lemFir),
263                         be64_to_cpu(data->lemErrorMask),
264                         be64_to_cpu(data->lemWOF));
265         if (data->phbErrorStatus)
266                 pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
267                         be64_to_cpu(data->phbErrorStatus),
268                         be64_to_cpu(data->phbFirstErrorStatus),
269                         be64_to_cpu(data->phbErrorLog0),
270                         be64_to_cpu(data->phbErrorLog1));
271         if (data->mmioErrorStatus)
272                 pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
273                         be64_to_cpu(data->mmioErrorStatus),
274                         be64_to_cpu(data->mmioFirstErrorStatus),
275                         be64_to_cpu(data->mmioErrorLog0),
276                         be64_to_cpu(data->mmioErrorLog1));
277         if (data->dma0ErrorStatus)
278                 pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
279                         be64_to_cpu(data->dma0ErrorStatus),
280                         be64_to_cpu(data->dma0FirstErrorStatus),
281                         be64_to_cpu(data->dma0ErrorLog0),
282                         be64_to_cpu(data->dma0ErrorLog1));
283         if (data->dma1ErrorStatus)
284                 pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
285                         be64_to_cpu(data->dma1ErrorStatus),
286                         be64_to_cpu(data->dma1FirstErrorStatus),
287                         be64_to_cpu(data->dma1ErrorLog0),
288                         be64_to_cpu(data->dma1ErrorLog1));
289
290         for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
291                 if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
292                     (be64_to_cpu(data->pestB[i]) >> 63) == 0)
293                         continue;
294
295                 pr_info("PE[%3d] A/B: %016llx %016llx\n",
296                                 i, be64_to_cpu(data->pestA[i]),
297                                 be64_to_cpu(data->pestB[i]));
298         }
299 }
300
301 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
302                                 unsigned char *log_buff)
303 {
304         struct OpalIoPhbErrorCommon *common;
305
306         if (!hose || !log_buff)
307                 return;
308
309         common = (struct OpalIoPhbErrorCommon *)log_buff;
310         switch (be32_to_cpu(common->ioType)) {
311         case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
312                 pnv_pci_dump_p7ioc_diag_data(hose, common);
313                 break;
314         case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
315                 pnv_pci_dump_phb3_diag_data(hose, common);
316                 break;
317         default:
318                 pr_warn("%s: Unrecognized ioType %d\n",
319                         __func__, be32_to_cpu(common->ioType));
320         }
321 }
322
323 static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
324 {
325         unsigned long flags, rc;
326         int has_diag, ret = 0;
327
328         spin_lock_irqsave(&phb->lock, flags);
329
330         /* Fetch PHB diag-data */
331         rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
332                                          PNV_PCI_DIAG_BUF_SIZE);
333         has_diag = (rc == OPAL_SUCCESS);
334
335         /* If PHB supports compound PE, to handle it */
336         if (phb->unfreeze_pe) {
337                 ret = phb->unfreeze_pe(phb,
338                                        pe_no,
339                                        OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
340         } else {
341                 rc = opal_pci_eeh_freeze_clear(phb->opal_id,
342                                              pe_no,
343                                              OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
344                 if (rc) {
345                         pr_warn("%s: Failure %ld clearing frozen "
346                                 "PHB#%x-PE#%x\n",
347                                 __func__, rc, phb->hose->global_number,
348                                 pe_no);
349                         ret = -EIO;
350                 }
351         }
352
353         /*
354          * For now, let's only display the diag buffer when we fail to clear
355          * the EEH status. We'll do more sensible things later when we have
356          * proper EEH support. We need to make sure we don't pollute ourselves
357          * with the normal errors generated when probing empty slots
358          */
359         if (has_diag && ret)
360                 pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
361
362         spin_unlock_irqrestore(&phb->lock, flags);
363 }
364
365 static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
366 {
367         struct pnv_phb *phb = pdn->phb->private_data;
368         u8      fstate;
369         __be16  pcierr;
370         int     pe_no;
371         s64     rc;
372
373         /*
374          * Get the PE#. During the PCI probe stage, we might not
375          * setup that yet. So all ER errors should be mapped to
376          * reserved PE.
377          */
378         pe_no = pdn->pe_number;
379         if (pe_no == IODA_INVALID_PE) {
380                 pe_no = phb->ioda.reserved_pe_idx;
381         }
382
383         /*
384          * Fetch frozen state. If the PHB support compound PE,
385          * we need handle that case.
386          */
387         if (phb->get_pe_state) {
388                 fstate = phb->get_pe_state(phb, pe_no);
389         } else {
390                 rc = opal_pci_eeh_freeze_status(phb->opal_id,
391                                                 pe_no,
392                                                 &fstate,
393                                                 &pcierr,
394                                                 NULL);
395                 if (rc) {
396                         pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n",
397                                 __func__, rc, phb->hose->global_number, pe_no);
398                         return;
399                 }
400         }
401
402         pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
403                  (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
404
405         /* Clear the frozen state if applicable */
406         if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
407             fstate == OPAL_EEH_STOPPED_DMA_FREEZE  ||
408             fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
409                 /*
410                  * If PHB supports compound PE, freeze it for
411                  * consistency.
412                  */
413                 if (phb->freeze_pe)
414                         phb->freeze_pe(phb, pe_no);
415
416                 pnv_pci_handle_eeh_config(phb, pe_no);
417         }
418 }
419
420 int pnv_pci_cfg_read(struct pci_dn *pdn,
421                      int where, int size, u32 *val)
422 {
423         struct pnv_phb *phb = pdn->phb->private_data;
424         u32 bdfn = (pdn->busno << 8) | pdn->devfn;
425         s64 rc;
426
427         switch (size) {
428         case 1: {
429                 u8 v8;
430                 rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
431                 *val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
432                 break;
433         }
434         case 2: {
435                 __be16 v16;
436                 rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
437                                                    &v16);
438                 *val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
439                 break;
440         }
441         case 4: {
442                 __be32 v32;
443                 rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
444                 *val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
445                 break;
446         }
447         default:
448                 return PCIBIOS_FUNC_NOT_SUPPORTED;
449         }
450
451         pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
452                  __func__, pdn->busno, pdn->devfn, where, size, *val);
453         return PCIBIOS_SUCCESSFUL;
454 }
455
456 int pnv_pci_cfg_write(struct pci_dn *pdn,
457                       int where, int size, u32 val)
458 {
459         struct pnv_phb *phb = pdn->phb->private_data;
460         u32 bdfn = (pdn->busno << 8) | pdn->devfn;
461
462         pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
463                  __func__, pdn->busno, pdn->devfn, where, size, val);
464         switch (size) {
465         case 1:
466                 opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
467                 break;
468         case 2:
469                 opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
470                 break;
471         case 4:
472                 opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
473                 break;
474         default:
475                 return PCIBIOS_FUNC_NOT_SUPPORTED;
476         }
477
478         return PCIBIOS_SUCCESSFUL;
479 }
480
481 #if CONFIG_EEH
482 static bool pnv_pci_cfg_check(struct pci_dn *pdn)
483 {
484         struct eeh_dev *edev = NULL;
485         struct pnv_phb *phb = pdn->phb->private_data;
486
487         /* EEH not enabled ? */
488         if (!(phb->flags & PNV_PHB_FLAG_EEH))
489                 return true;
490
491         /* PE reset or device removed ? */
492         edev = pdn->edev;
493         if (edev) {
494                 if (edev->pe &&
495                     (edev->pe->state & EEH_PE_CFG_BLOCKED))
496                         return false;
497
498                 if (edev->mode & EEH_DEV_REMOVED)
499                         return false;
500         }
501
502         return true;
503 }
504 #else
505 static inline pnv_pci_cfg_check(struct pci_dn *pdn)
506 {
507         return true;
508 }
509 #endif /* CONFIG_EEH */
510
511 static int pnv_pci_read_config(struct pci_bus *bus,
512                                unsigned int devfn,
513                                int where, int size, u32 *val)
514 {
515         struct pci_dn *pdn;
516         struct pnv_phb *phb;
517         int ret;
518
519         *val = 0xFFFFFFFF;
520         pdn = pci_get_pdn_by_devfn(bus, devfn);
521         if (!pdn)
522                 return PCIBIOS_DEVICE_NOT_FOUND;
523
524         if (!pnv_pci_cfg_check(pdn))
525                 return PCIBIOS_DEVICE_NOT_FOUND;
526
527         ret = pnv_pci_cfg_read(pdn, where, size, val);
528         phb = pdn->phb->private_data;
529         if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
530                 if (*val == EEH_IO_ERROR_VALUE(size) &&
531                     eeh_dev_check_failure(pdn->edev))
532                         return PCIBIOS_DEVICE_NOT_FOUND;
533         } else {
534                 pnv_pci_config_check_eeh(pdn);
535         }
536
537         return ret;
538 }
539
540 static int pnv_pci_write_config(struct pci_bus *bus,
541                                 unsigned int devfn,
542                                 int where, int size, u32 val)
543 {
544         struct pci_dn *pdn;
545         struct pnv_phb *phb;
546         int ret;
547
548         pdn = pci_get_pdn_by_devfn(bus, devfn);
549         if (!pdn)
550                 return PCIBIOS_DEVICE_NOT_FOUND;
551
552         if (!pnv_pci_cfg_check(pdn))
553                 return PCIBIOS_DEVICE_NOT_FOUND;
554
555         ret = pnv_pci_cfg_write(pdn, where, size, val);
556         phb = pdn->phb->private_data;
557         if (!(phb->flags & PNV_PHB_FLAG_EEH))
558                 pnv_pci_config_check_eeh(pdn);
559
560         return ret;
561 }
562
563 struct pci_ops pnv_pci_ops = {
564         .read  = pnv_pci_read_config,
565         .write = pnv_pci_write_config,
566 };
567
568 static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
569 {
570         __be64 *tmp = ((__be64 *)tbl->it_base);
571         int  level = tbl->it_indirect_levels;
572         const long shift = ilog2(tbl->it_level_size);
573         unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
574
575         while (level) {
576                 int n = (idx & mask) >> (level * shift);
577                 unsigned long tce = be64_to_cpu(tmp[n]);
578
579                 tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
580                 idx &= ~mask;
581                 mask >>= shift;
582                 --level;
583         }
584
585         return tmp + idx;
586 }
587
588 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
589                 unsigned long uaddr, enum dma_data_direction direction,
590                 struct dma_attrs *attrs)
591 {
592         u64 proto_tce = iommu_direction_to_tce_perm(direction);
593         u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
594         long i;
595
596         if (proto_tce & TCE_PCI_WRITE)
597                 proto_tce |= TCE_PCI_READ;
598
599         for (i = 0; i < npages; i++) {
600                 unsigned long newtce = proto_tce |
601                         ((rpn + i) << tbl->it_page_shift);
602                 unsigned long idx = index - tbl->it_offset + i;
603
604                 *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
605         }
606
607         return 0;
608 }
609
610 #ifdef CONFIG_IOMMU_API
611 int pnv_tce_xchg(struct iommu_table *tbl, long index,
612                 unsigned long *hpa, enum dma_data_direction *direction)
613 {
614         u64 proto_tce = iommu_direction_to_tce_perm(*direction);
615         unsigned long newtce = *hpa | proto_tce, oldtce;
616         unsigned long idx = index - tbl->it_offset;
617
618         BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
619
620         if (newtce & TCE_PCI_WRITE)
621                 newtce |= TCE_PCI_READ;
622
623         oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
624         *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
625         *direction = iommu_tce_direction(oldtce);
626
627         return 0;
628 }
629 #endif
630
631 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
632 {
633         long i;
634
635         for (i = 0; i < npages; i++) {
636                 unsigned long idx = index - tbl->it_offset + i;
637
638                 *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
639         }
640 }
641
642 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
643 {
644         return *(pnv_tce(tbl, index - tbl->it_offset));
645 }
646
647 struct iommu_table *pnv_pci_table_alloc(int nid)
648 {
649         struct iommu_table *tbl;
650
651         tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
652         INIT_LIST_HEAD_RCU(&tbl->it_group_list);
653
654         return tbl;
655 }
656
657 long pnv_pci_link_table_and_group(int node, int num,
658                 struct iommu_table *tbl,
659                 struct iommu_table_group *table_group)
660 {
661         struct iommu_table_group_link *tgl = NULL;
662
663         if (WARN_ON(!tbl || !table_group))
664                 return -EINVAL;
665
666         tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
667                         node);
668         if (!tgl)
669                 return -ENOMEM;
670
671         tgl->table_group = table_group;
672         list_add_rcu(&tgl->next, &tbl->it_group_list);
673
674         table_group->tables[num] = tbl;
675
676         return 0;
677 }
678
679 static void pnv_iommu_table_group_link_free(struct rcu_head *head)
680 {
681         struct iommu_table_group_link *tgl = container_of(head,
682                         struct iommu_table_group_link, rcu);
683
684         kfree(tgl);
685 }
686
687 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
688                 struct iommu_table_group *table_group)
689 {
690         long i;
691         bool found;
692         struct iommu_table_group_link *tgl;
693
694         if (!tbl || !table_group)
695                 return;
696
697         /* Remove link to a group from table's list of attached groups */
698         found = false;
699         list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
700                 if (tgl->table_group == table_group) {
701                         list_del_rcu(&tgl->next);
702                         call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
703                         found = true;
704                         break;
705                 }
706         }
707         if (WARN_ON(!found))
708                 return;
709
710         /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
711         found = false;
712         for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
713                 if (table_group->tables[i] == tbl) {
714                         table_group->tables[i] = NULL;
715                         found = true;
716                         break;
717                 }
718         }
719         WARN_ON(!found);
720 }
721
722 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
723                                void *tce_mem, u64 tce_size,
724                                u64 dma_offset, unsigned page_shift)
725 {
726         tbl->it_blocksize = 16;
727         tbl->it_base = (unsigned long)tce_mem;
728         tbl->it_page_shift = page_shift;
729         tbl->it_offset = dma_offset >> tbl->it_page_shift;
730         tbl->it_index = 0;
731         tbl->it_size = tce_size >> 3;
732         tbl->it_busno = 0;
733         tbl->it_type = TCE_PCI;
734 }
735
736 void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
737 {
738         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
739         struct pnv_phb *phb = hose->private_data;
740 #ifdef CONFIG_PCI_IOV
741         struct pnv_ioda_pe *pe;
742         struct pci_dn *pdn;
743
744         /* Fix the VF pdn PE number */
745         if (pdev->is_virtfn) {
746                 pdn = pci_get_pdn(pdev);
747                 WARN_ON(pdn->pe_number != IODA_INVALID_PE);
748                 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
749                         if (pe->rid == ((pdev->bus->number << 8) |
750                             (pdev->devfn & 0xff))) {
751                                 pdn->pe_number = pe->pe_number;
752                                 pe->pdev = pdev;
753                                 break;
754                         }
755                 }
756         }
757 #endif /* CONFIG_PCI_IOV */
758
759         if (phb && phb->dma_dev_setup)
760                 phb->dma_dev_setup(phb, pdev);
761 }
762
763 void pnv_pci_dma_bus_setup(struct pci_bus *bus)
764 {
765         struct pci_controller *hose = bus->sysdata;
766         struct pnv_phb *phb = hose->private_data;
767         struct pnv_ioda_pe *pe;
768
769         list_for_each_entry(pe, &phb->ioda.pe_list, list) {
770                 if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
771                         continue;
772
773                 if (!pe->pbus)
774                         continue;
775
776                 if (bus->number == ((pe->rid >> 8) & 0xFF)) {
777                         pe->pbus = bus;
778                         break;
779                 }
780         }
781 }
782
783 void pnv_pci_shutdown(void)
784 {
785         struct pci_controller *hose;
786
787         list_for_each_entry(hose, &hose_list, list_node)
788                 if (hose->controller_ops.shutdown)
789                         hose->controller_ops.shutdown(hose);
790 }
791
792 /* Fixup wrong class code in p7ioc and p8 root complex */
793 static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
794 {
795         dev->class = PCI_CLASS_BRIDGE_PCI << 8;
796 }
797 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
798
799 void __init pnv_pci_init(void)
800 {
801         struct device_node *np;
802
803         pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
804
805         /* If we don't have OPAL, eg. in sim, just skip PCI probe */
806         if (!firmware_has_feature(FW_FEATURE_OPAL))
807                 return;
808
809         /* Look for IODA IO-Hubs. */
810         for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
811                 pnv_pci_init_ioda_hub(np);
812         }
813
814         /* Look for ioda2 built-in PHB3's */
815         for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
816                 pnv_pci_init_ioda2_phb(np);
817
818         /* Look for NPU PHBs */
819         for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
820                 pnv_pci_init_npu_phb(np);
821
822         /* Setup the linkage between OF nodes and PHBs */
823         pci_devs_phb_init();
824
825         /* Configure IOMMU DMA hooks */
826         set_pci_dma_ops(&dma_iommu_ops);
827 }
828
829 machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);