powerpc/powernv: Print correct PHB type names
[cascardo/linux.git] / arch / powerpc / platforms / powernv / pci-ioda.c
index 3a5ea82..2115ed7 100644 (file)
@@ -55,6 +55,7 @@
 #define POWERNV_IOMMU_DEFAULT_LEVELS   1
 #define POWERNV_IOMMU_MAX_LEVELS       5
 
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" };
 static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
 
 void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
@@ -141,16 +142,14 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
 
 static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
-       unsigned long pe;
+       unsigned long pe = phb->ioda.total_pe_num - 1;
 
-       do {
-               pe = find_next_zero_bit(phb->ioda.pe_alloc,
-                                       phb->ioda.total_pe_num, 0);
-               if (pe >= phb->ioda.total_pe_num)
-                       return NULL;
-       } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
+       for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
+               if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
+                       return pnv_ioda_init_pe(phb, pe);
+       }
 
-       return pnv_ioda_init_pe(phb, pe);
+       return NULL;
 }
 
 static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
@@ -196,14 +195,14 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb)
        set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc);
 
        /*
-        * Strip off the segment used by the reserved PE, which is
-        * expected to be 0 or last one of PE capabicity.
+        * Exclude the segments for reserved and root bus PE, which
+        * are first or last two PEs.
         */
        r = &phb->hose->mem_resources[1];
        if (phb->ioda.reserved_pe_idx == 0)
-               r->start += phb->ioda.m64_segsize;
+               r->start += (2 * phb->ioda.m64_segsize);
        else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
-               r->end -= phb->ioda.m64_segsize;
+               r->end -= (2 * phb->ioda.m64_segsize);
        else
                pr_warn("  Cannot strip M64 segment for reserved PE#%d\n",
                        phb->ioda.reserved_pe_idx);
@@ -283,14 +282,14 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb)
        }
 
        /*
-        * Exclude the segment used by the reserved PE, which
-        * is expected to be 0 or last supported PE#.
+        * Exclude the segments for reserved and root bus PE, which
+        * are first or last two PEs.
         */
        r = &phb->hose->mem_resources[1];
        if (phb->ioda.reserved_pe_idx == 0)
-               r->start += phb->ioda.m64_segsize;
+               r->start += (2 * phb->ioda.m64_segsize);
        else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
-               r->end -= phb->ioda.m64_segsize;
+               r->end -= (2 * phb->ioda.m64_segsize);
        else
                WARN(1, "Wrong reserved PE#%d on PHB#%d\n",
                     phb->ioda.reserved_pe_idx, phb->hose->global_number);
@@ -714,7 +713,6 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
        return 0;
 }
 
-#ifdef CONFIG_PCI_IOV
 static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
        struct pci_dev *parent;
@@ -749,9 +747,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
                }
                rid_end = pe->rid + (count << 8);
        } else {
+#ifdef CONFIG_PCI_IOV
                if (pe->flags & PNV_IODA_PE_VF)
                        parent = pe->parent_dev;
                else
+#endif
                        parent = pe->pdev->bus->self;
                bcomp = OpalPciBusAll;
                dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
@@ -761,7 +761,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 
        /* Clear the reverse map */
        for (rid = pe->rid; rid < rid_end; rid++)
-               phb->ioda.pe_rmap[rid] = 0;
+               phb->ioda.pe_rmap[rid] = IODA_INVALID_PE;
 
        /* Release from all parents PELT-V */
        while (parent) {
@@ -789,11 +789,12 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 
        pe->pbus = NULL;
        pe->pdev = NULL;
+#ifdef CONFIG_PCI_IOV
        pe->parent_dev = NULL;
+#endif
 
        return 0;
 }
-#endif /* CONFIG_PCI_IOV */
 
 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
@@ -1024,6 +1025,16 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
                                pci_name(dev));
                        continue;
                }
+
+               /*
+                * In partial hotplug case, the PCI device might be still
+                * associated with the PE and needn't attach it to the PE
+                * again.
+                */
+               if (pdn->pe_number != IODA_INVALID_PE)
+                       continue;
+
+               pe->device_count++;
                pdn->pcidev = dev;
                pdn->pe_number = pe->pe_number;
                if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
@@ -1042,9 +1053,26 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
        struct pci_controller *hose = pci_bus_to_host(bus);
        struct pnv_phb *phb = hose->private_data;
        struct pnv_ioda_pe *pe = NULL;
+       unsigned int pe_num;
+
+       /*
+        * In partial hotplug case, the PE instance might be still alive.
+        * We should reuse it instead of allocating a new one.
+        */
+       pe_num = phb->ioda.pe_rmap[bus->number << 8];
+       if (pe_num != IODA_INVALID_PE) {
+               pe = &phb->ioda.pe_array[pe_num];
+               pnv_ioda_setup_same_PE(bus, pe);
+               return NULL;
+       }
+
+       /* PE number for root bus should have been reserved */
+       if (pci_is_root_bus(bus) &&
+           phb->ioda.root_pe_idx != IODA_INVALID_PE)
+               pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
 
        /* Check if PE is determined by M64 */
-       if (phb->pick_m64_pe)
+       if (!pe && phb->pick_m64_pe)
                pe = phb->pick_m64_pe(bus, all);
 
        /* The PE number isn't pinned by M64 */
@@ -1156,30 +1184,6 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
                pnv_ioda_setup_npu_PE(pdev);
 }
 
-static void pnv_ioda_setup_PEs(struct pci_bus *bus)
-{
-       struct pci_dev *dev;
-
-       pnv_ioda_setup_bus_PE(bus, false);
-
-       list_for_each_entry(dev, &bus->devices, bus_list) {
-               if (dev->subordinate) {
-                       if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
-                               pnv_ioda_setup_bus_PE(dev->subordinate, true);
-                       else
-                               pnv_ioda_setup_PEs(dev->subordinate);
-               }
-       }
-}
-
-/*
- * Configure PEs so that the downstream PCI buses and devices
- * could have their associated PE#. Unfortunately, we didn't
- * figure out the way to identify the PLX bridge yet. So we
- * simply put the PCI bus and the subordinate behind the root
- * port to PE# here. The game rule here is expected to be changed
- * as soon as we can detected PLX bridge correctly.
- */
 static void pnv_pci_ioda_setup_PEs(void)
 {
        struct pci_controller *hose, *tmp;
@@ -1187,22 +1191,11 @@ static void pnv_pci_ioda_setup_PEs(void)
 
        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
                phb = hose->private_data;
-
-               /* M64 layout might affect PE allocation */
-               if (phb->reserve_m64_pe)
-                       phb->reserve_m64_pe(hose->bus, NULL, true);
-
-               /*
-                * On NPU PHB, we expect separate PEs for individual PCI
-                * functions. PCI bus dependent PEs are required for the
-                * remaining types of PHBs.
-                */
                if (phb->type == PNV_PHB_NPU) {
                        /* PE#0 is needed for error reporting */
                        pnv_ioda_reserve_pe(phb, 0);
                        pnv_ioda_setup_npu_PEs(hose->bus);
-               } else
-                       pnv_ioda_setup_PEs(hose->bus);
+               }
        }
 }
 
@@ -2657,6 +2650,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 {
        int64_t rc;
 
+       if (!pnv_pci_ioda_pe_dma_weight(pe))
+               return;
+
        /* TVE #1 is selected by PCI address bit 59 */
        pe->tce_bypass_base = 1ull << 59;
 
@@ -2688,49 +2684,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
                pnv_ioda_setup_bus_dma(pe, pe->pbus);
 }
 
-static void pnv_ioda_setup_dma(struct pnv_phb *phb)
-{
-       struct pci_controller *hose = phb->hose;
-       struct pnv_ioda_pe *pe;
-       unsigned int weight;
-
-       /* If we have more PE# than segments available, hand out one
-        * per PE until we run out and let the rest fail. If not,
-        * then we assign at least one segment per PE, plus more based
-        * on the amount of devices under that PE
-        */
-       pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n",
-               hose->global_number, phb->ioda.dma32_count);
-
-       pnv_pci_ioda_setup_opal_tce_kill(phb);
-
-       /* Walk our PE list and configure their DMA segments */
-       list_for_each_entry(pe, &phb->ioda.pe_list, list) {
-               weight = pnv_pci_ioda_pe_dma_weight(pe);
-               if (!weight)
-                       continue;
-
-               /*
-                * For IODA2 compliant PHB3, we needn't care about the weight.
-                * The all available 32-bits DMA space will be assigned to
-                * the specific PE.
-                */
-               if (phb->type == PNV_PHB_IODA1) {
-                       pnv_pci_ioda1_setup_dma_pe(phb, pe);
-               } else if (phb->type == PNV_PHB_IODA2) {
-                       pe_info(pe, "Assign DMA32 space\n");
-                       pnv_pci_ioda2_setup_dma_pe(phb, pe);
-               } else if (phb->type == PNV_PHB_NPU) {
-                       /*
-                        * We initialise the DMA space for an NPU PHB
-                        * after setup of the PHB is complete as we
-                        * point the NPU TVT to the the same location
-                        * as the PHB3 TVT.
-                        */
-               }
-       }
-}
-
 #ifdef CONFIG_PCI_MSI
 static void pnv_ioda2_msi_eoi(struct irq_data *d)
 {
@@ -2793,7 +2746,9 @@ int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
        pe_info(pe, "Switching PHB to CXL\n");
 
        rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number);
-       if (rc)
+       if (rc == OPAL_UNSUPPORTED)
+               dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n");
+       else if (rc)
                dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc);
 
        return rc;
@@ -3197,41 +3152,6 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
        }
 }
 
-static void pnv_pci_ioda_setup_seg(void)
-{
-       struct pci_controller *tmp, *hose;
-       struct pnv_phb *phb;
-       struct pnv_ioda_pe *pe;
-
-       list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
-               phb = hose->private_data;
-
-               /* NPU PHB does not support IO or MMIO segmentation */
-               if (phb->type == PNV_PHB_NPU)
-                       continue;
-
-               list_for_each_entry(pe, &phb->ioda.pe_list, list) {
-                       pnv_ioda_setup_pe_seg(pe);
-               }
-       }
-}
-
-static void pnv_pci_ioda_setup_DMA(void)
-{
-       struct pci_controller *hose, *tmp;
-       struct pnv_phb *phb;
-
-       list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
-               pnv_ioda_setup_dma(hose->private_data);
-
-               /* Mark the PHB initialization done */
-               phb = hose->private_data;
-               phb->initialized = 1;
-       }
-
-       pnv_pci_ioda_setup_iommu_api();
-}
-
 static void pnv_pci_ioda_create_dbgfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
@@ -3242,6 +3162,9 @@ static void pnv_pci_ioda_create_dbgfs(void)
        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
                phb = hose->private_data;
 
+               /* Notify initialization of PHB done */
+               phb->initialized = 1;
+
                sprintf(name, "PCI%04x", hose->global_number);
                phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
                if (!phb->dbgfs)
@@ -3254,9 +3177,7 @@ static void pnv_pci_ioda_create_dbgfs(void)
 static void pnv_pci_ioda_fixup(void)
 {
        pnv_pci_ioda_setup_PEs();
-       pnv_pci_ioda_setup_seg();
-       pnv_pci_ioda_setup_DMA();
-
+       pnv_pci_ioda_setup_iommu_api();
        pnv_pci_ioda_create_dbgfs();
 
 #ifdef CONFIG_EEH
@@ -3306,6 +3227,115 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
        return phb->ioda.io_segsize;
 }
 
+/*
+ * We are updating root port or the upstream port of the
+ * bridge behind the root port with PHB's windows in order
+ * to accommodate the changes on required resources during
+ * PCI (slot) hotplug, which is connected to either root
+ * port or the downstream ports of PCIe switch behind the
+ * root port.
+ */
+static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
+                                          unsigned long type)
+{
+       struct pci_controller *hose = pci_bus_to_host(bus);
+       struct pnv_phb *phb = hose->private_data;
+       struct pci_dev *bridge = bus->self;
+       struct resource *r, *w;
+       bool msi_region = false;
+       int i;
+
+       /* Check if we need apply fixup to the bridge's windows */
+       if (!pci_is_root_bus(bridge->bus) &&
+           !pci_is_root_bus(bridge->bus->self->bus))
+               return;
+
+       /* Fixup the resources */
+       for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+               r = &bridge->resource[PCI_BRIDGE_RESOURCES + i];
+               if (!r->flags || !r->parent)
+                       continue;
+
+               w = NULL;
+               if (r->flags & type & IORESOURCE_IO)
+                       w = &hose->io_resource;
+               else if (pnv_pci_is_mem_pref_64(r->flags) &&
+                        (type & IORESOURCE_PREFETCH) &&
+                        phb->ioda.m64_segsize)
+                       w = &hose->mem_resources[1];
+               else if (r->flags & type & IORESOURCE_MEM) {
+                       w = &hose->mem_resources[0];
+                       msi_region = true;
+               }
+
+               r->start = w->start;
+               r->end = w->end;
+
+               /* The 64KB 32-bits MSI region shouldn't be included in
+                * the 32-bits bridge window. Otherwise, we can see strange
+                * issues. One of them is EEH error observed on Garrison.
+                *
+                * Exclude top 1MB region which is the minimal alignment of
+                * 32-bits bridge window.
+                */
+               if (msi_region) {
+                       r->end += 0x10000;
+                       r->end -= 0x100000;
+               }
+       }
+}
+
+static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+       struct pci_controller *hose = pci_bus_to_host(bus);
+       struct pnv_phb *phb = hose->private_data;
+       struct pci_dev *bridge = bus->self;
+       struct pnv_ioda_pe *pe;
+       bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
+
+       /* Extend bridge's windows if necessary */
+       pnv_pci_fixup_bridge_resources(bus, type);
+
+       /* The PE for root bus should be realized before any one else */
+       if (!phb->ioda.root_pe_populated) {
+               pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false);
+               if (pe) {
+                       phb->ioda.root_pe_idx = pe->pe_number;
+                       phb->ioda.root_pe_populated = true;
+               }
+       }
+
+       /* Don't assign PE to PCI bus, which doesn't have subordinate devices */
+       if (list_empty(&bus->devices))
+               return;
+
+       /* Reserve PEs according to used M64 resources */
+       if (phb->reserve_m64_pe)
+               phb->reserve_m64_pe(bus, NULL, all);
+
+       /*
+        * Assign PE. We might run here because of partial hotplug.
+        * For the case, we just pick up the existing PE and should
+        * not allocate resources again.
+        */
+       pe = pnv_ioda_setup_bus_PE(bus, all);
+       if (!pe)
+               return;
+
+       pnv_ioda_setup_pe_seg(pe);
+       switch (phb->type) {
+       case PNV_PHB_IODA1:
+               pnv_pci_ioda1_setup_dma_pe(phb, pe);
+               break;
+       case PNV_PHB_IODA2:
+               pnv_pci_ioda2_setup_dma_pe(phb, pe);
+               break;
+       default:
+               pr_warn("%s: No DMA for PHB#%d (type %d)\n",
+                       __func__, phb->hose->global_number, phb->type);
+       }
+}
+
 #ifdef CONFIG_PCI_IOV
 static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
                                                      int resno)
@@ -3366,6 +3396,178 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
        return true;
 }
 
+static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
+                                      int num)
+{
+       struct pnv_ioda_pe *pe = container_of(table_group,
+                                             struct pnv_ioda_pe, table_group);
+       struct pnv_phb *phb = pe->phb;
+       unsigned int idx;
+       long rc;
+
+       pe_info(pe, "Removing DMA window #%d\n", num);
+       for (idx = 0; idx < phb->ioda.dma32_count; idx++) {
+               if (phb->ioda.dma32_segmap[idx] != pe->pe_number)
+                       continue;
+
+               rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+                                               idx, 0, 0ul, 0ul, 0ul);
+               if (rc != OPAL_SUCCESS) {
+                       pe_warn(pe, "Failure %ld unmapping DMA32 segment#%d\n",
+                               rc, idx);
+                       return rc;
+               }
+
+               phb->ioda.dma32_segmap[idx] = IODA_INVALID_PE;
+       }
+
+       pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
+       return OPAL_SUCCESS;
+}
+
+static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
+{
+       unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
+       struct iommu_table *tbl = pe->table_group.tables[0];
+       int64_t rc;
+
+       if (!weight)
+               return;
+
+       rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0);
+       if (rc != OPAL_SUCCESS)
+               return;
+
+       pnv_pci_ioda1_tce_invalidate(tbl, tbl->it_offset, tbl->it_size, false);
+       if (pe->table_group.group) {
+               iommu_group_put(pe->table_group.group);
+               WARN_ON(pe->table_group.group);
+       }
+
+       free_pages(tbl->it_base, get_order(tbl->it_size << 3));
+       iommu_free_table(tbl, "pnv");
+}
+
+static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
+{
+       struct iommu_table *tbl = pe->table_group.tables[0];
+       unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
+#ifdef CONFIG_IOMMU_API
+       int64_t rc;
+#endif
+
+       if (!weight)
+               return;
+
+#ifdef CONFIG_IOMMU_API
+       rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+       if (rc)
+               pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+#endif
+
+       pnv_pci_ioda2_set_bypass(pe, false);
+       if (pe->table_group.group) {
+               iommu_group_put(pe->table_group.group);
+               WARN_ON(pe->table_group.group);
+       }
+
+       pnv_pci_ioda2_table_free_pages(tbl);
+       iommu_free_table(tbl, "pnv");
+}
+
+static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
+                                unsigned short win,
+                                unsigned int *map)
+{
+       struct pnv_phb *phb = pe->phb;
+       int idx;
+       int64_t rc;
+
+       for (idx = 0; idx < phb->ioda.total_pe_num; idx++) {
+               if (map[idx] != pe->pe_number)
+                       continue;
+
+               if (win == OPAL_M64_WINDOW_TYPE)
+                       rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+                                       phb->ioda.reserved_pe_idx, win,
+                                       idx / PNV_IODA1_M64_SEGS,
+                                       idx % PNV_IODA1_M64_SEGS);
+               else
+                       rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+                                       phb->ioda.reserved_pe_idx, win, 0, idx);
+
+               if (rc != OPAL_SUCCESS)
+                       pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n",
+                               rc, win, idx);
+
+               map[idx] = IODA_INVALID_PE;
+       }
+}
+
+static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
+{
+       struct pnv_phb *phb = pe->phb;
+
+       if (phb->type == PNV_PHB_IODA1) {
+               pnv_ioda_free_pe_seg(pe, OPAL_IO_WINDOW_TYPE,
+                                    phb->ioda.io_segmap);
+               pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
+                                    phb->ioda.m32_segmap);
+               pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE,
+                                    phb->ioda.m64_segmap);
+       } else if (phb->type == PNV_PHB_IODA2) {
+               pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
+                                    phb->ioda.m32_segmap);
+       }
+}
+
+static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
+{
+       struct pnv_phb *phb = pe->phb;
+       struct pnv_ioda_pe *slave, *tmp;
+
+       /* Release slave PEs in compound PE */
+       if (pe->flags & PNV_IODA_PE_MASTER) {
+               list_for_each_entry_safe(slave, tmp, &pe->slaves, list)
+                       pnv_ioda_release_pe(slave);
+       }
+
+       list_del(&pe->list);
+       switch (phb->type) {
+       case PNV_PHB_IODA1:
+               pnv_pci_ioda1_release_pe_dma(pe);
+               break;
+       case PNV_PHB_IODA2:
+               pnv_pci_ioda2_release_pe_dma(pe);
+               break;
+       default:
+               WARN_ON(1);
+       }
+
+       pnv_ioda_release_pe_seg(pe);
+       pnv_ioda_deconfigure_pe(pe->phb, pe);
+       pnv_ioda_free_pe(pe);
+}
+
+static void pnv_pci_release_device(struct pci_dev *pdev)
+{
+       struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+       struct pnv_phb *phb = hose->private_data;
+       struct pci_dn *pdn = pci_get_pdn(pdev);
+       struct pnv_ioda_pe *pe;
+
+       if (pdev->is_virtfn)
+               return;
+
+       if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+               return;
+
+       pe = &phb->ioda.pe_array[pdn->pe_number];
+       WARN_ON(--pe->device_count < 0);
+       if (pe->device_count == 0)
+               pnv_ioda_release_pe(pe);
+}
+
 static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
 {
        struct pnv_phb *phb = hose->private_data;
@@ -3382,7 +3584,9 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
        .teardown_msi_irqs      = pnv_teardown_msi_irqs,
 #endif
        .enable_device_hook     = pnv_pci_enable_device_hook,
+       .release_device         = pnv_pci_release_device,
        .window_alignment       = pnv_pci_window_alignment,
+       .setup_bridge           = pnv_pci_setup_bridge,
        .reset_secondary_bus    = pnv_pci_reset_secondary_bus,
        .dma_set_mask           = pnv_pci_ioda_dma_set_mask,
        .dma_get_required_mask  = pnv_pci_ioda_dma_get_required_mask,
@@ -3425,7 +3629,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
        void *aux;
        long rc;
 
-       pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
+       pr_info("Initializing %s PHB (%s)\n",
+               pnv_phb_names[ioda_type], of_node_full_name(np));
 
        prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
        if (!prop64) {
@@ -3480,6 +3685,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
        if (phb->regs == NULL)
                pr_err("  Failed to map registers !\n");
 
+       /* Initialize TCE kill register */
+       pnv_pci_ioda_setup_opal_tce_kill(phb);
+
        /* Initialize more IODA stuff */
        phb->ioda.total_pe_num = 1;
        prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
@@ -3489,6 +3697,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
        if (prop32)
                phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);
 
+       /* Invalidate RID to PE# mapping */
+       for (segno = 0; segno < ARRAY_SIZE(phb->ioda.pe_rmap); segno++)
+               phb->ioda.pe_rmap[segno] = IODA_INVALID_PE;
+
        /* Parse 64-bit MMIO range */
        pnv_ioda_parse_m64_window(phb);
 
@@ -3540,7 +3752,22 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
                        phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE;
        }
        phb->ioda.pe_array = aux + pemap_off;
-       set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc);
+
+       /*
+        * Choose PE number for root bus, which shouldn't have
+        * M64 resources consumed by its child devices. To pick
+        * the PE number adjacent to the reserved one if possible.
+        */
+       pnv_ioda_reserve_pe(phb, phb->ioda.reserved_pe_idx);
+       if (phb->ioda.reserved_pe_idx == 0) {
+               phb->ioda.root_pe_idx = 1;
+               pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
+       } else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) {
+               phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1;
+               pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
+       } else {
+               phb->ioda.root_pe_idx = IODA_INVALID_PE;
+       }
 
        INIT_LIST_HEAD(&phb->ioda.pe_list);
        mutex_init(&phb->ioda.pe_list_mutex);