powerpc/powernv: Use PE instead of number during setup and release

[cascardo/linux.git] / arch / powerpc / platforms / powernv / pci-ioda.c
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c

index f90dc04..c762f38 100644 (file)
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -48,8 +48,9 @@
  #include "powernv.h"
  #include "pci.h"
  
-/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
-#define TCE32_TABLE_SIZE       ((0x10000000 / 0x1000) * 8)
+#define PNV_IODA1_M64_NUM      16      /* Number of M64 BARs   */
+#define PNV_IODA1_M64_SEGS     8       /* Segments per M64 BAR */
+#define PNV_IODA1_DMA32_SEGSIZE        0x10000000
  
  #define POWERNV_IOMMU_DEFAULT_LEVELS   1
  #define POWERNV_IOMMU_MAX_LEVELS       5
@@ -122,9 +123,17 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
                 (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
  }
  
+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+       phb->ioda.pe_array[pe_no].phb = phb;
+       phb->ioda.pe_array[pe_no].pe_number = pe_no;
+
+       return &phb->ioda.pe_array[pe_no];
+}
+
  static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
  {
-       if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) {
+       if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
                 pr_warn("%s: Invalid PE %d on PHB#%x\n",
                         __func__, pe_no, phb->hose->global_number);
                 return;
@@ -134,32 +143,31 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
                 pr_debug("%s: PE %d was reserved on PHB#%x\n",
                          __func__, pe_no, phb->hose->global_number);
  
-       phb->ioda.pe_array[pe_no].phb = phb;
-       phb->ioda.pe_array[pe_no].pe_number = pe_no;
+       pnv_ioda_init_pe(phb, pe_no);
  }
  
-static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
  {
         unsigned long pe;
  
         do {
                 pe = find_next_zero_bit(phb->ioda.pe_alloc,
-                                       phb->ioda.total_pe, 0);
-               if (pe >= phb->ioda.total_pe)
-                       return IODA_INVALID_PE;
+                                       phb->ioda.total_pe_num, 0);
+               if (pe >= phb->ioda.total_pe_num)
+                       return NULL;
         } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
  
-       phb->ioda.pe_array[pe].phb = phb;
-       phb->ioda.pe_array[pe].pe_number = pe;
-       return pe;
+       return pnv_ioda_init_pe(phb, pe);
  }
  
-static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
+static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
  {
-       WARN_ON(phb->ioda.pe_array[pe].pdev);
+       struct pnv_phb *phb = pe->phb;
+
+       WARN_ON(pe->pdev);
  
-       memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
-       clear_bit(pe, phb->ioda.pe_alloc);
+       memset(pe, 0, sizeof(struct pnv_ioda_pe));
+       clear_bit(pe->pe_number, phb->ioda.pe_alloc);
  }
  
  /* The default M64 BAR is shared by all PEs */
@@ -199,13 +207,13 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb)
          * expected to be 0 or last one of PE capabicity.
          */
         r = &phb->hose->mem_resources[1];
-       if (phb->ioda.reserved_pe == 0)
+       if (phb->ioda.reserved_pe_idx == 0)
                 r->start += phb->ioda.m64_segsize;
-       else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+       else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
                 r->end -= phb->ioda.m64_segsize;
         else
                 pr_warn("  Cannot strip M64 segment for reserved PE#%d\n",
-                       phb->ioda.reserved_pe);
+                       phb->ioda.reserved_pe_idx);
  
         return 0;
  
@@ -219,7 +227,7 @@ fail:
         return -EIO;
  }
  
-static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
+static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
                                          unsigned long *pe_bitmap)
  {
         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -246,22 +254,80 @@ static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
         }
  }
  
-static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus,
-                                    unsigned long *pe_bitmap,
-                                    bool all)
+static int pnv_ioda1_init_m64(struct pnv_phb *phb)
+{
+       struct resource *r;
+       int index;
+
+       /*
+        * There are 16 M64 BARs, each of which has 8 segments. So
+        * there are as many M64 segments as the maximum number of
+        * PEs, which is 128.
+        */
+       for (index = 0; index < PNV_IODA1_M64_NUM; index++) {
+               unsigned long base, segsz = phb->ioda.m64_segsize;
+               int64_t rc;
+
+               base = phb->ioda.m64_base +
+                      index * PNV_IODA1_M64_SEGS * segsz;
+               rc = opal_pci_set_phb_mem_window(phb->opal_id,
+                               OPAL_M64_WINDOW_TYPE, index, base, 0,
+                               PNV_IODA1_M64_SEGS * segsz);
+               if (rc != OPAL_SUCCESS) {
+                       pr_warn("  Error %lld setting M64 PHB#%d-BAR#%d\n",
+                               rc, phb->hose->global_number, index);
+                       goto fail;
+               }
+
+               rc = opal_pci_phb_mmio_enable(phb->opal_id,
+                               OPAL_M64_WINDOW_TYPE, index,
+                               OPAL_ENABLE_M64_SPLIT);
+               if (rc != OPAL_SUCCESS) {
+                       pr_warn("  Error %lld enabling M64 PHB#%d-BAR#%d\n",
+                               rc, phb->hose->global_number, index);
+                       goto fail;
+               }
+       }
+
+       /*
+        * Exclude the segment used by the reserved PE, which
+        * is expected to be 0 or last supported PE#.
+        */
+       r = &phb->hose->mem_resources[1];
+       if (phb->ioda.reserved_pe_idx == 0)
+               r->start += phb->ioda.m64_segsize;
+       else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
+               r->end -= phb->ioda.m64_segsize;
+       else
+               WARN(1, "Wrong reserved PE#%d on PHB#%d\n",
+                    phb->ioda.reserved_pe_idx, phb->hose->global_number);
+
+       return 0;
+
+fail:
+       for ( ; index >= 0; index--)
+               opal_pci_phb_mmio_enable(phb->opal_id,
+                       OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64);
+
+       return -EIO;
+}
+
+static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
+                                   unsigned long *pe_bitmap,
+                                   bool all)
  {
         struct pci_dev *pdev;
  
         list_for_each_entry(pdev, &bus->devices, bus_list) {
-               pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap);
+               pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap);
  
                 if (all && pdev->subordinate)
-                       pnv_ioda2_reserve_m64_pe(pdev->subordinate,
-                                                pe_bitmap, all);
+                       pnv_ioda_reserve_m64_pe(pdev->subordinate,
+                                               pe_bitmap, all);
         }
  }
  
-static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
  {
         struct pci_controller *hose = pci_bus_to_host(bus);
         struct pnv_phb *phb = hose->private_data;
@@ -271,28 +337,28 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
  
         /* Root bus shouldn't use M64 */
         if (pci_is_root_bus(bus))
-               return IODA_INVALID_PE;
+               return NULL;
  
         /* Allocate bitmap */
-       size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+       size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
         pe_alloc = kzalloc(size, GFP_KERNEL);
         if (!pe_alloc) {
                 pr_warn("%s: Out of memory !\n",
                         __func__);
-               return IODA_INVALID_PE;
+               return NULL;
         }
  
         /* Figure out reserved PE numbers by the PE */
-       pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all);
+       pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
  
         /*
          * the current bus might not own M64 window and that's all
          * contributed by its child buses. For the case, we needn't
          * pick M64 dependent PE#.
          */
-       if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+       if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
                 kfree(pe_alloc);
-               return IODA_INVALID_PE;
+               return NULL;
         }
  
         /*
@@ -301,10 +367,11 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
          */
         master_pe = NULL;
         i = -1;
-       while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
-               phb->ioda.total_pe) {
+       while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) <
+               phb->ioda.total_pe_num) {
                 pe = &phb->ioda.pe_array[i];
  
+               phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number;
                 if (!master_pe) {
                         pe->flags |= PNV_IODA_PE_MASTER;
                         INIT_LIST_HEAD(&pe->slaves);
@@ -314,10 +381,30 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
                         pe->master = master_pe;
                         list_add_tail(&pe->list, &master_pe->slaves);
                 }
+
+               /*
+                * P7IOC supports M64DT, which helps mapping M64 segment
+                * to one particular PE#. However, PHB3 has fixed mapping
+                * between M64 segment and PE#. In order to have same logic
+                * for P7IOC and PHB3, we enforce fixed mapping between M64
+                * segment and PE# on P7IOC.
+                */
+               if (phb->type == PNV_PHB_IODA1) {
+                       int64_t rc;
+
+                       rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+                                       pe->pe_number, OPAL_M64_WINDOW_TYPE,
+                                       pe->pe_number / PNV_IODA1_M64_SEGS,
+                                       pe->pe_number % PNV_IODA1_M64_SEGS);
+                       if (rc != OPAL_SUCCESS)
+                               pr_warn("%s: Error %lld mapping M64 for PHB#%d-PE#%d\n",
+                                       __func__, rc, phb->hose->global_number,
+                                       pe->pe_number);
+               }
         }
  
         kfree(pe_alloc);
-       return master_pe->pe_number;
+       return master_pe;
  }
  
  static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
@@ -328,8 +415,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
         const u32 *r;
         u64 pci_addr;
  
-       /* FIXME: Support M64 for P7IOC */
-       if (phb->type != PNV_PHB_IODA2) {
+       if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) {
                 pr_info("  Not support M64 window\n");
                 return;
         }
@@ -355,7 +441,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
         hose->mem_offset[1] = res->start - pci_addr;
  
         phb->ioda.m64_size = resource_size(res);
-       phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+       phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num;
         phb->ioda.m64_base = pci_addr;
  
         pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n",
@@ -363,9 +449,12 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
  
         /* Use last M64 BAR to cover M64 window */
         phb->ioda.m64_bar_idx = 15;
-       phb->init_m64 = pnv_ioda2_init_m64;
-       phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe;
-       phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+       if (phb->type == PNV_PHB_IODA1)
+               phb->init_m64 = pnv_ioda1_init_m64;
+       else
+               phb->init_m64 = pnv_ioda2_init_m64;
+       phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe;
+       phb->pick_m64_pe = pnv_ioda_pick_m64_pe;
  }
  
  static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
@@ -456,7 +545,7 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
         s64 rc;
  
         /* Sanity check on PE number */
-       if (pe_no < 0 || pe_no >= phb->ioda.total_pe)
+       if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num)
                 return OPAL_EEH_STOPPED_PERM_UNAVAIL;
  
         /*
@@ -808,44 +897,6 @@ out:
         return 0;
  }
  
-static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
-                                      struct pnv_ioda_pe *pe)
-{
-       struct pnv_ioda_pe *lpe;
-
-       list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
-               if (lpe->dma_weight < pe->dma_weight) {
-                       list_add_tail(&pe->dma_link, &lpe->dma_link);
-                       return;
-               }
-       }
-       list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
-}
-
-static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
-{
-       /* This is quite simplistic. The "base" weight of a device
-        * is 10. 0 means no DMA is to be accounted for it.
-        */
-
-       /* If it's a bridge, no DMA */
-       if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
-               return 0;
-
-       /* Reduce the weight of slow USB controllers */
-       if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
-           dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
-           dev->class == PCI_CLASS_SERIAL_USB_EHCI)
-               return 3;
-
-       /* Increase the weight of RAID (includes Obsidian) */
-       if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
-               return 15;
-
-       /* Default */
-       return 10;
-}
-
  #ifdef CONFIG_PCI_IOV
  static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
  {
@@ -872,9 +923,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
                 if (!res->flags || !res->parent)
                         continue;
  
-               if (!pnv_pci_is_mem_pref_64(res->flags))
-                       continue;
-
                 /*
                  * The actual IOV BAR range is determined by the start address
                  * and the actual size for num_vfs VFs BAR.  This check is to
@@ -903,9 +951,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
                 if (!res->flags || !res->parent)
                         continue;
  
-               if (!pnv_pci_is_mem_pref_64(res->flags))
-                       continue;
-
                 size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
                 res2 = *res;
                 res->start += size * offset;
@@ -925,7 +970,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
         struct pnv_phb *phb = hose->private_data;
         struct pci_dn *pdn = pci_get_pdn(dev);
         struct pnv_ioda_pe *pe;
-       int pe_num;
  
         if (!pdn) {
                 pr_err("%s: Device tree node not associated properly\n",
@@ -935,8 +979,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
         if (pdn->pe_number != IODA_INVALID_PE)
                 return NULL;
  
-       pe_num = pnv_ioda_alloc_pe(phb);
-       if (pe_num == IODA_INVALID_PE) {
+       pe = pnv_ioda_alloc_pe(phb);
+       if (!pe) {
                 pr_warning("%s: Not enough PE# available, disabling device\n",
                            pci_name(dev));
                 return NULL;
@@ -949,14 +993,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
          *
          * At some point we want to remove the PDN completely anyways
          */
-       pe = &phb->ioda.pe_array[pe_num];
         pci_dev_get(dev);
         pdn->pcidev = dev;
-       pdn->pe_number = pe_num;
+       pdn->pe_number = pe->pe_number;
         pe->flags = PNV_IODA_PE_DEV;
         pe->pdev = dev;
         pe->pbus = NULL;
-       pe->tce32_seg = -1;
         pe->mve_number = -1;
         pe->rid = dev->bus->number << 8 | pdn->devfn;
  
@@ -964,24 +1006,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
  
         if (pnv_ioda_configure_pe(phb, pe)) {
                 /* XXX What do we do here ? */
-               if (pe_num)
-                       pnv_ioda_free_pe(phb, pe_num);
+               pnv_ioda_free_pe(pe);
                 pdn->pe_number = IODA_INVALID_PE;
                 pe->pdev = NULL;
                 pci_dev_put(dev);
                 return NULL;
         }
  
-       /* Assign a DMA weight to the device */
-       pe->dma_weight = pnv_ioda_dma_weight(dev);
-       if (pe->dma_weight != 0) {
-               phb->ioda.dma_weight += pe->dma_weight;
-               phb->ioda.dma_pe_count++;
-       }
-
-       /* Link the PE */
-       pnv_ioda_link_pe_by_weight(phb, pe);
-
         return pe;
  }
  
@@ -999,7 +1030,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
                 }
                 pdn->pcidev = dev;
                 pdn->pe_number = pe->pe_number;
-               pe->dma_weight += pnv_ioda_dma_weight(dev);
                 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
                         pnv_ioda_setup_same_PE(dev->subordinate, pe);
         }
@@ -1011,49 +1041,44 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
   * subordinate PCI devices and buses. The second type of PE is normally
   * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
   */
-static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
  {
         struct pci_controller *hose = pci_bus_to_host(bus);
         struct pnv_phb *phb = hose->private_data;
-       struct pnv_ioda_pe *pe;
-       int pe_num = IODA_INVALID_PE;
+       struct pnv_ioda_pe *pe = NULL;
  
         /* Check if PE is determined by M64 */
         if (phb->pick_m64_pe)
-               pe_num = phb->pick_m64_pe(bus, all);
+               pe = phb->pick_m64_pe(bus, all);
  
         /* The PE number isn't pinned by M64 */
-       if (pe_num == IODA_INVALID_PE)
-               pe_num = pnv_ioda_alloc_pe(phb);
+       if (!pe)
+               pe = pnv_ioda_alloc_pe(phb);
  
-       if (pe_num == IODA_INVALID_PE) {
+       if (!pe) {
                 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
                         __func__, pci_domain_nr(bus), bus->number);
-               return;
+               return NULL;
         }
  
-       pe = &phb->ioda.pe_array[pe_num];
         pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
         pe->pbus = bus;
         pe->pdev = NULL;
-       pe->tce32_seg = -1;
         pe->mve_number = -1;
         pe->rid = bus->busn_res.start << 8;
-       pe->dma_weight = 0;
  
         if (all)
                 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
-                       bus->busn_res.start, bus->busn_res.end, pe_num);
+                       bus->busn_res.start, bus->busn_res.end, pe->pe_number);
         else
                 pe_info(pe, "Secondary bus %d associated with PE#%d\n",
-                       bus->busn_res.start, pe_num);
+                       bus->busn_res.start, pe->pe_number);
  
         if (pnv_ioda_configure_pe(phb, pe)) {
                 /* XXX What do we do here ? */
-               if (pe_num)
-                       pnv_ioda_free_pe(phb, pe_num);
+               pnv_ioda_free_pe(pe);
                 pe->pbus = NULL;
-               return;
+               return NULL;
         }
  
         /* Associate it with all child devices */
@@ -1062,16 +1087,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
         /* Put PE to the list */
         list_add_tail(&pe->list, &phb->ioda.pe_list);
  
-       /* Account for one DMA PE if at least one DMA capable device exist
-        * below the bridge
-        */
-       if (pe->dma_weight != 0) {
-               phb->ioda.dma_weight += pe->dma_weight;
-               phb->ioda.dma_pe_count++;
-       }
-
-       /* Link the PE */
-       pnv_ioda_link_pe_by_weight(phb, pe);
+       return pe;
  }
  
  static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
@@ -1094,7 +1110,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
          * same GPU get assigned the same PE.
          */
         gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
-       for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) {
+       for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
                 pe = &phb->ioda.pe_array[pe_num];
                 if (!pe->pdev)
                         continue;
@@ -1112,7 +1128,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
                         rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
                         npu_pdn->pcidev = npu_pdev;
                         npu_pdn->pe_number = pe_num;
-                       pe->dma_weight += pnv_ioda_dma_weight(npu_pdev);
                         phb->ioda.pe_rmap[rid] = pe->pe_number;
  
                         /* Map the PE to this link */
@@ -1196,29 +1211,36 @@ static void pnv_pci_ioda_setup_PEs(void)
  }
  
  #ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
  {
         struct pci_bus        *bus;
         struct pci_controller *hose;
         struct pnv_phb        *phb;
         struct pci_dn         *pdn;
         int                    i, j;
+       int                    m64_bars;
  
         bus = pdev->bus;
         hose = pci_bus_to_host(bus);
         phb = hose->private_data;
         pdn = pci_get_pdn(pdev);
  
+       if (pdn->m64_single_mode)
+               m64_bars = num_vfs;
+       else
+               m64_bars = 1;
+
         for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
-               for (j = 0; j < M64_PER_IOV; j++) {
-                       if (pdn->m64_wins[i][j] == IODA_INVALID_M64)
+               for (j = 0; j < m64_bars; j++) {
+                       if (pdn->m64_map[j][i] == IODA_INVALID_M64)
                                 continue;
                         opal_pci_phb_mmio_enable(phb->opal_id,
-                               OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0);
-                       clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc);
-                       pdn->m64_wins[i][j] = IODA_INVALID_M64;
+                               OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
+                       clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
+                       pdn->m64_map[j][i] = IODA_INVALID_M64;
                 }
  
+       kfree(pdn->m64_map);
         return 0;
  }
  
@@ -1235,8 +1257,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
         int                    total_vfs;
         resource_size_t        size, start;
         int                    pe_num;
-       int                    vf_groups;
-       int                    vf_per_group;
+       int                    m64_bars;
  
         bus = pdev->bus;
         hose = pci_bus_to_host(bus);
@@ -1244,29 +1265,26 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
         pdn = pci_get_pdn(pdev);
         total_vfs = pci_sriov_get_totalvfs(pdev);
  
-       /* Initialize the m64_wins to IODA_INVALID_M64 */
-       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
-               for (j = 0; j < M64_PER_IOV; j++)
-                       pdn->m64_wins[i][j] = IODA_INVALID_M64;
+       if (pdn->m64_single_mode)
+               m64_bars = num_vfs;
+       else
+               m64_bars = 1;
+
+       pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL);
+       if (!pdn->m64_map)
+               return -ENOMEM;
+       /* Initialize the m64_map to IODA_INVALID_M64 */
+       for (i = 0; i < m64_bars ; i++)
+               for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
+                       pdn->m64_map[i][j] = IODA_INVALID_M64;
  
-       if (pdn->m64_per_iov == M64_PER_IOV) {
-               vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV;
-               vf_per_group = (num_vfs <= M64_PER_IOV)? 1:
-                       roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-       } else {
-               vf_groups = 1;
-               vf_per_group = 1;
-       }
  
         for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
                 res = &pdev->resource[i + PCI_IOV_RESOURCES];
                 if (!res->flags || !res->parent)
                         continue;
  
-               if (!pnv_pci_is_mem_pref_64(res->flags))
-                       continue;
-
-               for (j = 0; j < vf_groups; j++) {
+               for (j = 0; j < m64_bars; j++) {
                         do {
                                 win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
                                                 phb->ioda.m64_bar_idx + 1, 0);
@@ -1275,12 +1293,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
                                         goto m64_failed;
                         } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
  
-                       pdn->m64_wins[i][j] = win;
+                       pdn->m64_map[j][i] = win;
  
-                       if (pdn->m64_per_iov == M64_PER_IOV) {
+                       if (pdn->m64_single_mode) {
                                 size = pci_iov_resource_size(pdev,
                                                         PCI_IOV_RESOURCES + i);
-                               size = size * vf_per_group;
                                 start = res->start + size * j;
                         } else {
                                 size = resource_size(res);
@@ -1288,16 +1305,16 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
                         }
  
                         /* Map the M64 here */
-                       if (pdn->m64_per_iov == M64_PER_IOV) {
-                               pe_num = pdn->offset + j;
+                       if (pdn->m64_single_mode) {
+                               pe_num = pdn->pe_num_map[j];
                                 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
                                                 pe_num, OPAL_M64_WINDOW_TYPE,
-                                               pdn->m64_wins[i][j], 0);
+                                               pdn->m64_map[j][i], 0);
                         }
  
                         rc = opal_pci_set_phb_mem_window(phb->opal_id,
                                                  OPAL_M64_WINDOW_TYPE,
-                                                pdn->m64_wins[i][j],
+                                                pdn->m64_map[j][i],
                                                  start,
                                                  0, /* unused */
                                                  size);
@@ -1309,12 +1326,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
                                 goto m64_failed;
                         }
  
-                       if (pdn->m64_per_iov == M64_PER_IOV)
+                       if (pdn->m64_single_mode)
                                 rc = opal_pci_phb_mmio_enable(phb->opal_id,
-                                    OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2);
+                                    OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
                         else
                                 rc = opal_pci_phb_mmio_enable(phb->opal_id,
-                                    OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1);
+                                    OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
  
                         if (rc != OPAL_SUCCESS) {
                                 dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
@@ -1326,7 +1343,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
         return 0;
  
  m64_failed:
-       pnv_pci_vf_release_m64(pdev);
+       pnv_pci_vf_release_m64(pdev, num_vfs);
         return -EBUSY;
  }
  
@@ -1353,15 +1370,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
         iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
  }
  
-static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
  {
         struct pci_bus        *bus;
         struct pci_controller *hose;
         struct pnv_phb        *phb;
         struct pnv_ioda_pe    *pe, *pe_n;
         struct pci_dn         *pdn;
-       u16                    vf_index;
-       int64_t                rc;
  
         bus = pdev->bus;
         hose = pci_bus_to_host(bus);
@@ -1371,35 +1386,6 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
         if (!pdev->is_physfn)
                 return;
  
-       if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
-               int   vf_group;
-               int   vf_per_group;
-               int   vf_index1;
-
-               vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-
-               for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
-                       for (vf_index = vf_group * vf_per_group;
-                               vf_index < (vf_group + 1) * vf_per_group &&
-                               vf_index < num_vfs;
-                               vf_index++)
-                               for (vf_index1 = vf_group * vf_per_group;
-                                       vf_index1 < (vf_group + 1) * vf_per_group &&
-                                       vf_index1 < num_vfs;
-                                       vf_index1++){
-
-                                       rc = opal_pci_set_peltv(phb->opal_id,
-                                               pdn->offset + vf_index,
-                                               pdn->offset + vf_index1,
-                                               OPAL_REMOVE_PE_FROM_DOMAIN);
-
-                                       if (rc)
-                                           dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n",
-                                               __func__,
-                                               pdn->offset + vf_index1, rc);
-                               }
-       }
-
         list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
                 if (pe->parent_dev != pdev)
                         continue;
@@ -1413,7 +1399,7 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
  
                 pnv_ioda_deconfigure_pe(phb, pe);
  
-               pnv_ioda_free_pe(phb, pe->pe_number);
+               pnv_ioda_free_pe(pe);
         }
  }
  
@@ -1422,9 +1408,10 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
         struct pci_bus        *bus;
         struct pci_controller *hose;
         struct pnv_phb        *phb;
+       struct pnv_ioda_pe    *pe;
         struct pci_dn         *pdn;
         struct pci_sriov      *iov;
-       u16 num_vfs;
+       u16                    num_vfs, i;
  
         bus = pdev->bus;
         hose = pci_bus_to_host(bus);
@@ -1434,18 +1421,28 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
         num_vfs = pdn->num_vfs;
  
         /* Release VF PEs */
-       pnv_ioda_release_vf_PE(pdev, num_vfs);
+       pnv_ioda_release_vf_PE(pdev);
  
         if (phb->type == PNV_PHB_IODA2) {
-               if (pdn->m64_per_iov == 1)
-                       pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+               if (!pdn->m64_single_mode)
+                       pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
  
                 /* Release M64 windows */
-               pnv_pci_vf_release_m64(pdev);
+               pnv_pci_vf_release_m64(pdev, num_vfs);
  
                 /* Release PE numbers */
-               bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
-               pdn->offset = 0;
+               if (pdn->m64_single_mode) {
+                       for (i = 0; i < num_vfs; i++) {
+                               if (pdn->pe_num_map[i] == IODA_INVALID_PE)
+                                       continue;
+
+                               pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
+                               pnv_ioda_free_pe(pe);
+                       }
+               } else
+                       bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
+               /* Releasing pe_num_map */
+               kfree(pdn->pe_num_map);
         }
  }
  
@@ -1460,7 +1457,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
         int                    pe_num;
         u16                    vf_index;
         struct pci_dn         *pdn;
-       int64_t                rc;
  
         bus = pdev->bus;
         hose = pci_bus_to_host(bus);
@@ -1472,7 +1468,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
  
         /* Reserve PE for each VF */
         for (vf_index = 0; vf_index < num_vfs; vf_index++) {
-               pe_num = pdn->offset + vf_index;
+               if (pdn->m64_single_mode)
+                       pe_num = pdn->pe_num_map[vf_index];
+               else
+                       pe_num = *pdn->pe_num_map + vf_index;
  
                 pe = &phb->ioda.pe_array[pe_num];
                 pe->pe_number = pe_num;
@@ -1480,7 +1479,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
                 pe->flags = PNV_IODA_PE_VF;
                 pe->pbus = NULL;
                 pe->parent_dev = pdev;
-               pe->tce32_seg = -1;
                 pe->mve_number = -1;
                 pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
                            pci_iov_virtfn_devfn(pdev, vf_index);
@@ -1492,8 +1490,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
  
                 if (pnv_ioda_configure_pe(phb, pe)) {
                         /* XXX What do we do here ? */
-                       if (pe_num)
-                               pnv_ioda_free_pe(phb, pe_num);
+                       pnv_ioda_free_pe(pe);
                         pe->pdev = NULL;
                         continue;
                 }
@@ -1505,37 +1502,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
  
                 pnv_pci_ioda2_setup_dma_pe(phb, pe);
         }
-
-       if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
-               int   vf_group;
-               int   vf_per_group;
-               int   vf_index1;
-
-               vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-
-               for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
-                       for (vf_index = vf_group * vf_per_group;
-                            vf_index < (vf_group + 1) * vf_per_group &&
-                            vf_index < num_vfs;
-                            vf_index++) {
-                               for (vf_index1 = vf_group * vf_per_group;
-                                    vf_index1 < (vf_group + 1) * vf_per_group &&
-                                    vf_index1 < num_vfs;
-                                    vf_index1++) {
-
-                                       rc = opal_pci_set_peltv(phb->opal_id,
-                                               pdn->offset + vf_index,
-                                               pdn->offset + vf_index1,
-                                               OPAL_ADD_PE_TO_DOMAIN);
-
-                                       if (rc)
-                                           dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n",
-                                               __func__,
-                                               pdn->offset + vf_index1, rc);
-                               }
-                       }
-               }
-       }
  }
  
  int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
@@ -1543,8 +1509,10 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
         struct pci_bus        *bus;
         struct pci_controller *hose;
         struct pnv_phb        *phb;
+       struct pnv_ioda_pe    *pe;
         struct pci_dn         *pdn;
         int                    ret;
+       u16                    i;
  
         bus = pdev->bus;
         hose = pci_bus_to_host(bus);
@@ -1552,20 +1520,61 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
         pdn = pci_get_pdn(pdev);
  
         if (phb->type == PNV_PHB_IODA2) {
+               if (!pdn->vfs_expanded) {
+                       dev_info(&pdev->dev, "don't support this SRIOV device"
+                               " with non 64bit-prefetchable IOV BAR\n");
+                       return -ENOSPC;
+               }
+
+               /*
+                * When M64 BARs functions in Single PE mode, the number of VFs
+                * could be enabled must be less than the number of M64 BARs.
+                */
+               if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
+                       dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
+                       return -EBUSY;
+               }
+
+               /* Allocating pe_num_map */
+               if (pdn->m64_single_mode)
+                       pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs,
+                                       GFP_KERNEL);
+               else
+                       pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
+
+               if (!pdn->pe_num_map)
+                       return -ENOMEM;
+
+               if (pdn->m64_single_mode)
+                       for (i = 0; i < num_vfs; i++)
+                               pdn->pe_num_map[i] = IODA_INVALID_PE;
+
                 /* Calculate available PE for required VFs */
-               mutex_lock(&phb->ioda.pe_alloc_mutex);
-               pdn->offset = bitmap_find_next_zero_area(
-                       phb->ioda.pe_alloc, phb->ioda.total_pe,
-                       0, num_vfs, 0);
-               if (pdn->offset >= phb->ioda.total_pe) {
+               if (pdn->m64_single_mode) {
+                       for (i = 0; i < num_vfs; i++) {
+                               pe = pnv_ioda_alloc_pe(phb);
+                               if (!pe) {
+                                       ret = -EBUSY;
+                                       goto m64_failed;
+                               }
+
+                               pdn->pe_num_map[i] = pe->pe_number;
+                       }
+               } else {
+                       mutex_lock(&phb->ioda.pe_alloc_mutex);
+                       *pdn->pe_num_map = bitmap_find_next_zero_area(
+                               phb->ioda.pe_alloc, phb->ioda.total_pe_num,
+                               0, num_vfs, 0);
+                       if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
+                               mutex_unlock(&phb->ioda.pe_alloc_mutex);
+                               dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+                               kfree(pdn->pe_num_map);
+                               return -EBUSY;
+                       }
+                       bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
                         mutex_unlock(&phb->ioda.pe_alloc_mutex);
-                       dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
-                       pdn->offset = 0;
-                       return -EBUSY;
                 }
-               bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
                 pdn->num_vfs = num_vfs;
-               mutex_unlock(&phb->ioda.pe_alloc_mutex);
  
                 /* Assign M64 window accordingly */
                 ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
@@ -1579,8 +1588,8 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
                  * the IOV BAR according to the PE# allocated to the VFs.
                  * Otherwise, the PE# for the VF will conflict with others.
                  */
-               if (pdn->m64_per_iov == 1) {
-                       ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+               if (!pdn->m64_single_mode) {
+                       ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
                         if (ret)
                                 goto m64_failed;
                 }
@@ -1592,8 +1601,19 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
         return 0;
  
  m64_failed:
-       bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
-       pdn->offset = 0;
+       if (pdn->m64_single_mode) {
+               for (i = 0; i < num_vfs; i++) {
+                       if (pdn->pe_num_map[i] == IODA_INVALID_PE)
+                               continue;
+
+                       pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
+                       pnv_ioda_free_pe(pe);
+               }
+       } else
+               bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
+
+       /* Releasing pe_num_map */
+       kfree(pdn->pe_num_map);
  
         return ret;
  }
@@ -1612,8 +1632,7 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
         /* Allocate PCI data */
         add_dev_pci_data(pdev);
  
-       pnv_pci_sriov_enable(pdev, num_vfs);
-       return 0;
+       return pnv_pci_sriov_enable(pdev, num_vfs);
  }
  #endif /* CONFIG_PCI_IOV */
  
@@ -1955,56 +1974,140 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
         .free = pnv_ioda2_table_free,
  };
  
-static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
-                                     struct pnv_ioda_pe *pe, unsigned int base,
-                                     unsigned int segs)
+static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
+{
+       unsigned int *weight = (unsigned int *)data;
+
+       /* This is quite simplistic. The "base" weight of a device
+        * is 10. 0 means no DMA is to be accounted for it.
+        */
+       if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+               return 0;
+
+       if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
+           dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
+           dev->class == PCI_CLASS_SERIAL_USB_EHCI)
+               *weight += 3;
+       else if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
+               *weight += 15;
+       else
+               *weight += 10;
+
+       return 0;
+}
+
+static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe)
+{
+       unsigned int weight = 0;
+
+       /* SRIOV VF has same DMA32 weight as its PF */
+#ifdef CONFIG_PCI_IOV
+       if ((pe->flags & PNV_IODA_PE_VF) && pe->parent_dev) {
+               pnv_pci_ioda_dev_dma_weight(pe->parent_dev, &weight);
+               return weight;
+       }
+#endif
+
+       if ((pe->flags & PNV_IODA_PE_DEV) && pe->pdev) {
+               pnv_pci_ioda_dev_dma_weight(pe->pdev, &weight);
+       } else if ((pe->flags & PNV_IODA_PE_BUS) && pe->pbus) {
+               struct pci_dev *pdev;
+
+               list_for_each_entry(pdev, &pe->pbus->devices, bus_list)
+                       pnv_pci_ioda_dev_dma_weight(pdev, &weight);
+       } else if ((pe->flags & PNV_IODA_PE_BUS_ALL) && pe->pbus) {
+               pci_walk_bus(pe->pbus, pnv_pci_ioda_dev_dma_weight, &weight);
+       }
+
+       return weight;
+}
+
+static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
+                                      struct pnv_ioda_pe *pe)
  {
  
         struct page *tce_mem = NULL;
         struct iommu_table *tbl;
-       unsigned int i;
+       unsigned int weight, total_weight = 0;
+       unsigned int tce32_segsz, base, segs, avail, i;
         int64_t rc;
         void *addr;
  
         /* XXX FIXME: Handle 64-bit only DMA devices */
         /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
         /* XXX FIXME: Allocate multi-level tables on PHB3 */
+       weight = pnv_pci_ioda_pe_dma_weight(pe);
+       if (!weight)
+               return;
+
+       pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight,
+                    &total_weight);
+       segs = (weight * phb->ioda.dma32_count) / total_weight;
+       if (!segs)
+               segs = 1;
  
-       /* We shouldn't already have a 32-bit DMA associated */
-       if (WARN_ON(pe->tce32_seg >= 0))
+       /*
+        * Allocate contiguous DMA32 segments. We begin with the expected
+        * number of segments. With one more attempt, the number of DMA32
+        * segments to be allocated is decreased by one until one segment
+        * is allocated successfully.
+        */
+       do {
+               for (base = 0; base <= phb->ioda.dma32_count - segs; base++) {
+                       for (avail = 0, i = base; i < base + segs; i++) {
+                               if (phb->ioda.dma32_segmap[i] ==
+                                   IODA_INVALID_PE)
+                                       avail++;
+                       }
+
+                       if (avail == segs)
+                               goto found;
+               }
+       } while (--segs);
+
+       if (!segs) {
+               pe_warn(pe, "No available DMA32 segments\n");
                 return;
+       }
  
+found:
         tbl = pnv_pci_table_alloc(phb->hose->node);
         iommu_register_group(&pe->table_group, phb->hose->global_number,
                         pe->pe_number);
         pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
  
         /* Grab a 32-bit TCE table */
-       pe->tce32_seg = base;
+       pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n",
+               weight, total_weight, base, segs);
         pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
-               (base << 28), ((base + segs) << 28) - 1);
+               base * PNV_IODA1_DMA32_SEGSIZE,
+               (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1);
  
         /* XXX Currently, we allocate one big contiguous table for the
          * TCEs. We only really need one chunk per 256M of TCE space
          * (ie per segment) but that's an optimization for later, it
          * requires some added smarts with our get/put_tce implementation
+        *
+        * Each TCE page is 4KB in size and each TCE entry occupies 8
+        * bytes
          */
+       tce32_segsz = PNV_IODA1_DMA32_SEGSIZE >> (IOMMU_PAGE_SHIFT_4K - 3);
         tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
-                                  get_order(TCE32_TABLE_SIZE * segs));
+                                  get_order(tce32_segsz * segs));
         if (!tce_mem) {
                 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
                 goto fail;
         }
         addr = page_address(tce_mem);
-       memset(addr, 0, TCE32_TABLE_SIZE * segs);
+       memset(addr, 0, tce32_segsz * segs);
  
         /* Configure HW */
         for (i = 0; i < segs; i++) {
                 rc = opal_pci_map_pe_dma_window(phb->opal_id,
                                               pe->pe_number,
                                               base + i, 1,
-                                             __pa(addr) + TCE32_TABLE_SIZE * i,
-                                             TCE32_TABLE_SIZE, 0x1000);
+                                             __pa(addr) + tce32_segsz * i,
+                                             tce32_segsz, IOMMU_PAGE_SIZE_4K);
                 if (rc) {
                         pe_err(pe, " Failed to configure 32-bit TCE table,"
                                " err %ld\n", rc);
@@ -2012,9 +2115,14 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
                 }
         }
  
+       /* Setup DMA32 segment mapping */
+       for (i = base; i < base + segs; i++)
+               phb->ioda.dma32_segmap[i] = pe->pe_number;
+
         /* Setup linux iommu table */
-       pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
-                                 base << 28, IOMMU_PAGE_SHIFT_4K);
+       pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs,
+                                 base * PNV_IODA1_DMA32_SEGSIZE,
+                                 IOMMU_PAGE_SHIFT_4K);
  
         /* OPAL variant of P7IOC SW invalidated TCEs */
         if (phb->ioda.tce_inval_reg)
@@ -2041,10 +2149,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
         return;
   fail:
         /* XXX Failure: Try to fallback to 64-bit only ? */
-       if (pe->tce32_seg >= 0)
-               pe->tce32_seg = -1;
         if (tce_mem)
-               __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+               __free_pages(tce_mem, get_order(tce32_segsz * segs));
         if (tbl) {
                 pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
                 iommu_free_table(tbl, "pnv");
@@ -2453,10 +2559,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
  {
         int64_t rc;
  
-       /* We shouldn't already have a 32-bit DMA associated */
-       if (WARN_ON(pe->tce32_seg >= 0))
-               return;
-
         /* TVE #1 is selected by PCI address bit 59 */
         pe->tce_bypass_base = 1ull << 59;
  
@@ -2464,7 +2566,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
                         pe->pe_number);
  
         /* The PE will reserve all possible 32-bits space */
-       pe->tce32_seg = 0;
         pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
                 phb->ioda.m32_pci_base);
  
@@ -2480,11 +2581,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
  #endif
  
         rc = pnv_pci_ioda2_setup_default_config(pe);
-       if (rc) {
-               if (pe->tce32_seg >= 0)
-                       pe->tce32_seg = -1;
+       if (rc)
                 return;
-       }
  
         if (pe->flags & PNV_IODA_PE_DEV)
                 iommu_add_device(&pe->pdev->dev);
@@ -2495,47 +2593,24 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
  static void pnv_ioda_setup_dma(struct pnv_phb *phb)
  {
         struct pci_controller *hose = phb->hose;
-       unsigned int residual, remaining, segs, tw, base;
         struct pnv_ioda_pe *pe;
+       unsigned int weight;
  
         /* If we have more PE# than segments available, hand out one
          * per PE until we run out and let the rest fail. If not,
          * then we assign at least one segment per PE, plus more based
          * on the amount of devices under that PE
          */
-       if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
-               residual = 0;
-       else
-               residual = phb->ioda.tce32_count -
-                       phb->ioda.dma_pe_count;
-
-       pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
-               hose->global_number, phb->ioda.tce32_count);
-       pr_info("PCI: %d PE# for a total weight of %d\n",
-               phb->ioda.dma_pe_count, phb->ioda.dma_weight);
+       pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n",
+               hose->global_number, phb->ioda.dma32_count);
  
         pnv_pci_ioda_setup_opal_tce_kill(phb);
  
-       /* Walk our PE list and configure their DMA segments, hand them
-        * out one base segment plus any residual segments based on
-        * weight
-        */
-       remaining = phb->ioda.tce32_count;
-       tw = phb->ioda.dma_weight;
-       base = 0;
-       list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
-               if (!pe->dma_weight)
+       /* Walk our PE list and configure their DMA segments */
+       list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+               weight = pnv_pci_ioda_pe_dma_weight(pe);
+               if (!weight)
                         continue;
-               if (!remaining) {
-                       pe_warn(pe, "No DMA32 resources available\n");
-                       continue;
-               }
-               segs = 1;
-               if (residual) {
-                       segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
-                       if (segs > remaining)
-                               segs = remaining;
-               }
  
                 /*
                  * For IODA2 compliant PHB3, we needn't care about the weight.
@@ -2543,12 +2618,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
                  * the specific PE.
                  */
                 if (phb->type == PNV_PHB_IODA1) {
-                       pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
-                               pe->dma_weight, segs);
-                       pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+                       pnv_pci_ioda1_setup_dma_pe(phb, pe);
                 } else if (phb->type == PNV_PHB_IODA2) {
                         pe_info(pe, "Assign DMA32 space\n");
-                       segs = 0;
                         pnv_pci_ioda2_setup_dma_pe(phb, pe);
                 } else if (phb->type == PNV_PHB_NPU) {
                         /*
@@ -2558,9 +2630,6 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
                          * as the PHB3 TVT.
                          */
                 }
-
-               remaining -= segs;
-               base += segs;
         }
  }
  
@@ -2851,45 +2920,58 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
  #ifdef CONFIG_PCI_IOV
  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
  {
-       struct pci_controller *hose;
-       struct pnv_phb *phb;
+       struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+       struct pnv_phb *phb = hose->private_data;
+       const resource_size_t gate = phb->ioda.m64_segsize >> 2;
         struct resource *res;
         int i;
-       resource_size_t size;
+       resource_size_t size, total_vf_bar_sz;
         struct pci_dn *pdn;
         int mul, total_vfs;
  
         if (!pdev->is_physfn || pdev->is_added)
                 return;
  
-       hose = pci_bus_to_host(pdev->bus);
-       phb = hose->private_data;
-
         pdn = pci_get_pdn(pdev);
         pdn->vfs_expanded = 0;
+       pdn->m64_single_mode = false;
  
         total_vfs = pci_sriov_get_totalvfs(pdev);
-       pdn->m64_per_iov = 1;
-       mul = phb->ioda.total_pe;
+       mul = phb->ioda.total_pe_num;
+       total_vf_bar_sz = 0;
  
         for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
                 res = &pdev->resource[i + PCI_IOV_RESOURCES];
                 if (!res->flags || res->parent)
                         continue;
                 if (!pnv_pci_is_mem_pref_64(res->flags)) {
-                       dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
+                       dev_warn(&pdev->dev, "Don't support SR-IOV with"
+                                       " non M64 VF BAR%d: %pR. \n",
                                  i, res);
-                       continue;
+                       goto truncate_iov;
                 }
  
-               size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+               total_vf_bar_sz += pci_iov_resource_size(pdev,
+                               i + PCI_IOV_RESOURCES);
  
-               /* bigger than 64M */
-               if (size > (1 << 26)) {
-                       dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
-                                i, res);
-                       pdn->m64_per_iov = M64_PER_IOV;
+               /*
+                * If bigger than quarter of M64 segment size, just round up
+                * power of two.
+                *
+                * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
+                * with other devices, IOV BAR size is expanded to be
+                * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
+                * segment size , the expanded size would equal to half of the
+                * whole M64 space size, which will exhaust the M64 Space and
+                * limit the system flexibility.  This is a design decision to
+                * set the boundary to quarter of the M64 segment size.
+                */
+               if (total_vf_bar_sz > gate) {
                         mul = roundup_pow_of_two(total_vfs);
+                       dev_info(&pdev->dev,
+                               "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
+                               total_vf_bar_sz, gate, mul);
+                       pdn->m64_single_mode = true;
                         break;
                 }
         }
@@ -2898,36 +2980,100 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
                 res = &pdev->resource[i + PCI_IOV_RESOURCES];
                 if (!res->flags || res->parent)
                         continue;
-               if (!pnv_pci_is_mem_pref_64(res->flags)) {
-                       dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
-                                i, res);
-                       continue;
-               }
  
-               dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
                 size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+               /*
+                * On PHB3, the minimum size alignment of M64 BAR in single
+                * mode is 32MB.
+                */
+               if (pdn->m64_single_mode && (size < SZ_32M))
+                       goto truncate_iov;
+               dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
                 res->end = res->start + size * mul - 1;
                 dev_dbg(&pdev->dev, "                       %pR\n", res);
                 dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
                          i, res, mul);
         }
         pdn->vfs_expanded = mul;
+
+       return;
+
+truncate_iov:
+       /* To save MMIO space, IOV BAR is truncated. */
+       for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+               res = &pdev->resource[i + PCI_IOV_RESOURCES];
+               res->flags = 0;
+               res->end = res->start - 1;
+       }
  }
  #endif /* CONFIG_PCI_IOV */
  
+static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
+                                 struct resource *res)
+{
+       struct pnv_phb *phb = pe->phb;
+       struct pci_bus_region region;
+       int index;
+       int64_t rc;
+
+       if (!res || !res->flags || res->start > res->end)
+               return;
+
+       if (res->flags & IORESOURCE_IO) {
+               region.start = res->start - phb->ioda.io_pci_base;
+               region.end   = res->end - phb->ioda.io_pci_base;
+               index = region.start / phb->ioda.io_segsize;
+
+               while (index < phb->ioda.total_pe_num &&
+                      region.start <= region.end) {
+                       phb->ioda.io_segmap[index] = pe->pe_number;
+                       rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+                               pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+                       if (rc != OPAL_SUCCESS) {
+                               pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n",
+                                      __func__, rc, index, pe->pe_number);
+                               break;
+                       }
+
+                       region.start += phb->ioda.io_segsize;
+                       index++;
+               }
+       } else if ((res->flags & IORESOURCE_MEM) &&
+                  !pnv_pci_is_mem_pref_64(res->flags)) {
+               region.start = res->start -
+                              phb->hose->mem_offset[0] -
+                              phb->ioda.m32_pci_base;
+               region.end   = res->end -
+                              phb->hose->mem_offset[0] -
+                              phb->ioda.m32_pci_base;
+               index = region.start / phb->ioda.m32_segsize;
+
+               while (index < phb->ioda.total_pe_num &&
+                      region.start <= region.end) {
+                       phb->ioda.m32_segmap[index] = pe->pe_number;
+                       rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+                               pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+                       if (rc != OPAL_SUCCESS) {
+                               pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d",
+                                      __func__, rc, index, pe->pe_number);
+                               break;
+                       }
+
+                       region.start += phb->ioda.m32_segsize;
+                       index++;
+               }
+       }
+}
+
  /*
   * This function is supposed to be called on basis of PE from top
   * to bottom style. So the the I/O or MMIO segment assigned to
   * parent PE could be overrided by its child PEs if necessary.
   */
-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
-                                 struct pnv_ioda_pe *pe)
+static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
  {
-       struct pnv_phb *phb = hose->private_data;
-       struct pci_bus_region region;
-       struct resource *res;
-       int i, index;
-       int rc;
+       struct pci_dev *pdev;
+       int i;
  
         /*
          * NOTE: We only care PCI bus based PE for now. For PCI
@@ -2936,57 +3082,20 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
          */
         BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
  
-       pci_bus_for_each_resource(pe->pbus, res, i) {
-               if (!res || !res->flags ||
-                   res->start > res->end)
-                       continue;
-
-               if (res->flags & IORESOURCE_IO) {
-                       region.start = res->start - phb->ioda.io_pci_base;
-                       region.end   = res->end - phb->ioda.io_pci_base;
-                       index = region.start / phb->ioda.io_segsize;
-
-                       while (index < phb->ioda.total_pe &&
-                              region.start <= region.end) {
-                               phb->ioda.io_segmap[index] = pe->pe_number;
-                               rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-                                       pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
-                               if (rc != OPAL_SUCCESS) {
-                                       pr_err("%s: OPAL error %d when mapping IO "
-                                              "segment #%d to PE#%d\n",
-                                              __func__, rc, index, pe->pe_number);
-                                       break;
-                               }
-
-                               region.start += phb->ioda.io_segsize;
-                               index++;
-                       }
-               } else if ((res->flags & IORESOURCE_MEM) &&
-                          !pnv_pci_is_mem_pref_64(res->flags)) {
-                       region.start = res->start -
-                                      hose->mem_offset[0] -
-                                      phb->ioda.m32_pci_base;
-                       region.end   = res->end -
-                                      hose->mem_offset[0] -
-                                      phb->ioda.m32_pci_base;
-                       index = region.start / phb->ioda.m32_segsize;
-
-                       while (index < phb->ioda.total_pe &&
-                              region.start <= region.end) {
-                               phb->ioda.m32_segmap[index] = pe->pe_number;
-                               rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-                                       pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
-                               if (rc != OPAL_SUCCESS) {
-                                       pr_err("%s: OPAL error %d when mapping M32 "
-                                              "segment#%d to PE#%d",
-                                              __func__, rc, index, pe->pe_number);
-                                       break;
-                               }
+       list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+               for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+                       pnv_ioda_setup_pe_res(pe, &pdev->resource[i]);
  
-                               region.start += phb->ioda.m32_segsize;
-                               index++;
-                       }
-               }
+               /*
+                * If the PE contains all subordinate PCI buses, the
+                * windows of the child bridges should be mapped to
+                * the PE as well.
+                */
+               if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev))
+                       continue;
+               for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+                       pnv_ioda_setup_pe_res(pe,
+                               &pdev->resource[PCI_BRIDGE_RESOURCES + i]);
         }
  }
  
@@ -3004,7 +3113,7 @@ static void pnv_pci_ioda_setup_seg(void)
                         continue;
  
                 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
-                       pnv_ioda_setup_pe_seg(hose, pe);
+                       pnv_ioda_setup_pe_seg(pe);
                 }
         }
  }
@@ -3048,13 +3157,18 @@ static void pnv_npu_ioda_fixup(void)
         struct pci_controller *hose, *tmp;
         struct pnv_phb *phb;
         struct pnv_ioda_pe *pe;
+       unsigned int weight;
  
         list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
                 phb = hose->private_data;
                 if (phb->type != PNV_PHB_NPU)
                         continue;
  
-               list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
+               list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+                       weight = pnv_pci_ioda_pe_dma_weight(pe);
+                       if (WARN_ON(!weight))
+                               continue;
+
                         enable_bypass = dma_get_mask(&pe->pdev->dev) ==
                                 DMA_BIT_MASK(64);
                         pnv_npu_init_dma_pe(pe);
@@ -3125,18 +3239,35 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
  static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
                                                       int resno)
  {
+       struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+       struct pnv_phb *phb = hose->private_data;
         struct pci_dn *pdn = pci_get_pdn(pdev);
-       resource_size_t align, iov_align;
-
-       iov_align = resource_size(&pdev->resource[resno]);
-       if (iov_align)
-               return iov_align;
+       resource_size_t align;
  
+       /*
+        * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+        * SR-IOV. While from hardware perspective, the range mapped by M64
+        * BAR should be size aligned.
+        *
+        * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
+        * powernv-specific hardware restriction is gone. But if just use the
+        * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
+        * in one segment of M64 #15, which introduces the PE conflict between
+        * PF and VF. Based on this, the minimum alignment of an IOV BAR is
+        * m64_segsize.
+        *
+        * This function returns the total IOV BAR size if M64 BAR is in
+        * Shared PE mode or just VF BAR size if not.
+        * If the M64 BAR is in Single PE mode, return the VF BAR size or
+        * M64 segment size if IOV BAR size is less.
+        */
         align = pci_iov_resource_size(pdev, resno);
-       if (pdn->vfs_expanded)
-               return pdn->vfs_expanded * align;
+       if (!pdn->vfs_expanded)
+               return align;
+       if (pdn->m64_single_mode)
+               return max(align, (resource_size_t)phb->ioda.m64_segsize);
  
-       return align;
+       return pdn->vfs_expanded * align;
  }
  #endif /* CONFIG_PCI_IOV */
  
@@ -3164,12 +3295,6 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
         return true;
  }
  
-static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
-                              u32 devfn)
-{
-       return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
-}
-
  static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
  {
         struct pnv_phb *phb = hose->private_data;
@@ -3179,31 +3304,31 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
  }
  
  static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
-       .dma_dev_setup = pnv_pci_dma_dev_setup,
-       .dma_bus_setup = pnv_pci_dma_bus_setup,
+       .dma_dev_setup          = pnv_pci_dma_dev_setup,
+       .dma_bus_setup          = pnv_pci_dma_bus_setup,
  #ifdef CONFIG_PCI_MSI
-       .setup_msi_irqs = pnv_setup_msi_irqs,
-       .teardown_msi_irqs = pnv_teardown_msi_irqs,
+       .setup_msi_irqs         = pnv_setup_msi_irqs,
+       .teardown_msi_irqs      = pnv_teardown_msi_irqs,
  #endif
-       .enable_device_hook = pnv_pci_enable_device_hook,
-       .window_alignment = pnv_pci_window_alignment,
-       .reset_secondary_bus = pnv_pci_reset_secondary_bus,
-       .dma_set_mask = pnv_pci_ioda_dma_set_mask,
-       .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
-       .shutdown = pnv_pci_ioda_shutdown,
+       .enable_device_hook     = pnv_pci_enable_device_hook,
+       .window_alignment       = pnv_pci_window_alignment,
+       .reset_secondary_bus    = pnv_pci_reset_secondary_bus,
+       .dma_set_mask           = pnv_pci_ioda_dma_set_mask,
+       .dma_get_required_mask  = pnv_pci_ioda_dma_get_required_mask,
+       .shutdown               = pnv_pci_ioda_shutdown,
  };
  
  static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
-       .dma_dev_setup = pnv_pci_dma_dev_setup,
+       .dma_dev_setup          = pnv_pci_dma_dev_setup,
  #ifdef CONFIG_PCI_MSI
-       .setup_msi_irqs = pnv_setup_msi_irqs,
-       .teardown_msi_irqs = pnv_teardown_msi_irqs,
+       .setup_msi_irqs         = pnv_setup_msi_irqs,
+       .teardown_msi_irqs      = pnv_teardown_msi_irqs,
  #endif
-       .enable_device_hook = pnv_pci_enable_device_hook,
-       .window_alignment = pnv_pci_window_alignment,
-       .reset_secondary_bus = pnv_pci_reset_secondary_bus,
-       .dma_set_mask = pnv_npu_dma_set_mask,
-       .shutdown = pnv_pci_ioda_shutdown,
+       .enable_device_hook     = pnv_pci_enable_device_hook,
+       .window_alignment       = pnv_pci_window_alignment,
+       .reset_secondary_bus    = pnv_pci_reset_secondary_bus,
+       .dma_set_mask           = pnv_npu_dma_set_mask,
+       .shutdown               = pnv_pci_ioda_shutdown,
  };
  
  static void __init pnv_pci_init_ioda_phb(struct device_node *np,
@@ -3211,10 +3336,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
  {
         struct pci_controller *hose;
         struct pnv_phb *phb;
-       unsigned long size, m32map_off, pemap_off, iomap_off = 0;
+       unsigned long size, m64map_off, m32map_off, pemap_off;
+       unsigned long iomap_off = 0, dma32map_off = 0;
         const __be64 *prop64;
         const __be32 *prop32;
         int len;
+       unsigned int segno;
         u64 phb_id;
         void *aux;
         long rc;
@@ -3275,13 +3402,13 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
                 pr_err("  Failed to map registers !\n");
  
         /* Initialize more IODA stuff */
-       phb->ioda.total_pe = 1;
+       phb->ioda.total_pe_num = 1;
         prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
         if (prop32)
-               phb->ioda.total_pe = be32_to_cpup(prop32);
+               phb->ioda.total_pe_num = be32_to_cpup(prop32);
         prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
         if (prop32)
-               phb->ioda.reserved_pe = be32_to_cpup(prop32);
+               phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);
  
         /* Parse 64-bit MMIO range */
         pnv_ioda_parse_m64_window(phb);
@@ -3290,36 +3417,57 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
         /* FW Has already off top 64k of M32 space (MSI space) */
         phb->ioda.m32_size += 0x10000;
  
-       phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
+       phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num;
         phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
         phb->ioda.io_size = hose->pci_io_size;
-       phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
+       phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
         phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
  
+       /* Calculate how many 32-bit TCE segments we have */
+       phb->ioda.dma32_count = phb->ioda.m32_pci_base /
+                               PNV_IODA1_DMA32_SEGSIZE;
+
         /* Allocate aux data & arrays. We don't have IO ports on PHB3 */
-       size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+       size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
+       m64map_off = size;
+       size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
         m32map_off = size;
-       size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
+       size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]);
         if (phb->type == PNV_PHB_IODA1) {
                 iomap_off = size;
-               size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
+               size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]);
+               dma32map_off = size;
+               size += phb->ioda.dma32_count *
+                       sizeof(phb->ioda.dma32_segmap[0]);
         }
         pemap_off = size;
-       size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
+       size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
         aux = memblock_virt_alloc(size, 0);
         phb->ioda.pe_alloc = aux;
+       phb->ioda.m64_segmap = aux + m64map_off;
         phb->ioda.m32_segmap = aux + m32map_off;
-       if (phb->type == PNV_PHB_IODA1)
+       for (segno = 0; segno < phb->ioda.total_pe_num; segno++) {
+               phb->ioda.m64_segmap[segno] = IODA_INVALID_PE;
+               phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
+       }
+       if (phb->type == PNV_PHB_IODA1) {
                 phb->ioda.io_segmap = aux + iomap_off;
+               for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
+                       phb->ioda.io_segmap[segno] = IODA_INVALID_PE;
+
+               phb->ioda.dma32_segmap = aux + dma32map_off;
+               for (segno = 0; segno < phb->ioda.dma32_count; segno++)
+                       phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE;
+       }
         phb->ioda.pe_array = aux + pemap_off;
-       set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
+       set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc);
  
-       INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
         INIT_LIST_HEAD(&phb->ioda.pe_list);
         mutex_init(&phb->ioda.pe_list_mutex);
  
         /* Calculate how many 32-bit TCE segments we have */
-       phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
+       phb->ioda.dma32_count = phb->ioda.m32_pci_base /
+                               PNV_IODA1_DMA32_SEGSIZE;
  
  #if 0 /* We should really do that ... */
         rc = opal_pci_set_phb_mem_window(opal->phb_id,
@@ -3331,7 +3479,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
  #endif
  
         pr_info("  %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
-               phb->ioda.total_pe, phb->ioda.reserved_pe,
+               phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx,
                 phb->ioda.m32_size, phb->ioda.m32_segsize);
         if (phb->ioda.m64_size)
                 pr_info("                 M64: 0x%lx [segment=0x%lx]\n",
@@ -3346,9 +3494,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
         phb->freeze_pe = pnv_ioda_freeze_pe;
         phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
  
-       /* Setup RID -> PE mapping function */
-       phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
-
         /* Setup TCEs */
         phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;