Merge branch 'devel/for-linus-3.19' into stable/for-linus-3.19
authorDavid Vrabel <david.vrabel@citrix.com>
Mon, 15 Dec 2014 16:41:00 +0000 (16:41 +0000)
committerDavid Vrabel <david.vrabel@citrix.com>
Mon, 15 Dec 2014 16:41:00 +0000 (16:41 +0000)
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/xen/page.h
arch/x86/mm/pageattr.c
arch/x86/xen/mmu.c
arch/x86/xen/p2m.c
arch/x86/xen/setup.c
arch/x86/xen/xen-ops.h

index 0778964..d83f5e7 100644 (file)
@@ -396,6 +396,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
 extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
                                    unsigned int *level);
+extern pmd_t *lookup_pmd_address(unsigned long address);
 extern phys_addr_t slow_virt_to_phys(void *__address);
 extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
                                   unsigned numpages, unsigned long page_flags);
index f58ef6c..5eea099 100644 (file)
@@ -41,10 +41,12 @@ typedef struct xpaddr {
 
 extern unsigned long *machine_to_phys_mapping;
 extern unsigned long  machine_to_phys_nr;
+extern unsigned long *xen_p2m_addr;
+extern unsigned long  xen_p2m_size;
+extern unsigned long  xen_max_p2m_pfn;
 
 extern unsigned long get_phys_to_machine(unsigned long pfn);
 extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern unsigned long set_phys_range_identity(unsigned long pfn_s,
                                             unsigned long pfn_e);
@@ -52,17 +54,52 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
                                   struct gnttab_map_grant_ref *kmap_ops,
                                   struct page **pages, unsigned int count);
-extern int m2p_add_override(unsigned long mfn, struct page *page,
-                           struct gnttab_map_grant_ref *kmap_op);
 extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
                                     struct gnttab_map_grant_ref *kmap_ops,
                                     struct page **pages, unsigned int count);
-extern int m2p_remove_override(struct page *page,
-                              struct gnttab_map_grant_ref *kmap_op,
-                              unsigned long mfn);
-extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
+/*
+ * Helper functions to write or read unsigned long values to/from
+ * memory, when the access may fault.
+ */
+static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
+{
+       return __put_user(val, (unsigned long __user *)addr);
+}
+
+static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
+{
+       return __get_user(*val, (unsigned long __user *)addr);
+}
+
+/*
+ * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
+ * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
+ *   bits (identity or foreign) are set.
+ * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set
+ *   identity or foreign indicator will be still set. __pfn_to_mfn() is
+ *   encapsulating get_phys_to_machine() which is called in special cases only.
+ * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special
+ *   cases needing an extended handling.
+ */
+static inline unsigned long __pfn_to_mfn(unsigned long pfn)
+{
+       unsigned long mfn;
+
+       if (pfn < xen_p2m_size)
+               mfn = xen_p2m_addr[pfn];
+       else if (unlikely(pfn < xen_max_p2m_pfn))
+               return get_phys_to_machine(pfn);
+       else
+               return IDENTITY_FRAME(pfn);
+
+       if (unlikely(mfn == INVALID_P2M_ENTRY))
+               return get_phys_to_machine(pfn);
+
+       return mfn;
+}
+
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
        unsigned long mfn;
@@ -70,7 +107,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return pfn;
 
-       mfn = get_phys_to_machine(pfn);
+       mfn = __pfn_to_mfn(pfn);
 
        if (mfn != INVALID_P2M_ENTRY)
                mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
@@ -83,7 +120,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return 1;
 
-       return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
+       return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
 }
 
 static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
@@ -102,7 +139,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
         * In such cases it doesn't matter what we return (we return garbage),
         * but we must handle the fault without crashing!
         */
-       ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+       ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn);
        if (ret < 0)
                return ~0;
 
@@ -117,7 +154,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
                return mfn;
 
        pfn = mfn_to_pfn_no_overrides(mfn);
-       if (get_phys_to_machine(pfn) != mfn) {
+       if (__pfn_to_mfn(pfn) != mfn) {
                /*
                 * If this appears to be a foreign mfn (because the pfn
                 * doesn't map back to the mfn), then check the local override
@@ -133,8 +170,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
         * entry doesn't map back to the mfn and m2p_override doesn't have a
         * valid entry for it.
         */
-       if (pfn == ~0 &&
-                       get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
+       if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
                pfn = mfn;
 
        return pfn;
@@ -180,7 +216,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
                return mfn;
 
        pfn = mfn_to_pfn(mfn);
-       if (get_phys_to_machine(pfn) != mfn)
+       if (__pfn_to_mfn(pfn) != mfn)
                return -1; /* force !pfn_valid() */
        return pfn;
 }
index 36de293..1298108 100644 (file)
@@ -383,6 +383,26 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
         return lookup_address(address, level);
 }
 
+/*
+ * Lookup the PMD entry for a virtual address. Return a pointer to the entry
+ * or NULL if not present.
+ */
+pmd_t *lookup_pmd_address(unsigned long address)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+
+       pgd = pgd_offset_k(address);
+       if (pgd_none(*pgd))
+               return NULL;
+
+       pud = pud_offset(pgd, address);
+       if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
+               return NULL;
+
+       return pmd_offset(pud, address);
+}
+
 /*
  * This is necessary because __pa() does not work on some
  * kinds of memory, like vmalloc() or the alloc_remap()
index a8a1a3d..a1a429a 100644 (file)
@@ -387,7 +387,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
                unsigned long mfn;
 
                if (!xen_feature(XENFEAT_auto_translated_physmap))
-                       mfn = get_phys_to_machine(pfn);
+                       mfn = __pfn_to_mfn(pfn);
                else
                        mfn = pfn;
                /*
@@ -1158,20 +1158,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
         * instead of somewhere later and be confusing. */
        xen_mc_flush();
 }
-static void __init xen_pagetable_p2m_copy(void)
+
+static void __init xen_pagetable_p2m_free(void)
 {
        unsigned long size;
        unsigned long addr;
-       unsigned long new_mfn_list;
-
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               return;
 
        size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
 
-       new_mfn_list = xen_revector_p2m_tree();
        /* No memory or already called. */
-       if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
+       if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
                return;
 
        /* using __ka address and sticking INVALID_P2M_ENTRY! */
@@ -1189,8 +1185,6 @@ static void __init xen_pagetable_p2m_copy(void)
 
        size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
        memblock_free(__pa(xen_start_info->mfn_list), size);
-       /* And revector! Bye bye old array */
-       xen_start_info->mfn_list = new_mfn_list;
 
        /* At this stage, cleanup_highmap has already cleaned __ka space
         * from _brk_limit way up to the max_pfn_mapped (which is the end of
@@ -1214,17 +1208,35 @@ static void __init xen_pagetable_p2m_copy(void)
 }
 #endif
 
-static void __init xen_pagetable_init(void)
+static void __init xen_pagetable_p2m_setup(void)
 {
-       paging_init();
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return;
+
+       xen_vmalloc_p2m_tree();
+
 #ifdef CONFIG_X86_64
-       xen_pagetable_p2m_copy();
+       xen_pagetable_p2m_free();
 #endif
+       /* And revector! Bye bye old array */
+       xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
+}
+
+static void __init xen_pagetable_init(void)
+{
+       paging_init();
+       xen_post_allocator_init();
+
+       xen_pagetable_p2m_setup();
+
        /* Allocate and initialize top and mid mfn levels for p2m structure */
        xen_build_mfn_list_list();
 
+       /* Remap memory freed due to conflicts with E820 map */
+       if (!xen_feature(XENFEAT_auto_translated_physmap))
+               xen_remap_memory();
+
        xen_setup_shared_info();
-       xen_post_allocator_init();
 }
 static void xen_write_cr2(unsigned long cr2)
 {
index b456b04..edbc7a6 100644 (file)
@@ -3,21 +3,22 @@
  * guests themselves, but it must also access and update the p2m array
  * during suspend/resume when all the pages are reallocated.
  *
- * The p2m table is logically a flat array, but we implement it as a
- * three-level tree to allow the address space to be sparse.
+ * The logical flat p2m table is mapped to a linear kernel memory area.
+ * For accesses by Xen a three-level tree linked via mfns only is set up to
+ * allow the address space to be sparse.
  *
- *                               Xen
- *                                |
- *     p2m_top              p2m_top_mfn
- *       /  \                   /   \
- * p2m_mid p2m_mid     p2m_mid_mfn p2m_mid_mfn
- *    / \      / \         /           /
- *  p2m p2m p2m p2m p2m p2m p2m ...
+ *               Xen
+ *                |
+ *          p2m_top_mfn
+ *              /   \
+ * p2m_mid_mfn p2m_mid_mfn
+ *         /           /
+ *  p2m p2m p2m ...
  *
  * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
  *
- * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
- * maximum representable pseudo-physical address space is:
+ * The p2m_top_mfn level is limited to 1 page, so the maximum representable
+ * pseudo-physical address space is:
  *  P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
  *
  * P2M_PER_PAGE depends on the architecture, as a mfn is always
@@ -30,6 +31,9 @@
  * leaf entries, or for the top  root, or middle one, for which there is a void
  * entry, we assume it is  "missing". So (for example)
  *  pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
+ * We have a dedicated page p2m_missing with all entries being
+ * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
+ * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
  *
  * We also have the possibility of setting 1-1 mappings on certain regions, so
  * that:
  * PCI BARs, or ACPI spaces), we can create mappings easily because we
  * get the PFN value to match the MFN.
  *
- * For this to work efficiently we have one new page p2m_identity and
- * allocate (via reserved_brk) any other pages we need to cover the sides
- * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
- * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
- * no other fancy value).
+ * For this to work efficiently we have one new page p2m_identity. All entries
+ * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
+ * recognizes that and MFNs, no other fancy value).
  *
  * On lookup we spot that the entry points to p2m_identity and return the
  * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
  * If the entry points to an allocated page, we just proceed as before and
- * return the PFN.  If the PFN has IDENTITY_FRAME_BIT set we unmask that in
+ * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
  * appropriate functions (pfn_to_mfn).
  *
  * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
  * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
  * non-identity pfn. To protect ourselves against we elect to set (and get) the
  * IDENTITY_FRAME_BIT on all identity mapped PFNs.
- *
- * This simplistic diagram is used to explain the more subtle piece of code.
- * There is also a digram of the P2M at the end that can help.
- * Imagine your E820 looking as so:
- *
- *                    1GB                                           2GB    4GB
- * /-------------------+---------\/----\         /----------\    /---+-----\
- * | System RAM        | Sys RAM ||ACPI|         | reserved |    | Sys RAM |
- * \-------------------+---------/\----/         \----------/    \---+-----/
- *                               ^- 1029MB                       ^- 2001MB
- *
- * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
- *  2048MB = 524288 (0x80000)]
- *
- * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
- * is actually not present (would have to kick the balloon driver to put it in).
- *
- * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
- * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
- * of the PFN and the end PFN (263424 and 512256 respectively). The first step
- * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
- * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
- * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
- * required to split any existing p2m_mid_missing middle pages.
- *
- * With the E820 example above, 263424 is not 1GB aligned so we allocate a
- * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
- * Each entry in the allocate page is "missing" (points to p2m_missing).
- *
- * Next stage is to determine if we need to do a more granular boundary check
- * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
- * We check if the start pfn and end pfn violate that boundary check, and if
- * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
- * granularity of setting which PFNs are missing and which ones are identity.
- * In our example 263424 and 512256 both fail the check so we reserve_brk two
- * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
- * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
- *
- * At this point we would at minimum reserve_brk one page, but could be up to
- * three. Each call to set_phys_range_identity has at maximum a three page
- * cost. If we were to query the P2M at this stage, all those entries from
- * start PFN through end PFN (so 1029MB -> 2001MB) would return
- * INVALID_P2M_ENTRY ("missing").
- *
- * The next step is to walk from the start pfn to the end pfn setting
- * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
- * If we find that the middle entry is pointing to p2m_missing we can swap it
- * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
- * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
- * At this point we do not need to worry about boundary aligment (so no need to
- * reserve_brk a middle page, figure out which PFNs are "missing" and which
- * ones are identity), as that has been done earlier.  If we find that the
- * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
- * that page (which covers 512 PFNs) and set the appropriate PFN with
- * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
- * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
- * IDENTITY_FRAME_BIT set.
- *
- * All other regions that are void (or not filled) either point to p2m_missing
- * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
- * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
- * contain the INVALID_P2M_ENTRY value and are considered "missing."
- *
- * Finally, the region beyond the end of of the E820 (4 GB in this example)
- * is set to be identity (in case there are MMIO regions placed here).
- *
- * This is what the p2m ends up looking (for the E820 above) with this
- * fabulous drawing:
- *
- *    p2m         /--------------\
- *  /-----\       | &mfn_list[0],|                           /-----------------\
- *  |  0  |------>| &mfn_list[1],|    /---------------\      | ~0, ~0, ..      |
- *  |-----|       |  ..., ~0, ~0 |    | ~0, ~0, [x]---+----->| IDENTITY [@256] |
- *  |  1  |---\   \--------------/    | [p2m_identity]+\     | IDENTITY [@257] |
- *  |-----|    \                      | [p2m_identity]+\\    | ....            |
- *  |  2  |--\  \-------------------->|  ...          | \\   \----------------/
- *  |-----|   \                       \---------------/  \\
- *  |  3  |-\  \                                          \\  p2m_identity [1]
- *  |-----|  \  \-------------------->/---------------\   /-----------------\
- *  | ..  |\  |                       | [p2m_identity]+-->| ~0, ~0, ~0, ... |
- *  \-----/ | |                       | [p2m_identity]+-->| ..., ~0         |
- *          | |                       | ....          |   \-----------------/
- *          | |                       +-[x], ~0, ~0.. +\
- *          | |                       \---------------/ \
- *          | |                                          \-> /---------------\
- *          | V  p2m_mid_missing       p2m_missing           | IDENTITY[@0]  |
- *          | /-----------------\     /------------\         | IDENTITY[@256]|
- *          | | [p2m_missing]   +---->| ~0, ~0, ...|         | ~0, ~0, ....  |
- *          | | [p2m_missing]   +---->| ..., ~0    |         \---------------/
- *          | | ...             |     \------------/
- *          | \-----------------/
- *          |
- *          |     p2m_mid_identity
- *          |   /-----------------\
- *          \-->| [p2m_identity]  +---->[1]
- *              | [p2m_identity]  +---->[1]
- *              | ...             |
- *              \-----------------/
- *
- * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
  */
 
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/bootmem.h>
+#include <linux/slab.h>
 
 #include <asm/cache.h>
 #include <asm/setup.h>
+#include <asm/uaccess.h>
 
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include "multicalls.h"
 #include "xen-ops.h"
 
+#define PMDS_PER_MID_PAGE      (P2M_MID_PER_PAGE / PTRS_PER_PTE)
+
 static void __init m2p_override_init(void);
 
+unsigned long *xen_p2m_addr __read_mostly;
+EXPORT_SYMBOL_GPL(xen_p2m_addr);
+unsigned long xen_p2m_size __read_mostly;
+EXPORT_SYMBOL_GPL(xen_p2m_size);
 unsigned long xen_max_p2m_pfn __read_mostly;
+EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
+
+static DEFINE_SPINLOCK(p2m_update_lock);
 
 static unsigned long *p2m_mid_missing_mfn;
 static unsigned long *p2m_top_mfn;
 static unsigned long **p2m_top_mfn_p;
-
-/* Placeholders for holes in the address space */
-static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
-
-static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
-
-static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
-
-RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-
-/* For each I/O range remapped we may lose up to two leaf pages for the boundary
- * violations and three mid pages to cover up to 3GB. With
- * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
- * remapped region.
- */
-RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
+static unsigned long *p2m_missing;
+static unsigned long *p2m_identity;
+static pte_t *p2m_missing_pte;
+static pte_t *p2m_identity_pte;
 
 static inline unsigned p2m_top_index(unsigned long pfn)
 {
@@ -220,14 +119,6 @@ static inline unsigned p2m_index(unsigned long pfn)
        return pfn % P2M_PER_PAGE;
 }
 
-static void p2m_top_init(unsigned long ***top)
-{
-       unsigned i;
-
-       for (i = 0; i < P2M_TOP_PER_PAGE; i++)
-               top[i] = p2m_mid_missing;
-}
-
 static void p2m_top_mfn_init(unsigned long *top)
 {
        unsigned i;
@@ -244,28 +135,43 @@ static void p2m_top_mfn_p_init(unsigned long **top)
                top[i] = p2m_mid_missing_mfn;
 }
 
-static void p2m_mid_init(unsigned long **mid, unsigned long *leaf)
+static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
 {
        unsigned i;
 
        for (i = 0; i < P2M_MID_PER_PAGE; i++)
-               mid[i] = leaf;
+               mid[i] = virt_to_mfn(leaf);
 }
 
-static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
+static void p2m_init(unsigned long *p2m)
 {
        unsigned i;
 
-       for (i = 0; i < P2M_MID_PER_PAGE; i++)
-               mid[i] = virt_to_mfn(leaf);
+       for (i = 0; i < P2M_PER_PAGE; i++)
+               p2m[i] = INVALID_P2M_ENTRY;
 }
 
-static void p2m_init(unsigned long *p2m)
+static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
 {
        unsigned i;
 
-       for (i = 0; i < P2M_MID_PER_PAGE; i++)
-               p2m[i] = INVALID_P2M_ENTRY;
+       for (i = 0; i < P2M_PER_PAGE; i++)
+               p2m[i] = IDENTITY_FRAME(pfn + i);
+}
+
+static void * __ref alloc_p2m_page(void)
+{
+       if (unlikely(!slab_is_available()))
+               return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+
+       return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
+}
+
+/* Only to be called in case of a race for a page just allocated! */
+static void free_p2m_page(void *p)
+{
+       BUG_ON(!slab_is_available());
+       free_page((unsigned long)p);
 }
 
 /*
@@ -280,40 +186,46 @@ static void p2m_init(unsigned long *p2m)
  */
 void __ref xen_build_mfn_list_list(void)
 {
-       unsigned long pfn;
+       unsigned long pfn, mfn;
+       pte_t *ptep;
+       unsigned int level, topidx, mididx;
+       unsigned long *mid_mfn_p;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return;
 
        /* Pre-initialize p2m_top_mfn to be completely missing */
        if (p2m_top_mfn == NULL) {
-               p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+               p2m_mid_missing_mfn = alloc_p2m_page();
                p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
 
-               p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+               p2m_top_mfn_p = alloc_p2m_page();
                p2m_top_mfn_p_init(p2m_top_mfn_p);
 
-               p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+               p2m_top_mfn = alloc_p2m_page();
                p2m_top_mfn_init(p2m_top_mfn);
        } else {
                /* Reinitialise, mfn's all change after migration */
                p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
        }
 
-       for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
-               unsigned topidx = p2m_top_index(pfn);
-               unsigned mididx = p2m_mid_index(pfn);
-               unsigned long **mid;
-               unsigned long *mid_mfn_p;
+       for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN;
+            pfn += P2M_PER_PAGE) {
+               topidx = p2m_top_index(pfn);
+               mididx = p2m_mid_index(pfn);
 
-               mid = p2m_top[topidx];
                mid_mfn_p = p2m_top_mfn_p[topidx];
+               ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn),
+                                     &level);
+               BUG_ON(!ptep || level != PG_LEVEL_4K);
+               mfn = pte_mfn(*ptep);
+               ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
 
                /* Don't bother allocating any mfn mid levels if
                 * they're just missing, just update the stored mfn,
                 * since all could have changed over a migrate.
                 */
-               if (mid == p2m_mid_missing) {
+               if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
                        BUG_ON(mididx);
                        BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
                        p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
@@ -322,19 +234,14 @@ void __ref xen_build_mfn_list_list(void)
                }
 
                if (mid_mfn_p == p2m_mid_missing_mfn) {
-                       /*
-                        * XXX boot-time only!  We should never find
-                        * missing parts of the mfn tree after
-                        * runtime.
-                        */
-                       mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
+                       mid_mfn_p = alloc_p2m_page();
                        p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
 
                        p2m_top_mfn_p[topidx] = mid_mfn_p;
                }
 
                p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
-               mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
+               mid_mfn_p[mididx] = mfn;
        }
 }
 
@@ -353,171 +260,235 @@ void xen_setup_mfn_list_list(void)
 /* Set up p2m_top to point to the domain-builder provided p2m pages */
 void __init xen_build_dynamic_phys_to_machine(void)
 {
-       unsigned long *mfn_list;
-       unsigned long max_pfn;
        unsigned long pfn;
 
         if (xen_feature(XENFEAT_auto_translated_physmap))
                return;
 
-       mfn_list = (unsigned long *)xen_start_info->mfn_list;
-       max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
-       xen_max_p2m_pfn = max_pfn;
+       xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
+       xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
 
-       p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
-       p2m_init(p2m_missing);
-       p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
-       p2m_init(p2m_identity);
+       for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++)
+               xen_p2m_addr[pfn] = INVALID_P2M_ENTRY;
 
-       p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
-       p2m_mid_init(p2m_mid_missing, p2m_missing);
-       p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
-       p2m_mid_init(p2m_mid_identity, p2m_identity);
+       xen_max_p2m_pfn = xen_p2m_size;
+}
 
-       p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
-       p2m_top_init(p2m_top);
+#define P2M_TYPE_IDENTITY      0
+#define P2M_TYPE_MISSING       1
+#define P2M_TYPE_PFN           2
+#define P2M_TYPE_UNKNOWN       3
 
-       /*
-        * The domain builder gives us a pre-constructed p2m array in
-        * mfn_list for all the pages initially given to us, so we just
-        * need to graft that into our tree structure.
-        */
-       for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
-               unsigned topidx = p2m_top_index(pfn);
-               unsigned mididx = p2m_mid_index(pfn);
+static int xen_p2m_elem_type(unsigned long pfn)
+{
+       unsigned long mfn;
 
-               if (p2m_top[topidx] == p2m_mid_missing) {
-                       unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
-                       p2m_mid_init(mid, p2m_missing);
+       if (pfn >= xen_p2m_size)
+               return P2M_TYPE_IDENTITY;
 
-                       p2m_top[topidx] = mid;
-               }
+       mfn = xen_p2m_addr[pfn];
 
-               /*
-                * As long as the mfn_list has enough entries to completely
-                * fill a p2m page, pointing into the array is ok. But if
-                * not the entries beyond the last pfn will be undefined.
-                */
-               if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
-                       unsigned long p2midx;
+       if (mfn == INVALID_P2M_ENTRY)
+               return P2M_TYPE_MISSING;
 
-                       p2midx = max_pfn % P2M_PER_PAGE;
-                       for ( ; p2midx < P2M_PER_PAGE; p2midx++)
-                               mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY;
-               }
-               p2m_top[topidx][mididx] = &mfn_list[pfn];
-       }
+       if (mfn & IDENTITY_FRAME_BIT)
+               return P2M_TYPE_IDENTITY;
 
-       m2p_override_init();
+       return P2M_TYPE_PFN;
 }
-#ifdef CONFIG_X86_64
-unsigned long __init xen_revector_p2m_tree(void)
+
+static void __init xen_rebuild_p2m_list(unsigned long *p2m)
 {
-       unsigned long va_start;
-       unsigned long va_end;
+       unsigned int i, chunk;
        unsigned long pfn;
-       unsigned long pfn_free = 0;
-       unsigned long *mfn_list = NULL;
-       unsigned long size;
-
-       va_start = xen_start_info->mfn_list;
-       /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
-        * so make sure it is rounded up to that */
-       size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
-       va_end = va_start + size;
-
-       /* If we were revectored already, don't do it again. */
-       if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
-               return 0;
+       unsigned long *mfns;
+       pte_t *ptep;
+       pmd_t *pmdp;
+       int type;
 
-       mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
-       if (!mfn_list) {
-               pr_warn("Could not allocate space for a new P2M tree!\n");
-               return xen_start_info->mfn_list;
-       }
-       /* Fill it out with INVALID_P2M_ENTRY value */
-       memset(mfn_list, 0xFF, size);
+       p2m_missing = alloc_p2m_page();
+       p2m_init(p2m_missing);
+       p2m_identity = alloc_p2m_page();
+       p2m_init(p2m_identity);
 
-       for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
-               unsigned topidx = p2m_top_index(pfn);
-               unsigned mididx;
-               unsigned long *mid_p;
+       p2m_missing_pte = alloc_p2m_page();
+       paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
+       p2m_identity_pte = alloc_p2m_page();
+       paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT);
+       for (i = 0; i < PTRS_PER_PTE; i++) {
+               set_pte(p2m_missing_pte + i,
+                       pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO));
+               set_pte(p2m_identity_pte + i,
+                       pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO));
+       }
 
-               if (!p2m_top[topidx])
+       for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) {
+               /*
+                * Try to map missing/identity PMDs or p2m-pages if possible.
+                * We have to respect the structure of the mfn_list_list
+                * which will be built just afterwards.
+                * Chunk size to test is one p2m page if we are in the middle
+                * of a mfn_list_list mid page and the complete mid page area
+                * if we are at index 0 of the mid page. Please note that a
+                * mid page might cover more than one PMD, e.g. on 32 bit PAE
+                * kernels.
+                */
+               chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
+                       P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;
+
+               type = xen_p2m_elem_type(pfn);
+               i = 0;
+               if (type != P2M_TYPE_PFN)
+                       for (i = 1; i < chunk; i++)
+                               if (xen_p2m_elem_type(pfn + i) != type)
+                                       break;
+               if (i < chunk)
+                       /* Reset to minimal chunk size. */
+                       chunk = P2M_PER_PAGE;
+
+               if (type == P2M_TYPE_PFN || i < chunk) {
+                       /* Use initial p2m page contents. */
+#ifdef CONFIG_X86_64
+                       mfns = alloc_p2m_page();
+                       copy_page(mfns, xen_p2m_addr + pfn);
+#else
+                       mfns = xen_p2m_addr + pfn;
+#endif
+                       ptep = populate_extra_pte((unsigned long)(p2m + pfn));
+                       set_pte(ptep,
+                               pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
                        continue;
+               }
 
-               if (p2m_top[topidx] == p2m_mid_missing)
+               if (chunk == P2M_PER_PAGE) {
+                       /* Map complete missing or identity p2m-page. */
+                       mfns = (type == P2M_TYPE_MISSING) ?
+                               p2m_missing : p2m_identity;
+                       ptep = populate_extra_pte((unsigned long)(p2m + pfn));
+                       set_pte(ptep,
+                               pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO));
                        continue;
+               }
 
-               mididx = p2m_mid_index(pfn);
-               mid_p = p2m_top[topidx][mididx];
-               if (!mid_p)
-                       continue;
-               if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
-                       continue;
+               /* Complete missing or identity PMD(s) can be mapped. */
+               ptep = (type == P2M_TYPE_MISSING) ?
+                       p2m_missing_pte : p2m_identity_pte;
+               for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
+                       pmdp = populate_extra_pmd(
+                               (unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
+                       set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
+               }
+       }
+}
 
-               if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
-                       continue;
+void __init xen_vmalloc_p2m_tree(void)
+{
+       static struct vm_struct vm;
 
-               /* The old va. Rebase it on mfn_list */
-               if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
-                       unsigned long *new;
+       vm.flags = VM_ALLOC;
+       vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
+                       PMD_SIZE * PMDS_PER_MID_PAGE);
+       vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
+       pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
 
-                       if (pfn_free  > (size / sizeof(unsigned long))) {
-                               WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
-                                    size / sizeof(unsigned long), pfn_free);
-                               return 0;
-                       }
-                       new = &mfn_list[pfn_free];
+       xen_max_p2m_pfn = vm.size / sizeof(unsigned long);
 
-                       copy_page(new, mid_p);
-                       p2m_top[topidx][mididx] = &mfn_list[pfn_free];
+       xen_rebuild_p2m_list(vm.addr);
 
-                       pfn_free += P2M_PER_PAGE;
+       xen_p2m_addr = vm.addr;
+       xen_p2m_size = xen_max_p2m_pfn;
 
-               }
-               /* This should be the leafs allocated for identity from _brk. */
-       }
-       return (unsigned long)mfn_list;
+       xen_inv_extra_mem();
 
+       m2p_override_init();
 }
-#else
-unsigned long __init xen_revector_p2m_tree(void)
-{
-       return 0;
-}
-#endif
+
 unsigned long get_phys_to_machine(unsigned long pfn)
 {
-       unsigned topidx, mididx, idx;
+       pte_t *ptep;
+       unsigned int level;
+
+       if (unlikely(pfn >= xen_p2m_size)) {
+               if (pfn < xen_max_p2m_pfn)
+                       return xen_chk_extra_mem(pfn);
 
-       if (unlikely(pfn >= MAX_P2M_PFN))
                return IDENTITY_FRAME(pfn);
+       }
 
-       topidx = p2m_top_index(pfn);
-       mididx = p2m_mid_index(pfn);
-       idx = p2m_index(pfn);
+       ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
+       BUG_ON(!ptep || level != PG_LEVEL_4K);
 
        /*
         * The INVALID_P2M_ENTRY is filled in both p2m_*identity
         * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
         * would be wrong.
         */
-       if (p2m_top[topidx][mididx] == p2m_identity)
+       if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
                return IDENTITY_FRAME(pfn);
 
-       return p2m_top[topidx][mididx][idx];
+       return xen_p2m_addr[pfn];
 }
 EXPORT_SYMBOL_GPL(get_phys_to_machine);
 
-static void *alloc_p2m_page(void)
+/*
+ * Allocate new pmd(s). It is checked whether the old pmd is still in place.
+ * If not, nothing is changed. This is okay as the only reason for allocating
+ * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
+ * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
+ */
+static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
 {
-       return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
-}
+       pte_t *ptechk;
+       pte_t *pteret = ptep;
+       pte_t *pte_newpg[PMDS_PER_MID_PAGE];
+       pmd_t *pmdp;
+       unsigned int level;
+       unsigned long flags;
+       unsigned long vaddr;
+       int i;
 
-static void free_p2m_page(void *p)
-{
-       free_page((unsigned long)p);
+       /* Do all allocations first to bail out in error case. */
+       for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
+               pte_newpg[i] = alloc_p2m_page();
+               if (!pte_newpg[i]) {
+                       for (i--; i >= 0; i--)
+                               free_p2m_page(pte_newpg[i]);
+
+                       return NULL;
+               }
+       }
+
+       vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1);
+
+       for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
+               copy_page(pte_newpg[i], pte_pg);
+               paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);
+
+               pmdp = lookup_pmd_address(vaddr);
+               BUG_ON(!pmdp);
+
+               spin_lock_irqsave(&p2m_update_lock, flags);
+
+               ptechk = lookup_address(vaddr, &level);
+               if (ptechk == pte_pg) {
+                       set_pmd(pmdp,
+                               __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
+                       if (vaddr == (addr & ~(PMD_SIZE - 1)))
+                               pteret = pte_offset_kernel(pmdp, addr);
+                       pte_newpg[i] = NULL;
+               }
+
+               spin_unlock_irqrestore(&p2m_update_lock, flags);
+
+               if (pte_newpg[i]) {
+                       paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
+                       free_p2m_page(pte_newpg[i]);
+               }
+
+               vaddr += PMD_SIZE;
+       }
+
+       return pteret;
 }
 
 /*
@@ -530,58 +501,62 @@ static void free_p2m_page(void *p)
 static bool alloc_p2m(unsigned long pfn)
 {
        unsigned topidx, mididx;
-       unsigned long ***top_p, **mid;
        unsigned long *top_mfn_p, *mid_mfn;
-       unsigned long *p2m_orig;
+       pte_t *ptep, *pte_pg;
+       unsigned int level;
+       unsigned long flags;
+       unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
+       unsigned long p2m_pfn;
 
        topidx = p2m_top_index(pfn);
        mididx = p2m_mid_index(pfn);
 
-       top_p = &p2m_top[topidx];
-       mid = ACCESS_ONCE(*top_p);
+       ptep = lookup_address(addr, &level);
+       BUG_ON(!ptep || level != PG_LEVEL_4K);
+       pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
 
-       if (mid == p2m_mid_missing) {
-               /* Mid level is missing, allocate a new one */
-               mid = alloc_p2m_page();
-               if (!mid)
+       if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
+               /* PMD level is missing, allocate a new one */
+               ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
+               if (!ptep)
                        return false;
-
-               p2m_mid_init(mid, p2m_missing);
-
-               if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
-                       free_p2m_page(mid);
        }
 
-       top_mfn_p = &p2m_top_mfn[topidx];
-       mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
+       if (p2m_top_mfn) {
+               top_mfn_p = &p2m_top_mfn[topidx];
+               mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
 
-       BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
+               BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
 
-       if (mid_mfn == p2m_mid_missing_mfn) {
-               /* Separately check the mid mfn level */
-               unsigned long missing_mfn;
-               unsigned long mid_mfn_mfn;
-               unsigned long old_mfn;
+               if (mid_mfn == p2m_mid_missing_mfn) {
+                       /* Separately check the mid mfn level */
+                       unsigned long missing_mfn;
+                       unsigned long mid_mfn_mfn;
+                       unsigned long old_mfn;
 
-               mid_mfn = alloc_p2m_page();
-               if (!mid_mfn)
-                       return false;
+                       mid_mfn = alloc_p2m_page();
+                       if (!mid_mfn)
+                               return false;
 
-               p2m_mid_mfn_init(mid_mfn, p2m_missing);
+                       p2m_mid_mfn_init(mid_mfn, p2m_missing);
 
-               missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
-               mid_mfn_mfn = virt_to_mfn(mid_mfn);
-               old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
-               if (old_mfn != missing_mfn) {
-                       free_p2m_page(mid_mfn);
-                       mid_mfn = mfn_to_virt(old_mfn);
-               } else {
-                       p2m_top_mfn_p[topidx] = mid_mfn;
+                       missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
+                       mid_mfn_mfn = virt_to_mfn(mid_mfn);
+                       old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
+                       if (old_mfn != missing_mfn) {
+                               free_p2m_page(mid_mfn);
+                               mid_mfn = mfn_to_virt(old_mfn);
+                       } else {
+                               p2m_top_mfn_p[topidx] = mid_mfn;
+                       }
                }
+       } else {
+               mid_mfn = NULL;
        }
 
-       p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]);
-       if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) {
+       p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep));
+       if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
+           p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
                /* p2m leaf page is missing */
                unsigned long *p2m;
 
@@ -589,183 +564,36 @@ static bool alloc_p2m(unsigned long pfn)
                if (!p2m)
                        return false;
 
-               p2m_init(p2m);
-
-               if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
-                       free_p2m_page(p2m);
+               if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
+                       p2m_init(p2m);
                else
-                       mid_mfn[mididx] = virt_to_mfn(p2m);
-       }
-
-       return true;
-}
-
-static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
-{
-       unsigned topidx, mididx, idx;
-       unsigned long *p2m;
-
-       topidx = p2m_top_index(pfn);
-       mididx = p2m_mid_index(pfn);
-       idx = p2m_index(pfn);
-
-       /* Pfff.. No boundary cross-over, lets get out. */
-       if (!idx && check_boundary)
-               return false;
-
-       WARN(p2m_top[topidx][mididx] == p2m_identity,
-               "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
-               topidx, mididx);
-
-       /*
-        * Could be done by xen_build_dynamic_phys_to_machine..
-        */
-       if (p2m_top[topidx][mididx] != p2m_missing)
-               return false;
-
-       /* Boundary cross-over for the edges: */
-       p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
-
-       p2m_init(p2m);
+                       p2m_init_identity(p2m, pfn);
 
-       p2m_top[topidx][mididx] = p2m;
+               spin_lock_irqsave(&p2m_update_lock, flags);
 
-       return true;
-}
-
-static bool __init early_alloc_p2m_middle(unsigned long pfn)
-{
-       unsigned topidx = p2m_top_index(pfn);
-       unsigned long **mid;
-
-       mid = p2m_top[topidx];
-       if (mid == p2m_mid_missing) {
-               mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
-
-               p2m_mid_init(mid, p2m_missing);
-
-               p2m_top[topidx] = mid;
-       }
-       return true;
-}
-
-/*
- * Skim over the P2M tree looking at pages that are either filled with
- * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and
- * replace the P2M leaf with a p2m_missing or p2m_identity.
- * Stick the old page in the new P2M tree location.
- */
-static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
-{
-       unsigned topidx;
-       unsigned mididx;
-       unsigned ident_pfns;
-       unsigned inv_pfns;
-       unsigned long *p2m;
-       unsigned idx;
-       unsigned long pfn;
-
-       /* We only look when this entails a P2M middle layer */
-       if (p2m_index(set_pfn))
-               return false;
-
-       for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
-               topidx = p2m_top_index(pfn);
-
-               if (!p2m_top[topidx])
-                       continue;
-
-               if (p2m_top[topidx] == p2m_mid_missing)
-                       continue;
-
-               mididx = p2m_mid_index(pfn);
-               p2m = p2m_top[topidx][mididx];
-               if (!p2m)
-                       continue;
-
-               if ((p2m == p2m_missing) || (p2m == p2m_identity))
-                       continue;
-
-               if ((unsigned long)p2m == INVALID_P2M_ENTRY)
-                       continue;
-
-               ident_pfns = 0;
-               inv_pfns = 0;
-               for (idx = 0; idx < P2M_PER_PAGE; idx++) {
-                       /* IDENTITY_PFNs are 1:1 */
-                       if (p2m[idx] == IDENTITY_FRAME(pfn + idx))
-                               ident_pfns++;
-                       else if (p2m[idx] == INVALID_P2M_ENTRY)
-                               inv_pfns++;
-                       else
-                               break;
+               if (pte_pfn(*ptep) == p2m_pfn) {
+                       set_pte(ptep,
+                               pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
+                       if (mid_mfn)
+                               mid_mfn[mididx] = virt_to_mfn(p2m);
+                       p2m = NULL;
                }
-               if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE))
-                       goto found;
-       }
-       return false;
-found:
-       /* Found one, replace old with p2m_identity or p2m_missing */
-       p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
-
-       /* Reset where we want to stick the old page in. */
-       topidx = p2m_top_index(set_pfn);
-       mididx = p2m_mid_index(set_pfn);
-
-       /* This shouldn't happen */
-       if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
-               early_alloc_p2m_middle(set_pfn);
-
-       if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
-               return false;
-
-       p2m_init(p2m);
-       p2m_top[topidx][mididx] = p2m;
 
-       return true;
-}
-bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-       if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
-               if (!early_alloc_p2m_middle(pfn))
-                       return false;
-
-               if (early_can_reuse_p2m_middle(pfn))
-                       return __set_phys_to_machine(pfn, mfn);
-
-               if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
-                       return false;
+               spin_unlock_irqrestore(&p2m_update_lock, flags);
 
-               if (!__set_phys_to_machine(pfn, mfn))
-                       return false;
+               if (p2m)
+                       free_p2m_page(p2m);
        }
 
        return true;
 }
 
-static void __init early_split_p2m(unsigned long pfn)
-{
-       unsigned long mididx, idx;
-
-       mididx = p2m_mid_index(pfn);
-       idx = p2m_index(pfn);
-
-       /*
-        * Allocate new middle and leaf pages if this pfn lies in the
-        * middle of one.
-        */
-       if (mididx || idx)
-               early_alloc_p2m_middle(pfn);
-       if (idx)
-               early_alloc_p2m(pfn, false);
-}
-
 unsigned long __init set_phys_range_identity(unsigned long pfn_s,
                                      unsigned long pfn_e)
 {
        unsigned long pfn;
 
-       if (unlikely(pfn_s >= MAX_P2M_PFN))
+       if (unlikely(pfn_s >= xen_p2m_size))
                return 0;
 
        if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
@@ -774,101 +602,51 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
        if (pfn_s > pfn_e)
                return 0;
 
-       if (pfn_e > MAX_P2M_PFN)
-               pfn_e = MAX_P2M_PFN;
-
-       early_split_p2m(pfn_s);
-       early_split_p2m(pfn_e);
-
-       for (pfn = pfn_s; pfn < pfn_e;) {
-               unsigned topidx = p2m_top_index(pfn);
-               unsigned mididx = p2m_mid_index(pfn);
-
-               if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
-                       break;
-               pfn++;
-
-               /*
-                * If the PFN was set to a middle or leaf identity
-                * page the remainder must also be identity, so skip
-                * ahead to the next middle or leaf entry.
-                */
-               if (p2m_top[topidx] == p2m_mid_identity)
-                       pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE);
-               else if (p2m_top[topidx][mididx] == p2m_identity)
-                       pfn = ALIGN(pfn, P2M_PER_PAGE);
-       }
+       if (pfn_e > xen_p2m_size)
+               pfn_e = xen_p2m_size;
 
-       WARN((pfn - pfn_s) != (pfn_e - pfn_s),
-               "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
-               (pfn_e - pfn_s) - (pfn - pfn_s));
+       for (pfn = pfn_s; pfn < pfn_e; pfn++)
+               xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn);
 
        return pfn - pfn_s;
 }
 
-/* Try to install p2m mapping; fail if intermediate bits missing */
 bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 {
-       unsigned topidx, mididx, idx;
+       pte_t *ptep;
+       unsigned int level;
 
        /* don't track P2M changes in autotranslate guests */
        if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
                return true;
 
-       if (unlikely(pfn >= MAX_P2M_PFN)) {
+       if (unlikely(pfn >= xen_p2m_size)) {
                BUG_ON(mfn != INVALID_P2M_ENTRY);
                return true;
        }
 
-       topidx = p2m_top_index(pfn);
-       mididx = p2m_mid_index(pfn);
-       idx = p2m_index(pfn);
-
-       /* For sparse holes were the p2m leaf has real PFN along with
-        * PCI holes, stick in the PFN as the MFN value.
-        *
-        * set_phys_range_identity() will have allocated new middle
-        * and leaf pages as required so an existing p2m_mid_missing
-        * or p2m_missing mean that whole range will be identity so
-        * these can be switched to p2m_mid_identity or p2m_identity.
-        */
-       if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
-               if (p2m_top[topidx] == p2m_mid_identity)
-                       return true;
-
-               if (p2m_top[topidx] == p2m_mid_missing) {
-                       WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
-                                       p2m_mid_identity) != p2m_mid_missing);
-                       return true;
-               }
-
-               if (p2m_top[topidx][mididx] == p2m_identity)
-                       return true;
+       if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
+               return true;
 
-               /* Swap over from MISSING to IDENTITY if needed. */
-               if (p2m_top[topidx][mididx] == p2m_missing) {
-                       WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
-                               p2m_identity) != p2m_missing);
-                       return true;
-               }
-       }
+       ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
+       BUG_ON(!ptep || level != PG_LEVEL_4K);
 
-       if (p2m_top[topidx][mididx] == p2m_missing)
+       if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing)))
                return mfn == INVALID_P2M_ENTRY;
 
-       p2m_top[topidx][mididx][idx] = mfn;
+       if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
+               return mfn == IDENTITY_FRAME(pfn);
 
-       return true;
+       return false;
 }
 
 bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 {
-       if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
+       if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
                if (!alloc_p2m(pfn))
                        return false;
 
-               if (!__set_phys_to_machine(pfn, mfn))
-                       return false;
+               return __set_phys_to_machine(pfn, mfn);
        }
 
        return true;
@@ -877,15 +655,16 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 #define M2P_OVERRIDE_HASH_SHIFT        10
 #define M2P_OVERRIDE_HASH      (1 << M2P_OVERRIDE_HASH_SHIFT)
 
-static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH);
+static struct list_head *m2p_overrides;
 static DEFINE_SPINLOCK(m2p_override_lock);
 
 static void __init m2p_override_init(void)
 {
        unsigned i;
 
-       m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
-                                  sizeof(unsigned long));
+       m2p_overrides = alloc_bootmem_align(
+                               sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
+                               sizeof(unsigned long));
 
        for (i = 0; i < M2P_OVERRIDE_HASH; i++)
                INIT_LIST_HEAD(&m2p_overrides[i]);
@@ -896,68 +675,9 @@ static unsigned long mfn_hash(unsigned long mfn)
        return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
 }
 
-int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
-                           struct gnttab_map_grant_ref *kmap_ops,
-                           struct page **pages, unsigned int count)
-{
-       int i, ret = 0;
-       bool lazy = false;
-       pte_t *pte;
-
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               return 0;
-
-       if (kmap_ops &&
-           !in_interrupt() &&
-           paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
-               arch_enter_lazy_mmu_mode();
-               lazy = true;
-       }
-
-       for (i = 0; i < count; i++) {
-               unsigned long mfn, pfn;
-
-               /* Do not add to override if the map failed. */
-               if (map_ops[i].status)
-                       continue;
-
-               if (map_ops[i].flags & GNTMAP_contains_pte) {
-                       pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
-                               (map_ops[i].host_addr & ~PAGE_MASK));
-                       mfn = pte_mfn(*pte);
-               } else {
-                       mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
-               }
-               pfn = page_to_pfn(pages[i]);
-
-               WARN_ON(PagePrivate(pages[i]));
-               SetPagePrivate(pages[i]);
-               set_page_private(pages[i], mfn);
-               pages[i]->index = pfn_to_mfn(pfn);
-
-               if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
-
-               if (kmap_ops) {
-                       ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
-                       if (ret)
-                               goto out;
-               }
-       }
-
-out:
-       if (lazy)
-               arch_leave_lazy_mmu_mode();
-
-       return ret;
-}
-EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
-
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page,
-               struct gnttab_map_grant_ref *kmap_op)
+static int m2p_add_override(unsigned long mfn, struct page *page,
+                           struct gnttab_map_grant_ref *kmap_op)
 {
        unsigned long flags;
        unsigned long pfn;
@@ -970,7 +690,7 @@ int m2p_add_override(unsigned long mfn, struct page *page,
                address = (unsigned long)__va(pfn << PAGE_SHIFT);
                ptep = lookup_address(address, &level);
                if (WARN(ptep == NULL || level != PG_LEVEL_4K,
-                                       "m2p_add_override: pfn %lx not mapped", pfn))
+                        "m2p_add_override: pfn %lx not mapped", pfn))
                        return -EINVAL;
        }
 
@@ -1004,19 +724,19 @@ int m2p_add_override(unsigned long mfn, struct page *page,
         * because mfn_to_pfn (that ends up being called by GUPF) will
         * return the backend pfn rather than the frontend pfn. */
        pfn = mfn_to_pfn_no_overrides(mfn);
-       if (get_phys_to_machine(pfn) == mfn)
+       if (__pfn_to_mfn(pfn) == mfn)
                set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(m2p_add_override);
 
-int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
-                             struct gnttab_map_grant_ref *kmap_ops,
-                             struct page **pages, unsigned int count)
+int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
+                           struct gnttab_map_grant_ref *kmap_ops,
+                           struct page **pages, unsigned int count)
 {
        int i, ret = 0;
        bool lazy = false;
+       pte_t *pte;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return 0;
@@ -1029,35 +749,75 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
        }
 
        for (i = 0; i < count; i++) {
-               unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i]));
-               unsigned long pfn = page_to_pfn(pages[i]);
+               unsigned long mfn, pfn;
 
-               if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
-                       ret = -EINVAL;
-                       goto out;
+               /* Do not add to override if the map failed. */
+               if (map_ops[i].status)
+                       continue;
+
+               if (map_ops[i].flags & GNTMAP_contains_pte) {
+                       pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
+                               (map_ops[i].host_addr & ~PAGE_MASK));
+                       mfn = pte_mfn(*pte);
+               } else {
+                       mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
                }
+               pfn = page_to_pfn(pages[i]);
 
-               set_page_private(pages[i], INVALID_P2M_ENTRY);
-               WARN_ON(!PagePrivate(pages[i]));
-               ClearPagePrivate(pages[i]);
-               set_phys_to_machine(pfn, pages[i]->index);
+               WARN_ON(PagePrivate(pages[i]));
+               SetPagePrivate(pages[i]);
+               set_page_private(pages[i], mfn);
+               pages[i]->index = pfn_to_mfn(pfn);
 
-               if (kmap_ops)
-                       ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
-               if (ret)
+               if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
+                       ret = -ENOMEM;
                        goto out;
+               }
+
+               if (kmap_ops) {
+                       ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
+                       if (ret)
+                               goto out;
+               }
        }
 
 out:
        if (lazy)
                arch_leave_lazy_mmu_mode();
+
        return ret;
 }
-EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
+EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
 
-int m2p_remove_override(struct page *page,
-                       struct gnttab_map_grant_ref *kmap_op,
-                       unsigned long mfn)
+static struct page *m2p_find_override(unsigned long mfn)
+{
+       unsigned long flags;
+       struct list_head *bucket;
+       struct page *p, *ret;
+
+       if (unlikely(!m2p_overrides))
+               return NULL;
+
+       ret = NULL;
+       bucket = &m2p_overrides[mfn_hash(mfn)];
+
+       spin_lock_irqsave(&m2p_override_lock, flags);
+
+       list_for_each_entry(p, bucket, lru) {
+               if (page_private(p) == mfn) {
+                       ret = p;
+                       break;
+               }
+       }
+
+       spin_unlock_irqrestore(&m2p_override_lock, flags);
+
+       return ret;
+}
+
+static int m2p_remove_override(struct page *page,
+                              struct gnttab_map_grant_ref *kmap_op,
+                              unsigned long mfn)
 {
        unsigned long flags;
        unsigned long pfn;
@@ -1072,7 +832,7 @@ int m2p_remove_override(struct page *page,
                ptep = lookup_address(address, &level);
 
                if (WARN(ptep == NULL || level != PG_LEVEL_4K,
-                                       "m2p_remove_override: pfn %lx not mapped", pfn))
+                        "m2p_remove_override: pfn %lx not mapped", pfn))
                        return -EINVAL;
        }
 
@@ -1102,9 +862,8 @@ int m2p_remove_override(struct page *page,
                         * hypercall actually returned an error.
                         */
                        if (kmap_op->handle == GNTST_general_error) {
-                               printk(KERN_WARNING "m2p_remove_override: "
-                                               "pfn %lx mfn %lx, failed to modify kernel mappings",
-                                               pfn, mfn);
+                               pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings",
+                                       pfn, mfn);
                                put_balloon_scratch_page();
                                return -1;
                        }
@@ -1112,14 +871,14 @@ int m2p_remove_override(struct page *page,
                        xen_mc_batch();
 
                        mcs = __xen_mc_entry(
-                                       sizeof(struct gnttab_unmap_and_replace));
+                               sizeof(struct gnttab_unmap_and_replace));
                        unmap_op = mcs.args;
                        unmap_op->host_addr = kmap_op->host_addr;
                        unmap_op->new_addr = scratch_page_address;
                        unmap_op->handle = kmap_op->handle;
 
                        MULTI_grant_table_op(mcs.mc,
-                                       GNTTABOP_unmap_and_replace, unmap_op, 1);
+                               GNTTABOP_unmap_and_replace, unmap_op, 1);
 
                        mcs = __xen_mc_entry(0);
                        MULTI_update_va_mapping(mcs.mc, scratch_page_address,
@@ -1145,35 +904,56 @@ int m2p_remove_override(struct page *page,
         * pfn again. */
        mfn &= ~FOREIGN_FRAME_BIT;
        pfn = mfn_to_pfn_no_overrides(mfn);
-       if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+       if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) &&
                        m2p_find_override(mfn) == NULL)
                set_phys_to_machine(pfn, mfn);
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(m2p_remove_override);
 
-struct page *m2p_find_override(unsigned long mfn)
+int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
+                             struct gnttab_map_grant_ref *kmap_ops,
+                             struct page **pages, unsigned int count)
 {
-       unsigned long flags;
-       struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)];
-       struct page *p, *ret;
+       int i, ret = 0;
+       bool lazy = false;
 
-       ret = NULL;
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return 0;
 
-       spin_lock_irqsave(&m2p_override_lock, flags);
+       if (kmap_ops &&
+           !in_interrupt() &&
+           paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+               arch_enter_lazy_mmu_mode();
+               lazy = true;
+       }
 
-       list_for_each_entry(p, bucket, lru) {
-               if (page_private(p) == mfn) {
-                       ret = p;
-                       break;
+       for (i = 0; i < count; i++) {
+               unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
+               unsigned long pfn = page_to_pfn(pages[i]);
+
+               if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
+                       ret = -EINVAL;
+                       goto out;
                }
-       }
 
-       spin_unlock_irqrestore(&m2p_override_lock, flags);
+               set_page_private(pages[i], INVALID_P2M_ENTRY);
+               WARN_ON(!PagePrivate(pages[i]));
+               ClearPagePrivate(pages[i]);
+               set_phys_to_machine(pfn, pages[i]->index);
+
+               if (kmap_ops)
+                       ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
+               if (ret)
+                       goto out;
+       }
 
+out:
+       if (lazy)
+               arch_leave_lazy_mmu_mode();
        return ret;
 }
+EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
 
 unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
 {
@@ -1192,79 +972,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
 #include "debugfs.h"
 static int p2m_dump_show(struct seq_file *m, void *v)
 {
-       static const char * const level_name[] = { "top", "middle",
-                                               "entry", "abnormal", "error"};
-#define TYPE_IDENTITY 0
-#define TYPE_MISSING 1
-#define TYPE_PFN 2
-#define TYPE_UNKNOWN 3
        static const char * const type_name[] = {
-                               [TYPE_IDENTITY] = "identity",
-                               [TYPE_MISSING] = "missing",
-                               [TYPE_PFN] = "pfn",
-                               [TYPE_UNKNOWN] = "abnormal"};
-       unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
-       unsigned int uninitialized_var(prev_level);
-       unsigned int uninitialized_var(prev_type);
-
-       if (!p2m_top)
-               return 0;
-
-       for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
-               unsigned topidx = p2m_top_index(pfn);
-               unsigned mididx = p2m_mid_index(pfn);
-               unsigned idx = p2m_index(pfn);
-               unsigned lvl, type;
-
-               lvl = 4;
-               type = TYPE_UNKNOWN;
-               if (p2m_top[topidx] == p2m_mid_missing) {
-                       lvl = 0; type = TYPE_MISSING;
-               } else if (p2m_top[topidx] == NULL) {
-                       lvl = 0; type = TYPE_UNKNOWN;
-               } else if (p2m_top[topidx][mididx] == NULL) {
-                       lvl = 1; type = TYPE_UNKNOWN;
-               } else if (p2m_top[topidx][mididx] == p2m_identity) {
-                       lvl = 1; type = TYPE_IDENTITY;
-               } else if (p2m_top[topidx][mididx] == p2m_missing) {
-                       lvl = 1; type = TYPE_MISSING;
-               } else if (p2m_top[topidx][mididx][idx] == 0) {
-                       lvl = 2; type = TYPE_UNKNOWN;
-               } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
-                       lvl = 2; type = TYPE_IDENTITY;
-               } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
-                       lvl = 2; type = TYPE_MISSING;
-               } else if (p2m_top[topidx][mididx][idx] == pfn) {
-                       lvl = 2; type = TYPE_PFN;
-               } else if (p2m_top[topidx][mididx][idx] != pfn) {
-                       lvl = 2; type = TYPE_PFN;
-               }
-               if (pfn == 0) {
-                       prev_level = lvl;
-                       prev_type = type;
-               }
-               if (pfn == MAX_DOMAIN_PAGES-1) {
-                       lvl = 3;
-                       type = TYPE_UNKNOWN;
-               }
-               if (prev_type != type) {
-                       seq_printf(m, " [0x%lx->0x%lx] %s\n",
-                               prev_pfn_type, pfn, type_name[prev_type]);
-                       prev_pfn_type = pfn;
+                               [P2M_TYPE_IDENTITY] = "identity",
+                               [P2M_TYPE_MISSING] = "missing",
+                               [P2M_TYPE_PFN] = "pfn",
+                               [P2M_TYPE_UNKNOWN] = "abnormal"};
+       unsigned long pfn, first_pfn;
+       int type, prev_type;
+
+       prev_type = xen_p2m_elem_type(0);
+       first_pfn = 0;
+
+       for (pfn = 0; pfn < xen_p2m_size; pfn++) {
+               type = xen_p2m_elem_type(pfn);
+               if (type != prev_type) {
+                       seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
+                                  type_name[prev_type]);
                        prev_type = type;
-               }
-               if (prev_level != lvl) {
-                       seq_printf(m, " [0x%lx->0x%lx] level %s\n",
-                               prev_pfn_level, pfn, level_name[prev_level]);
-                       prev_pfn_level = pfn;
-                       prev_level = lvl;
+                       first_pfn = pfn;
                }
        }
+       seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
+                  type_name[prev_type]);
        return 0;
-#undef TYPE_IDENTITY
-#undef TYPE_MISSING
-#undef TYPE_PFN
-#undef TYPE_UNKNOWN
 }
 
 static int p2m_dump_open(struct inode *inode, struct file *filp)
index 29834b3..dfd77de 100644 (file)
@@ -30,6 +30,7 @@
 #include "xen-ops.h"
 #include "vdso.h"
 #include "p2m.h"
+#include "mmu.h"
 
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
@@ -47,8 +48,19 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 /* Number of pages released from the initial allocation. */
 unsigned long xen_released_pages;
 
-/* Buffer used to remap identity mapped pages */
-unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
+/*
+ * Buffer used to remap identity mapped pages. We only need the virtual space.
+ * The physical page behind this address is remapped as needed to different
+ * buffer pages.
+ */
+#define REMAP_SIZE     (P2M_PER_PAGE - 3)
+static struct {
+       unsigned long   next_area_mfn;
+       unsigned long   target_pfn;
+       unsigned long   size;
+       unsigned long   mfns[REMAP_SIZE];
+} xen_remap_buf __initdata __aligned(PAGE_SIZE);
+static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
 
 /* 
  * The maximum amount of extra memory compared to the base size.  The
@@ -64,7 +76,6 @@ unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
 
 static void __init xen_add_extra_mem(u64 start, u64 size)
 {
-       unsigned long pfn;
        int i;
 
        for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
@@ -84,75 +95,76 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
                printk(KERN_WARNING "Warning: not enough extra memory regions\n");
 
        memblock_reserve(start, size);
+}
 
-       xen_max_p2m_pfn = PFN_DOWN(start + size);
-       for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
-               unsigned long mfn = pfn_to_mfn(pfn);
-
-               if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
-                       continue;
-               WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
-                         pfn, mfn);
+static void __init xen_del_extra_mem(u64 start, u64 size)
+{
+       int i;
+       u64 start_r, size_r;
 
-               __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+       for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+               start_r = xen_extra_mem[i].start;
+               size_r = xen_extra_mem[i].size;
+
+               /* Start of region. */
+               if (start_r == start) {
+                       BUG_ON(size > size_r);
+                       xen_extra_mem[i].start += size;
+                       xen_extra_mem[i].size -= size;
+                       break;
+               }
+               /* End of region. */
+               if (start_r + size_r == start + size) {
+                       BUG_ON(size > size_r);
+                       xen_extra_mem[i].size -= size;
+                       break;
+               }
+               /* Mid of region. */
+               if (start > start_r && start < start_r + size_r) {
+                       BUG_ON(start + size > start_r + size_r);
+                       xen_extra_mem[i].size = start - start_r;
+                       /* Calling memblock_reserve() again is okay. */
+                       xen_add_extra_mem(start + size, start_r + size_r -
+                                         (start + size));
+                       break;
+               }
        }
+       memblock_free(start, size);
 }
 
-static unsigned long __init xen_do_chunk(unsigned long start,
-                                        unsigned long end, bool release)
+/*
+ * Called during boot before the p2m list can take entries beyond the
+ * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
+ * invalid.
+ */
+unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
 {
-       struct xen_memory_reservation reservation = {
-               .address_bits = 0,
-               .extent_order = 0,
-               .domid        = DOMID_SELF
-       };
-       unsigned long len = 0;
-       unsigned long pfn;
-       int ret;
+       int i;
+       unsigned long addr = PFN_PHYS(pfn);
 
-       for (pfn = start; pfn < end; pfn++) {
-               unsigned long frame;
-               unsigned long mfn = pfn_to_mfn(pfn);
+       for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+               if (addr >= xen_extra_mem[i].start &&
+                   addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
+                       return INVALID_P2M_ENTRY;
+       }
 
-               if (release) {
-                       /* Make sure pfn exists to start with */
-                       if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
-                               continue;
-                       frame = mfn;
-               } else {
-                       if (mfn != INVALID_P2M_ENTRY)
-                               continue;
-                       frame = pfn;
-               }
-               set_xen_guest_handle(reservation.extent_start, &frame);
-               reservation.nr_extents = 1;
+       return IDENTITY_FRAME(pfn);
+}
 
-               ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
-                                          &reservation);
-               WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
-                    release ? "release" : "populate", pfn, ret);
+/*
+ * Mark all pfns of extra mem as invalid in p2m list.
+ */
+void __init xen_inv_extra_mem(void)
+{
+       unsigned long pfn, pfn_s, pfn_e;
+       int i;
 
-               if (ret == 1) {
-                       if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
-                               if (release)
-                                       break;
-                               set_xen_guest_handle(reservation.extent_start, &frame);
-                               reservation.nr_extents = 1;
-                               ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-                                                          &reservation);
-                               break;
-                       }
-                       len++;
-               } else
-                       break;
+       for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+               pfn_s = PFN_DOWN(xen_extra_mem[i].start);
+               pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
+               for (pfn = pfn_s; pfn < pfn_e; pfn++)
+                       set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
        }
-       if (len)
-               printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
-                      release ? "Freeing" : "Populating",
-                      start, end, len,
-                      release ? "freed" : "added");
-
-       return len;
 }
 
 /*
@@ -198,26 +210,62 @@ static unsigned long __init xen_find_pfn_range(
        return done;
 }
 
+static int __init xen_free_mfn(unsigned long mfn)
+{
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+
+       set_xen_guest_handle(reservation.extent_start, &mfn);
+       reservation.nr_extents = 1;
+
+       return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+}
+
 /*
- * This releases a chunk of memory and then does the identity map. It's used as
+ * This releases a chunk of memory and then does the identity map. It's used
  * as a fallback if the remapping fails.
  */
 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
        unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
        unsigned long *released)
 {
+       unsigned long len = 0;
+       unsigned long pfn, end;
+       int ret;
+
        WARN_ON(start_pfn > end_pfn);
 
+       end = min(end_pfn, nr_pages);
+       for (pfn = start_pfn; pfn < end; pfn++) {
+               unsigned long mfn = pfn_to_mfn(pfn);
+
+               /* Make sure pfn exists to start with */
+               if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
+                       continue;
+
+               ret = xen_free_mfn(mfn);
+               WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
+
+               if (ret == 1) {
+                       if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
+                               break;
+                       len++;
+               } else
+                       break;
+       }
+
        /* Need to release pages first */
-       *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
+       *released += len;
        *identity += set_phys_range_identity(start_pfn, end_pfn);
 }
 
 /*
- * Helper function to update both the p2m and m2p tables.
+ * Helper function to update the p2m and m2p tables and kernel mapping.
  */
-static unsigned long __init xen_update_mem_tables(unsigned long pfn,
-                                                 unsigned long mfn)
+static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
 {
        struct mmu_update update = {
                .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
@@ -225,161 +273,88 @@ static unsigned long __init xen_update_mem_tables(unsigned long pfn,
        };
 
        /* Update p2m */
-       if (!early_set_phys_to_machine(pfn, mfn)) {
+       if (!set_phys_to_machine(pfn, mfn)) {
                WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
                     pfn, mfn);
-               return false;
+               BUG();
        }
 
        /* Update m2p */
        if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
                WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
                     mfn, pfn);
-               return false;
+               BUG();
        }
 
-       return true;
+       /* Update kernel mapping, but not for highmem. */
+       if ((pfn << PAGE_SHIFT) >= __pa(high_memory))
+               return;
+
+       if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
+                                        mfn_pte(mfn, PAGE_KERNEL), 0)) {
+               WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
+                     mfn, pfn);
+               BUG();
+       }
 }
 
 /*
  * This function updates the p2m and m2p tables with an identity map from
- * start_pfn to start_pfn+size and remaps the underlying RAM of the original
- * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
- * to not exhaust the reserved brk space. Doing it in properly aligned blocks
- * ensures we only allocate the minimum required leaf pages in the p2m table. It
- * copies the existing mfns from the p2m table under the 1:1 map, overwrites
- * them with the identity map and then updates the p2m and m2p tables with the
- * remapped memory.
+ * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
+ * original allocation at remap_pfn. The information needed for remapping is
+ * saved in the memory itself to avoid the need for allocating buffers. The
+ * complete remap information is contained in a list of MFNs each containing
+ * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
+ * This enables us to preserve the original mfn sequence while doing the
+ * remapping at a time when the memory management is capable of allocating
+ * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
+ * its callers.
  */
-static unsigned long __init xen_do_set_identity_and_remap_chunk(
+static void __init xen_do_set_identity_and_remap_chunk(
         unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
 {
+       unsigned long buf = (unsigned long)&xen_remap_buf;
+       unsigned long mfn_save, mfn;
        unsigned long ident_pfn_iter, remap_pfn_iter;
-       unsigned long ident_start_pfn_align, remap_start_pfn_align;
-       unsigned long ident_end_pfn_align, remap_end_pfn_align;
-       unsigned long ident_boundary_pfn, remap_boundary_pfn;
-       unsigned long ident_cnt = 0;
-       unsigned long remap_cnt = 0;
+       unsigned long ident_end_pfn = start_pfn + size;
        unsigned long left = size;
-       unsigned long mod;
-       int i;
+       unsigned long ident_cnt = 0;
+       unsigned int i, chunk;
 
        WARN_ON(size == 0);
 
        BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
 
-       /*
-        * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
-        * blocks. We need to keep track of both the existing pfn mapping and
-        * the new pfn remapping.
-        */
-       mod = start_pfn % P2M_PER_PAGE;
-       ident_start_pfn_align =
-               mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
-       mod = remap_pfn % P2M_PER_PAGE;
-       remap_start_pfn_align =
-               mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
-       mod = (start_pfn + size) % P2M_PER_PAGE;
-       ident_end_pfn_align = start_pfn + size - mod;
-       mod = (remap_pfn + size) % P2M_PER_PAGE;
-       remap_end_pfn_align = remap_pfn + size - mod;
-
-       /* Iterate over each p2m leaf node in each range */
-       for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
-            ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
-            ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
-               /* Check we aren't past the end */
-               BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
-               BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
-
-               /* Save p2m mappings */
-               for (i = 0; i < P2M_PER_PAGE; i++)
-                       xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
-
-               /* Set identity map which will free a p2m leaf */
-               ident_cnt += set_phys_range_identity(ident_pfn_iter,
-                       ident_pfn_iter + P2M_PER_PAGE);
+       mfn_save = virt_to_mfn(buf);
 
-#ifdef DEBUG
-               /* Helps verify a p2m leaf has been freed */
-               for (i = 0; i < P2M_PER_PAGE; i++) {
-                       unsigned int pfn = ident_pfn_iter + i;
-                       BUG_ON(pfn_to_mfn(pfn) != pfn);
-               }
-#endif
-               /* Now remap memory */
-               for (i = 0; i < P2M_PER_PAGE; i++) {
-                       unsigned long mfn = xen_remap_buf[i];
-
-                       /* This will use the p2m leaf freed above */
-                       if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
-                               WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
-                                       remap_pfn_iter + i, mfn);
-                               return 0;
-                       }
-
-                       remap_cnt++;
-               }
-
-               left -= P2M_PER_PAGE;
-       }
-
-       /* Max boundary space possible */
-       BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
+       for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
+            ident_pfn_iter < ident_end_pfn;
+            ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
+               chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
 
-       /* Now handle the boundary conditions */
-       ident_boundary_pfn = start_pfn;
-       remap_boundary_pfn = remap_pfn;
-       for (i = 0; i < left; i++) {
-               unsigned long mfn;
+               /* Map first pfn to xen_remap_buf */
+               mfn = pfn_to_mfn(ident_pfn_iter);
+               set_pte_mfn(buf, mfn, PAGE_KERNEL);
 
-               /* These two checks move from the start to end boundaries */
-               if (ident_boundary_pfn == ident_start_pfn_align)
-                       ident_boundary_pfn = ident_pfn_iter;
-               if (remap_boundary_pfn == remap_start_pfn_align)
-                       remap_boundary_pfn = remap_pfn_iter;
+               /* Save mapping information in page */
+               xen_remap_buf.next_area_mfn = xen_remap_mfn;
+               xen_remap_buf.target_pfn = remap_pfn_iter;
+               xen_remap_buf.size = chunk;
+               for (i = 0; i < chunk; i++)
+                       xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
 
-               /* Check we aren't past the end */
-               BUG_ON(ident_boundary_pfn >= start_pfn + size);
-               BUG_ON(remap_boundary_pfn >= remap_pfn + size);
-
-               mfn = pfn_to_mfn(ident_boundary_pfn);
-
-               if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
-                       WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
-                               remap_pfn_iter + i, mfn);
-                       return 0;
-               }
-               remap_cnt++;
+               /* Put remap buf into list. */
+               xen_remap_mfn = mfn;
 
-               ident_boundary_pfn++;
-               remap_boundary_pfn++;
-       }
+               /* Set identity map */
+               ident_cnt += set_phys_range_identity(ident_pfn_iter,
+                       ident_pfn_iter + chunk);
 
-       /* Finish up the identity map */
-       if (ident_start_pfn_align >= ident_end_pfn_align) {
-               /*
-                 * In this case we have an identity range which does not span an
-                 * aligned block so everything needs to be identity mapped here.
-                 * If we didn't check this we might remap too many pages since
-                 * the align boundaries are not meaningful in this case.
-                */
-               ident_cnt += set_phys_range_identity(start_pfn,
-                       start_pfn + size);
-       } else {
-               /* Remapped above so check each end of the chunk */
-               if (start_pfn < ident_start_pfn_align)
-                       ident_cnt += set_phys_range_identity(start_pfn,
-                               ident_start_pfn_align);
-               if (start_pfn + size > ident_pfn_iter)
-                       ident_cnt += set_phys_range_identity(ident_pfn_iter,
-                               start_pfn + size);
+               left -= chunk;
        }
 
-       BUG_ON(ident_cnt != size);
-       BUG_ON(remap_cnt != size);
-
-       return size;
+       /* Restore old xen_remap_buf mapping */
+       set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
 }
 
 /*
@@ -396,8 +371,7 @@ static unsigned long __init xen_do_set_identity_and_remap_chunk(
 static unsigned long __init xen_set_identity_and_remap_chunk(
         const struct e820entry *list, size_t map_size, unsigned long start_pfn,
        unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
-       unsigned long *identity, unsigned long *remapped,
-       unsigned long *released)
+       unsigned long *identity, unsigned long *released)
 {
        unsigned long pfn;
        unsigned long i = 0;
@@ -431,19 +405,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
                if (size > remap_range_size)
                        size = remap_range_size;
 
-               if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
-                       WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
-                               cur_pfn, size, remap_pfn);
-                       xen_set_identity_and_release_chunk(cur_pfn,
-                               cur_pfn + left, nr_pages, identity, released);
-                       break;
-               }
+               xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
 
                /* Update variables to reflect new mappings. */
                i += size;
                remap_pfn += size;
                *identity += size;
-               *remapped += size;
        }
 
        /*
@@ -458,13 +425,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
        return remap_pfn;
 }
 
-static unsigned long __init xen_set_identity_and_remap(
+static void __init xen_set_identity_and_remap(
        const struct e820entry *list, size_t map_size, unsigned long nr_pages,
        unsigned long *released)
 {
        phys_addr_t start = 0;
        unsigned long identity = 0;
-       unsigned long remapped = 0;
        unsigned long last_pfn = nr_pages;
        const struct e820entry *entry;
        unsigned long num_released = 0;
@@ -494,8 +460,7 @@ static unsigned long __init xen_set_identity_and_remap(
                                last_pfn = xen_set_identity_and_remap_chunk(
                                                list, map_size, start_pfn,
                                                end_pfn, nr_pages, last_pfn,
-                                               &identity, &remapped,
-                                               &num_released);
+                                               &identity, &num_released);
                        start = end;
                }
        }
@@ -503,12 +468,63 @@ static unsigned long __init xen_set_identity_and_remap(
        *released = num_released;
 
        pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
-       pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
-               last_pfn);
        pr_info("Released %ld page(s)\n", num_released);
+}
+
+/*
+ * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
+ * The remap information (which mfn remap to which pfn) is contained in the
+ * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
+ * This scheme allows to remap the different chunks in arbitrary order while
+ * the resulting mapping will be independant from the order.
+ */
+void __init xen_remap_memory(void)
+{
+       unsigned long buf = (unsigned long)&xen_remap_buf;
+       unsigned long mfn_save, mfn, pfn;
+       unsigned long remapped = 0;
+       unsigned int i;
+       unsigned long pfn_s = ~0UL;
+       unsigned long len = 0;
+
+       mfn_save = virt_to_mfn(buf);
+
+       while (xen_remap_mfn != INVALID_P2M_ENTRY) {
+               /* Map the remap information */
+               set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
 
-       return last_pfn;
+               BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
+
+               pfn = xen_remap_buf.target_pfn;
+               for (i = 0; i < xen_remap_buf.size; i++) {
+                       mfn = xen_remap_buf.mfns[i];
+                       xen_update_mem_tables(pfn, mfn);
+                       remapped++;
+                       pfn++;
+               }
+               if (pfn_s == ~0UL || pfn == pfn_s) {
+                       pfn_s = xen_remap_buf.target_pfn;
+                       len += xen_remap_buf.size;
+               } else if (pfn_s + len == xen_remap_buf.target_pfn) {
+                       len += xen_remap_buf.size;
+               } else {
+                       xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
+                       pfn_s = xen_remap_buf.target_pfn;
+                       len = xen_remap_buf.size;
+               }
+
+               mfn = xen_remap_mfn;
+               xen_remap_mfn = xen_remap_buf.next_area_mfn;
+       }
+
+       if (pfn_s != ~0UL && len)
+               xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
+
+       set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
+
+       pr_info("Remapped %ld page(s)\n", remapped);
 }
+
 static unsigned long __init xen_get_max_pages(void)
 {
        unsigned long max_pages = MAX_DOMAIN_PAGES;
@@ -569,7 +585,6 @@ char * __init xen_memory_setup(void)
        int rc;
        struct xen_memory_map memmap;
        unsigned long max_pages;
-       unsigned long last_pfn = 0;
        unsigned long extra_pages = 0;
        int i;
        int op;
@@ -616,17 +631,14 @@ char * __init xen_memory_setup(void)
                extra_pages += max_pages - max_pfn;
 
        /*
-        * Set identity map on non-RAM pages and remap the underlying RAM.
+        * Set identity map on non-RAM pages and prepare remapping the
+        * underlying RAM.
         */
-       last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
-                                             &xen_released_pages);
+       xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
+                                  &xen_released_pages);
 
        extra_pages += xen_released_pages;
 
-       if (last_pfn > max_pfn) {
-               max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
-               mem_end = PFN_PHYS(max_pfn);
-       }
        /*
         * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
         * factor the base size.  On non-highmem systems, the base
@@ -653,6 +665,7 @@ char * __init xen_memory_setup(void)
                                size = min(size, (u64)extra_pages * PAGE_SIZE);
                                extra_pages -= size / PAGE_SIZE;
                                xen_add_extra_mem(addr, size);
+                               xen_max_p2m_pfn = PFN_DOWN(addr + size);
                        } else
                                type = E820_UNUSABLE;
                }
index 28c7e0b..f92921f 100644 (file)
@@ -29,12 +29,14 @@ void xen_build_mfn_list_list(void);
 void xen_setup_machphys_mapping(void);
 void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_reserve_top(void);
-extern unsigned long xen_max_p2m_pfn;
 
 void xen_mm_pin_all(void);
 void xen_mm_unpin_all(void);
 void xen_set_pat(u64);
 
+unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
+void __init xen_inv_extra_mem(void);
+void __init xen_remap_memory(void);
 char * __init xen_memory_setup(void);
 char * xen_auto_xlated_memory_setup(void);
 void __init xen_arch_setup(void);
@@ -47,7 +49,7 @@ void xen_hvm_init_shared_info(void);
 void xen_unplug_emulated_devices(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
-unsigned long __init xen_revector_p2m_tree(void);
+void __init xen_vmalloc_p2m_tree(void);
 
 void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);