Merge tag 'upstream-3.9-rc1' of git://git.infradead.org/linux-ubifs
[cascardo/linux.git] / mm / page_alloc.c
index d1107ad..0dade3f 100644 (file)
@@ -202,11 +202,18 @@ static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+/* Movable memory ranges, will also be used by memblock subsystem. */
+struct movablemem_map movablemem_map = {
+       .acpi = false,
+       .nr_map = 0,
+};
+
 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __initdata required_kernelcore;
 static unsigned long __initdata required_movablecore;
 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
+static unsigned long __meminitdata zone_movable_limit[MAX_NUMNODES];
 
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
 int movable_zone;
@@ -240,15 +247,20 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
        int ret = 0;
        unsigned seq;
        unsigned long pfn = page_to_pfn(page);
+       unsigned long sp, start_pfn;
 
        do {
                seq = zone_span_seqbegin(zone);
-               if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
-                       ret = 1;
-               else if (pfn < zone->zone_start_pfn)
+               start_pfn = zone->zone_start_pfn;
+               sp = zone->spanned_pages;
+               if (!zone_spans_pfn(zone, pfn))
                        ret = 1;
        } while (zone_span_seqretry(zone, seq));
 
+       if (ret)
+               pr_err("page %lu outside zone [ %lu - %lu ]\n",
+                       pfn, start_pfn, start_pfn + sp);
+
        return ret;
 }
 
@@ -288,7 +300,7 @@ static void bad_page(struct page *page)
 
        /* Don't complain about poisoned pages */
        if (PageHWPoison(page)) {
-               reset_page_mapcount(page); /* remove PageBuddy */
+               page_mapcount_reset(page); /* remove PageBuddy */
                return;
        }
 
@@ -320,8 +332,8 @@ static void bad_page(struct page *page)
        dump_stack();
 out:
        /* Leave bad fields for debug, except PageBuddy could make trouble */
-       reset_page_mapcount(page); /* remove PageBuddy */
-       add_taint(TAINT_BAD_PAGE);
+       page_mapcount_reset(page); /* remove PageBuddy */
+       add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 /*
@@ -533,6 +545,8 @@ static inline void __free_one_page(struct page *page,
        unsigned long uninitialized_var(buddy_idx);
        struct page *buddy;
 
+       VM_BUG_ON(!zone_is_initialized(zone));
+
        if (unlikely(PageCompound(page)))
                if (unlikely(destroy_compound_page(page, order)))
                        return;
@@ -606,7 +620,7 @@ static inline int free_pages_check(struct page *page)
                bad_page(page);
                return 1;
        }
-       reset_page_last_nid(page);
+       page_nid_reset_last(page);
        if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
                page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
        return 0;
@@ -666,7 +680,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                        /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
                        __free_one_page(page, zone, 0, mt);
                        trace_mm_page_pcpu_drain(page, 0, mt);
-                       if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
+                       if (likely(!is_migrate_isolate_page(page))) {
                                __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
                                if (is_migrate_cma(mt))
                                        __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
@@ -684,7 +698,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
        zone->pages_scanned = 0;
 
        __free_one_page(page, zone, order, migratetype);
-       if (unlikely(migratetype != MIGRATE_ISOLATE))
+       if (unlikely(!is_migrate_isolate(migratetype)))
                __mod_zone_freepage_state(zone, 1 << order, migratetype);
        spin_unlock(&zone->lock);
 }
@@ -916,7 +930,9 @@ static int fallbacks[MIGRATE_TYPES][4] = {
        [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
 #endif
        [MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
+#ifdef CONFIG_MEMORY_ISOLATION
        [MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
+#endif
 };
 
 /*
@@ -981,9 +997,9 @@ int move_freepages_block(struct zone *zone, struct page *page,
        end_pfn = start_pfn + pageblock_nr_pages - 1;
 
        /* Do not cross zone boundaries */
-       if (start_pfn < zone->zone_start_pfn)
+       if (!zone_spans_pfn(zone, start_pfn))
                start_page = page;
-       if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
+       if (!zone_spans_pfn(zone, end_pfn))
                return 0;
 
        return move_freepages(zone, start_page, end_page, migratetype);
@@ -1142,7 +1158,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                        list_add_tail(&page->lru, list);
                if (IS_ENABLED(CONFIG_CMA)) {
                        mt = get_pageblock_migratetype(page);
-                       if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE)
+                       if (!is_migrate_cma(mt) && !is_migrate_isolate(mt))
                                mt = migratetype;
                }
                set_freepage_migratetype(page, mt);
@@ -1277,7 +1293,7 @@ void mark_free_pages(struct zone *zone)
 
        spin_lock_irqsave(&zone->lock, flags);
 
-       max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+       max_zone_pfn = zone_end_pfn(zone);
        for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                if (pfn_valid(pfn)) {
                        struct page *page = pfn_to_page(pfn);
@@ -1326,7 +1342,7 @@ void free_hot_cold_page(struct page *page, int cold)
         * excessively into the page allocator
         */
        if (migratetype >= MIGRATE_PCPTYPES) {
-               if (unlikely(migratetype == MIGRATE_ISOLATE)) {
+               if (unlikely(is_migrate_isolate(migratetype))) {
                        free_one_page(zone, page, 0, migratetype);
                        goto out;
                }
@@ -1400,7 +1416,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
        zone = page_zone(page);
        mt = get_pageblock_migratetype(page);
 
-       if (mt != MIGRATE_ISOLATE) {
+       if (!is_migrate_isolate(mt)) {
                /* Obey watermarks as if the page was being allocated */
                watermark = low_wmark_pages(zone) + (1 << order);
                if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
@@ -1419,7 +1435,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
                struct page *endpage = page + (1 << order) - 1;
                for (; page < endpage; page += pageblock_nr_pages) {
                        int mt = get_pageblock_migratetype(page);
-                       if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt))
+                       if (!is_migrate_isolate(mt) && !is_migrate_cma(mt))
                                set_pageblock_migratetype(page,
                                                          MIGRATE_MOVABLE);
                }
@@ -2615,10 +2631,17 @@ retry_cpuset:
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                        zonelist, high_zoneidx, alloc_flags,
                        preferred_zone, migratetype);
-       if (unlikely(!page))
+       if (unlikely(!page)) {
+               /*
+                * Runtime PM, block IO and its error handling path
+                * can deadlock because I/O on the device might not
+                * complete.
+                */
+               gfp_mask = memalloc_noio_flags(gfp_mask);
                page = __alloc_pages_slowpath(gfp_mask, order,
                                zonelist, high_zoneidx, nodemask,
                                preferred_zone, migratetype);
+       }
 
        trace_mm_page_alloc(page, order, gfp_mask, migratetype);
 
@@ -2790,18 +2813,27 @@ void free_pages_exact(void *virt, size_t size)
 }
 EXPORT_SYMBOL(free_pages_exact);
 
-static unsigned int nr_free_zone_pages(int offset)
+/**
+ * nr_free_zone_pages - count number of pages beyond high watermark
+ * @offset: The zone index of the highest zone
+ *
+ * nr_free_zone_pages() counts the number of counts pages which are beyond the
+ * high watermark within all zones at or below a given zone index.  For each
+ * zone, the number of pages is calculated as:
+ *     present_pages - high_pages
+ */
+static unsigned long nr_free_zone_pages(int offset)
 {
        struct zoneref *z;
        struct zone *zone;
 
        /* Just pick one node, since fallback list is circular */
-       unsigned int sum = 0;
+       unsigned long sum = 0;
 
        struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
 
        for_each_zone_zonelist(zone, z, zonelist, offset) {
-               unsigned long size = zone->present_pages;
+               unsigned long size = zone->managed_pages;
                unsigned long high = high_wmark_pages(zone);
                if (size > high)
                        sum += size - high;
@@ -2810,19 +2842,25 @@ static unsigned int nr_free_zone_pages(int offset)
        return sum;
 }
 
-/*
- * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
+/**
+ * nr_free_buffer_pages - count number of pages beyond high watermark
+ *
+ * nr_free_buffer_pages() counts the number of pages which are beyond the high
+ * watermark within ZONE_DMA and ZONE_NORMAL.
  */
-unsigned int nr_free_buffer_pages(void)
+unsigned long nr_free_buffer_pages(void)
 {
        return nr_free_zone_pages(gfp_zone(GFP_USER));
 }
 EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
 
-/*
- * Amount of free RAM allocatable within all zones
+/**
+ * nr_free_pagecache_pages - count number of pages beyond high watermark
+ *
+ * nr_free_pagecache_pages() counts the number of pages which are beyond the
+ * high watermark within all zones.
  */
-unsigned int nr_free_pagecache_pages(void)
+unsigned long nr_free_pagecache_pages(void)
 {
        return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
 }
@@ -2854,7 +2892,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
        val->totalram = pgdat->node_present_pages;
        val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
-       val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
+       val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
        val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
                        NR_FREE_PAGES);
 #else
@@ -2897,7 +2935,9 @@ static void show_migration_types(unsigned char type)
 #ifdef CONFIG_CMA
                [MIGRATE_CMA]           = 'C',
 #endif
+#ifdef CONFIG_MEMORY_ISOLATION
                [MIGRATE_ISOLATE]       = 'I',
+#endif
        };
        char tmp[MIGRATE_TYPES + 1];
        char *p = tmp;
@@ -3236,7 +3276,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
 {
        int n, val;
        int min_val = INT_MAX;
-       int best_node = -1;
+       int best_node = NUMA_NO_NODE;
        const struct cpumask *tmp = cpumask_of_node(0);
 
        /* Use the local node if we haven't already */
@@ -3780,7 +3820,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
         * the block.
         */
        start_pfn = zone->zone_start_pfn;
-       end_pfn = start_pfn + zone->spanned_pages;
+       end_pfn = zone_end_pfn(zone);
        start_pfn = roundup(start_pfn, pageblock_nr_pages);
        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
                                                        pageblock_order;
@@ -3876,8 +3916,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                set_page_links(page, zone, nid, pfn);
                mminit_verify_page_links(page, zone, nid, pfn);
                init_page_count(page);
-               reset_page_mapcount(page);
-               reset_page_last_nid(page);
+               page_mapcount_reset(page);
+               page_nid_reset_last(page);
                SetPageReserved(page);
                /*
                 * Mark the block movable so that blocks are reserved for
@@ -3894,7 +3934,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                 * pfn out of zone.
                 */
                if ((z->zone_start_pfn <= pfn)
-                   && (pfn < z->zone_start_pfn + z->spanned_pages)
+                   && (pfn < zone_end_pfn(z))
                    && !(pfn & (pageblock_nr_pages - 1)))
                        set_pageblock_migratetype(page, MIGRATE_MOVABLE);
 
@@ -3932,7 +3972,7 @@ static int __meminit zone_batchsize(struct zone *zone)
         *
         * OK, so we don't know how big the cache is.  So guess.
         */
-       batch = zone->present_pages / 1024;
+       batch = zone->managed_pages / 1024;
        if (batch * PAGE_SIZE > 512 * 1024)
                batch = (512 * 1024) / PAGE_SIZE;
        batch /= 4;             /* We effectively *= 4 below */
@@ -4016,7 +4056,7 @@ static void __meminit setup_zone_pageset(struct zone *zone)
 
                if (percpu_pagelist_fraction)
                        setup_pagelist_highmark(pcp,
-                               (zone->present_pages /
+                               (zone->managed_pages /
                                        percpu_pagelist_fraction));
        }
 }
@@ -4372,6 +4412,77 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
        return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
 }
 
+/**
+ * sanitize_zone_movable_limit - Sanitize the zone_movable_limit array.
+ *
+ * zone_movable_limit is initialized as 0. This function will try to get
+ * the first ZONE_MOVABLE pfn of each node from movablemem_map, and
+ * assigne them to zone_movable_limit.
+ * zone_movable_limit[nid] == 0 means no limit for the node.
+ *
+ * Note: Each range is represented as [start_pfn, end_pfn)
+ */
+static void __meminit sanitize_zone_movable_limit(void)
+{
+       int map_pos = 0, i, nid;
+       unsigned long start_pfn, end_pfn;
+
+       if (!movablemem_map.nr_map)
+               return;
+
+       /* Iterate all ranges from minimum to maximum */
+       for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+               /*
+                * If we have found lowest pfn of ZONE_MOVABLE of the node
+                * specified by user, just go on to check next range.
+                */
+               if (zone_movable_limit[nid])
+                       continue;
+
+#ifdef CONFIG_ZONE_DMA
+               /* Skip DMA memory. */
+               if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA])
+                       start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA];
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+               /* Skip DMA32 memory. */
+               if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA32])
+                       start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA32];
+#endif
+
+#ifdef CONFIG_HIGHMEM
+               /* Skip lowmem if ZONE_MOVABLE is highmem. */
+               if (zone_movable_is_highmem() &&
+                   start_pfn < arch_zone_lowest_possible_pfn[ZONE_HIGHMEM])
+                       start_pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
+#endif
+
+               if (start_pfn >= end_pfn)
+                       continue;
+
+               while (map_pos < movablemem_map.nr_map) {
+                       if (end_pfn <= movablemem_map.map[map_pos].start_pfn)
+                               break;
+
+                       if (start_pfn >= movablemem_map.map[map_pos].end_pfn) {
+                               map_pos++;
+                               continue;
+                       }
+
+                       /*
+                        * The start_pfn of ZONE_MOVABLE is either the minimum
+                        * pfn specified by movablemem_map, or 0, which means
+                        * the node has no ZONE_MOVABLE.
+                        */
+                       zone_movable_limit[nid] = max(start_pfn,
+                                       movablemem_map.map[map_pos].start_pfn);
+
+                       break;
+               }
+       }
+}
+
 #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
@@ -4389,7 +4500,6 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
 
        return zholes_size[zone_type];
 }
-
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
@@ -4573,7 +4683,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                nr_all_pages += freesize;
 
                zone->spanned_pages = size;
-               zone->present_pages = freesize;
+               zone->present_pages = realsize;
                /*
                 * Set an approximate value for lowmem here, it will be adjusted
                 * when the bootmem allocator frees pages into the buddy system.
@@ -4625,7 +4735,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
                 * for the buddy allocator to function correctly.
                 */
                start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
-               end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+               end = pgdat_end_pfn(pgdat);
                end = ALIGN(end, MAX_ORDER_NR_PAGES);
                size =  (end - start) * sizeof(struct page);
                map = alloc_remap(pgdat->node_id, size);
@@ -4831,12 +4941,19 @@ static void __init find_zone_movable_pfns_for_nodes(void)
                required_kernelcore = max(required_kernelcore, corepages);
        }
 
-       /* If kernelcore was not specified, there is no ZONE_MOVABLE */
-       if (!required_kernelcore)
+       /*
+        * If neither kernelcore/movablecore nor movablemem_map is specified,
+        * there is no ZONE_MOVABLE. But if movablemem_map is specified, the
+        * start pfn of ZONE_MOVABLE has been stored in zone_movable_limit[].
+        */
+       if (!required_kernelcore) {
+               if (movablemem_map.nr_map)
+                       memcpy(zone_movable_pfn, zone_movable_limit,
+                               sizeof(zone_movable_pfn));
                goto out;
+       }
 
        /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
-       find_usable_zone_for_movable();
        usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
 
 restart:
@@ -4864,10 +4981,24 @@ restart:
                for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
                        unsigned long size_pages;
 
+                       /*
+                        * Find more memory for kernelcore in
+                        * [zone_movable_pfn[nid], zone_movable_limit[nid]).
+                        */
                        start_pfn = max(start_pfn, zone_movable_pfn[nid]);
                        if (start_pfn >= end_pfn)
                                continue;
 
+                       if (zone_movable_limit[nid]) {
+                               end_pfn = min(end_pfn, zone_movable_limit[nid]);
+                               /* No range left for kernelcore in this node */
+                               if (start_pfn >= end_pfn) {
+                                       zone_movable_pfn[nid] =
+                                                       zone_movable_limit[nid];
+                                       break;
+                               }
+                       }
+
                        /* Account for what is only usable for kernelcore */
                        if (start_pfn < usable_startpfn) {
                                unsigned long kernel_pages;
@@ -4927,12 +5058,12 @@ restart:
        if (usable_nodes && required_kernelcore > usable_nodes)
                goto restart;
 
+out:
        /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
        for (nid = 0; nid < MAX_NUMNODES; nid++)
                zone_movable_pfn[nid] =
                        roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
 
-out:
        /* restore the node_state */
        node_states[N_MEMORY] = saved_node_state;
 }
@@ -4995,6 +5126,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 
        /* Find the PFNs that ZONE_MOVABLE begins at in each node */
        memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
+       find_usable_zone_for_movable();
+       sanitize_zone_movable_limit();
        find_zone_movable_pfns_for_nodes();
 
        /* Print out the zone ranges */
@@ -5078,6 +5211,181 @@ static int __init cmdline_parse_movablecore(char *p)
 early_param("kernelcore", cmdline_parse_kernelcore);
 early_param("movablecore", cmdline_parse_movablecore);
 
+/**
+ * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[].
+ * @start_pfn: start pfn of the range to be checked
+ * @end_pfn:   end pfn of the range to be checked (exclusive)
+ *
+ * This function checks if a given memory range [start_pfn, end_pfn) overlaps
+ * the movablemem_map.map[] array.
+ *
+ * Return: index of the first overlapped element in movablemem_map.map[]
+ *         or -1 if they don't overlap each other.
+ */
+int __init movablemem_map_overlap(unsigned long start_pfn,
+                                  unsigned long end_pfn)
+{
+       int overlap;
+
+       if (!movablemem_map.nr_map)
+               return -1;
+
+       for (overlap = 0; overlap < movablemem_map.nr_map; overlap++)
+               if (start_pfn < movablemem_map.map[overlap].end_pfn)
+                       break;
+
+       if (overlap == movablemem_map.nr_map ||
+           end_pfn <= movablemem_map.map[overlap].start_pfn)
+               return -1;
+
+       return overlap;
+}
+
+/**
+ * insert_movablemem_map - Insert a memory range in to movablemem_map.map.
+ * @start_pfn: start pfn of the range
+ * @end_pfn:   end pfn of the range
+ *
+ * This function will also merge the overlapped ranges, and sort the array
+ * by start_pfn in monotonic increasing order.
+ */
+void __init insert_movablemem_map(unsigned long start_pfn,
+                                 unsigned long end_pfn)
+{
+       int pos, overlap;
+
+       /*
+        * pos will be at the 1st overlapped range, or the position
+        * where the element should be inserted.
+        */
+       for (pos = 0; pos < movablemem_map.nr_map; pos++)
+               if (start_pfn <= movablemem_map.map[pos].end_pfn)
+                       break;
+
+       /* If there is no overlapped range, just insert the element. */
+       if (pos == movablemem_map.nr_map ||
+           end_pfn < movablemem_map.map[pos].start_pfn) {
+               /*
+                * If pos is not the end of array, we need to move all
+                * the rest elements backward.
+                */
+               if (pos < movablemem_map.nr_map)
+                       memmove(&movablemem_map.map[pos+1],
+                               &movablemem_map.map[pos],
+                               sizeof(struct movablemem_entry) *
+                               (movablemem_map.nr_map - pos));
+               movablemem_map.map[pos].start_pfn = start_pfn;
+               movablemem_map.map[pos].end_pfn = end_pfn;
+               movablemem_map.nr_map++;
+               return;
+       }
+
+       /* overlap will be at the last overlapped range */
+       for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++)
+               if (end_pfn < movablemem_map.map[overlap].start_pfn)
+                       break;
+
+       /*
+        * If there are more ranges overlapped, we need to merge them,
+        * and move the rest elements forward.
+        */
+       overlap--;
+       movablemem_map.map[pos].start_pfn = min(start_pfn,
+                                       movablemem_map.map[pos].start_pfn);
+       movablemem_map.map[pos].end_pfn = max(end_pfn,
+                                       movablemem_map.map[overlap].end_pfn);
+
+       if (pos != overlap && overlap + 1 != movablemem_map.nr_map)
+               memmove(&movablemem_map.map[pos+1],
+                       &movablemem_map.map[overlap+1],
+                       sizeof(struct movablemem_entry) *
+                       (movablemem_map.nr_map - overlap - 1));
+
+       movablemem_map.nr_map -= overlap - pos;
+}
+
+/**
+ * movablemem_map_add_region - Add a memory range into movablemem_map.
+ * @start:     physical start address of range
+ * @end:       physical end address of range
+ *
+ * This function transform the physical address into pfn, and then add the
+ * range into movablemem_map by calling insert_movablemem_map().
+ */
+static void __init movablemem_map_add_region(u64 start, u64 size)
+{
+       unsigned long start_pfn, end_pfn;
+
+       /* In case size == 0 or start + size overflows */
+       if (start + size <= start)
+               return;
+
+       if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) {
+               pr_err("movablemem_map: too many entries;"
+                       " ignoring [mem %#010llx-%#010llx]\n",
+                       (unsigned long long) start,
+                       (unsigned long long) (start + size - 1));
+               return;
+       }
+
+       start_pfn = PFN_DOWN(start);
+       end_pfn = PFN_UP(start + size);
+       insert_movablemem_map(start_pfn, end_pfn);
+}
+
+/*
+ * cmdline_parse_movablemem_map - Parse boot option movablemem_map.
+ * @p: The boot option of the following format:
+ *     movablemem_map=nn[KMG]@ss[KMG]
+ *
+ * This option sets the memory range [ss, ss+nn) to be used as movable memory.
+ *
+ * Return: 0 on success or -EINVAL on failure.
+ */
+static int __init cmdline_parse_movablemem_map(char *p)
+{
+       char *oldp;
+       u64 start_at, mem_size;
+
+       if (!p)
+               goto err;
+
+       if (!strcmp(p, "acpi"))
+               movablemem_map.acpi = true;
+
+       /*
+        * If user decide to use info from BIOS, all the other user specified
+        * ranges will be ingored.
+        */
+       if (movablemem_map.acpi) {
+               if (movablemem_map.nr_map) {
+                       memset(movablemem_map.map, 0,
+                               sizeof(struct movablemem_entry)
+                               * movablemem_map.nr_map);
+                       movablemem_map.nr_map = 0;
+               }
+               return 0;
+       }
+
+       oldp = p;
+       mem_size = memparse(p, &p);
+       if (p == oldp)
+               goto err;
+
+       if (*p == '@') {
+               oldp = ++p;
+               start_at = memparse(p, &p);
+               if (p == oldp || *p != '\0')
+                       goto err;
+
+               movablemem_map_add_region(start_at, mem_size);
+               return 0;
+       }
+err:
+       return -EINVAL;
+}
+early_param("movablemem_map", cmdline_parse_movablemem_map);
+
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 /**
@@ -5160,8 +5468,8 @@ static void calculate_totalreserve_pages(void)
                        /* we treat the high watermark as reserved pages. */
                        max += high_wmark_pages(zone);
 
-                       if (max > zone->present_pages)
-                               max = zone->present_pages;
+                       if (max > zone->managed_pages)
+                               max = zone->managed_pages;
                        reserve_pages += max;
                        /*
                         * Lowmem reserves are not available to
@@ -5193,7 +5501,7 @@ static void setup_per_zone_lowmem_reserve(void)
        for_each_online_pgdat(pgdat) {
                for (j = 0; j < MAX_NR_ZONES; j++) {
                        struct zone *zone = pgdat->node_zones + j;
-                       unsigned long present_pages = zone->present_pages;
+                       unsigned long managed_pages = zone->managed_pages;
 
                        zone->lowmem_reserve[j] = 0;
 
@@ -5207,9 +5515,9 @@ static void setup_per_zone_lowmem_reserve(void)
                                        sysctl_lowmem_reserve_ratio[idx] = 1;
 
                                lower_zone = pgdat->node_zones + idx;
-                               lower_zone->lowmem_reserve[j] = present_pages /
+                               lower_zone->lowmem_reserve[j] = managed_pages /
                                        sysctl_lowmem_reserve_ratio[idx];
-                               present_pages += lower_zone->present_pages;
+                               managed_pages += lower_zone->managed_pages;
                        }
                }
        }
@@ -5228,14 +5536,14 @@ static void __setup_per_zone_wmarks(void)
        /* Calculate total number of !ZONE_HIGHMEM pages */
        for_each_zone(zone) {
                if (!is_highmem(zone))
-                       lowmem_pages += zone->present_pages;
+                       lowmem_pages += zone->managed_pages;
        }
 
        for_each_zone(zone) {
                u64 tmp;
 
                spin_lock_irqsave(&zone->lock, flags);
-               tmp = (u64)pages_min * zone->present_pages;
+               tmp = (u64)pages_min * zone->managed_pages;
                do_div(tmp, lowmem_pages);
                if (is_highmem(zone)) {
                        /*
@@ -5247,13 +5555,10 @@ static void __setup_per_zone_wmarks(void)
                         * deltas controls asynch page reclaim, and so should
                         * not be capped for highmem.
                         */
-                       int min_pages;
+                       unsigned long min_pages;
 
-                       min_pages = zone->present_pages / 1024;
-                       if (min_pages < SWAP_CLUSTER_MAX)
-                               min_pages = SWAP_CLUSTER_MAX;
-                       if (min_pages > 128)
-                               min_pages = 128;
+                       min_pages = zone->managed_pages / 1024;
+                       min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
                        zone->watermark[WMARK_MIN] = min_pages;
                } else {
                        /*
@@ -5314,7 +5619,7 @@ static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
        unsigned int gb, ratio;
 
        /* Zone size in gigabytes */
-       gb = zone->present_pages >> (30 - PAGE_SHIFT);
+       gb = zone->managed_pages >> (30 - PAGE_SHIFT);
        if (gb)
                ratio = int_sqrt(10 * gb);
        else
@@ -5400,7 +5705,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
                return rc;
 
        for_each_zone(zone)
-               zone->min_unmapped_pages = (zone->present_pages *
+               zone->min_unmapped_pages = (zone->managed_pages *
                                sysctl_min_unmapped_ratio) / 100;
        return 0;
 }
@@ -5416,7 +5721,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
                return rc;
 
        for_each_zone(zone)
-               zone->min_slab_pages = (zone->present_pages *
+               zone->min_slab_pages = (zone->managed_pages *
                                sysctl_min_slab_ratio) / 100;
        return 0;
 }
@@ -5458,7 +5763,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
        for_each_populated_zone(zone) {
                for_each_possible_cpu(cpu) {
                        unsigned long  high;
-                       high = zone->present_pages / percpu_pagelist_fraction;
+                       high = zone->managed_pages / percpu_pagelist_fraction;
                        setup_pagelist_highmark(
                                per_cpu_ptr(zone->pageset, cpu), high);
                }
@@ -5645,8 +5950,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
        pfn = page_to_pfn(page);
        bitmap = get_pageblock_bitmap(zone, pfn);
        bitidx = pfn_to_bitidx(zone, pfn);
-       VM_BUG_ON(pfn < zone->zone_start_pfn);
-       VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages);
+       VM_BUG_ON(!zone_spans_pfn(zone, pfn));
 
        for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
                if (flags & value)
@@ -5744,8 +6048,7 @@ bool is_pageblock_removable_nolock(struct page *page)
 
        zone = page_zone(page);
        pfn = page_to_pfn(page);
-       if (zone->zone_start_pfn > pfn ||
-                       zone->zone_start_pfn + zone->spanned_pages <= pfn)
+       if (!zone_spans_pfn(zone, pfn))
                return false;
 
        return !has_unmovable_pages(zone, page, 0, true);
@@ -5801,14 +6104,14 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                                                        &cc->migratepages);
                cc->nr_migratepages -= nr_reclaimed;
 
-               ret = migrate_pages(&cc->migratepages,
-                                   alloc_migrate_target,
-                                   0, false, MIGRATE_SYNC,
-                                   MR_CMA);
+               ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
+                                   0, MIGRATE_SYNC, MR_CMA);
        }
-
-       putback_movable_pages(&cc->migratepages);
-       return ret > 0 ? 0 : ret;
+       if (ret < 0) {
+               putback_movable_pages(&cc->migratepages);
+               return ret;
+       }
+       return 0;
 }
 
 /**