WARN_ON(!mutex_is_locked(&pm_mutex));
WARN_ON(saved_gfp_mask);
saved_gfp_mask = gfp_allowed_mask;
- gfp_allowed_mask &= ~GFP_IOFS;
+ gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
}
bool pm_suspended_storage(void)
{
- if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
+ if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
return false;
return true;
}
static struct {
struct fault_attr attr;
- u32 ignore_gfp_highmem;
- u32 ignore_gfp_wait;
+ bool ignore_gfp_highmem;
+ bool ignore_gfp_wait;
u32 min_order;
} fail_page_alloc = {
.attr = FAULT_ATTR_INITIALIZER,
- .ignore_gfp_wait = 1,
- .ignore_gfp_highmem = 1,
+ .ignore_gfp_wait = true,
+ .ignore_gfp_highmem = true,
.min_order = 1,
};
return false;
if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
return false;
- if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT))
+ if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_DIRECT_RECLAIM))
return false;
return should_fail(&fail_page_alloc.attr, 1 << order);
min -= min / 2;
if (alloc_flags & ALLOC_HARDER)
min -= min / 4;
+
#ifdef CONFIG_CMA
/* If allocation can't use CMA areas don't use free CMA pages */
if (!(alloc_flags & ALLOC_CMA))
}
bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
- unsigned long mark, int classzone_idx, int alloc_flags)
+ unsigned long mark, int classzone_idx)
{
long free_pages = zone_page_state(z, NR_FREE_PAGES);
if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
- return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+ return __zone_watermark_ok(z, order, mark, classzone_idx, 0,
free_pages);
}
nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
int zlc_active = 0; /* set if using zonelist_cache */
int did_zlc_setup = 0; /* just call zlc_setup() one time */
- bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
- (gfp_mask & __GFP_WRITE);
int nr_fair_skipped = 0;
bool zonelist_rescan;
*
* XXX: For now, allow allocations to potentially
* exceed the per-zone dirty limit in the slowpath
- * (ALLOC_WMARK_LOW unset) before going into reclaim,
+ * (spread_dirty_pages unset) before going into reclaim,
* which is important when on a NUMA setup the allowed
* zones are together not big enough to reach the
* global limit. The proper fix for these situations
* will require awareness of zones in the
* dirty-throttling and the flusher threads.
*/
- if (consider_zone_dirty && !zone_dirty_ok(zone))
+ if (ac->spread_dirty_pages && !zone_dirty_ok(zone))
continue;
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
if (test_thread_flag(TIF_MEMDIE) ||
(current->flags & (PF_MEMALLOC | PF_EXITING)))
filter &= ~SHOW_MEM_FILTER_NODES;
- if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
+ if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
filter &= ~SHOW_MEM_FILTER_NODES;
if (fmt) {
gfp_to_alloc_flags(gfp_t gfp_mask)
{
int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
- const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
* The caller may dip into page reserves a bit more if the caller
* cannot run direct reclaim, or if the caller has realtime scheduling
* policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
- * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
+ * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
*/
alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
- if (atomic) {
+ if (gfp_mask & __GFP_ATOMIC) {
/*
* Not worth trying to allocate harder for __GFP_NOMEMALLOC even
* if it can't schedule.
return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
}
+static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
+{
+ return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
+}
+
static inline struct page *
__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct alloc_context *ac)
{
- const gfp_t wait = gfp_mask & __GFP_WAIT;
+ bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
struct page *page = NULL;
int alloc_flags;
unsigned long pages_reclaimed = 0;
return NULL;
}
+ /*
+ * We also sanity check to catch abuse of atomic reserves being used by
+ * callers that are not in atomic context.
+ */
+ if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
+ (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
+ gfp_mask &= ~__GFP_ATOMIC;
+
/*
* If this allocation cannot block and it is for a specific node, then
* fail early. There's no need to wakeup kswapd or retry for a
* speculative node-specific allocation.
*/
- if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
+ if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
goto nopage;
retry:
- if (!(gfp_mask & __GFP_NO_KSWAPD))
+ if (gfp_mask & __GFP_KSWAPD_RECLAIM)
wake_all_kswapds(order, ac);
/*
}
}
- /* Atomic allocations - we can't balance anything */
- if (!wait) {
+ /* Caller is not willing to reclaim, we can't balance anything */
+ if (!can_direct_reclaim) {
/*
* All existing users of the deprecated __GFP_NOFAIL are
* blockable, so warn of any new users that actually allow this
goto got_pg;
/* Checks for THP-specific high-order allocations */
- if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
+ if (is_thp_gfp_mask(gfp_mask)) {
/*
* If compaction is deferred for high-order allocations, it is
* because sync compaction recently failed. If this is the case
* fault, so use asynchronous memory compaction for THP unless it is
* khugepaged trying to collapse.
*/
- if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
- (current->flags & PF_KTHREAD))
+ if (!is_thp_gfp_mask(gfp_mask) || (current->flags & PF_KTHREAD))
migration_mode = MIGRATE_SYNC_LIGHT;
/* Try direct reclaim and then allocating */
lockdep_trace_alloc(gfp_mask);
- might_sleep_if(gfp_mask & __GFP_WAIT);
+ might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
if (should_fail_alloc_page(gfp_mask, order))
return NULL;
/* We set it here, as __alloc_pages_slowpath might have changed it */
ac.zonelist = zonelist;
+
+ /* Dirty zone balancing only done in the fast path */
+ ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
+
/* The preferred zone is used for statistics later */
preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx,
ac.nodemask ? : &cpuset_current_mems_allowed,
* complete.
*/
alloc_mask = memalloc_noio_flags(gfp_mask);
+ ac.spread_dirty_pages = false;
page = __alloc_pages_slowpath(alloc_mask, order, &ac);
}
struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
{
struct page *page;
- struct mem_cgroup *memcg = NULL;
- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
- return NULL;
page = alloc_pages(gfp_mask, order);
- memcg_kmem_commit_charge(page, memcg, order);
+ if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
+ __free_pages(page, order);
+ page = NULL;
+ }
return page;
}
struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
struct page *page;
- struct mem_cgroup *memcg = NULL;
- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
- return NULL;
page = alloc_pages_node(nid, gfp_mask, order);
- memcg_kmem_commit_charge(page, memcg, order);
+ if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
+ __free_pages(page, order);
+ page = NULL;
+ }
return page;
}
*/
void __free_kmem_pages(struct page *page, unsigned int order)
{
- memcg_kmem_uncharge_pages(page, order);
+ memcg_kmem_uncharge(page, order);
__free_pages(page, order);
}
int __meminit init_currently_empty_zone(struct zone *zone,
unsigned long zone_start_pfn,
- unsigned long size,
- enum memmap_context context)
+ unsigned long size)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int ret;
set_pageblock_order();
setup_usemap(pgdat, zone, zone_start_pfn, size);
- ret = init_currently_empty_zone(zone, zone_start_pfn,
- size, MEMMAP_EARLY);
+ ret = init_currently_empty_zone(zone, zone_start_pfn, size);
BUG_ON(ret);
memmap_init(size, nid, j, zone_start_pfn);
zone_start_pfn += size;
static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
{
+ unsigned long __maybe_unused offset = 0;
+
/* Skip empty nodes */
if (!pgdat->node_spanned_pages)
return;
* for the buddy allocator to function correctly.
*/
start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
+ offset = pgdat->node_start_pfn - start;
end = pgdat_end_pfn(pgdat);
end = ALIGN(end, MAX_ORDER_NR_PAGES);
size = (end - start) * sizeof(struct page);
if (!map)
map = memblock_virt_alloc_node_nopanic(size,
pgdat->node_id);
- pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
+ pgdat->node_mem_map = map + offset;
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
/*
*/
if (pgdat == NODE_DATA(0)) {
mem_map = NODE_DATA(0)->node_mem_map;
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+#if defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) || defined(CONFIG_FLATMEM)
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
- mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
+ mem_map -= offset;
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
}
#endif
*/
required_movablecore =
roundup(required_movablecore, MAX_ORDER_NR_PAGES);
+ required_movablecore = min(totalpages, required_movablecore);
corepages = totalpages - required_movablecore;
required_kernelcore = max(required_kernelcore, corepages);
}
- /* If kernelcore was not specified, there is no ZONE_MOVABLE */
- if (!required_kernelcore)
+ /*
+ * If kernelcore was not specified or kernelcore size is larger
+ * than totalpages, there is no ZONE_MOVABLE.
+ */
+ if (!required_kernelcore || required_kernelcore >= totalpages)
goto out;
/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */