mm: vmscan: remove reclaim_mode_t
[cascardo/linux.git] / mm / vmscan.c
index 33dc256..68e5819 100644 (file)
 #define CREATE_TRACE_POINTS
 #include <trace/events/vmscan.h>
 
-/*
- * reclaim_mode determines how the inactive list is shrunk
- * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages
- * RECLAIM_MODE_ASYNC:  Do not block
- * RECLAIM_MODE_SYNC:   Allow blocking e.g. call wait_on_page_writeback
- * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference
- *                     page from the LRU and reclaim all pages within a
- *                     naturally aligned range
- * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of
- *                     order-0 pages and then compact the zone
- */
-typedef unsigned __bitwise__ reclaim_mode_t;
-#define RECLAIM_MODE_SINGLE            ((__force reclaim_mode_t)0x01u)
-#define RECLAIM_MODE_ASYNC             ((__force reclaim_mode_t)0x02u)
-#define RECLAIM_MODE_SYNC              ((__force reclaim_mode_t)0x04u)
-#define RECLAIM_MODE_LUMPYRECLAIM      ((__force reclaim_mode_t)0x08u)
-#define RECLAIM_MODE_COMPACTION                ((__force reclaim_mode_t)0x10u)
-
 struct scan_control {
        /* Incremented by the number of inactive pages that were scanned */
        unsigned long nr_scanned;
@@ -96,12 +78,6 @@ struct scan_control {
 
        int order;
 
-       /*
-        * Intend to reclaim enough continuous memory rather than reclaim
-        * enough amount of memory. i.e, mode for high order allocation.
-        */
-       reclaim_mode_t reclaim_mode;
-
        /*
         * The memory cgroup that hit its limit and as a result is the
         * primary target of this reclaim invocation.
@@ -364,39 +340,6 @@ out:
        return ret;
 }
 
-static void set_reclaim_mode(int priority, struct scan_control *sc,
-                                  bool sync)
-{
-       reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC;
-
-       /*
-        * Initially assume we are entering either lumpy reclaim or
-        * reclaim/compaction.Depending on the order, we will either set the
-        * sync mode or just reclaim order-0 pages later.
-        */
-       if (COMPACTION_BUILD)
-               sc->reclaim_mode = RECLAIM_MODE_COMPACTION;
-       else
-               sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM;
-
-       /*
-        * Avoid using lumpy reclaim or reclaim/compaction if possible by
-        * restricting when its set to either costly allocations or when
-        * under memory pressure
-        */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               sc->reclaim_mode |= syncmode;
-       else if (sc->order && priority < DEF_PRIORITY - 2)
-               sc->reclaim_mode |= syncmode;
-       else
-               sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
-}
-
-static void reset_reclaim_mode(struct scan_control *sc)
-{
-       sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
-}
-
 static inline int is_page_cache_freeable(struct page *page)
 {
        /*
@@ -416,10 +359,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
                return 1;
        if (bdi == current->backing_dev_info)
                return 1;
-
-       /* lumpy reclaim for hugepage often need a lot of write */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               return 1;
        return 0;
 }
 
@@ -523,8 +462,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
                        /* synchronous write or broken a_ops? */
                        ClearPageReclaim(page);
                }
-               trace_mm_vmscan_writepage(page,
-                       trace_reclaim_flags(page, sc->reclaim_mode));
+               trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
                inc_zone_page_state(page, NR_VMSCAN_WRITE);
                return PAGE_SUCCESS;
        }
@@ -710,10 +648,6 @@ static enum page_references page_check_references(struct page *page,
        referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
        referenced_page = TestClearPageReferenced(page);
 
-       /* Lumpy reclaim - ignore references */
-       if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
-               return PAGEREF_RECLAIM;
-
        /*
         * Mlock lost the isolation race with us.  Let try_to_unmap()
         * move the page to the unevictable list.
@@ -813,19 +747,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
                if (PageWriteback(page)) {
                        nr_writeback++;
-                       /*
-                        * Synchronous reclaim cannot queue pages for
-                        * writeback due to the possibility of stack overflow
-                        * but if it encounters a page under writeback, wait
-                        * for the IO to complete.
-                        */
-                       if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) &&
-                           may_enter_fs)
-                               wait_on_page_writeback(page);
-                       else {
-                               unlock_page(page);
-                               goto keep_lumpy;
-                       }
+                       unlock_page(page);
+                       goto keep;
                }
 
                references = page_check_references(page, mz, sc);
@@ -908,7 +831,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                goto activate_locked;
                        case PAGE_SUCCESS:
                                if (PageWriteback(page))
-                                       goto keep_lumpy;
+                                       goto keep;
                                if (PageDirty(page))
                                        goto keep;
 
@@ -994,7 +917,6 @@ cull_mlocked:
                        try_to_free_swap(page);
                unlock_page(page);
                putback_lru_page(page);
-               reset_reclaim_mode(sc);
                continue;
 
 activate_locked:
@@ -1007,8 +929,6 @@ activate_locked:
 keep_locked:
                unlock_page(page);
 keep:
-               reset_reclaim_mode(sc);
-keep_lumpy:
                list_add(&page->lru, &ret_pages);
                VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
        }
@@ -1064,11 +984,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
        if (!all_lru_mode && !!page_is_file_cache(page) != file)
                return ret;
 
-       /*
-        * When this function is being called for lumpy reclaim, we
-        * initially look into all LRU pages, active, inactive and
-        * unevictable; only give shrink_page_list evictable pages.
-        */
+       /* Do not give back unevictable pages for compaction */
        if (PageUnevictable(page))
                return ret;
 
@@ -1153,9 +1069,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
        struct lruvec *lruvec;
        struct list_head *src;
        unsigned long nr_taken = 0;
-       unsigned long nr_lumpy_taken = 0;
-       unsigned long nr_lumpy_dirty = 0;
-       unsigned long nr_lumpy_failed = 0;
        unsigned long scan;
        int lru = LRU_BASE;
 
@@ -1168,10 +1081,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
        for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
                struct page *page;
-               unsigned long pfn;
-               unsigned long end_pfn;
-               unsigned long page_pfn;
-               int zone_id;
 
                page = lru_to_page(src);
                prefetchw_prev_lru_page(page, src, flags);
@@ -1193,84 +1102,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                default:
                        BUG();
                }
-
-               if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM))
-                       continue;
-
-               /*
-                * Attempt to take all pages in the order aligned region
-                * surrounding the tag page.  Only take those pages of
-                * the same active state as that tag page.  We may safely
-                * round the target page pfn down to the requested order
-                * as the mem_map is guaranteed valid out to MAX_ORDER,
-                * where that page is in a different zone we will detect
-                * it from its zone id and abort this block scan.
-                */
-               zone_id = page_zone_id(page);
-               page_pfn = page_to_pfn(page);
-               pfn = page_pfn & ~((1 << sc->order) - 1);
-               end_pfn = pfn + (1 << sc->order);
-               for (; pfn < end_pfn; pfn++) {
-                       struct page *cursor_page;
-
-                       /* The target page is in the block, ignore it. */
-                       if (unlikely(pfn == page_pfn))
-                               continue;
-
-                       /* Avoid holes within the zone. */
-                       if (unlikely(!pfn_valid_within(pfn)))
-                               break;
-
-                       cursor_page = pfn_to_page(pfn);
-
-                       /* Check that we have not crossed a zone boundary. */
-                       if (unlikely(page_zone_id(cursor_page) != zone_id))
-                               break;
-
-                       /*
-                        * If we don't have enough swap space, reclaiming of
-                        * anon page which don't already have a swap slot is
-                        * pointless.
-                        */
-                       if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
-                           !PageSwapCache(cursor_page))
-                               break;
-
-                       if (__isolate_lru_page(cursor_page, mode, file) == 0) {
-                               unsigned int isolated_pages;
-
-                               mem_cgroup_lru_del(cursor_page);
-                               list_move(&cursor_page->lru, dst);
-                               isolated_pages = hpage_nr_pages(cursor_page);
-                               nr_taken += isolated_pages;
-                               nr_lumpy_taken += isolated_pages;
-                               if (PageDirty(cursor_page))
-                                       nr_lumpy_dirty += isolated_pages;
-                               scan++;
-                               pfn += isolated_pages - 1;
-                       } else {
-                               /*
-                                * Check if the page is freed already.
-                                *
-                                * We can't use page_count() as that
-                                * requires compound_head and we don't
-                                * have a pin on the page here. If a
-                                * page is tail, we may or may not
-                                * have isolated the head, so assume
-                                * it's not free, it'd be tricky to
-                                * track the head status without a
-                                * page pin.
-                                */
-                               if (!PageTail(cursor_page) &&
-                                   !atomic_read(&cursor_page->_count))
-                                       continue;
-                               break;
-                       }
-               }
-
-               /* If we break out of the loop above, lumpy reclaim failed */
-               if (pfn < end_pfn)
-                       nr_lumpy_failed++;
        }
 
        *nr_scanned = scan;
@@ -1278,7 +1109,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
        trace_mm_vmscan_lru_isolate(sc->order,
                        nr_to_scan, scan,
                        nr_taken,
-                       nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
                        mode, file);
        return nr_taken;
 }
@@ -1453,47 +1283,6 @@ update_isolated_counts(struct mem_cgroup_zone *mz,
        preempt_enable();
 }
 
-/*
- * Returns true if a direct reclaim should wait on pages under writeback.
- *
- * If we are direct reclaiming for contiguous pages and we do not reclaim
- * everything in the list, try again and wait for writeback IO to complete.
- * This will stall high-order allocations noticeably. Only do that when really
- * need to free the pages under high memory pressure.
- */
-static inline bool should_reclaim_stall(unsigned long nr_taken,
-                                       unsigned long nr_freed,
-                                       int priority,
-                                       struct scan_control *sc)
-{
-       int lumpy_stall_priority;
-
-       /* kswapd should not stall on sync IO */
-       if (current_is_kswapd())
-               return false;
-
-       /* Only stall on lumpy reclaim */
-       if (sc->reclaim_mode & RECLAIM_MODE_SINGLE)
-               return false;
-
-       /* If we have reclaimed everything on the isolated list, no stall */
-       if (nr_freed == nr_taken)
-               return false;
-
-       /*
-        * For high-order allocations, there are two stall thresholds.
-        * High-cost allocations stall immediately where as lower
-        * order allocations such as stacks require the scanning
-        * priority to be much higher before stalling.
-        */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               lumpy_stall_priority = DEF_PRIORITY;
-       else
-               lumpy_stall_priority = DEF_PRIORITY / 3;
-
-       return priority <= lumpy_stall_priority;
-}
-
 /*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
@@ -1522,10 +1311,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
                        return SWAP_CLUSTER_MAX;
        }
 
-       set_reclaim_mode(priority, sc, false);
-       if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
-               isolate_mode |= ISOLATE_ACTIVE;
-
        lru_add_drain();
 
        if (!sc->may_unmap)
@@ -1556,13 +1341,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
        nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
                                                &nr_dirty, &nr_writeback);
 
-       /* Check if we should syncronously wait for writeback */
-       if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
-               set_reclaim_mode(priority, sc, true);
-               nr_reclaimed += shrink_page_list(&page_list, mz, sc,
-                                       priority, &nr_dirty, &nr_writeback);
-       }
-
        spin_lock_irq(&zone->lru_lock);
 
        reclaim_stat->recent_scanned[0] += nr_anon;
@@ -1616,7 +1394,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
                zone_idx(zone),
                nr_scanned, nr_reclaimed,
                priority,
-               trace_shrink_flags(file, sc->reclaim_mode));
+               trace_shrink_flags(file));
        return nr_reclaimed;
 }
 
@@ -1695,8 +1473,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
        lru_add_drain();
 
-       reset_reclaim_mode(sc);
-
        if (!sc->may_unmap)
                isolate_mode |= ISOLATE_UNMAPPED;
        if (!sc->may_writepage)
@@ -2009,23 +1785,35 @@ out:
        }
 }
 
+/* Use reclaim/compaction for costly allocs or under memory pressure */
+static bool in_reclaim_compaction(int priority, struct scan_control *sc)
+{
+       if (COMPACTION_BUILD && sc->order &&
+                       (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
+                        priority < DEF_PRIORITY - 2))
+               return true;
+
+       return false;
+}
+
 /*
- * Reclaim/compaction depends on a number of pages being freed. To avoid
- * disruption to the system, a small number of order-0 pages continue to be
- * rotated and reclaimed in the normal fashion. However, by the time we get
- * back to the allocator and call try_to_compact_zone(), we ensure that
- * there are enough free pages for it to be likely successful
+ * Reclaim/compaction is used for high-order allocation requests. It reclaims
+ * order-0 pages before compacting the zone. should_continue_reclaim() returns
+ * true if more pages should be reclaimed such that when the page allocator
+ * calls try_to_compact_zone() that it will have enough free pages to succeed.
+ * It will give up earlier than that if there is difficulty reclaiming pages.
  */
 static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
                                        unsigned long nr_reclaimed,
                                        unsigned long nr_scanned,
+                                       int priority,
                                        struct scan_control *sc)
 {
        unsigned long pages_for_compaction;
        unsigned long inactive_lru_pages;
 
        /* If not in reclaim/compaction mode, stop */
-       if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
+       if (!in_reclaim_compaction(priority, sc))
                return false;
 
        /* Consider stopping depending on scan and reclaim activity */
@@ -2127,7 +1915,8 @@ restart:
 
        /* reclaim/compaction might need reclaim to continue */
        if (should_continue_reclaim(mz, nr_reclaimed,
-                                       sc->nr_scanned - nr_scanned, sc))
+                                       sc->nr_scanned - nr_scanned,
+                                       priority, sc))
                goto restart;
 
        throttle_vm_writeout(sc->gfp_mask);
@@ -2352,8 +2141,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 
        for (priority = DEF_PRIORITY; priority >= 0; priority--) {
                sc->nr_scanned = 0;
-               if (!priority)
-                       disable_swap_token(sc->target_mem_cgroup);
                aborted_reclaim = shrink_zones(priority, zonelist, sc);
 
                /*
@@ -2704,10 +2491,6 @@ loop_again:
                unsigned long lru_pages = 0;
                int has_under_min_watermark_zone = 0;
 
-               /* The swap token gets in the way of swapout... */
-               if (!priority)
-                       disable_swap_token(NULL);
-
                all_zones_ok = 1;
                balanced = 0;