mm: vmscan: remove reclaim_mode_t

[cascardo/linux.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 1a51868..68e5819 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -53,24 +53,6 @@
  #define CREATE_TRACE_POINTS
  #include <trace/events/vmscan.h>
  
-/*
- * reclaim_mode determines how the inactive list is shrunk
- * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages
- * RECLAIM_MODE_ASYNC:  Do not block
- * RECLAIM_MODE_SYNC:   Allow blocking e.g. call wait_on_page_writeback
- * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference
- *                     page from the LRU and reclaim all pages within a
- *                     naturally aligned range
- * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of
- *                     order-0 pages and then compact the zone
- */
-typedef unsigned __bitwise__ reclaim_mode_t;
-#define RECLAIM_MODE_SINGLE            ((__force reclaim_mode_t)0x01u)
-#define RECLAIM_MODE_ASYNC             ((__force reclaim_mode_t)0x02u)
-#define RECLAIM_MODE_SYNC              ((__force reclaim_mode_t)0x04u)
-#define RECLAIM_MODE_LUMPYRECLAIM      ((__force reclaim_mode_t)0x08u)
-#define RECLAIM_MODE_COMPACTION                ((__force reclaim_mode_t)0x10u)
-
  struct scan_control {
         /* Incremented by the number of inactive pages that were scanned */
         unsigned long nr_scanned;
@@ -96,12 +78,6 @@ struct scan_control {
  
         int order;
  
-       /*
-        * Intend to reclaim enough continuous memory rather than reclaim
-        * enough amount of memory. i.e, mode for high order allocation.
-        */
-       reclaim_mode_t reclaim_mode;
-
         /*
          * The memory cgroup that hit its limit and as a result is the
          * primary target of this reclaim invocation.
@@ -364,39 +340,6 @@ out:
         return ret;
  }
  
-static void set_reclaim_mode(int priority, struct scan_control *sc,
-                                  bool sync)
-{
-       reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC;
-
-       /*
-        * Initially assume we are entering either lumpy reclaim or
-        * reclaim/compaction.Depending on the order, we will either set the
-        * sync mode or just reclaim order-0 pages later.
-        */
-       if (COMPACTION_BUILD)
-               sc->reclaim_mode = RECLAIM_MODE_COMPACTION;
-       else
-               sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM;
-
-       /*
-        * Avoid using lumpy reclaim or reclaim/compaction if possible by
-        * restricting when its set to either costly allocations or when
-        * under memory pressure
-        */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               sc->reclaim_mode |= syncmode;
-       else if (sc->order && priority < DEF_PRIORITY - 2)
-               sc->reclaim_mode |= syncmode;
-       else
-               sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
-}
-
-static void reset_reclaim_mode(struct scan_control *sc)
-{
-       sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
-}
-
  static inline int is_page_cache_freeable(struct page *page)
  {
         /*
@@ -416,10 +359,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
                 return 1;
         if (bdi == current->backing_dev_info)
                 return 1;
-
-       /* lumpy reclaim for hugepage often need a lot of write */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               return 1;
         return 0;
  }
  
@@ -523,8 +462,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
                         /* synchronous write or broken a_ops? */
                         ClearPageReclaim(page);
                 }
-               trace_mm_vmscan_writepage(page,
-                       trace_reclaim_flags(page, sc->reclaim_mode));
+               trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
                 inc_zone_page_state(page, NR_VMSCAN_WRITE);
                 return PAGE_SUCCESS;
         }
@@ -710,10 +648,6 @@ static enum page_references page_check_references(struct page *page,
         referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
         referenced_page = TestClearPageReferenced(page);
  
-       /* Lumpy reclaim - ignore references */
-       if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
-               return PAGEREF_RECLAIM;
-
         /*
          * Mlock lost the isolation race with us.  Let try_to_unmap()
          * move the page to the unevictable list.
@@ -813,19 +747,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
  
                 if (PageWriteback(page)) {
                         nr_writeback++;
-                       /*
-                        * Synchronous reclaim cannot queue pages for
-                        * writeback due to the possibility of stack overflow
-                        * but if it encounters a page under writeback, wait
-                        * for the IO to complete.
-                        */
-                       if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) &&
-                           may_enter_fs)
-                               wait_on_page_writeback(page);
-                       else {
-                               unlock_page(page);
-                               goto keep_lumpy;
-                       }
+                       unlock_page(page);
+                       goto keep;
                 }
  
                 references = page_check_references(page, mz, sc);
@@ -908,7 +831,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                 goto activate_locked;
                         case PAGE_SUCCESS:
                                 if (PageWriteback(page))
-                                       goto keep_lumpy;
+                                       goto keep;
                                 if (PageDirty(page))
                                         goto keep;
  
@@ -994,7 +917,6 @@ cull_mlocked:
                         try_to_free_swap(page);
                 unlock_page(page);
                 putback_lru_page(page);
-               reset_reclaim_mode(sc);
                 continue;
  
  activate_locked:
@@ -1007,8 +929,6 @@ activate_locked:
  keep_locked:
                 unlock_page(page);
  keep:
-               reset_reclaim_mode(sc);
-keep_lumpy:
                 list_add(&page->lru, &ret_pages);
                 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
         }
@@ -1064,11 +984,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
         if (!all_lru_mode && !!page_is_file_cache(page) != file)
                 return ret;
  
-       /*
-        * When this function is being called for lumpy reclaim, we
-        * initially look into all LRU pages, active, inactive and
-        * unevictable; only give shrink_page_list evictable pages.
-        */
+       /* Do not give back unevictable pages for compaction */
         if (PageUnevictable(page))
                 return ret;
  
@@ -1153,9 +1069,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
         struct lruvec *lruvec;
         struct list_head *src;
         unsigned long nr_taken = 0;
-       unsigned long nr_lumpy_taken = 0;
-       unsigned long nr_lumpy_dirty = 0;
-       unsigned long nr_lumpy_failed = 0;
         unsigned long scan;
         int lru = LRU_BASE;
  
@@ -1168,10 +1081,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
  
         for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
                 struct page *page;
-               unsigned long pfn;
-               unsigned long end_pfn;
-               unsigned long page_pfn;
-               int zone_id;
  
                 page = lru_to_page(src);
                 prefetchw_prev_lru_page(page, src, flags);
@@ -1193,84 +1102,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                 default:
                         BUG();
                 }
-
-               if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM))
-                       continue;
-
-               /*
-                * Attempt to take all pages in the order aligned region
-                * surrounding the tag page.  Only take those pages of
-                * the same active state as that tag page.  We may safely
-                * round the target page pfn down to the requested order
-                * as the mem_map is guaranteed valid out to MAX_ORDER,
-                * where that page is in a different zone we will detect
-                * it from its zone id and abort this block scan.
-                */
-               zone_id = page_zone_id(page);
-               page_pfn = page_to_pfn(page);
-               pfn = page_pfn & ~((1 << sc->order) - 1);
-               end_pfn = pfn + (1 << sc->order);
-               for (; pfn < end_pfn; pfn++) {
-                       struct page *cursor_page;
-
-                       /* The target page is in the block, ignore it. */
-                       if (unlikely(pfn == page_pfn))
-                               continue;
-
-                       /* Avoid holes within the zone. */
-                       if (unlikely(!pfn_valid_within(pfn)))
-                               break;
-
-                       cursor_page = pfn_to_page(pfn);
-
-                       /* Check that we have not crossed a zone boundary. */
-                       if (unlikely(page_zone_id(cursor_page) != zone_id))
-                               break;
-
-                       /*
-                        * If we don't have enough swap space, reclaiming of
-                        * anon page which don't already have a swap slot is
-                        * pointless.
-                        */
-                       if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
-                           !PageSwapCache(cursor_page))
-                               break;
-
-                       if (__isolate_lru_page(cursor_page, mode, file) == 0) {
-                               unsigned int isolated_pages;
-
-                               mem_cgroup_lru_del(cursor_page);
-                               list_move(&cursor_page->lru, dst);
-                               isolated_pages = hpage_nr_pages(cursor_page);
-                               nr_taken += isolated_pages;
-                               nr_lumpy_taken += isolated_pages;
-                               if (PageDirty(cursor_page))
-                                       nr_lumpy_dirty += isolated_pages;
-                               scan++;
-                               pfn += isolated_pages - 1;
-                       } else {
-                               /*
-                                * Check if the page is freed already.
-                                *
-                                * We can't use page_count() as that
-                                * requires compound_head and we don't
-                                * have a pin on the page here. If a
-                                * page is tail, we may or may not
-                                * have isolated the head, so assume
-                                * it's not free, it'd be tricky to
-                                * track the head status without a
-                                * page pin.
-                                */
-                               if (!PageTail(cursor_page) &&
-                                   !atomic_read(&cursor_page->_count))
-                                       continue;
-                               break;
-                       }
-               }
-
-               /* If we break out of the loop above, lumpy reclaim failed */
-               if (pfn < end_pfn)
-                       nr_lumpy_failed++;
         }
  
         *nr_scanned = scan;
@@ -1278,7 +1109,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
         trace_mm_vmscan_lru_isolate(sc->order,
                         nr_to_scan, scan,
                         nr_taken,
-                       nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
                         mode, file);
         return nr_taken;
  }
@@ -1453,47 +1283,6 @@ update_isolated_counts(struct mem_cgroup_zone *mz,
         preempt_enable();
  }
  
-/*
- * Returns true if a direct reclaim should wait on pages under writeback.
- *
- * If we are direct reclaiming for contiguous pages and we do not reclaim
- * everything in the list, try again and wait for writeback IO to complete.
- * This will stall high-order allocations noticeably. Only do that when really
- * need to free the pages under high memory pressure.
- */
-static inline bool should_reclaim_stall(unsigned long nr_taken,
-                                       unsigned long nr_freed,
-                                       int priority,
-                                       struct scan_control *sc)
-{
-       int lumpy_stall_priority;
-
-       /* kswapd should not stall on sync IO */
-       if (current_is_kswapd())
-               return false;
-
-       /* Only stall on lumpy reclaim */
-       if (sc->reclaim_mode & RECLAIM_MODE_SINGLE)
-               return false;
-
-       /* If we have reclaimed everything on the isolated list, no stall */
-       if (nr_freed == nr_taken)
-               return false;
-
-       /*
-        * For high-order allocations, there are two stall thresholds.
-        * High-cost allocations stall immediately where as lower
-        * order allocations such as stacks require the scanning
-        * priority to be much higher before stalling.
-        */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               lumpy_stall_priority = DEF_PRIORITY;
-       else
-               lumpy_stall_priority = DEF_PRIORITY / 3;
-
-       return priority <= lumpy_stall_priority;
-}
-
  /*
   * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
   * of reclaimed pages
@@ -1522,10 +1311,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
                         return SWAP_CLUSTER_MAX;
         }
  
-       set_reclaim_mode(priority, sc, false);
-       if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
-               isolate_mode |= ISOLATE_ACTIVE;
-
         lru_add_drain();
  
         if (!sc->may_unmap)
@@ -1556,21 +1341,19 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
         nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
                                                 &nr_dirty, &nr_writeback);
  
-       /* Check if we should syncronously wait for writeback */
-       if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
-               set_reclaim_mode(priority, sc, true);
-               nr_reclaimed += shrink_page_list(&page_list, mz, sc,
-                                       priority, &nr_dirty, &nr_writeback);
-       }
-
         spin_lock_irq(&zone->lru_lock);
  
         reclaim_stat->recent_scanned[0] += nr_anon;
         reclaim_stat->recent_scanned[1] += nr_file;
  
-       if (current_is_kswapd())
-               __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
-       __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
+       if (global_reclaim(sc)) {
+               if (current_is_kswapd())
+                       __count_zone_vm_events(PGSTEAL_KSWAPD, zone,
+                                              nr_reclaimed);
+               else
+                       __count_zone_vm_events(PGSTEAL_DIRECT, zone,
+                                              nr_reclaimed);
+       }
  
         putback_inactive_pages(mz, &page_list);
  
@@ -1611,7 +1394,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
                 zone_idx(zone),
                 nr_scanned, nr_reclaimed,
                 priority,
-               trace_shrink_flags(file, sc->reclaim_mode));
+               trace_shrink_flags(file));
         return nr_reclaimed;
  }
  
@@ -1690,8 +1473,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
  
         lru_add_drain();
  
-       reset_reclaim_mode(sc);
-
         if (!sc->may_unmap)
                 isolate_mode |= ISOLATE_UNMAPPED;
         if (!sc->may_writepage)
@@ -2004,23 +1785,35 @@ out:
         }
  }
  
+/* Use reclaim/compaction for costly allocs or under memory pressure */
+static bool in_reclaim_compaction(int priority, struct scan_control *sc)
+{
+       if (COMPACTION_BUILD && sc->order &&
+                       (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
+                        priority < DEF_PRIORITY - 2))
+               return true;
+
+       return false;
+}
+
  /*
- * Reclaim/compaction depends on a number of pages being freed. To avoid
- * disruption to the system, a small number of order-0 pages continue to be
- * rotated and reclaimed in the normal fashion. However, by the time we get
- * back to the allocator and call try_to_compact_zone(), we ensure that
- * there are enough free pages for it to be likely successful
+ * Reclaim/compaction is used for high-order allocation requests. It reclaims
+ * order-0 pages before compacting the zone. should_continue_reclaim() returns
+ * true if more pages should be reclaimed such that when the page allocator
+ * calls try_to_compact_zone() that it will have enough free pages to succeed.
+ * It will give up earlier than that if there is difficulty reclaiming pages.
   */
  static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
                                         unsigned long nr_reclaimed,
                                         unsigned long nr_scanned,
+                                       int priority,
                                         struct scan_control *sc)
  {
         unsigned long pages_for_compaction;
         unsigned long inactive_lru_pages;
  
         /* If not in reclaim/compaction mode, stop */
-       if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
+       if (!in_reclaim_compaction(priority, sc))
                 return false;
  
         /* Consider stopping depending on scan and reclaim activity */
@@ -2122,7 +1915,8 @@ restart:
  
         /* reclaim/compaction might need reclaim to continue */
         if (should_continue_reclaim(mz, nr_reclaimed,
-                                       sc->nr_scanned - nr_scanned, sc))
+                                       sc->nr_scanned - nr_scanned,
+                                       priority, sc))
                 goto restart;
  
         throttle_vm_writeout(sc->gfp_mask);
@@ -2347,8 +2141,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
  
         for (priority = DEF_PRIORITY; priority >= 0; priority--) {
                 sc->nr_scanned = 0;
-               if (!priority)
-                       disable_swap_token(sc->target_mem_cgroup);
                 aborted_reclaim = shrink_zones(priority, zonelist, sc);
  
                 /*
@@ -2699,10 +2491,6 @@ loop_again:
                 unsigned long lru_pages = 0;
                 int has_under_min_watermark_zone = 0;
  
-               /* The swap token gets in the way of swapout... */
-               if (!priority)
-                       disable_swap_token(NULL);
-
                 all_zones_ok = 1;
                 balanced = 0;