net/mlx5e: Added self loopback prevention
[cascardo/linux.git] / mm / migrate.c
index 6d7774e..7890d0b 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/mempolicy.h>
 #include <linux/vmalloc.h>
 #include <linux/security.h>
+#include <linux/backing-dev.h>
 #include <linux/syscalls.h>
 #include <linux/hugetlb.h>
 #include <linux/hugetlb_cgroup.h>
@@ -313,6 +314,8 @@ int migrate_page_move_mapping(struct address_space *mapping,
                struct buffer_head *head, enum migrate_mode mode,
                int extra_count)
 {
+       struct zone *oldzone, *newzone;
+       int dirty;
        int expected_count = 1 + extra_count;
        void **pslot;
 
@@ -320,9 +323,20 @@ int migrate_page_move_mapping(struct address_space *mapping,
                /* Anonymous page without mapping */
                if (page_count(page) != expected_count)
                        return -EAGAIN;
+
+               /* No turning back from here */
+               set_page_memcg(newpage, page_memcg(page));
+               newpage->index = page->index;
+               newpage->mapping = page->mapping;
+               if (PageSwapBacked(page))
+                       SetPageSwapBacked(newpage);
+
                return MIGRATEPAGE_SUCCESS;
        }
 
+       oldzone = page_zone(page);
+       newzone = page_zone(newpage);
+
        spin_lock_irq(&mapping->tree_lock);
 
        pslot = radix_tree_lookup_slot(&mapping->page_tree,
@@ -355,14 +369,28 @@ int migrate_page_move_mapping(struct address_space *mapping,
        }
 
        /*
-        * Now we know that no one else is looking at the page.
+        * Now we know that no one else is looking at the page:
+        * no turning back from here.
         */
+       set_page_memcg(newpage, page_memcg(page));
+       newpage->index = page->index;
+       newpage->mapping = page->mapping;
+       if (PageSwapBacked(page))
+               SetPageSwapBacked(newpage);
+
        get_page(newpage);      /* add cache reference */
        if (PageSwapCache(page)) {
                SetPageSwapCache(newpage);
                set_page_private(newpage, page_private(page));
        }
 
+       /* Move dirty while page refs frozen and newpage not yet exposed */
+       dirty = PageDirty(page);
+       if (dirty) {
+               ClearPageDirty(page);
+               SetPageDirty(newpage);
+       }
+
        radix_tree_replace_slot(pslot, newpage);
 
        /*
@@ -372,6 +400,9 @@ int migrate_page_move_mapping(struct address_space *mapping,
         */
        page_unfreeze_refs(page, expected_count - 1);
 
+       spin_unlock(&mapping->tree_lock);
+       /* Leave irq disabled to prevent preemption while updating stats */
+
        /*
         * If moved to a different zone then also account
         * the page for that zone. Other VM counters will be
@@ -382,13 +413,19 @@ int migrate_page_move_mapping(struct address_space *mapping,
         * via NR_FILE_PAGES and NR_ANON_PAGES if they
         * are mapped to swap space.
         */
-       __dec_zone_page_state(page, NR_FILE_PAGES);
-       __inc_zone_page_state(newpage, NR_FILE_PAGES);
-       if (!PageSwapCache(page) && PageSwapBacked(page)) {
-               __dec_zone_page_state(page, NR_SHMEM);
-               __inc_zone_page_state(newpage, NR_SHMEM);
+       if (newzone != oldzone) {
+               __dec_zone_state(oldzone, NR_FILE_PAGES);
+               __inc_zone_state(newzone, NR_FILE_PAGES);
+               if (PageSwapBacked(page) && !PageSwapCache(page)) {
+                       __dec_zone_state(oldzone, NR_SHMEM);
+                       __inc_zone_state(newzone, NR_SHMEM);
+               }
+               if (dirty && mapping_cap_account_dirty(mapping)) {
+                       __dec_zone_state(oldzone, NR_FILE_DIRTY);
+                       __inc_zone_state(newzone, NR_FILE_DIRTY);
+               }
        }
-       spin_unlock_irq(&mapping->tree_lock);
+       local_irq_enable();
 
        return MIGRATEPAGE_SUCCESS;
 }
@@ -403,12 +440,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
        int expected_count;
        void **pslot;
 
-       if (!mapping) {
-               if (page_count(page) != 1)
-                       return -EAGAIN;
-               return MIGRATEPAGE_SUCCESS;
-       }
-
        spin_lock_irq(&mapping->tree_lock);
 
        pslot = radix_tree_lookup_slot(&mapping->page_tree,
@@ -426,6 +457,9 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
                return -EAGAIN;
        }
 
+       set_page_memcg(newpage, page_memcg(page));
+       newpage->index = page->index;
+       newpage->mapping = page->mapping;
        get_page(newpage);
 
        radix_tree_replace_slot(pslot, newpage);
@@ -512,20 +546,9 @@ void migrate_page_copy(struct page *newpage, struct page *page)
        if (PageMappedToDisk(page))
                SetPageMappedToDisk(newpage);
 
-       if (PageDirty(page)) {
-               clear_page_dirty_for_io(page);
-               /*
-                * Want to mark the page and the radix tree as dirty, and
-                * redo the accounting that clear_page_dirty_for_io undid,
-                * but we can't use set_page_dirty because that function
-                * is actually a signal that all of the page has become dirty.
-                * Whereas only part of our page may be dirty.
-                */
-               if (PageSwapBacked(page))
-                       SetPageDirty(newpage);
-               else
-                       __set_page_dirty_nobuffers(newpage);
-       }
+       /* Move dirty on pages not done by migrate_page_move_mapping() */
+       if (PageDirty(page))
+               SetPageDirty(newpage);
 
        if (page_is_young(page))
                set_page_young(newpage);
@@ -722,7 +745,7 @@ static int fallback_migrate_page(struct address_space *mapping,
  *  MIGRATEPAGE_SUCCESS - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page,
-                               int page_was_mapped, enum migrate_mode mode)
+                               enum migrate_mode mode)
 {
        struct address_space *mapping;
        int rc;
@@ -730,21 +753,6 @@ static int move_to_new_page(struct page *newpage, struct page *page,
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
 
-       /* Prepare mapping for the new page.*/
-       newpage->index = page->index;
-       newpage->mapping = page->mapping;
-       if (PageSwapBacked(page))
-               SetPageSwapBacked(newpage);
-
-       /*
-        * Indirectly called below, migrate_page_copy() copies PG_dirty and thus
-        * needs newpage's memcg set to transfer memcg dirty page accounting.
-        * So perform memcg migration in two steps:
-        * 1. set newpage->mem_cgroup (here)
-        * 2. clear page->mem_cgroup (below)
-        */
-       set_page_memcg(newpage, page_memcg(page));
-
        mapping = page_mapping(page);
        if (!mapping)
                rc = migrate_page(mapping, newpage, page, mode);
@@ -755,19 +763,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
                 * space which also has its own migratepage callback. This
                 * is the most common path for page migration.
                 */
-               rc = mapping->a_ops->migratepage(mapping,
-                                               newpage, page, mode);
+               rc = mapping->a_ops->migratepage(mapping, newpage, page, mode);
        else
                rc = fallback_migrate_page(mapping, newpage, page, mode);
 
-       if (rc != MIGRATEPAGE_SUCCESS) {
-               set_page_memcg(newpage, NULL);
-               newpage->mapping = NULL;
-       } else {
+       /*
+        * When successful, old pagecache page->mapping must be cleared before
+        * page is freed; but stats require that PageAnon be left as PageAnon.
+        */
+       if (rc == MIGRATEPAGE_SUCCESS) {
                set_page_memcg(page, NULL);
-               if (page_was_mapped)
-                       remove_migration_ptes(page, newpage);
-               page->mapping = NULL;
+               if (!PageAnon(page))
+                       page->mapping = NULL;
        }
        return rc;
 }
@@ -817,6 +824,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                        goto out_unlock;
                wait_on_page_writeback(page);
        }
+
        /*
         * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
         * we cannot notice that anon_vma is freed while we migrates a page.
@@ -824,34 +832,15 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
         * of migration. File cache pages are no problem because of page_lock()
         * File Caches may use write_page() or lock_page() in migration, then,
         * just care Anon page here.
+        *
+        * Only page_get_anon_vma() understands the subtleties of
+        * getting a hold on an anon_vma from outside one of its mms.
+        * But if we cannot get anon_vma, then we won't need it anyway,
+        * because that implies that the anon page is no longer mapped
+        * (and cannot be remapped so long as we hold the page lock).
         */
-       if (PageAnon(page) && !PageKsm(page)) {
-               /*
-                * Only page_lock_anon_vma_read() understands the subtleties of
-                * getting a hold on an anon_vma from outside one of its mms.
-                */
+       if (PageAnon(page) && !PageKsm(page))
                anon_vma = page_get_anon_vma(page);
-               if (anon_vma) {
-                       /*
-                        * Anon page
-                        */
-               } else if (PageSwapCache(page)) {
-                       /*
-                        * We cannot be sure that the anon_vma of an unmapped
-                        * swapcache page is safe to use because we don't
-                        * know in advance if the VMA that this page belonged
-                        * to still exists. If the VMA and others sharing the
-                        * data have been freed, then the anon_vma could
-                        * already be invalid.
-                        *
-                        * To avoid this possibility, swapcache pages get
-                        * migrated but are not remapped when migration
-                        * completes
-                        */
-               } else {
-                       goto out_unlock;
-               }
-       }
 
        /*
         * Block others from accessing the new page when we get around to
@@ -896,16 +885,19 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                }
        } else if (page_mapped(page)) {
                /* Establish migration ptes */
+               VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
+                               page);
                try_to_unmap(page,
                        TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
                page_was_mapped = 1;
        }
 
        if (!page_mapped(page))
-               rc = move_to_new_page(newpage, page, page_was_mapped, mode);
+               rc = move_to_new_page(newpage, page, mode);
 
-       if (rc && page_was_mapped)
-               remove_migration_ptes(page, page);
+       if (page_was_mapped)
+               remove_migration_ptes(page,
+                       rc == MIGRATEPAGE_SUCCESS ? newpage : page);
 
 out_unlock_both:
        unlock_page(newpage);
@@ -984,10 +976,9 @@ out:
         * it.  Otherwise, putback_lru_page() will drop the reference grabbed
         * during isolation.
         */
-       if (put_new_page) {
-               ClearPageSwapBacked(newpage);
+       if (put_new_page)
                put_new_page(newpage, private);
-       else if (unlikely(__is_movable_balloon_page(newpage))) {
+       else if (unlikely(__is_movable_balloon_page(newpage))) {
                /* drop our reference, page already in the balloon */
                put_page(newpage);
        } else
@@ -1066,10 +1057,11 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
        }
 
        if (!page_mapped(hpage))
-               rc = move_to_new_page(new_hpage, hpage, page_was_mapped, mode);
+               rc = move_to_new_page(new_hpage, hpage, mode);
 
-       if (rc != MIGRATEPAGE_SUCCESS && page_was_mapped)
-               remove_migration_ptes(hpage, hpage);
+       if (page_was_mapped)
+               remove_migration_ptes(hpage,
+                       rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage);
 
        unlock_page(new_hpage);
 
@@ -1586,7 +1578,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
                                         (GFP_HIGHUSER_MOVABLE |
                                          __GFP_THISNODE | __GFP_NOMEMALLOC |
                                          __GFP_NORETRY | __GFP_NOWARN) &
-                                        ~GFP_IOFS, 0);
+                                        ~(__GFP_IO | __GFP_FS), 0);
 
        return newpage;
 }
@@ -1760,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
                goto out_dropref;
 
        new_page = alloc_pages_node(node,
-               (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
+               (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
                HPAGE_PMD_ORDER);
        if (!new_page)
                goto out_fail;