rmap: add argument to charge compound page
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Sat, 16 Jan 2016 00:52:16 +0000 (16:52 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Jan 2016 01:56:32 +0000 (17:56 -0800)
We're going to allow mapping of individual 4k pages of THP compound
page.  It means we cannot rely on PageTransHuge() check to decide if
map/unmap small page or THP.

The patch adds new argument to rmap functions to indicate whether we
want to operate on whole compound page or only the small page.

[n-horiguchi@ah.jp.nec.com: fix mapcount mismatch in hugepage migration]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/rmap.h
kernel/events/uprobes.c
mm/huge_memory.c
mm/hugetlb.c
mm/ksm.c
mm/memory.c
mm/migrate.c
mm/rmap.c
mm/swapfile.c
mm/userfaultfd.c

index 29446ae..038b6e7 100644 (file)
@@ -161,16 +161,22 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,
 
 struct anon_vma *page_get_anon_vma(struct page *page);
 
+/* bitflags for do_page_add_anon_rmap() */
+#define RMAP_EXCLUSIVE 0x01
+#define RMAP_COMPOUND 0x02
+
 /*
  * rmap interfaces called when adding or removing pte of page
  */
 void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
-void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_anon_rmap(struct page *, struct vm_area_struct *,
+               unsigned long, bool);
 void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
                           unsigned long, int);
-void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
+               unsigned long, bool);
 void page_add_file_rmap(struct page *);
-void page_remove_rmap(struct page *);
+void page_remove_rmap(struct page *, bool);
 
 void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
                            unsigned long);
index bb06691..060c7a0 100644 (file)
@@ -175,7 +175,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
                goto unlock;
 
        get_page(kpage);
-       page_add_new_anon_rmap(kpage, vma, addr);
+       page_add_new_anon_rmap(kpage, vma, addr, false);
        mem_cgroup_commit_charge(kpage, memcg, false);
        lru_cache_add_active_or_unevictable(kpage, vma);
 
@@ -188,7 +188,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        ptep_clear_flush_notify(vma, addr, ptep);
        set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
 
-       page_remove_rmap(page);
+       page_remove_rmap(page, false);
        if (!page_mapped(page))
                try_to_free_swap(page);
        pte_unmap_unlock(ptep, ptl);
index 370d44a..b7669cf 100644 (file)
@@ -797,7 +797,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 
                entry = mk_huge_pmd(page, vma->vm_page_prot);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-               page_add_new_anon_rmap(page, vma, haddr);
+               page_add_new_anon_rmap(page, vma, haddr, true);
                mem_cgroup_commit_charge(page, memcg, false);
                lru_cache_add_active_or_unevictable(page, vma);
                pgtable_trans_huge_deposit(mm, pmd, pgtable);
@@ -1139,7 +1139,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                memcg = (void *)page_private(pages[i]);
                set_page_private(pages[i], 0);
-               page_add_new_anon_rmap(pages[i], vma, haddr);
+               page_add_new_anon_rmap(pages[i], vma, haddr, false);
                mem_cgroup_commit_charge(pages[i], memcg, false);
                lru_cache_add_active_or_unevictable(pages[i], vma);
                pte = pte_offset_map(&_pmd, haddr);
@@ -1151,7 +1151,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 
        smp_wmb(); /* make pte visible before pmd */
        pmd_populate(mm, pmd, pgtable);
-       page_remove_rmap(page);
+       page_remove_rmap(page, true);
        spin_unlock(ptl);
 
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
@@ -1271,7 +1271,7 @@ alloc:
                entry = mk_huge_pmd(new_page, vma->vm_page_prot);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
                pmdp_huge_clear_flush_notify(vma, haddr, pmd);
-               page_add_new_anon_rmap(new_page, vma, haddr);
+               page_add_new_anon_rmap(new_page, vma, haddr, true);
                mem_cgroup_commit_charge(new_page, memcg, false);
                lru_cache_add_active_or_unevictable(new_page, vma);
                set_pmd_at(mm, haddr, pmd, entry);
@@ -1281,7 +1281,7 @@ alloc:
                        put_huge_zero_page();
                } else {
                        VM_BUG_ON_PAGE(!PageHead(page), page);
-                       page_remove_rmap(page);
+                       page_remove_rmap(page, true);
                        put_page(page);
                }
                ret |= VM_FAULT_WRITE;
@@ -1508,7 +1508,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                put_huge_zero_page();
        } else {
                struct page *page = pmd_page(orig_pmd);
-               page_remove_rmap(page);
+               page_remove_rmap(page, true);
                VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
                add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
                VM_BUG_ON_PAGE(!PageHead(page), page);
@@ -2371,7 +2371,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
                         * superfluous.
                         */
                        pte_clear(vma->vm_mm, address, _pte);
-                       page_remove_rmap(src_page);
+                       page_remove_rmap(src_page, false);
                        spin_unlock(ptl);
                        free_page_and_swap_cache(src_page);
                }
@@ -2682,7 +2682,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 
        spin_lock(pmd_ptl);
        BUG_ON(!pmd_none(*pmd));
-       page_add_new_anon_rmap(new_page, vma, address);
+       page_add_new_anon_rmap(new_page, vma, address, true);
        mem_cgroup_commit_charge(new_page, memcg, false);
        lru_cache_add_active_or_unevictable(new_page, vma);
        pgtable_trans_huge_deposit(mm, pmd, pgtable);
index cdf3825..e924529 100644 (file)
@@ -3186,7 +3186,7 @@ again:
                        set_page_dirty(page);
 
                hugetlb_count_sub(pages_per_huge_page(h), mm);
-               page_remove_rmap(page);
+               page_remove_rmap(page, true);
                force_flush = !__tlb_remove_page(tlb, page);
                if (force_flush) {
                        address += sz;
@@ -3415,7 +3415,7 @@ retry_avoidcopy:
                mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
                set_huge_pte_at(mm, address, ptep,
                                make_huge_pte(vma, new_page, 1));
-               page_remove_rmap(old_page);
+               page_remove_rmap(old_page, true);
                hugepage_add_new_anon_rmap(new_page, vma, address);
                /* Make the old page be freed below */
                new_page = old_page;
index 643abe7..b4f7b69 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -956,13 +956,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
        }
 
        get_page(kpage);
-       page_add_anon_rmap(kpage, vma, addr);
+       page_add_anon_rmap(kpage, vma, addr, false);
 
        flush_cache_page(vma, addr, pte_pfn(*ptep));
        ptep_clear_flush_notify(vma, addr, ptep);
        set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
 
-       page_remove_rmap(page);
+       page_remove_rmap(page, false);
        if (!page_mapped(page))
                try_to_free_swap(page);
        put_page(page);
index f9360dd..f964d19 100644 (file)
@@ -1118,7 +1118,7 @@ again:
                                        mark_page_accessed(page);
                        }
                        rss[mm_counter(page)]--;
-                       page_remove_rmap(page);
+                       page_remove_rmap(page, false);
                        if (unlikely(page_mapcount(page) < 0))
                                print_bad_pte(vma, addr, ptent, page);
                        if (unlikely(!__tlb_remove_page(tlb, page))) {
@@ -2118,7 +2118,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                 * thread doing COW.
                 */
                ptep_clear_flush_notify(vma, address, page_table);
-               page_add_new_anon_rmap(new_page, vma, address);
+               page_add_new_anon_rmap(new_page, vma, address, false);
                mem_cgroup_commit_charge(new_page, memcg, false);
                lru_cache_add_active_or_unevictable(new_page, vma);
                /*
@@ -2151,7 +2151,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                         * mapcount is visible. So transitively, TLBs to
                         * old page will be flushed before it can be reused.
                         */
-                       page_remove_rmap(old_page);
+                       page_remove_rmap(old_page, false);
                }
 
                /* Free the old page.. */
@@ -2567,7 +2567,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                flags &= ~FAULT_FLAG_WRITE;
                ret |= VM_FAULT_WRITE;
-               exclusive = 1;
+               exclusive = RMAP_EXCLUSIVE;
        }
        flush_icache_page(vma, page);
        if (pte_swp_soft_dirty(orig_pte))
@@ -2577,7 +2577,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                do_page_add_anon_rmap(page, vma, address, exclusive);
                mem_cgroup_commit_charge(page, memcg, true);
        } else { /* ksm created a completely new copy */
-               page_add_new_anon_rmap(page, vma, address);
+               page_add_new_anon_rmap(page, vma, address, false);
                mem_cgroup_commit_charge(page, memcg, false);
                lru_cache_add_active_or_unevictable(page, vma);
        }
@@ -2735,7 +2735,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        }
 
        inc_mm_counter_fast(mm, MM_ANONPAGES);
-       page_add_new_anon_rmap(page, vma, address);
+       page_add_new_anon_rmap(page, vma, address, false);
        mem_cgroup_commit_charge(page, memcg, false);
        lru_cache_add_active_or_unevictable(page, vma);
 setpte:
@@ -2824,7 +2824,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
        if (anon) {
                inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-               page_add_new_anon_rmap(page, vma, address);
+               page_add_new_anon_rmap(page, vma, address, false);
        } else {
                inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
                page_add_file_rmap(page);
index f7f345d..3921f20 100644 (file)
@@ -167,7 +167,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
                else
                        page_dup_rmap(new);
        } else if (PageAnon(new))
-               page_add_anon_rmap(new, vma, addr);
+               page_add_anon_rmap(new, vma, addr, false);
        else
                page_add_file_rmap(new);
 
@@ -1815,7 +1815,7 @@ fail_putback:
         * guarantee the copy is visible before the pagetable update.
         */
        flush_cache_range(vma, mmun_start, mmun_end);
-       page_add_anon_rmap(new_page, vma, mmun_start);
+       page_add_anon_rmap(new_page, vma, mmun_start, true);
        pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
        set_pmd_at(mm, mmun_start, pmd, entry);
        flush_tlb_range(vma, mmun_start, mmun_end);
@@ -1826,14 +1826,14 @@ fail_putback:
                flush_tlb_range(vma, mmun_start, mmun_end);
                mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
                update_mmu_cache_pmd(vma, address, &entry);
-               page_remove_rmap(new_page);
+               page_remove_rmap(new_page, true);
                goto fail_putback;
        }
 
        mlock_migrate_page(new_page, page);
        set_page_memcg(new_page, page_memcg(page));
        set_page_memcg(page, NULL);
-       page_remove_rmap(page);
+       page_remove_rmap(page, true);
 
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
index 622756c..c330f9a 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1133,6 +1133,7 @@ static void __page_check_anon_rmap(struct page *page,
  * @page:      the page to add the mapping to
  * @vma:       the vm area in which the mapping is added
  * @address:   the user virtual address mapped
+ * @compound:  charge the page as compound or small page
  *
  * The caller needs to hold the pte lock, and the page must be locked in
  * the anon_vma case: to serialize mapping,index checking after setting,
@@ -1140,9 +1141,9 @@ static void __page_check_anon_rmap(struct page *page,
  * (but PageKsm is never downgraded to PageAnon).
  */
 void page_add_anon_rmap(struct page *page,
-       struct vm_area_struct *vma, unsigned long address)
+       struct vm_area_struct *vma, unsigned long address, bool compound)
 {
-       do_page_add_anon_rmap(page, vma, address, 0);
+       do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
 }
 
 /*
@@ -1151,21 +1152,24 @@ void page_add_anon_rmap(struct page *page,
  * Everybody else should continue to use page_add_anon_rmap above.
  */
 void do_page_add_anon_rmap(struct page *page,
-       struct vm_area_struct *vma, unsigned long address, int exclusive)
+       struct vm_area_struct *vma, unsigned long address, int flags)
 {
        int first = atomic_inc_and_test(&page->_mapcount);
        if (first) {
+               bool compound = flags & RMAP_COMPOUND;
+               int nr = compound ? hpage_nr_pages(page) : 1;
                /*
                 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
                 * these counters are not modified in interrupt context, and
                 * pte lock(a spinlock) is held, which implies preemption
                 * disabled.
                 */
-               if (PageTransHuge(page))
+               if (compound) {
+                       VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                        __inc_zone_page_state(page,
                                              NR_ANON_TRANSPARENT_HUGEPAGES);
-               __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-                               hpage_nr_pages(page));
+               }
+               __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
        }
        if (unlikely(PageKsm(page)))
                return;
@@ -1173,7 +1177,8 @@ void do_page_add_anon_rmap(struct page *page,
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        /* address might be in next vma when migration races vma_adjust */
        if (first)
-               __page_set_anon_rmap(page, vma, address, exclusive);
+               __page_set_anon_rmap(page, vma, address,
+                               flags & RMAP_EXCLUSIVE);
        else
                __page_check_anon_rmap(page, vma, address);
 }
@@ -1183,21 +1188,25 @@ void do_page_add_anon_rmap(struct page *page,
  * @page:      the page to add the mapping to
  * @vma:       the vm area in which the mapping is added
  * @address:   the user virtual address mapped
+ * @compound:  charge the page as compound or small page
  *
  * Same as page_add_anon_rmap but must only be called on *new* pages.
  * This means the inc-and-test can be bypassed.
  * Page does not have to be locked.
  */
 void page_add_new_anon_rmap(struct page *page,
-       struct vm_area_struct *vma, unsigned long address)
+       struct vm_area_struct *vma, unsigned long address, bool compound)
 {
+       int nr = compound ? hpage_nr_pages(page) : 1;
+
        VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
        SetPageSwapBacked(page);
        atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
-       if (PageTransHuge(page))
+       if (compound) {
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
-       __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-                       hpage_nr_pages(page));
+       }
+       __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
        __page_set_anon_rmap(page, vma, address, 1);
 }
 
@@ -1249,13 +1258,17 @@ out:
 
 /**
  * page_remove_rmap - take down pte mapping from a page
- * @page: page to remove mapping from
+ * @page:      page to remove mapping from
+ * @compound:  uncharge the page as compound or small page
  *
  * The caller needs to hold the pte lock.
  */
-void page_remove_rmap(struct page *page)
+void page_remove_rmap(struct page *page, bool compound)
 {
+       int nr = compound ? hpage_nr_pages(page) : 1;
+
        if (!PageAnon(page)) {
+               VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
                page_remove_file_rmap(page);
                return;
        }
@@ -1273,11 +1286,12 @@ void page_remove_rmap(struct page *page)
         * these counters are not modified in interrupt context, and
         * pte lock(a spinlock) is held, which implies preemption disabled.
         */
-       if (PageTransHuge(page))
+       if (compound) {
+               VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+       }
 
-       __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-                             -hpage_nr_pages(page));
+       __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -nr);
 
        if (unlikely(PageMlocked(page)))
                clear_page_mlock(page);
@@ -1416,7 +1430,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
        } else
                dec_mm_counter(mm, mm_counter_file(page));
 
-       page_remove_rmap(page);
+       page_remove_rmap(page, PageHuge(page));
        page_cache_release(page);
 
 out_unmap:
index e6b8591..058e6f0 100644 (file)
@@ -1160,10 +1160,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        set_pte_at(vma->vm_mm, addr, pte,
                   pte_mkold(mk_pte(page, vma->vm_page_prot)));
        if (page == swapcache) {
-               page_add_anon_rmap(page, vma, addr);
+               page_add_anon_rmap(page, vma, addr, false);
                mem_cgroup_commit_charge(page, memcg, true);
        } else { /* ksm created a completely new copy */
-               page_add_new_anon_rmap(page, vma, addr);
+               page_add_new_anon_rmap(page, vma, addr, false);
                mem_cgroup_commit_charge(page, memcg, false);
                lru_cache_add_active_or_unevictable(page, vma);
        }
index 77fee93..ae21a1f 100644 (file)
@@ -76,7 +76,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
                goto out_release_uncharge_unlock;
 
        inc_mm_counter(dst_mm, MM_ANONPAGES);
-       page_add_new_anon_rmap(page, dst_vma, dst_addr);
+       page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
        mem_cgroup_commit_charge(page, memcg, false);
        lru_cache_add_active_or_unevictable(page, dst_vma);