mm: disable fault around on emulated access bit architecture
[cascardo/linux.git] / mm / memory.c
index 93897f2..a1b93d9 100644 (file)
@@ -789,6 +789,46 @@ out:
        return pfn_to_page(pfn);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+                               pmd_t pmd)
+{
+       unsigned long pfn = pmd_pfn(pmd);
+
+       /*
+        * There is no pmd_special() but there may be special pmds, e.g.
+        * in a direct-access (dax) mapping, so let's just replicate the
+        * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+        */
+       if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+               if (vma->vm_flags & VM_MIXEDMAP) {
+                       if (!pfn_valid(pfn))
+                               return NULL;
+                       goto out;
+               } else {
+                       unsigned long off;
+                       off = (addr - vma->vm_start) >> PAGE_SHIFT;
+                       if (pfn == vma->vm_pgoff + off)
+                               return NULL;
+                       if (!is_cow_mapping(vma->vm_flags))
+                               return NULL;
+               }
+       }
+
+       if (is_zero_pfn(pfn))
+               return NULL;
+       if (unlikely(pfn > highest_memmap_pfn))
+               return NULL;
+
+       /*
+        * NOTE! We still have PageReserved() pages in the page tables.
+        * eg. VDSO mappings can cause them to exist.
+        */
+out:
+       return pfn_to_page(pfn);
+}
+#endif
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
@@ -1182,15 +1222,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
                next = pmd_addr_end(addr, end);
                if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
                        if (next - addr != HPAGE_PMD_SIZE) {
-#ifdef CONFIG_DEBUG_VM
-                               if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
-                                       pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
-                                               __func__, addr, end,
-                                               vma->vm_start,
-                                               vma->vm_end);
-                                       BUG();
-                               }
-#endif
+                               VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
+                                   !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
                                split_huge_pmd(vma, pmd, addr);
                        } else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                goto next;
@@ -1711,6 +1744,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
        unsigned long next;
        unsigned long end = addr + PAGE_ALIGN(size);
        struct mm_struct *mm = vma->vm_mm;
+       unsigned long remap_pfn = pfn;
        int err;
 
        /*
@@ -1737,7 +1771,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                vma->vm_pgoff = pfn;
        }
 
-       err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
+       err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size));
        if (err)
                return -EINVAL;
 
@@ -1756,7 +1790,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
        } while (pgd++, addr = next, addr != end);
 
        if (err)
-               untrack_pfn(vma, pfn, PAGE_ALIGN(size));
+               untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size));
 
        return err;
 }
@@ -2340,6 +2374,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
         * not dirty accountable.
         */
        if (PageAnon(old_page) && !PageKsm(old_page)) {
+               int total_mapcount;
                if (!trylock_page(old_page)) {
                        get_page(old_page);
                        pte_unmap_unlock(page_table, ptl);
@@ -2354,13 +2389,18 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        }
                        put_page(old_page);
                }
-               if (reuse_swap_page(old_page)) {
-                       /*
-                        * The page is all ours.  Move it to our anon_vma so
-                        * the rmap code will not search our parent or siblings.
-                        * Protected against the rmap code by the page lock.
-                        */
-                       page_move_anon_rmap(old_page, vma, address);
+               if (reuse_swap_page(old_page, &total_mapcount)) {
+                       if (total_mapcount == 1) {
+                               /*
+                                * The page is all ours. Move it to
+                                * our anon_vma so the rmap code will
+                                * not search our parent or siblings.
+                                * Protected against the rmap code by
+                                * the page lock.
+                                */
+                               page_move_anon_rmap(compound_head(old_page),
+                                                   vma, address);
+                       }
                        unlock_page(old_page);
                        return wp_page_reuse(mm, vma, address, page_table, ptl,
                                             orig_pte, old_page, 0, 0);
@@ -2584,7 +2624,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
        inc_mm_counter_fast(mm, MM_ANONPAGES);
        dec_mm_counter_fast(mm, MM_SWAPENTS);
        pte = mk_pte(page, vma->vm_page_prot);
-       if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+       if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                flags &= ~FAULT_FLAG_WRITE;
                ret |= VM_FAULT_WRITE;
@@ -2836,7 +2876,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
  * vm_ops->map_pages.
  */
 void do_set_pte(struct vm_area_struct *vma, unsigned long address,
-               struct page *page, pte_t *pte, bool write, bool anon)
+               struct page *page, pte_t *pte, bool write, bool anon, bool old)
 {
        pte_t entry;
 
@@ -2844,6 +2884,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
        entry = mk_pte(page, vma->vm_page_prot);
        if (write)
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+       if (old)
+               entry = pte_mkold(entry);
        if (anon) {
                inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
                page_add_new_anon_rmap(page, vma, address, false);
@@ -2857,8 +2899,16 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
        update_mmu_cache(vma, address, pte);
 }
 
+/*
+ * If architecture emulates "accessed" or "young" bit without HW support,
+ * there is no much gain with fault_around.
+ */
 static unsigned long fault_around_bytes __read_mostly =
+#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+       PAGE_SIZE;
+#else
        rounddown_pow_of_two(65536);
+#endif
 
 #ifdef CONFIG_DEBUG_FS
 static int fault_around_bytes_get(void *data, u64 *val)
@@ -2981,9 +3031,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         */
        if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
                pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-               do_fault_around(vma, address, pte, pgoff, flags);
                if (!pte_same(*pte, orig_pte))
                        goto unlock_out;
+               do_fault_around(vma, address, pte, pgoff, flags);
+               /* Check if the fault is handled by faultaround */
+               if (!pte_same(*pte, orig_pte)) {
+                       /*
+                        * Faultaround produce old pte, but the pte we've
+                        * handler fault for should be young.
+                        */
+                       pte_t entry = pte_mkyoung(*pte);
+                       if (ptep_set_access_flags(vma, address, pte, entry, 0))
+                               update_mmu_cache(vma, address, pte);
+                       goto unlock_out;
+               }
                pte_unmap_unlock(pte, ptl);
        }
 
@@ -2998,7 +3059,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                put_page(fault_page);
                return ret;
        }
-       do_set_pte(vma, address, fault_page, pte, false, false);
+       do_set_pte(vma, address, fault_page, pte, false, false, false);
        unlock_page(fault_page);
 unlock_out:
        pte_unmap_unlock(pte, ptl);
@@ -3050,7 +3111,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                }
                goto uncharge_out;
        }
-       do_set_pte(vma, address, new_page, pte, true, true);
+       do_set_pte(vma, address, new_page, pte, true, true, false);
        mem_cgroup_commit_charge(new_page, memcg, false, false);
        lru_cache_add_active_or_unevictable(new_page, vma);
        pte_unmap_unlock(pte, ptl);
@@ -3107,7 +3168,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                put_page(fault_page);
                return ret;
        }
-       do_set_pte(vma, address, fault_page, pte, true, false);
+       do_set_pte(vma, address, fault_page, pte, true, false, false);
        pte_unmap_unlock(pte, ptl);
 
        if (set_page_dirty(fault_page))