mm: disable fault around on emulated access bit architecture

[cascardo/linux.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 93897f2..a1b93d9 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -789,6 +789,46 @@ out:
         return pfn_to_page(pfn);
  }
  
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+                               pmd_t pmd)
+{
+       unsigned long pfn = pmd_pfn(pmd);
+
+       /*
+        * There is no pmd_special() but there may be special pmds, e.g.
+        * in a direct-access (dax) mapping, so let's just replicate the
+        * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+        */
+       if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+               if (vma->vm_flags & VM_MIXEDMAP) {
+                       if (!pfn_valid(pfn))
+                               return NULL;
+                       goto out;
+               } else {
+                       unsigned long off;
+                       off = (addr - vma->vm_start) >> PAGE_SHIFT;
+                       if (pfn == vma->vm_pgoff + off)
+                               return NULL;
+                       if (!is_cow_mapping(vma->vm_flags))
+                               return NULL;
+               }
+       }
+
+       if (is_zero_pfn(pfn))
+               return NULL;
+       if (unlikely(pfn > highest_memmap_pfn))
+               return NULL;
+
+       /*
+        * NOTE! We still have PageReserved() pages in the page tables.
+        * eg. VDSO mappings can cause them to exist.
+        */
+out:
+       return pfn_to_page(pfn);
+}
+#endif
+
  /*
   * copy one vm_area from one task to the other. Assumes the page tables
   * already present in the new task to be cleared in the whole range
@@ -1182,15 +1222,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
                 next = pmd_addr_end(addr, end);
                 if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
                         if (next - addr != HPAGE_PMD_SIZE) {
-#ifdef CONFIG_DEBUG_VM
-                               if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
-                                       pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
-                                               __func__, addr, end,
-                                               vma->vm_start,
-                                               vma->vm_end);
-                                       BUG();
-                               }
-#endif
+                               VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
+                                   !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
                                 split_huge_pmd(vma, pmd, addr);
                         } else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                 goto next;
@@ -1711,6 +1744,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
         unsigned long next;
         unsigned long end = addr + PAGE_ALIGN(size);
         struct mm_struct *mm = vma->vm_mm;
+       unsigned long remap_pfn = pfn;
         int err;
  
         /*
@@ -1737,7 +1771,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                 vma->vm_pgoff = pfn;
         }
  
-       err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
+       err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size));
         if (err)
                 return -EINVAL;
  
@@ -1756,7 +1790,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
         } while (pgd++, addr = next, addr != end);
  
         if (err)
-               untrack_pfn(vma, pfn, PAGE_ALIGN(size));
+               untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size));
  
         return err;
  }
@@ -2340,6 +2374,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
          * not dirty accountable.
          */
         if (PageAnon(old_page) && !PageKsm(old_page)) {
+               int total_mapcount;
                 if (!trylock_page(old_page)) {
                         get_page(old_page);
                         pte_unmap_unlock(page_table, ptl);
@@ -2354,13 +2389,18 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         }
                         put_page(old_page);
                 }
-               if (reuse_swap_page(old_page)) {
-                       /*
-                        * The page is all ours.  Move it to our anon_vma so
-                        * the rmap code will not search our parent or siblings.
-                        * Protected against the rmap code by the page lock.
-                        */
-                       page_move_anon_rmap(old_page, vma, address);
+               if (reuse_swap_page(old_page, &total_mapcount)) {
+                       if (total_mapcount == 1) {
+                               /*
+                                * The page is all ours. Move it to
+                                * our anon_vma so the rmap code will
+                                * not search our parent or siblings.
+                                * Protected against the rmap code by
+                                * the page lock.
+                                */
+                               page_move_anon_rmap(compound_head(old_page),
+                                                   vma, address);
+                       }
                         unlock_page(old_page);
                         return wp_page_reuse(mm, vma, address, page_table, ptl,
                                              orig_pte, old_page, 0, 0);
@@ -2584,7 +2624,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         inc_mm_counter_fast(mm, MM_ANONPAGES);
         dec_mm_counter_fast(mm, MM_SWAPENTS);
         pte = mk_pte(page, vma->vm_page_prot);
-       if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+       if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                 flags &= ~FAULT_FLAG_WRITE;
                 ret |= VM_FAULT_WRITE;
@@ -2836,7 +2876,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
   * vm_ops->map_pages.
   */
  void do_set_pte(struct vm_area_struct *vma, unsigned long address,
-               struct page *page, pte_t *pte, bool write, bool anon)
+               struct page *page, pte_t *pte, bool write, bool anon, bool old)
  {
         pte_t entry;
  
@@ -2844,6 +2884,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
         entry = mk_pte(page, vma->vm_page_prot);
         if (write)
                 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+       if (old)
+               entry = pte_mkold(entry);
         if (anon) {
                 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
                 page_add_new_anon_rmap(page, vma, address, false);
@@ -2857,8 +2899,16 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
         update_mmu_cache(vma, address, pte);
  }
  
+/*
+ * If architecture emulates "accessed" or "young" bit without HW support,
+ * there is no much gain with fault_around.
+ */
  static unsigned long fault_around_bytes __read_mostly =
+#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+       PAGE_SIZE;
+#else
         rounddown_pow_of_two(65536);
+#endif
  
  #ifdef CONFIG_DEBUG_FS
  static int fault_around_bytes_get(void *data, u64 *val)
@@ -2981,9 +3031,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
          */
         if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
                 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-               do_fault_around(vma, address, pte, pgoff, flags);
                 if (!pte_same(*pte, orig_pte))
                         goto unlock_out;
+               do_fault_around(vma, address, pte, pgoff, flags);
+               /* Check if the fault is handled by faultaround */
+               if (!pte_same(*pte, orig_pte)) {
+                       /*
+                        * Faultaround produce old pte, but the pte we've
+                        * handler fault for should be young.
+                        */
+                       pte_t entry = pte_mkyoung(*pte);
+                       if (ptep_set_access_flags(vma, address, pte, entry, 0))
+                               update_mmu_cache(vma, address, pte);
+                       goto unlock_out;
+               }
                 pte_unmap_unlock(pte, ptl);
         }
  
@@ -2998,7 +3059,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 put_page(fault_page);
                 return ret;
         }
-       do_set_pte(vma, address, fault_page, pte, false, false);
+       do_set_pte(vma, address, fault_page, pte, false, false, false);
         unlock_page(fault_page);
  unlock_out:
         pte_unmap_unlock(pte, ptl);
@@ -3050,7 +3111,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 }
                 goto uncharge_out;
         }
-       do_set_pte(vma, address, new_page, pte, true, true);
+       do_set_pte(vma, address, new_page, pte, true, true, false);
         mem_cgroup_commit_charge(new_page, memcg, false, false);
         lru_cache_add_active_or_unevictable(new_page, vma);
         pte_unmap_unlock(pte, ptl);
@@ -3107,7 +3168,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 put_page(fault_page);
                 return ret;
         }
-       do_set_pte(vma, address, fault_page, pte, true, false);
+       do_set_pte(vma, address, fault_page, pte, true, false, false);
         pte_unmap_unlock(pte, ptl);
  
         if (set_page_dirty(fault_page))