mm: numa: add paranoid check around pte_protnone_numa
[cascardo/linux.git] / mm / huge_memory.c
index cb7be11..8e07342 100644 (file)
@@ -1211,7 +1211,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                return ERR_PTR(-EFAULT);
 
        /* Full NUMA hinting faults to serialise migration in fault paths */
-       if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+       if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
                goto out;
 
        page = pmd_page(*pmd);
@@ -1262,6 +1262,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        bool migrated = false;
        int flags = 0;
 
+       /* A PROT_NONE fault should not end up here */
+       BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
+
        ptl = pmd_lock(mm, pmdp);
        if (unlikely(!pmd_same(pmd, *pmdp)))
                goto out_unlock;
@@ -1272,8 +1275,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         * check_same as the page may no longer be mapped.
         */
        if (unlikely(pmd_trans_migrating(*pmdp))) {
+               page = pmd_page(*pmdp);
                spin_unlock(ptl);
-               wait_migrate_huge_page(vma->anon_vma, pmdp);
+               wait_on_page_locked(page);
                goto out;
        }
 
@@ -1341,7 +1345,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
        /*
         * Migrate the THP to the requested node, returns with page unlocked
-        * and pmd_numa cleared.
+        * and access rights restored.
         */
        spin_unlock(ptl);
        migrated = migrate_misplaced_transhuge_page(mm, vma,
@@ -1354,9 +1358,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        goto out;
 clear_pmdnuma:
        BUG_ON(!PageLocked(page));
-       pmd = pmd_mknonnuma(pmd);
+       pmd = pmd_modify(pmd, vma->vm_page_prot);
        set_pmd_at(mm, haddr, pmdp, pmd);
-       VM_BUG_ON(pmd_numa(*pmdp));
        update_mmu_cache_pmd(vma, addr, pmdp);
        unlock_page(page);
 out_unlock:
@@ -1479,30 +1482,23 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
        if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                pmd_t entry;
-               ret = 1;
-               if (!prot_numa) {
-                       entry = pmdp_get_and_clear_notify(mm, addr, pmd);
-                       if (pmd_numa(entry))
-                               entry = pmd_mknonnuma(entry);
-                       entry = pmd_modify(entry, newprot);
-                       ret = HPAGE_PMD_NR;
-                       set_pmd_at(mm, addr, pmd, entry);
-                       BUG_ON(pmd_write(entry));
-               } else {
-                       struct page *page = pmd_page(*pmd);
 
-                       /*
-                        * Do not trap faults against the zero page. The
-                        * read-only data is likely to be read-cached on the
-                        * local CPU cache and it is less useful to know about
-                        * local vs remote hits on the zero page.
-                        */
-                       if (!is_huge_zero_page(page) &&
-                           !pmd_numa(*pmd)) {
-                               pmdp_set_numa(mm, addr, pmd);
-                               ret = HPAGE_PMD_NR;
-                       }
+               /*
+                * Avoid trapping faults against the zero page. The read-only
+                * data is likely to be read-cached on the local CPU and
+                * local/remote hits to the zero page are not interesting.
+                */
+               if (prot_numa && is_huge_zero_pmd(*pmd)) {
+                       spin_unlock(ptl);
+                       return 0;
                }
+
+               ret = 1;
+               entry = pmdp_get_and_clear_notify(mm, addr, pmd);
+               entry = pmd_modify(entry, newprot);
+               ret = HPAGE_PMD_NR;
+               set_pmd_at(mm, addr, pmd, entry);
+               BUG_ON(pmd_write(entry));
                spin_unlock(ptl);
        }
 
@@ -1766,9 +1762,9 @@ static int __split_huge_page_map(struct page *page,
                        pte_t *pte, entry;
                        BUG_ON(PageCompound(page+i));
                        /*
-                        * Note that pmd_numa is not transferred deliberately
-                        * to avoid any possibility that pte_numa leaks to
-                        * a PROT_NONE VMA by accident.
+                        * Note that NUMA hinting access restrictions are not
+                        * transferred to avoid any possibility of altering
+                        * permissions across VMAs.
                         */
                        entry = mk_pte(page + i, vma->vm_page_prot);
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);