return ERR_PTR(-EFAULT);
/* Full NUMA hinting faults to serialise migration in fault paths */
- if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
goto out;
page = pmd_page(*pmd);
bool migrated = false;
int flags = 0;
+ /* A PROT_NONE fault should not end up here */
+ BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
+
ptl = pmd_lock(mm, pmdp);
if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock;
* check_same as the page may no longer be mapped.
*/
if (unlikely(pmd_trans_migrating(*pmdp))) {
+ page = pmd_page(*pmdp);
spin_unlock(ptl);
- wait_migrate_huge_page(vma->anon_vma, pmdp);
+ wait_on_page_locked(page);
goto out;
}
/*
* Migrate the THP to the requested node, returns with page unlocked
- * and pmd_numa cleared.
+ * and access rights restored.
*/
spin_unlock(ptl);
migrated = migrate_misplaced_transhuge_page(mm, vma,
goto out;
clear_pmdnuma:
BUG_ON(!PageLocked(page));
- pmd = pmd_mknonnuma(pmd);
+ pmd = pmd_modify(pmd, vma->vm_page_prot);
set_pmd_at(mm, haddr, pmdp, pmd);
- VM_BUG_ON(pmd_numa(*pmdp));
update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page);
out_unlock:
return ret;
}
-int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- unsigned char *vec)
-{
- spinlock_t *ptl;
- int ret = 0;
-
- if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
- /*
- * All logical pages in the range are present
- * if backed by a huge page.
- */
- spin_unlock(ptl);
- memset(vec, 1, (end - addr) >> PAGE_SHIFT);
- ret = 1;
- }
-
- return ret;
-}
-
int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
pmd_t entry;
- ret = 1;
- if (!prot_numa) {
- entry = pmdp_get_and_clear_notify(mm, addr, pmd);
- if (pmd_numa(entry))
- entry = pmd_mknonnuma(entry);
- entry = pmd_modify(entry, newprot);
- ret = HPAGE_PMD_NR;
- set_pmd_at(mm, addr, pmd, entry);
- BUG_ON(pmd_write(entry));
- } else {
- struct page *page = pmd_page(*pmd);
- /*
- * Do not trap faults against the zero page. The
- * read-only data is likely to be read-cached on the
- * local CPU cache and it is less useful to know about
- * local vs remote hits on the zero page.
- */
- if (!is_huge_zero_page(page) &&
- !pmd_numa(*pmd)) {
- pmdp_set_numa(mm, addr, pmd);
- ret = HPAGE_PMD_NR;
- }
+ /*
+ * Avoid trapping faults against the zero page. The read-only
+ * data is likely to be read-cached on the local CPU and
+ * local/remote hits to the zero page are not interesting.
+ */
+ if (prot_numa && is_huge_zero_pmd(*pmd)) {
+ spin_unlock(ptl);
+ return 0;
}
+
+ ret = 1;
+ entry = pmdp_get_and_clear_notify(mm, addr, pmd);
+ entry = pmd_modify(entry, newprot);
+ ret = HPAGE_PMD_NR;
+ set_pmd_at(mm, addr, pmd, entry);
+ BUG_ON(pmd_write(entry));
spin_unlock(ptl);
}
pte_t *pte, entry;
BUG_ON(PageCompound(page+i));
/*
- * Note that pmd_numa is not transferred deliberately
- * to avoid any possibility that pte_numa leaks to
- * a PROT_NONE VMA by accident.
+ * Note that NUMA hinting access restrictions are not
+ * transferred to avoid any possibility of altering
+ * permissions across VMAs.
*/
entry = mk_pte(page + i, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
{
struct page *page;
pte_t *_pte;
- int referenced = 0, none = 0;
+ int none = 0;
+ bool referenced = false, writable = false;
for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
_pte++, address += PAGE_SIZE) {
pte_t pteval = *_pte;
else
goto out;
}
- if (!pte_present(pteval) || !pte_write(pteval))
+ if (!pte_present(pteval))
goto out;
page = vm_normal_page(vma, address, pteval);
if (unlikely(!page))
VM_BUG_ON_PAGE(!PageAnon(page), page);
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
- /* cannot use mapcount: can't collapse if there's a gup pin */
- if (page_count(page) != 1)
- goto out;
/*
* We can do it before isolate_lru_page because the
* page can't be freed from under us. NOTE: PG_lock
*/
if (!trylock_page(page))
goto out;
+
+ /*
+ * cannot use mapcount: can't collapse if there's a gup pin.
+ * The page must only be referenced by the scanned process
+ * and page swap cache.
+ */
+ if (page_count(page) != 1 + !!PageSwapCache(page)) {
+ unlock_page(page);
+ goto out;
+ }
+ if (pte_write(pteval)) {
+ writable = true;
+ } else {
+ if (PageSwapCache(page) && !reuse_swap_page(page)) {
+ unlock_page(page);
+ goto out;
+ }
+ /*
+ * Page is not in the swap cache. It can be collapsed
+ * into a THP.
+ */
+ }
+
/*
* Isolate the page to avoid collapsing an hugepage
* currently in use by the VM.
/* If there is no mapped pte young don't collapse the page */
if (pte_young(pteval) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
- referenced = 1;
+ referenced = true;
}
- if (likely(referenced))
+ if (likely(referenced && writable))
return 1;
out:
release_pte_pages(pte, _pte);
{
pmd_t *pmd;
pte_t *pte, *_pte;
- int ret = 0, referenced = 0, none = 0;
+ int ret = 0, none = 0;
struct page *page;
unsigned long _address;
spinlock_t *ptl;
int node = NUMA_NO_NODE;
+ bool writable = false, referenced = false;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
else
goto out_unmap;
}
- if (!pte_present(pteval) || !pte_write(pteval))
+ if (!pte_present(pteval))
goto out_unmap;
+ if (pte_write(pteval))
+ writable = true;
+
page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page))
goto out_unmap;
VM_BUG_ON_PAGE(PageCompound(page), page);
if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
goto out_unmap;
- /* cannot use mapcount: can't collapse if there's a gup pin */
- if (page_count(page) != 1)
+ /*
+ * cannot use mapcount: can't collapse if there's a gup pin.
+ * The page must only be referenced by the scanned process
+ * and page swap cache.
+ */
+ if (page_count(page) != 1 + !!PageSwapCache(page))
goto out_unmap;
if (pte_young(pteval) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
- referenced = 1;
+ referenced = true;
}
- if (referenced)
+ if (referenced && writable)
ret = 1;
out_unmap:
pte_unmap_unlock(pte, ptl);