(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)|
- (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
+ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
/* default scan 8*512 pte (or vmas) every 30 second */
static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8;
zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
HPAGE_PMD_ORDER);
- if (!zero_page)
+ if (!zero_page) {
+ count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
return 0;
+ }
+ count_vm_event(THP_ZERO_PAGE_ALLOC);
preempt_disable();
if (cmpxchg(&huge_zero_pfn, 0, page_to_pfn(zero_page))) {
preempt_enable();
static struct kobj_attribute defrag_attr =
__ATTR(defrag, 0644, defrag_show, defrag_store);
+static ssize_t use_zero_page_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return single_flag_show(kobj, attr, buf,
+ TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+}
+static ssize_t use_zero_page_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ return single_flag_store(kobj, attr, buf, count,
+ TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+}
+static struct kobj_attribute use_zero_page_attr =
+ __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
#ifdef CONFIG_DEBUG_VM
static ssize_t debug_cow_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
static struct attribute *hugepage_attr[] = {
&enabled_attr.attr,
&defrag_attr.attr,
+ &use_zero_page_attr.attr,
#ifdef CONFIG_DEBUG_VM
&debug_cow_attr.attr,
#endif
}
#endif
-static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
unsigned long zero_pfn)
{
pmd_t entry;
+ if (!pmd_none(*pmd))
+ return false;
entry = pfn_pmd(zero_pfn, vma->vm_page_prot);
entry = pmd_wrprotect(entry);
entry = pmd_mkhuge(entry);
set_pmd_at(mm, haddr, pmd, entry);
pgtable_trans_huge_deposit(mm, pgtable);
mm->nr_ptes++;
+ return true;
}
int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma)))
return VM_FAULT_OOM;
- if (!(flags & FAULT_FLAG_WRITE)) {
+ if (!(flags & FAULT_FLAG_WRITE) &&
+ transparent_hugepage_use_zero_page()) {
pgtable_t pgtable;
unsigned long zero_pfn;
+ bool set;
pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable))
return VM_FAULT_OOM;
goto out;
}
spin_lock(&mm->page_table_lock);
- set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
+ set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
zero_pfn);
spin_unlock(&mm->page_table_lock);
+ if (!set) {
+ pte_free(mm, pgtable);
+ put_huge_zero_page();
+ }
return 0;
}
page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
*/
if (is_huge_zero_pmd(pmd)) {
unsigned long zero_pfn;
+ bool set;
/*
* get_huge_zero_page() will never allocate a new page here,
* since we already have a zero page to copy. It just takes a
* reference.
*/
zero_pfn = get_huge_zero_page();
- set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
+ set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
zero_pfn);
+ BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */
ret = 0;
goto out_unlock;
}
static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned long haddr)
+ pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr)
{
pgtable_t pgtable;
pmd_t _pmd;
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock);
+ if (unlikely(!pmd_same(*pmd, orig_pmd)))
+ goto out_free_page;
+
pmdp_clear_flush(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
ret |= VM_FAULT_WRITE;
out:
return ret;
+out_free_page:
+ spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+ mem_cgroup_uncharge_page(page);
+ put_page(page);
+ goto out;
}
static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
count_vm_event(THP_FAULT_FALLBACK);
if (is_huge_zero_pmd(orig_pmd)) {
ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
- address, pmd, haddr);
+ address, pmd, orig_pmd, haddr);
} else {
ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
pmd, orig_pmd, page, haddr);