mlx4_core: Adjustments to Flow Steering activation logic for SR-IOV
[cascardo/linux.git] / mm / huge_memory.c
index d89220c..827d9c8 100644 (file)
@@ -39,7 +39,8 @@ unsigned long transparent_hugepage_flags __read_mostly =
        (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
        (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)|
-       (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
+       (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
+       (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
 /* default scan 8*512 pte (or vmas) every 30 second */
 static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8;
@@ -184,8 +185,11 @@ retry:
 
        zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
                        HPAGE_PMD_ORDER);
-       if (!zero_page)
+       if (!zero_page) {
+               count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
                return 0;
+       }
+       count_vm_event(THP_ZERO_PAGE_ALLOC);
        preempt_disable();
        if (cmpxchg(&huge_zero_pfn, 0, page_to_pfn(zero_page))) {
                preempt_enable();
@@ -354,6 +358,20 @@ static ssize_t defrag_store(struct kobject *kobj,
 static struct kobj_attribute defrag_attr =
        __ATTR(defrag, 0644, defrag_show, defrag_store);
 
+static ssize_t use_zero_page_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       return single_flag_show(kobj, attr, buf,
+                               TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+}
+static ssize_t use_zero_page_store(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf, size_t count)
+{
+       return single_flag_store(kobj, attr, buf, count,
+                                TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+}
+static struct kobj_attribute use_zero_page_attr =
+       __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
 #ifdef CONFIG_DEBUG_VM
 static ssize_t debug_cow_show(struct kobject *kobj,
                                struct kobj_attribute *attr, char *buf)
@@ -375,6 +393,7 @@ static struct kobj_attribute debug_cow_attr =
 static struct attribute *hugepage_attr[] = {
        &enabled_attr.attr,
        &defrag_attr.attr,
+       &use_zero_page_attr.attr,
 #ifdef CONFIG_DEBUG_VM
        &debug_cow_attr.attr,
 #endif
@@ -750,17 +769,20 @@ static inline struct page *alloc_hugepage(int defrag)
 }
 #endif
 
-static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
                unsigned long zero_pfn)
 {
        pmd_t entry;
+       if (!pmd_none(*pmd))
+               return false;
        entry = pfn_pmd(zero_pfn, vma->vm_page_prot);
        entry = pmd_wrprotect(entry);
        entry = pmd_mkhuge(entry);
        set_pmd_at(mm, haddr, pmd, entry);
        pgtable_trans_huge_deposit(mm, pgtable);
        mm->nr_ptes++;
+       return true;
 }
 
 int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -776,9 +798,11 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        return VM_FAULT_OOM;
                if (unlikely(khugepaged_enter(vma)))
                        return VM_FAULT_OOM;
-               if (!(flags & FAULT_FLAG_WRITE)) {
+               if (!(flags & FAULT_FLAG_WRITE) &&
+                               transparent_hugepage_use_zero_page()) {
                        pgtable_t pgtable;
                        unsigned long zero_pfn;
+                       bool set;
                        pgtable = pte_alloc_one(mm, haddr);
                        if (unlikely(!pgtable))
                                return VM_FAULT_OOM;
@@ -789,9 +813,13 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                goto out;
                        }
                        spin_lock(&mm->page_table_lock);
-                       set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
+                       set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
                                        zero_pfn);
                        spin_unlock(&mm->page_table_lock);
+                       if (!set) {
+                               pte_free(mm, pgtable);
+                               put_huge_zero_page();
+                       }
                        return 0;
                }
                page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
@@ -865,14 +893,16 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         */
        if (is_huge_zero_pmd(pmd)) {
                unsigned long zero_pfn;
+               bool set;
                /*
                 * get_huge_zero_page() will never allocate a new page here,
                 * since we already have a zero page to copy. It just takes a
                 * reference.
                 */
                zero_pfn = get_huge_zero_page();
-               set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
+               set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
                                zero_pfn);
+               BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */
                ret = 0;
                goto out_unlock;
        }
@@ -929,7 +959,7 @@ unlock:
 
 static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long address,
-               pmd_t *pmd, unsigned long haddr)
+               pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr)
 {
        pgtable_t pgtable;
        pmd_t _pmd;
@@ -958,6 +988,9 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 
        spin_lock(&mm->page_table_lock);
+       if (unlikely(!pmd_same(*pmd, orig_pmd)))
+               goto out_free_page;
+
        pmdp_clear_flush(vma, haddr, pmd);
        /* leave pmd empty until pte is filled */
 
@@ -990,6 +1023,12 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
        ret |= VM_FAULT_WRITE;
 out:
        return ret;
+out_free_page:
+       spin_unlock(&mm->page_table_lock);
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+       mem_cgroup_uncharge_page(page);
+       put_page(page);
+       goto out;
 }
 
 static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
@@ -1136,7 +1175,7 @@ alloc:
                count_vm_event(THP_FAULT_FALLBACK);
                if (is_huge_zero_pmd(orig_pmd)) {
                        ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
-                                       address, pmd, haddr);
+                                       address, pmd, orig_pmd, haddr);
                } else {
                        ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
                                        pmd, orig_pmd, page, haddr);