Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger...

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 14 Feb 2015 18:54:28 +0000 (10:54 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 14 Feb 2015 18:54:28 +0000 (10:54 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Feb 2015 18:54:28 +0000 (10:54 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Feb 2015 18:54:28 +0000 (10:54 -0800)
diff --combined arch/powerpc/mm/hugetlbpage.c

index cf0464f,620d0ec..7e408bf
--- 1/arch/powerpc/mm/hugetlbpage.c
--- 2/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@@ -714,14 -714,6 +714,14 @@@ follow_huge_pmd(struct mm_struct *mm, u
         return NULL;
   }
   
+ +struct page *
+ +follow_huge_pud(struct mm_struct *mm, unsigned long address,
+ +              pud_t *pud, int write)
+ +{
+ +      BUG();
+ +      return NULL;
+ +}
+ +
   static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
                                       unsigned long sz)
   {
@@@ -986,7 -978,7 +986,7 @@@ pte_t *find_linux_pte_or_hugepte(pgd_t 
                  */
                 pdshift = PUD_SHIFT;
                 pudp = pud_offset(&pgd, ea);
-               pud  = ACCESS_ONCE(*pudp);
+               pud  = READ_ONCE(*pudp);
   
                 if (pud_none(pud))
                         return NULL;
@@@ -998,7 -990,7 +998,7 @@@
                 else {
                         pdshift = PMD_SHIFT;
                         pmdp = pmd_offset(&pud, ea);
-                       pmd  = ACCESS_ONCE(*pmdp);
+                       pmd  = READ_ONCE(*pmdp);
                         /*
                          * A hugepage collapse is captured by pmd_none, because
                          * it mark the pmd none and do a hpte invalidate.
diff --combined arch/sh/mm/gup.c

index e15f52a,e113bb4..e7af6a6
--- 1/arch/sh/mm/gup.c
--- 2/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@@ -17,7 -17,7 +17,7 @@@
   static inline pte_t gup_get_pte(pte_t *ptep)
   {
   #ifndef CONFIG_X2TLB
-       return ACCESS_ONCE(*ptep);
+       return READ_ONCE(*ptep);
   #else
         /*
          * With get_user_pages_fast, we walk down the pagetables without
@@@ -257,8 -257,10 +257,8 @@@ slow_irqon
                 start += nr << PAGE_SHIFT;
                 pages += nr;
   
- -              down_read(&mm->mmap_sem);
- -              ret = get_user_pages(current, mm, start,
- -                      (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
- -              up_read(&mm->mmap_sem);
+ +              ret = get_user_pages_unlocked(current, mm, start,
+ +                      (end - start) >> PAGE_SHIFT, write, 0, pages);
   
                 /* Have to be a bit careful with return values */
                 if (nr > 0) {
diff --combined arch/x86/xen/p2m.c

index f18fd1d,cb71016..740ae30
--- 1/arch/x86/xen/p2m.c
--- 2/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@@ -84,6 -84,8 +84,6 @@@
   
   #define PMDS_PER_MID_PAGE     (P2M_MID_PER_PAGE / PTRS_PER_PTE)
   
- -static void __init m2p_override_init(void);
- -
   unsigned long *xen_p2m_addr __read_mostly;
   EXPORT_SYMBOL_GPL(xen_p2m_addr);
   unsigned long xen_p2m_size __read_mostly;
@@@ -165,13 -167,10 +165,13 @@@ static void * __ref alloc_p2m_page(void
         return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
   }
   
- -/* Only to be called in case of a race for a page just allocated! */
- -static void free_p2m_page(void *p)
+ +static void __ref free_p2m_page(void *p)
   {
- -      BUG_ON(!slab_is_available());
+ +      if (unlikely(!slab_is_available())) {
+ +              free_bootmem((unsigned long)p, PAGE_SIZE);
+ +              return;
+ +      }
+ +
         free_page((unsigned long)p);
   }
   
@@@ -376,7 -375,7 +376,7 @@@ static void __init xen_rebuild_p2m_list
                         p2m_missing_pte : p2m_identity_pte;
                 for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
                         pmdp = populate_extra_pmd(
- -                              (unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
+ +                              (unsigned long)(p2m + pfn) + i * PMD_SIZE);
                         set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
                 }
         }
@@@ -400,6 -399,8 +400,6 @@@ void __init xen_vmalloc_p2m_tree(void
         xen_p2m_size = xen_max_p2m_pfn;
   
         xen_inv_extra_mem();
- -
- -      m2p_override_init();
   }
   
   unsigned long get_phys_to_machine(unsigned long pfn)
@@@ -435,9 -436,10 +435,9 @@@ EXPORT_SYMBOL_GPL(get_phys_to_machine)
    * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
    * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
    */
- -static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
+ +static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
   {
         pte_t *ptechk;
- -      pte_t *pteret = ptep;
         pte_t *pte_newpg[PMDS_PER_MID_PAGE];
         pmd_t *pmdp;
         unsigned int level;
@@@ -471,6 -473,8 +471,6 @@@
                 if (ptechk == pte_pg) {
                         set_pmd(pmdp,
                                 __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
- -                      if (vaddr == (addr & ~(PMD_SIZE - 1)))
- -                              pteret = pte_offset_kernel(pmdp, addr);
                         pte_newpg[i] = NULL;
                 }
   
@@@ -484,7 -488,7 +484,7 @@@
                 vaddr += PMD_SIZE;
         }
   
- -      return pteret;
+ +      return lookup_address(addr, &level);
   }
   
   /*
@@@ -513,7 -517,7 +513,7 @@@ static bool alloc_p2m(unsigned long pfn
   
         if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
                 /* PMD level is missing, allocate a new one */
- -              ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
+ +              ptep = alloc_p2m_pmd(addr, pte_pg);
                 if (!ptep)
                         return false;
         }
@@@ -550,7 -554,7 +550,7 @@@
                 mid_mfn = NULL;
         }
   
-       p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep));
+       p2m_pfn = pte_pfn(READ_ONCE(*ptep));
         if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
             p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
                 /* p2m leaf page is missing */
@@@ -648,21 -652,100 +648,21 @@@ bool set_phys_to_machine(unsigned long 
         return true;
   }
   
- -#define M2P_OVERRIDE_HASH_SHIFT       10
- -#define M2P_OVERRIDE_HASH     (1 << M2P_OVERRIDE_HASH_SHIFT)
- -
- -static struct list_head *m2p_overrides;
- -static DEFINE_SPINLOCK(m2p_override_lock);
- -
- -static void __init m2p_override_init(void)
- -{
- -      unsigned i;
- -
- -      m2p_overrides = alloc_bootmem_align(
- -                              sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
- -                              sizeof(unsigned long));
- -
- -      for (i = 0; i < M2P_OVERRIDE_HASH; i++)
- -              INIT_LIST_HEAD(&m2p_overrides[i]);
- -}
- -
- -static unsigned long mfn_hash(unsigned long mfn)
- -{
- -      return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
- -}
- -
- -/* Add an MFN override for a particular page */
- -static int m2p_add_override(unsigned long mfn, struct page *page,
- -                          struct gnttab_map_grant_ref *kmap_op)
- -{
- -      unsigned long flags;
- -      unsigned long pfn;
- -      unsigned long uninitialized_var(address);
- -      unsigned level;
- -      pte_t *ptep = NULL;
- -
- -      pfn = page_to_pfn(page);
- -      if (!PageHighMem(page)) {
- -              address = (unsigned long)__va(pfn << PAGE_SHIFT);
- -              ptep = lookup_address(address, &level);
- -              if (WARN(ptep == NULL || level != PG_LEVEL_4K,
- -                       "m2p_add_override: pfn %lx not mapped", pfn))
- -                      return -EINVAL;
- -      }
- -
- -      if (kmap_op != NULL) {
- -              if (!PageHighMem(page)) {
- -                      struct multicall_space mcs =
- -                              xen_mc_entry(sizeof(*kmap_op));
- -
- -                      MULTI_grant_table_op(mcs.mc,
- -                                      GNTTABOP_map_grant_ref, kmap_op, 1);
- -
- -                      xen_mc_issue(PARAVIRT_LAZY_MMU);
- -              }
- -      }
- -      spin_lock_irqsave(&m2p_override_lock, flags);
- -      list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
- -      spin_unlock_irqrestore(&m2p_override_lock, flags);
- -
- -      /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
- -       * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
- -       * pfn so that the following mfn_to_pfn(mfn) calls will return the
- -       * pfn from the m2p_override (the backend pfn) instead.
- -       * We need to do this because the pages shared by the frontend
- -       * (xen-blkfront) can be already locked (lock_page, called by
- -       * do_read_cache_page); when the userspace backend tries to use them
- -       * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
- -       * do_blockdev_direct_IO is going to try to lock the same pages
- -       * again resulting in a deadlock.
- -       * As a side effect get_user_pages_fast might not be safe on the
- -       * frontend pages while they are being shared with the backend,
- -       * because mfn_to_pfn (that ends up being called by GUPF) will
- -       * return the backend pfn rather than the frontend pfn. */
- -      pfn = mfn_to_pfn_no_overrides(mfn);
- -      if (__pfn_to_mfn(pfn) == mfn)
- -              set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
- -
- -      return 0;
- -}
- -
   int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
                             struct gnttab_map_grant_ref *kmap_ops,
                             struct page **pages, unsigned int count)
   {
         int i, ret = 0;
- -      bool lazy = false;
         pte_t *pte;
   
         if (xen_feature(XENFEAT_auto_translated_physmap))
                 return 0;
   
- -      if (kmap_ops &&
- -          !in_interrupt() &&
- -          paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
- -              arch_enter_lazy_mmu_mode();
- -              lazy = true;
+ +      if (kmap_ops) {
+ +              ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ +                                              kmap_ops, count);
+ +              if (ret)
+ +                      goto out;
         }
   
         for (i = 0; i < count; i++) {
@@@ -681,28 -764,170 +681,28 @@@
                 }
                 pfn = page_to_pfn(pages[i]);
   
- -              WARN_ON(PagePrivate(pages[i]));
- -              SetPagePrivate(pages[i]);
- -              set_page_private(pages[i], mfn);
- -              pages[i]->index = pfn_to_mfn(pfn);
+ +              WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");
   
                 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
                         ret = -ENOMEM;
                         goto out;
                 }
- -
- -              if (kmap_ops) {
- -                      ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
- -                      if (ret)
- -                              goto out;
- -              }
         }
   
   out:
- -      if (lazy)
- -              arch_leave_lazy_mmu_mode();
- -
         return ret;
   }
   EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
   
- -static struct page *m2p_find_override(unsigned long mfn)
- -{
- -      unsigned long flags;
- -      struct list_head *bucket;
- -      struct page *p, *ret;
- -
- -      if (unlikely(!m2p_overrides))
- -              return NULL;
- -
- -      ret = NULL;
- -      bucket = &m2p_overrides[mfn_hash(mfn)];
- -
- -      spin_lock_irqsave(&m2p_override_lock, flags);
- -
- -      list_for_each_entry(p, bucket, lru) {
- -              if (page_private(p) == mfn) {
- -                      ret = p;
- -                      break;
- -              }
- -      }
- -
- -      spin_unlock_irqrestore(&m2p_override_lock, flags);
- -
- -      return ret;
- -}
- -
- -static int m2p_remove_override(struct page *page,
- -                             struct gnttab_map_grant_ref *kmap_op,
- -                             unsigned long mfn)
- -{
- -      unsigned long flags;
- -      unsigned long pfn;
- -      unsigned long uninitialized_var(address);
- -      unsigned level;
- -      pte_t *ptep = NULL;
- -
- -      pfn = page_to_pfn(page);
- -
- -      if (!PageHighMem(page)) {
- -              address = (unsigned long)__va(pfn << PAGE_SHIFT);
- -              ptep = lookup_address(address, &level);
- -
- -              if (WARN(ptep == NULL || level != PG_LEVEL_4K,
- -                       "m2p_remove_override: pfn %lx not mapped", pfn))
- -                      return -EINVAL;
- -      }
- -
- -      spin_lock_irqsave(&m2p_override_lock, flags);
- -      list_del(&page->lru);
- -      spin_unlock_irqrestore(&m2p_override_lock, flags);
- -
- -      if (kmap_op != NULL) {
- -              if (!PageHighMem(page)) {
- -                      struct multicall_space mcs;
- -                      struct gnttab_unmap_and_replace *unmap_op;
- -                      struct page *scratch_page = get_balloon_scratch_page();
- -                      unsigned long scratch_page_address = (unsigned long)
- -                              __va(page_to_pfn(scratch_page) << PAGE_SHIFT);
- -
- -                      /*
- -                       * It might be that we queued all the m2p grant table
- -                       * hypercalls in a multicall, then m2p_remove_override
- -                       * get called before the multicall has actually been
- -                       * issued. In this case handle is going to -1 because
- -                       * it hasn't been modified yet.
- -                       */
- -                      if (kmap_op->handle == -1)
- -                              xen_mc_flush();
- -                      /*
- -                       * Now if kmap_op->handle is negative it means that the
- -                       * hypercall actually returned an error.
- -                       */
- -                      if (kmap_op->handle == GNTST_general_error) {
- -                              pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings",
- -                                      pfn, mfn);
- -                              put_balloon_scratch_page();
- -                              return -1;
- -                      }
- -
- -                      xen_mc_batch();
- -
- -                      mcs = __xen_mc_entry(
- -                              sizeof(struct gnttab_unmap_and_replace));
- -                      unmap_op = mcs.args;
- -                      unmap_op->host_addr = kmap_op->host_addr;
- -                      unmap_op->new_addr = scratch_page_address;
- -                      unmap_op->handle = kmap_op->handle;
- -
- -                      MULTI_grant_table_op(mcs.mc,
- -                              GNTTABOP_unmap_and_replace, unmap_op, 1);
- -
- -                      mcs = __xen_mc_entry(0);
- -                      MULTI_update_va_mapping(mcs.mc, scratch_page_address,
- -                                      pfn_pte(page_to_pfn(scratch_page),
- -                                      PAGE_KERNEL_RO), 0);
- -
- -                      xen_mc_issue(PARAVIRT_LAZY_MMU);
- -
- -                      kmap_op->host_addr = 0;
- -                      put_balloon_scratch_page();
- -              }
- -      }
- -
- -      /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
- -       * somewhere in this domain, even before being added to the
- -       * m2p_override (see comment above in m2p_add_override).
- -       * If there are no other entries in the m2p_override corresponding
- -       * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
- -       * the original pfn (the one shared by the frontend): the backend
- -       * cannot do any IO on this page anymore because it has been
- -       * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
- -       * the original pfn causes mfn_to_pfn(mfn) to return the frontend
- -       * pfn again. */
- -      mfn &= ~FOREIGN_FRAME_BIT;
- -      pfn = mfn_to_pfn_no_overrides(mfn);
- -      if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) &&
- -                      m2p_find_override(mfn) == NULL)
- -              set_phys_to_machine(pfn, mfn);
- -
- -      return 0;
- -}
- -
   int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
- -                            struct gnttab_map_grant_ref *kmap_ops,
+ +                            struct gnttab_unmap_grant_ref *kunmap_ops,
                               struct page **pages, unsigned int count)
   {
         int i, ret = 0;
- -      bool lazy = false;
   
         if (xen_feature(XENFEAT_auto_translated_physmap))
                 return 0;
   
- -      if (kmap_ops &&
- -          !in_interrupt() &&
- -          paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
- -              arch_enter_lazy_mmu_mode();
- -              lazy = true;
- -      }
- -
         for (i = 0; i < count; i++) {
                 unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
                 unsigned long pfn = page_to_pfn(pages[i]);
@@@ -712,16 -937,36 +712,16 @@@
                         goto out;
                 }
   
- -              set_page_private(pages[i], INVALID_P2M_ENTRY);
- -              WARN_ON(!PagePrivate(pages[i]));
- -              ClearPagePrivate(pages[i]);
- -              set_phys_to_machine(pfn, pages[i]->index);
- -
- -              if (kmap_ops)
- -                      ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
- -              if (ret)
- -                      goto out;
+ +              set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
         }
- -
+ +      if (kunmap_ops)
+ +              ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ +                                              kunmap_ops, count);
   out:
- -      if (lazy)
- -              arch_leave_lazy_mmu_mode();
         return ret;
   }
   EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
   
- -unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
- -{
- -      struct page *p = m2p_find_override(mfn);
- -      unsigned long ret = pfn;
- -
- -      if (p)
- -              ret = page_to_pfn(p);
- -
- -      return ret;
- -}
- -EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
- -
   #ifdef CONFIG_XEN_DEBUG_FS
   #include <linux/debugfs.h>
   #include "debugfs.h"
diff --combined include/linux/compiler.h

index 17f624c,7bebf05..d1ec10a
--- 1/include/linux/compiler.h
--- 2/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@@ -54,11 -54,7 +54,11 @@@ extern void __chk_io_ptr(const volatil
   #include <linux/compiler-gcc.h>
   #endif
   
+ +#ifdef CC_USING_HOTPATCH
+ +#define notrace __attribute__((hotpatch(0,0)))
+ +#else
   #define notrace __attribute__((no_instrument_function))
+ +#endif
   
   /* Intel compiler defines __GNUC__. So we will overwrite implementations
    * coming from above header files here
@@@ -219,7 -215,7 +219,7 @@@ static __always_inline void __read_once
         }
   }
   
- -static __always_inline void __assign_once_size(volatile void *p, void *res, int size)
+ +static __always_inline void __write_once_size(volatile void *p, void *res, int size)
   {
         switch (size) {
         case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
@@@ -239,15 -235,15 +239,15 @@@
   /*
    * Prevent the compiler from merging or refetching reads or writes. The
    * compiler is also forbidden from reordering successive instances of
- - * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the
+ + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
    * compiler is aware of some particular ordering.  One way to make the
    * compiler aware of ordering is to put the two invocations of READ_ONCE,
- - * ASSIGN_ONCE or ACCESS_ONCE() in different C statements.
+ + * WRITE_ONCE or ACCESS_ONCE() in different C statements.
    *
    * In contrast to ACCESS_ONCE these two macros will also work on aggregate
    * data types like structs or unions. If the size of the accessed data
    * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- - * READ_ONCE() and ASSIGN_ONCE()  will fall back to memcpy and print a
+ + * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
    * compile-time warning.
    *
    * Their two major use cases are: (1) Mediating communication between
@@@ -261,8 -257,8 +261,8 @@@
   #define READ_ONCE(x) \
         ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
   
- -#define ASSIGN_ONCE(val, x) \
- -      ({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; })
+ +#define WRITE_ONCE(x, val) \
+ +      ({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; })
   
   #endif /* __KERNEL__ */
   
@@@ -389,7 -385,7 +389,7 @@@
   
   /* Is this type a native word size -- useful for atomic operations */
   #ifndef __native_word
- -# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+ +# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
   #endif
   
   /* Compile time object size, -1 for unknown */
@@@ -451,12 -447,23 +451,23 @@@
    * to make the compiler aware of ordering is to put the two invocations of
    * ACCESS_ONCE() in different C statements.
    *
-  * This macro does absolutely -nothing- to prevent the CPU from reordering,
-  * merging, or refetching absolutely anything at any time.  Its main intended
-  * use is to mediate communication between process-level code and irq/NMI
-  * handlers, all running on the same CPU.
+  * ACCESS_ONCE will only work on scalar types. For union types, ACCESS_ONCE
+  * on a union member will work as long as the size of the member matches the
+  * size of the union and the size is smaller than word size.
+  *
+  * The major use cases of ACCESS_ONCE used to be (1) Mediating communication
+  * between process-level code and irq/NMI handlers, all running on the same CPU,
+  * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+  * mutilate accesses that either do not require ordering or that interact
+  * with an explicit memory barrier or atomic instruction that provides the
+  * required ordering.
+  *
+  * If possible use READ_ONCE/ASSIGN_ONCE instead.
    */
- #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+ #define __ACCESS_ONCE(x) ({ \
+        __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
+       (volatile typeof(x) *)&(x); })
+ #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
   
   /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
   #ifdef CONFIG_KPROBES
diff --combined mm/gup.c

index 51bf0b0,bed30ef..a6e24e2
--- 1/mm/gup.c
--- 2/mm/gup.c
+++ b/mm/gup.c
@@@ -55,7 -55,7 +55,7 @@@ retry
                  */
                 if (likely(!(flags & FOLL_MIGRATION)))
                         goto no_page;
- -              if (pte_none(pte) || pte_file(pte))
+ +              if (pte_none(pte))
                         goto no_page;
                 entry = pte_to_swp_entry(pte);
                 if (!is_migration_entry(entry))
@@@ -64,7 -64,7 +64,7 @@@
                 migration_entry_wait(mm, pmd, address);
                 goto retry;
         }
- -      if ((flags & FOLL_NUMA) && pte_numa(pte))
+ +      if ((flags & FOLL_NUMA) && pte_protnone(pte))
                 goto no_page;
         if ((flags & FOLL_WRITE) && !pte_write(pte)) {
                 pte_unmap_unlock(ptep, ptl);
@@@ -167,10 -167,10 +167,10 @@@ struct page *follow_page_mask(struct vm
         if (pud_none(*pud))
                 return no_page_table(vma, flags);
         if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
- -              if (flags & FOLL_GET)
- -                      return NULL;
- -              page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
- -              return page;
+ +              page = follow_huge_pud(mm, address, pud, flags);
+ +              if (page)
+ +                      return page;
+ +              return no_page_table(vma, flags);
         }
         if (unlikely(pud_bad(*pud)))
                 return no_page_table(vma, flags);
@@@ -179,12 -179,21 +179,12 @@@
         if (pmd_none(*pmd))
                 return no_page_table(vma, flags);
         if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
- -              page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
- -              if (flags & FOLL_GET) {
- -                      /*
- -                       * Refcount on tail pages are not well-defined and
- -                       * shouldn't be taken. The caller should handle a NULL
- -                       * return when trying to follow tail pages.
- -                       */
- -                      if (PageHead(page))
- -                              get_page(page);
- -                      else
- -                              page = NULL;
- -              }
- -              return page;
+ +              page = follow_huge_pmd(mm, address, pmd, flags);
+ +              if (page)
+ +                      return page;
+ +              return no_page_table(vma, flags);
         }
- -      if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ +      if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
                 return no_page_table(vma, flags);
         if (pmd_trans_huge(*pmd)) {
                 if (flags & FOLL_SPLIT) {
@@@ -287,7 -296,7 +287,7 @@@ static int faultin_page(struct task_str
                         return -ENOMEM;
                 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                         return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
- -              if (ret & VM_FAULT_SIGBUS)
+ +              if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                         return -EFAULT;
                 BUG();
         }
@@@ -562,7 -571,7 +562,7 @@@ int fixup_user_fault(struct task_struc
                         return -ENOMEM;
                 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                         return -EHWPOISON;
- -              if (ret & VM_FAULT_SIGBUS)
+ +              if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                         return -EFAULT;
                 BUG();
         }
@@@ -575,185 -584,6 +575,185 @@@
         return 0;
   }
   
+ +static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
+ +                                              struct mm_struct *mm,
+ +                                              unsigned long start,
+ +                                              unsigned long nr_pages,
+ +                                              int write, int force,
+ +                                              struct page **pages,
+ +                                              struct vm_area_struct **vmas,
+ +                                              int *locked, bool notify_drop,
+ +                                              unsigned int flags)
+ +{
+ +      long ret, pages_done;
+ +      bool lock_dropped;
+ +
+ +      if (locked) {
+ +              /* if VM_FAULT_RETRY can be returned, vmas become invalid */
+ +              BUG_ON(vmas);
+ +              /* check caller initialized locked */
+ +              BUG_ON(*locked != 1);
+ +      }
+ +
+ +      if (pages)
+ +              flags |= FOLL_GET;
+ +      if (write)
+ +              flags |= FOLL_WRITE;
+ +      if (force)
+ +              flags |= FOLL_FORCE;
+ +
+ +      pages_done = 0;
+ +      lock_dropped = false;
+ +      for (;;) {
+ +              ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
+ +                                     vmas, locked);
+ +              if (!locked)
+ +                      /* VM_FAULT_RETRY couldn't trigger, bypass */
+ +                      return ret;
+ +
+ +              /* VM_FAULT_RETRY cannot return errors */
+ +              if (!*locked) {
+ +                      BUG_ON(ret < 0);
+ +                      BUG_ON(ret >= nr_pages);
+ +              }
+ +
+ +              if (!pages)
+ +                      /* If it's a prefault don't insist harder */
+ +                      return ret;
+ +
+ +              if (ret > 0) {
+ +                      nr_pages -= ret;
+ +                      pages_done += ret;
+ +                      if (!nr_pages)
+ +                              break;
+ +              }
+ +              if (*locked) {
+ +                      /* VM_FAULT_RETRY didn't trigger */
+ +                      if (!pages_done)
+ +                              pages_done = ret;
+ +                      break;
+ +              }
+ +              /* VM_FAULT_RETRY triggered, so seek to the faulting offset */
+ +              pages += ret;
+ +              start += ret << PAGE_SHIFT;
+ +
+ +              /*
+ +               * Repeat on the address that fired VM_FAULT_RETRY
+ +               * without FAULT_FLAG_ALLOW_RETRY but with
+ +               * FAULT_FLAG_TRIED.
+ +               */
+ +              *locked = 1;
+ +              lock_dropped = true;
+ +              down_read(&mm->mmap_sem);
+ +              ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
+ +                                     pages, NULL, NULL);
+ +              if (ret != 1) {
+ +                      BUG_ON(ret > 1);
+ +                      if (!pages_done)
+ +                              pages_done = ret;
+ +                      break;
+ +              }
+ +              nr_pages--;
+ +              pages_done++;
+ +              if (!nr_pages)
+ +                      break;
+ +              pages++;
+ +              start += PAGE_SIZE;
+ +      }
+ +      if (notify_drop && lock_dropped && *locked) {
+ +              /*
+ +               * We must let the caller know we temporarily dropped the lock
+ +               * and so the critical section protected by it was lost.
+ +               */
+ +              up_read(&mm->mmap_sem);
+ +              *locked = 0;
+ +      }
+ +      return pages_done;
+ +}
+ +
+ +/*
+ + * We can leverage the VM_FAULT_RETRY functionality in the page fault
+ + * paths better by using either get_user_pages_locked() or
+ + * get_user_pages_unlocked().
+ + *
+ + * get_user_pages_locked() is suitable to replace the form:
+ + *
+ + *      down_read(&mm->mmap_sem);
+ + *      do_something()
+ + *      get_user_pages(tsk, mm, ..., pages, NULL);
+ + *      up_read(&mm->mmap_sem);
+ + *
+ + *  to:
+ + *
+ + *      int locked = 1;
+ + *      down_read(&mm->mmap_sem);
+ + *      do_something()
+ + *      get_user_pages_locked(tsk, mm, ..., pages, &locked);
+ + *      if (locked)
+ + *          up_read(&mm->mmap_sem);
+ + */
+ +long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
+ +                         unsigned long start, unsigned long nr_pages,
+ +                         int write, int force, struct page **pages,
+ +                         int *locked)
+ +{
+ +      return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
+ +                                     pages, NULL, locked, true, FOLL_TOUCH);
+ +}
+ +EXPORT_SYMBOL(get_user_pages_locked);
+ +
+ +/*
+ + * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to
+ + * pass additional gup_flags as last parameter (like FOLL_HWPOISON).
+ + *
+ + * NOTE: here FOLL_TOUCH is not set implicitly and must be set by the
+ + * caller if required (just like with __get_user_pages). "FOLL_GET",
+ + * "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed
+ + * according to the parameters "pages", "write", "force"
+ + * respectively.
+ + */
+ +__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+ +                                             unsigned long start, unsigned long nr_pages,
+ +                                             int write, int force, struct page **pages,
+ +                                             unsigned int gup_flags)
+ +{
+ +      long ret;
+ +      int locked = 1;
+ +      down_read(&mm->mmap_sem);
+ +      ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
+ +                                    pages, NULL, &locked, false, gup_flags);
+ +      if (locked)
+ +              up_read(&mm->mmap_sem);
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL(__get_user_pages_unlocked);
+ +
+ +/*
+ + * get_user_pages_unlocked() is suitable to replace the form:
+ + *
+ + *      down_read(&mm->mmap_sem);
+ + *      get_user_pages(tsk, mm, ..., pages, NULL);
+ + *      up_read(&mm->mmap_sem);
+ + *
+ + *  with:
+ + *
+ + *      get_user_pages_unlocked(tsk, mm, ..., pages);
+ + *
+ + * It is functionally equivalent to get_user_pages_fast so
+ + * get_user_pages_fast should be used instead, if the two parameters
+ + * "tsk" and "mm" are respectively equal to current and current->mm,
+ + * or if "force" shall be set to 1 (get_user_pages_fast misses the
+ + * "force" parameter).
+ + */
+ +long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+ +                           unsigned long start, unsigned long nr_pages,
+ +                           int write, int force, struct page **pages)
+ +{
+ +      return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write,
+ +                                       force, pages, FOLL_TOUCH);
+ +}
+ +EXPORT_SYMBOL(get_user_pages_unlocked);
+ +
   /*
    * get_user_pages() - pin user pages in memory
    * @tsk:      the task_struct to use for page fault accounting, or
@@@ -803,18 -633,22 +803,18 @@@
    * use the correct cache flushing APIs.
    *
    * See also get_user_pages_fast, for performance critical applications.
+ + *
+ + * get_user_pages should be phased out in favor of
+ + * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
+ + * should use get_user_pages because it cannot pass
+ + * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
    */
   long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                 unsigned long start, unsigned long nr_pages, int write,
                 int force, struct page **pages, struct vm_area_struct **vmas)
   {
- -      int flags = FOLL_TOUCH;
- -
- -      if (pages)
- -              flags |= FOLL_GET;
- -      if (write)
- -              flags |= FOLL_WRITE;
- -      if (force)
- -              flags |= FOLL_FORCE;
- -
- -      return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
- -                              NULL);
+ +      return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
+ +                                     pages, vmas, NULL, false, FOLL_TOUCH);
   }
   EXPORT_SYMBOL(get_user_pages);
   
@@@ -906,10 -740,10 +906,10 @@@ static int gup_pte_range(pmd_t pmd, uns
   
                 /*
                  * Similar to the PMD case below, NUMA hinting must take slow
- -               * path
+ +               * path using the pte_protnone check.
                  */
                 if (!pte_present(pte) || pte_special(pte) ||
- -                      pte_numa(pte) || (write && !pte_write(pte)))
+ +                      pte_protnone(pte) || (write && !pte_write(pte)))
                         goto pte_unmap;
   
                 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@@ -1092,7 -926,7 +1092,7 @@@ static int gup_pmd_range(pud_t pud, uns
   
         pmdp = pmd_offset(&pud, addr);
         do {
-               pmd_t pmd = ACCESS_ONCE(*pmdp);
+               pmd_t pmd = READ_ONCE(*pmdp);
   
                 next = pmd_addr_end(addr, end);
                 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
@@@ -1104,7 -938,7 +1104,7 @@@
                          * slowpath for accounting purposes and so that they
                          * can be serialised against THP migration.
                          */
- -                      if (pmd_numa(pmd))
+ +                      if (pmd_protnone(pmd))
                                 return 0;
   
                         if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
@@@ -1243,8 -1077,10 +1243,8 @@@ int get_user_pages_fast(unsigned long s
                 start += nr << PAGE_SHIFT;
                 pages += nr;
   
- -              down_read(&mm->mmap_sem);
- -              ret = get_user_pages(current, mm, start,
- -                                   nr_pages - nr, write, 0, pages, NULL);
- -              up_read(&mm->mmap_sem);
+ +              ret = get_user_pages_unlocked(current, mm, start,
+ +                                            nr_pages - nr, write, 0, pages);
   
                 /* Have to be a bit careful with return values */
                 if (nr > 0) {
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 14 Feb 2015 18:54:28 +0000 (10:54 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 14 Feb 2015 18:54:28 +0000 (10:54 -0800)
		1	2
arch/powerpc/mm/hugetlbpage.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sh/mm/gup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/p2m.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/compiler.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/gup.c	patch \|	diff1 \|	diff2 \|	blob \| history