x86/mm, sched/core: Turn off IRQs in switch_mm()

[cascardo/linux.git] / arch / x86 / include / asm / mmu_context.h
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h

index bfd9b2a..3963481 100644 (file)
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -52,15 +52,15 @@ struct ldt_struct {
  /*
   * Used for LDT copy/destruction.
   */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void destroy_context(struct mm_struct *mm);
+int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context_ldt(struct mm_struct *mm);
  #else  /* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context(struct task_struct *tsk,
-                                  struct mm_struct *mm)
+static inline int init_new_context_ldt(struct task_struct *tsk,
+                                      struct mm_struct *mm)
  {
         return 0;
  }
-static inline void destroy_context(struct mm_struct *mm) {}
+static inline void destroy_context_ldt(struct mm_struct *mm) {}
  #endif
  
  static inline void load_mm_ldt(struct mm_struct *mm)
@@ -104,103 +104,23 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
  #endif
  }
  
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
-                            struct task_struct *tsk)
+static inline int init_new_context(struct task_struct *tsk,
+                                  struct mm_struct *mm)
+{
+       init_new_context_ldt(tsk, mm);
+       return 0;
+}
+static inline void destroy_context(struct mm_struct *mm)
  {
-       unsigned cpu = smp_processor_id();
+       destroy_context_ldt(mm);
+}
  
-       if (likely(prev != next)) {
-#ifdef CONFIG_SMP
-               this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-               this_cpu_write(cpu_tlbstate.active_mm, next);
-#endif
-               cpumask_set_cpu(cpu, mm_cpumask(next));
-
-               /*
-                * Re-load page tables.
-                *
-                * This logic has an ordering constraint:
-                *
-                *  CPU 0: Write to a PTE for 'next'
-                *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
-                *  CPU 1: set bit 1 in next's mm_cpumask
-                *  CPU 1: load from the PTE that CPU 0 writes (implicit)
-                *
-                * We need to prevent an outcome in which CPU 1 observes
-                * the new PTE value and CPU 0 observes bit 1 clear in
-                * mm_cpumask.  (If that occurs, then the IPI will never
-                * be sent, and CPU 0's TLB will contain a stale entry.)
-                *
-                * The bad outcome can occur if either CPU's load is
-                * reordered before that CPU's store, so both CPUs must
-                * execute full barriers to prevent this from happening.
-                *
-                * Thus, switch_mm needs a full barrier between the
-                * store to mm_cpumask and any operation that could load
-                * from next->pgd.  TLB fills are special and can happen
-                * due to instruction fetches or for no reason at all,
-                * and neither LOCK nor MFENCE orders them.
-                * Fortunately, load_cr3() is serializing and gives the
-                * ordering guarantee we need.
-                *
-                */
-               load_cr3(next->pgd);
-
-               trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-
-               /* Stop flush ipis for the previous mm */
-               cpumask_clear_cpu(cpu, mm_cpumask(prev));
-
-               /* Load per-mm CR4 state */
-               load_mm_cr4(next);
+extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+                     struct task_struct *tsk);
  
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-               /*
-                * Load the LDT, if the LDT is different.
-                *
-                * It's possible that prev->context.ldt doesn't match
-                * the LDT register.  This can happen if leave_mm(prev)
-                * was called and then modify_ldt changed
-                * prev->context.ldt but suppressed an IPI to this CPU.
-                * In this case, prev->context.ldt != NULL, because we
-                * never set context.ldt to NULL while the mm still
-                * exists.  That means that next->context.ldt !=
-                * prev->context.ldt, because mms never share an LDT.
-                */
-               if (unlikely(prev->context.ldt != next->context.ldt))
-                       load_mm_ldt(next);
-#endif
-       }
-#ifdef CONFIG_SMP
-         else {
-               this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-               BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
-
-               if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
-                       /*
-                        * On established mms, the mm_cpumask is only changed
-                        * from irq context, from ptep_clear_flush() while in
-                        * lazy tlb mode, and here. Irqs are blocked during
-                        * schedule, protecting us from simultaneous changes.
-                        */
-                       cpumask_set_cpu(cpu, mm_cpumask(next));
-
-                       /*
-                        * We were in lazy tlb mode and leave_mm disabled
-                        * tlb flush IPI delivery. We must reload CR3
-                        * to make sure to use no freed page tables.
-                        *
-                        * As above, load_cr3() is serializing and orders TLB
-                        * fills with respect to the mm_cpumask write.
-                        */
-                       load_cr3(next->pgd);
-                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-                       load_mm_cr4(next);
-                       load_mm_ldt(next);
-               }
-       }
-#endif
-}
+extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+                              struct task_struct *tsk);
+#define switch_mm_irqs_off switch_mm_irqs_off
  
  #define activate_mm(prev, next)                        \
  do {                                           \
@@ -275,4 +195,68 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
                 mpx_notify_unmap(mm, vma, start, end);
  }
  
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+       u16 pkey = 0;
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+       unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
+                                     VM_PKEY_BIT2 | VM_PKEY_BIT3;
+       pkey = (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
+#endif
+       return pkey;
+}
+
+static inline bool __pkru_allows_pkey(u16 pkey, bool write)
+{
+       u32 pkru = read_pkru();
+
+       if (!__pkru_allows_read(pkru, pkey))
+               return false;
+       if (write && !__pkru_allows_write(pkru, pkey))
+               return false;
+
+       return true;
+}
+
+/*
+ * We only want to enforce protection keys on the current process
+ * because we effectively have no access to PKRU for other
+ * processes or any way to tell *which * PKRU in a threaded
+ * process we could use.
+ *
+ * So do not enforce things if the VMA is not from the current
+ * mm, or if we are in a kernel thread.
+ */
+static inline bool vma_is_foreign(struct vm_area_struct *vma)
+{
+       if (!current->mm)
+               return true;
+       /*
+        * Should PKRU be enforced on the access to this VMA?  If
+        * the VMA is from another process, then PKRU has no
+        * relevance and should not be enforced.
+        */
+       if (current->mm != vma->vm_mm)
+               return true;
+
+       return false;
+}
+
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+               bool write, bool execute, bool foreign)
+{
+       /* pkeys never affect instruction fetches */
+       if (execute)
+               return true;
+       /* allow access if the VMA is not one from this process */
+       if (foreign || vma_is_foreign(vma))
+               return true;
+       return __pkru_allows_pkey(vma_pkey(vma), write);
+}
+
+static inline bool arch_pte_access_permitted(pte_t pte, bool write)
+{
+       return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
+}
+
  #endif /* _ASM_X86_MMU_CONTEXT_H */