KVM: MMU: fix ept=0/pte.u=1/pte.w=0/CR0.WP=0/CR4.SMEP=1/EFER.NX=0 combo

[cascardo/linux.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index e2951b6..9bd8f44 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -596,6 +596,8 @@ struct vcpu_vmx {
         /* Support for PML */
  #define PML_ENTITY_NUM         512
         struct page *pml_pg;
+
+       u64 current_tsc_ratio;
  };
  
  enum segment_cache_field {
@@ -1811,6 +1813,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                         return;
                 }
                 break;
+       case MSR_IA32_PEBS_ENABLE:
+               /* PEBS needs a quiescent period after being disabled (to write
+                * a record).  Disabling PEBS through VMX MSR swapping doesn't
+                * provide that period, so a CPU could write host's record into
+                * guest's memory.
+                */
+               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
         }
  
         for (i = 0; i < m->nr; ++i)
@@ -1848,26 +1857,31 @@ static void reload_tss(void)
  
  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
  {
-       u64 guest_efer;
-       u64 ignore_bits;
+       u64 guest_efer = vmx->vcpu.arch.efer;
+       u64 ignore_bits = 0;
  
-       guest_efer = vmx->vcpu.arch.efer;
+       if (!enable_ept) {
+               /*
+                * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
+                * host CPUID is more efficient than testing guest CPUID
+                * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
+                */
+               if (boot_cpu_has(X86_FEATURE_SMEP))
+                       guest_efer |= EFER_NX;
+               else if (!(guest_efer & EFER_NX))
+                       ignore_bits |= EFER_NX;
+       }
  
         /*
-        * NX is emulated; LMA and LME handled by hardware; SCE meaningless
-        * outside long mode
+        * LMA and LME handled by hardware; SCE meaningless outside long mode.
          */
-       ignore_bits = EFER_NX | EFER_SCE;
+       ignore_bits |= EFER_SCE;
  #ifdef CONFIG_X86_64
         ignore_bits |= EFER_LMA | EFER_LME;
         /* SCE is meaningful only in long mode on Intel */
         if (guest_efer & EFER_LMA)
                 ignore_bits &= ~(u64)EFER_SCE;
  #endif
-       guest_efer &= ~ignore_bits;
-       guest_efer |= host_efer & ignore_bits;
-       vmx->guest_msrs[efer_offset].data = guest_efer;
-       vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
  
         clear_atomic_switch_msr(vmx, MSR_EFER);
  
@@ -1878,16 +1892,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
          */
         if (cpu_has_load_ia32_efer ||
             (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
-               guest_efer = vmx->vcpu.arch.efer;
                 if (!(guest_efer & EFER_LMA))
                         guest_efer &= ~EFER_LME;
                 if (guest_efer != host_efer)
                         add_atomic_switch_msr(vmx, MSR_EFER,
                                               guest_efer, host_efer);
                 return false;
-       }
+       } else {
+               guest_efer &= ~ignore_bits;
+               guest_efer |= host_efer & ignore_bits;
  
-       return true;
+               vmx->guest_msrs[efer_offset].data = guest_efer;
+               vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+
+               return true;
+       }
  }
  
  static unsigned long segment_base(u16 selector)
@@ -2127,14 +2146,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
                 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
  
-               /* Setup TSC multiplier */
-               if (cpu_has_vmx_tsc_scaling())
-                       vmcs_write64(TSC_MULTIPLIER,
-                                    vcpu->arch.tsc_scaling_ratio);
-
                 vmx->loaded_vmcs->cpu = cpu;
         }
  
+       /* Setup TSC multiplier */
+       if (kvm_has_tsc_control &&
+           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
+               vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
+               vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+       }
+
         vmx_vcpu_pi_load(vcpu, cpu);
  }