KVM: MMU: fix ept=0/pte.u=1/pte.w=0/CR0.WP=0/CR4.SMEP=1/EFER.NX=0 combo
[cascardo/linux.git] / arch / x86 / kvm / vmx.c
index e2951b6..9bd8f44 100644 (file)
@@ -596,6 +596,8 @@ struct vcpu_vmx {
        /* Support for PML */
 #define PML_ENTITY_NUM         512
        struct page *pml_pg;
+
+       u64 current_tsc_ratio;
 };
 
 enum segment_cache_field {
@@ -1811,6 +1813,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                        return;
                }
                break;
+       case MSR_IA32_PEBS_ENABLE:
+               /* PEBS needs a quiescent period after being disabled (to write
+                * a record).  Disabling PEBS through VMX MSR swapping doesn't
+                * provide that period, so a CPU could write host's record into
+                * guest's memory.
+                */
+               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
        }
 
        for (i = 0; i < m->nr; ++i)
@@ -1848,26 +1857,31 @@ static void reload_tss(void)
 
 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 {
-       u64 guest_efer;
-       u64 ignore_bits;
+       u64 guest_efer = vmx->vcpu.arch.efer;
+       u64 ignore_bits = 0;
 
-       guest_efer = vmx->vcpu.arch.efer;
+       if (!enable_ept) {
+               /*
+                * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
+                * host CPUID is more efficient than testing guest CPUID
+                * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
+                */
+               if (boot_cpu_has(X86_FEATURE_SMEP))
+                       guest_efer |= EFER_NX;
+               else if (!(guest_efer & EFER_NX))
+                       ignore_bits |= EFER_NX;
+       }
 
        /*
-        * NX is emulated; LMA and LME handled by hardware; SCE meaningless
-        * outside long mode
+        * LMA and LME handled by hardware; SCE meaningless outside long mode.
         */
-       ignore_bits = EFER_NX | EFER_SCE;
+       ignore_bits |= EFER_SCE;
 #ifdef CONFIG_X86_64
        ignore_bits |= EFER_LMA | EFER_LME;
        /* SCE is meaningful only in long mode on Intel */
        if (guest_efer & EFER_LMA)
                ignore_bits &= ~(u64)EFER_SCE;
 #endif
-       guest_efer &= ~ignore_bits;
-       guest_efer |= host_efer & ignore_bits;
-       vmx->guest_msrs[efer_offset].data = guest_efer;
-       vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
        clear_atomic_switch_msr(vmx, MSR_EFER);
 
@@ -1878,16 +1892,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
         */
        if (cpu_has_load_ia32_efer ||
            (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
-               guest_efer = vmx->vcpu.arch.efer;
                if (!(guest_efer & EFER_LMA))
                        guest_efer &= ~EFER_LME;
                if (guest_efer != host_efer)
                        add_atomic_switch_msr(vmx, MSR_EFER,
                                              guest_efer, host_efer);
                return false;
-       }
+       } else {
+               guest_efer &= ~ignore_bits;
+               guest_efer |= host_efer & ignore_bits;
 
-       return true;
+               vmx->guest_msrs[efer_offset].data = guest_efer;
+               vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+
+               return true;
+       }
 }
 
 static unsigned long segment_base(u16 selector)
@@ -2127,14 +2146,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
                vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
 
-               /* Setup TSC multiplier */
-               if (cpu_has_vmx_tsc_scaling())
-                       vmcs_write64(TSC_MULTIPLIER,
-                                    vcpu->arch.tsc_scaling_ratio);
-
                vmx->loaded_vmcs->cpu = cpu;
        }
 
+       /* Setup TSC multiplier */
+       if (kvm_has_tsc_control &&
+           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
+               vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
+               vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+       }
+
        vmx_vcpu_pi_load(vcpu, cpu);
 }