KVM: x86: disable MPX if host did not enable MPX XSAVE features
[cascardo/linux.git] / arch / x86 / kvm / vmx.c
index e2951b6..e512aa7 100644 (file)
@@ -861,7 +861,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
-static bool vmx_mpx_supported(void);
 static bool vmx_xsaves_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -961,25 +960,36 @@ static const u32 vmx_msr_index[] = {
        MSR_EFER, MSR_TSC_AUX, MSR_STAR,
 };
 
-static inline bool is_page_fault(u32 intr_info)
+static inline bool is_exception_n(u32 intr_info, u8 vector)
 {
        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
                             INTR_INFO_VALID_MASK)) ==
-               (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+               (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
+}
+
+static inline bool is_debug(u32 intr_info)
+{
+       return is_exception_n(intr_info, DB_VECTOR);
+}
+
+static inline bool is_breakpoint(u32 intr_info)
+{
+       return is_exception_n(intr_info, BP_VECTOR);
+}
+
+static inline bool is_page_fault(u32 intr_info)
+{
+       return is_exception_n(intr_info, PF_VECTOR);
 }
 
 static inline bool is_no_device(u32 intr_info)
 {
-       return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
-                            INTR_INFO_VALID_MASK)) ==
-               (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+       return is_exception_n(intr_info, NM_VECTOR);
 }
 
 static inline bool is_invalid_opcode(u32 intr_info)
 {
-       return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
-                            INTR_INFO_VALID_MASK)) ==
-               (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+       return is_exception_n(intr_info, UD_VECTOR);
 }
 
 static inline bool is_external_interrupt(u32 intr_info)
@@ -2584,7 +2594,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
                VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
 
-       if (vmx_mpx_supported())
+       if (kvm_mpx_supported())
                vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
        /* We support free control of debug control saving. */
@@ -2605,7 +2615,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                VM_ENTRY_LOAD_IA32_PAT;
        vmx->nested.nested_vmx_entry_ctls_high |=
                (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
-       if (vmx_mpx_supported())
+       if (kvm_mpx_supported())
                vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
        /* We support free control of debug control loading. */
@@ -2849,7 +2859,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
                break;
        case MSR_IA32_BNDCFGS:
-               if (!vmx_mpx_supported())
+               if (!kvm_mpx_supported())
                        return 1;
                msr_info->data = vmcs_read64(GUEST_BNDCFGS);
                break;
@@ -2926,7 +2936,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                vmcs_writel(GUEST_SYSENTER_ESP, data);
                break;
        case MSR_IA32_BNDCFGS:
-               if (!vmx_mpx_supported())
+               if (!kvm_mpx_supported())
                        return 1;
                vmcs_write64(GUEST_BNDCFGS, data);
                break;
@@ -3399,7 +3409,7 @@ static void init_vmcs_shadow_fields(void)
        for (i = j = 0; i < max_shadow_read_write_fields; i++) {
                switch (shadow_read_write_fields[i]) {
                case GUEST_BNDCFGS:
-                       if (!vmx_mpx_supported())
+                       if (!kvm_mpx_supported())
                                continue;
                        break;
                default:
@@ -5608,11 +5618,8 @@ static int handle_dr(struct kvm_vcpu *vcpu)
        }
 
        if (vcpu->guest_debug == 0) {
-               u32 cpu_based_vm_exec_control;
-
-               cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-               cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING;
-               vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+               vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+                               CPU_BASED_MOV_DR_EXITING);
 
                /*
                 * No more DR vmexits; force a reload of the debug registers
@@ -5649,8 +5656,6 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
 
 static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
-       u32 cpu_based_vm_exec_control;
-
        get_debugreg(vcpu->arch.db[0], 0);
        get_debugreg(vcpu->arch.db[1], 1);
        get_debugreg(vcpu->arch.db[2], 2);
@@ -5659,10 +5664,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
        vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
 
        vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
-
-       cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-       cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING;
-       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+       vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
 }
 
 static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
@@ -5747,8 +5749,7 @@ static int handle_halt(struct kvm_vcpu *vcpu)
 
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
-       kvm_emulate_hypercall(vcpu);
-       return 1;
+       return kvm_emulate_hypercall(vcpu);
 }
 
 static int handle_invd(struct kvm_vcpu *vcpu)
@@ -6435,8 +6436,8 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
 
        if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
                /* Recycle the least recently used VMCS. */
-               item = list_entry(vmx->nested.vmcs02_pool.prev,
-                       struct vmcs02_list, list);
+               item = list_last_entry(&vmx->nested.vmcs02_pool,
+                                      struct vmcs02_list, list);
                item->vmptr = vmx->nested.current_vmptr;
                list_move(&item->list, &vmx->nested.vmcs02_pool);
                return &item->vmcs02;
@@ -7752,6 +7753,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                else if (is_no_device(intr_info) &&
                         !(vmcs12->guest_cr0 & X86_CR0_TS))
                        return false;
+               else if (is_debug(intr_info) &&
+                        vcpu->guest_debug &
+                        (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+                       return false;
+               else if (is_breakpoint(intr_info) &&
+                        vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+                       return false;
                return vmcs12->exception_bitmap &
                                (1u << (intr_info & INTR_INFO_VECTOR_MASK));
        case EXIT_REASON_EXTERNAL_INTERRUPT:
@@ -10256,7 +10264,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
        vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
        vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
-       if (vmx_mpx_supported())
+       if (kvm_mpx_supported())
                vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
        if (nested_cpu_has_xsaves(vmcs12))
                vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
@@ -10764,13 +10772,26 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
                 */
 
                kvm_set_msi_irq(e, &irq);
-               if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
+               if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+                       /*
+                        * Make sure the IRTE is in remapped mode if
+                        * we don't handle it in posted mode.
+                        */
+                       ret = irq_set_vcpu_affinity(host_irq, NULL);
+                       if (ret < 0) {
+                               printk(KERN_INFO
+                                  "failed to back to remapped mode, irq: %u\n",
+                                  host_irq);
+                               goto out;
+                       }
+
                        continue;
+               }
 
                vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
                vcpu_info.vector = irq.vector;
 
-               trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi,
+               trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi,
                                vcpu_info.vector, vcpu_info.pi_desc_addr, set);
 
                if (set)