KVM: x86: ratelimit and decrease severity for guest-triggered printk
[cascardo/linux.git] / arch / x86 / kvm / vmx.c
index a45d858..2029c00 100644 (file)
@@ -422,6 +422,7 @@ struct nested_vmx {
        struct list_head vmcs02_pool;
        int vmcs02_num;
        u64 vmcs01_tsc_offset;
+       bool change_vmcs01_virtual_x2apic_mode;
        /* L2 must run next, and mustn't decide to exit to L1. */
        bool nested_run_pending;
        /*
@@ -435,6 +436,8 @@ struct nested_vmx {
        bool pi_pending;
        u16 posted_intr_nv;
 
+       unsigned long *msr_bitmap;
+
        struct hrtimer preemption_timer;
        bool preemption_timer_expired;
 
@@ -924,7 +927,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
 static unsigned long *vmx_vmread_bitmap;
 static unsigned long *vmx_vmwrite_bitmap;
 
@@ -2198,6 +2200,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
                        new.control) != old.control);
 }
 
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+       vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+       vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2256,10 +2264,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        /* Setup TSC multiplier */
        if (kvm_has_tsc_control &&
-           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
-               vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
-               vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
-       }
+           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+               decache_tsc_multiplier(vmx);
 
        vmx_vcpu_pi_load(vcpu, cpu);
        vmx->host_pkru = read_pkru();
@@ -2508,7 +2514,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
        unsigned long *msr_bitmap;
 
        if (is_guest_mode(vcpu))
-               msr_bitmap = vmx_msr_bitmap_nested;
+               msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
        else if (cpu_has_secondary_exec_ctrls() &&
                 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
                  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -6103,7 +6109,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
        gla_validity = (exit_qualification >> 7) & 0x3;
-       if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
+       if (gla_validity == 0x2) {
                printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
                printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
                        (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
@@ -6363,13 +6369,6 @@ static __init int hardware_setup(void)
        if (!vmx_msr_bitmap_longmode_x2apic)
                goto out4;
 
-       if (nested) {
-               vmx_msr_bitmap_nested =
-                       (unsigned long *)__get_free_page(GFP_KERNEL);
-               if (!vmx_msr_bitmap_nested)
-                       goto out5;
-       }
-
        vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_vmread_bitmap)
                goto out6;
@@ -6392,8 +6391,6 @@ static __init int hardware_setup(void)
 
        memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
        memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-       if (nested)
-               memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
 
        if (setup_vmcs_config(&vmcs_config) < 0) {
                r = -EIO;
@@ -6529,9 +6526,6 @@ out8:
 out7:
        free_page((unsigned long)vmx_vmread_bitmap);
 out6:
-       if (nested)
-               free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
        free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
        free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6551,6 @@ static __exit void hardware_unsetup(void)
        free_page((unsigned long)vmx_io_bitmap_a);
        free_page((unsigned long)vmx_vmwrite_bitmap);
        free_page((unsigned long)vmx_vmread_bitmap);
-       if (nested)
-               free_page((unsigned long)vmx_msr_bitmap_nested);
 
        free_kvm_area();
 }
@@ -6734,7 +6726,7 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
 {
        /* TODO: not to reset guest simply here. */
        kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-       pr_warn("kvm: nested vmx abort, indicator %d\n", indicator);
+       pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
 }
 
 static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
@@ -6995,16 +6987,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                return 1;
        }
 
+       if (cpu_has_vmx_msr_bitmap()) {
+               vmx->nested.msr_bitmap =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+               if (!vmx->nested.msr_bitmap)
+                       goto out_msr_bitmap;
+       }
+
        vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
        if (!vmx->nested.cached_vmcs12)
-               return -ENOMEM;
+               goto out_cached_vmcs12;
 
        if (enable_shadow_vmcs) {
                shadow_vmcs = alloc_vmcs();
-               if (!shadow_vmcs) {
-                       kfree(vmx->nested.cached_vmcs12);
-                       return -ENOMEM;
-               }
+               if (!shadow_vmcs)
+                       goto out_shadow_vmcs;
                /* mark vmcs as shadow */
                shadow_vmcs->revision_id |= (1u << 31);
                /* init shadow vmcs */
@@ -7016,7 +7013,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
        vmx->nested.vmcs02_num = 0;
 
        hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
-                    HRTIMER_MODE_REL);
+                    HRTIMER_MODE_REL_PINNED);
        vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
 
        vmx->nested.vmxon = true;
@@ -7024,6 +7021,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
        skip_emulated_instruction(vcpu);
        nested_vmx_succeed(vcpu);
        return 1;
+
+out_shadow_vmcs:
+       kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+       free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+       return -ENOMEM;
 }
 
 /*
@@ -7098,6 +7104,10 @@ static void free_nested(struct vcpu_vmx *vmx)
        vmx->nested.vmxon = false;
        free_vpid(vmx->nested.vpid02);
        nested_release_vmcs12(vmx);
+       if (vmx->nested.msr_bitmap) {
+               free_page((unsigned long)vmx->nested.msr_bitmap);
+               vmx->nested.msr_bitmap = NULL;
+       }
        if (enable_shadow_vmcs)
                free_vmcs(vmx->nested.current_shadow_vmcs);
        kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8429,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 {
        u32 sec_exec_control;
 
+       /* Postpone execution until vmcs01 is the current VMCS. */
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+               return;
+       }
+
        /*
         * There is not point to enable virtualize x2apic without enable
         * apicv
@@ -9472,8 +9488,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 {
        int msr;
        struct page *page;
-       unsigned long *msr_bitmap;
+       unsigned long *msr_bitmap_l1;
+       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
 
+       /* This shortcut is ok because we support only x2APIC MSRs so far. */
        if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
                return false;
 
@@ -9482,63 +9500,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
                WARN_ON(1);
                return false;
        }
-       msr_bitmap = (unsigned long *)kmap(page);
-       if (!msr_bitmap) {
+       msr_bitmap_l1 = (unsigned long *)kmap(page);
+       if (!msr_bitmap_l1) {
                nested_release_page_clean(page);
                WARN_ON(1);
                return false;
        }
 
+       memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
        if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
                if (nested_cpu_has_apic_reg_virt(vmcs12))
                        for (msr = 0x800; msr <= 0x8ff; msr++)
                                nested_vmx_disable_intercept_for_msr(
-                                       msr_bitmap,
-                                       vmx_msr_bitmap_nested,
+                                       msr_bitmap_l1, msr_bitmap_l0,
                                        msr, MSR_TYPE_R);
-               /* TPR is allowed */
-               nested_vmx_disable_intercept_for_msr(msr_bitmap,
-                               vmx_msr_bitmap_nested,
+
+               nested_vmx_disable_intercept_for_msr(
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_TASKPRI >> 4),
                                MSR_TYPE_R | MSR_TYPE_W);
+
                if (nested_cpu_has_vid(vmcs12)) {
-                       /* EOI and self-IPI are allowed */
                        nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap,
-                               vmx_msr_bitmap_nested,
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_EOI >> 4),
                                MSR_TYPE_W);
                        nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap,
-                               vmx_msr_bitmap_nested,
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
                                MSR_TYPE_W);
                }
-       } else {
-               /*
-                * Enable reading intercept of all the x2apic
-                * MSRs. We should not rely on vmcs12 to do any
-                * optimizations here, it may have been modified
-                * by L1.
-                */
-               for (msr = 0x800; msr <= 0x8ff; msr++)
-                       __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               msr,
-                               MSR_TYPE_R);
-
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_TASKPRI >> 4),
-                               MSR_TYPE_W);
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_EOI >> 4),
-                               MSR_TYPE_W);
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
-                               MSR_TYPE_W);
        }
        kunmap(page);
        nested_release_page_clean(page);
@@ -9606,7 +9598,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
        maxphyaddr = cpuid_maxphyaddr(vcpu);
        if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
            (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
-               pr_warn_ratelimited(
+               pr_debug_ratelimited(
                        "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
                        addr_field, maxphyaddr, count, addr);
                return -EINVAL;
@@ -9679,13 +9671,13 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
        for (i = 0; i < count; i++) {
                if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
                                        &e, sizeof(e))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                __func__, i, gpa + i * sizeof(e));
                        goto fail;
                }
                if (nested_vmx_load_msr_check(vcpu, &e)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s check failed (%u, 0x%x, 0x%x)\n",
                                __func__, i, e.index, e.reserved);
                        goto fail;
@@ -9693,7 +9685,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                msr.index = e.index;
                msr.data = e.value;
                if (kvm_set_msr(vcpu, &msr)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
                                __func__, i, e.index, e.value);
                        goto fail;
@@ -9714,13 +9706,13 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                if (kvm_vcpu_read_guest(vcpu,
                                        gpa + i * sizeof(e),
                                        &e, 2 * sizeof(u32))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                __func__, i, gpa + i * sizeof(e));
                        return -EINVAL;
                }
                if (nested_vmx_store_msr_check(vcpu, &e)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s check failed (%u, 0x%x, 0x%x)\n",
                                __func__, i, e.index, e.reserved);
                        return -EINVAL;
@@ -9728,7 +9720,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                msr_info.host_initiated = false;
                msr_info.index = e.index;
                if (kvm_get_msr(vcpu, &msr_info)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot read MSR (%u, 0x%x)\n",
                                __func__, i, e.index);
                        return -EINVAL;
@@ -9737,7 +9729,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                                         gpa + i * sizeof(e) +
                                             offsetof(struct vmx_msr_entry, value),
                                         &msr_info.data, sizeof(msr_info.data))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
                                __func__, i, e.index, msr_info.data);
                        return -EINVAL;
@@ -9957,10 +9949,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        }
 
        if (cpu_has_vmx_msr_bitmap() &&
-           exec_control & CPU_BASED_USE_MSR_BITMAPS) {
-               nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
-               /* MSR_BITMAP will be set by following vmx_set_efer. */
-       else
+           exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+           nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+               /* MSR_BITMAP will be set by following vmx_set_efer. */
+       else
                exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
 
        /*
@@ -10011,6 +10003,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                        vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
        else
                vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+       if (kvm_has_tsc_control)
+               decache_tsc_multiplier(vmx);
 
        if (enable_vpid) {
                /*
@@ -10506,6 +10500,9 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
        }
 
+       if (nested_cpu_has_ept(vmcs12))
+               vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+
        if (nested_cpu_has_vid(vmcs12))
                vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
 
@@ -10767,6 +10764,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        else
                vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
                              PIN_BASED_VMX_PREEMPTION_TIMER);
+       if (kvm_has_tsc_control)
+               decache_tsc_multiplier(vmx);
+
+       if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+               vmx_set_virtual_x2apic_mode(vcpu,
+                               vcpu->arch.apic_base & X2APIC_ENABLE);
+       }
 
        /* This is needed for same reason as it was needed in prepare_vmcs02 */
        vmx->host_rsp = 0;