KVM: halt_polling: provide a way to qualify wakeups during poll
[cascardo/linux.git] / arch / x86 / kvm / x86.c
index 742d0f7..bcef92f 100644 (file)
@@ -161,6 +161,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "halt_exits", VCPU_STAT(halt_exits) },
        { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
        { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+       { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
        { "hypercalls", VCPU_STAT(hypercalls) },
        { "request_irq", VCPU_STAT(request_irq_exits) },
@@ -700,7 +701,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
                if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
                        return 1;
        }
-       kvm_put_guest_xcr0(vcpu);
        vcpu->arch.xcr0 = xcr0;
 
        if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
@@ -2003,22 +2003,8 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
        vcpu->arch.pv_time_enabled = false;
 }
 
-static void accumulate_steal_time(struct kvm_vcpu *vcpu)
-{
-       u64 delta;
-
-       if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
-               return;
-
-       delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
-       vcpu->arch.st.last_steal = current->sched_info.run_delay;
-       vcpu->arch.st.accum_steal = delta;
-}
-
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
-       accumulate_steal_time(vcpu);
-
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
@@ -2026,9 +2012,26 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
                return;
 
-       vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
-       vcpu->arch.st.steal.version += 2;
-       vcpu->arch.st.accum_steal = 0;
+       if (vcpu->arch.st.steal.version & 1)
+               vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
+
+       vcpu->arch.st.steal.version += 1;
+
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+               &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+       smp_wmb();
+
+       vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+               vcpu->arch.st.last_steal;
+       vcpu->arch.st.last_steal = current->sched_info.run_delay;
+
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+               &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+       smp_wmb();
+
+       vcpu->arch.st.steal.version += 1;
 
        kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
@@ -6095,12 +6098,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
        }
 
        /* try to inject new event if pending */
-       if (vcpu->arch.nmi_pending) {
-               if (kvm_x86_ops->nmi_allowed(vcpu)) {
-                       --vcpu->arch.nmi_pending;
-                       vcpu->arch.nmi_injected = true;
-                       kvm_x86_ops->set_nmi(vcpu);
-               }
+       if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+               --vcpu->arch.nmi_pending;
+               vcpu->arch.nmi_injected = true;
+               kvm_x86_ops->set_nmi(vcpu);
        } else if (kvm_cpu_has_injectable_intr(vcpu)) {
                /*
                 * Because interrupts can be injected asynchronously, we are
@@ -6569,10 +6570,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                if (inject_pending_event(vcpu, req_int_win) != 0)
                        req_immediate_exit = true;
                /* enable NMI/IRQ window open exits if needed */
-               else if (vcpu->arch.nmi_pending)
-                       kvm_x86_ops->enable_nmi_window(vcpu);
-               else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
-                       kvm_x86_ops->enable_irq_window(vcpu);
+               else {
+                       if (vcpu->arch.nmi_pending)
+                               kvm_x86_ops->enable_nmi_window(vcpu);
+                       if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
+                               kvm_x86_ops->enable_irq_window(vcpu);
+               }
 
                if (kvm_lapic_enabled(vcpu)) {
                        update_cr8_intercept(vcpu);
@@ -6590,8 +6593,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        kvm_x86_ops->prepare_guest_switch(vcpu);
        if (vcpu->fpu_active)
                kvm_load_guest_fpu(vcpu);
-       kvm_load_guest_xcr0(vcpu);
-
        vcpu->mode = IN_GUEST_MODE;
 
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6618,6 +6619,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                goto cancel_injection;
        }
 
+       kvm_load_guest_xcr0(vcpu);
+
        if (req_immediate_exit)
                smp_send_reschedule(vcpu->cpu);
 
@@ -6667,6 +6670,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        vcpu->mode = OUTSIDE_GUEST_MODE;
        smp_wmb();
 
+       kvm_put_guest_xcr0(vcpu);
+
        /* Interrupt is enabled by handle_external_intr() */
        kvm_x86_ops->handle_external_intr(vcpu);
 
@@ -7314,7 +7319,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
         * and assume host would use all available bits.
         * Guest xcr0 would be loaded later.
         */
-       kvm_put_guest_xcr0(vcpu);
        vcpu->guest_fpu_loaded = 1;
        __kernel_fpu_begin();
        __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
@@ -7323,8 +7327,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-       kvm_put_guest_xcr0(vcpu);
-
        if (!vcpu->guest_fpu_loaded) {
                vcpu->fpu_counter = 0;
                return;
@@ -8357,19 +8359,21 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+bool kvm_arch_has_irq_bypass(void)
+{
+       return kvm_x86_ops->update_pi_irte != NULL;
+}
+
 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
                                      struct irq_bypass_producer *prod)
 {
        struct kvm_kernel_irqfd *irqfd =
                container_of(cons, struct kvm_kernel_irqfd, consumer);
 
-       if (kvm_x86_ops->update_pi_irte) {
-               irqfd->producer = prod;
-               return kvm_x86_ops->update_pi_irte(irqfd->kvm,
-                               prod->irq, irqfd->gsi, 1);
-       }
+       irqfd->producer = prod;
 
-       return -EINVAL;
+       return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+                                          prod->irq, irqfd->gsi, 1);
 }
 
 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@@ -8379,11 +8383,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
        struct kvm_kernel_irqfd *irqfd =
                container_of(cons, struct kvm_kernel_irqfd, consumer);
 
-       if (!kvm_x86_ops->update_pi_irte) {
-               WARN_ON(irqfd->producer != NULL);
-               return;
-       }
-
        WARN_ON(irqfd->producer != prod);
        irqfd->producer = NULL;