KVM: halt_polling: provide a way to qualify wakeups during poll

[cascardo/linux.git] / arch / x86 / kvm / x86.c
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 742d0f7..bcef92f 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -161,6 +161,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         { "halt_exits", VCPU_STAT(halt_exits) },
         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+       { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
         { "hypercalls", VCPU_STAT(hypercalls) },
         { "request_irq", VCPU_STAT(request_irq_exits) },
@@ -700,7 +701,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
                 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
                         return 1;
         }
-       kvm_put_guest_xcr0(vcpu);
         vcpu->arch.xcr0 = xcr0;
  
         if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
@@ -2003,22 +2003,8 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
         vcpu->arch.pv_time_enabled = false;
  }
  
-static void accumulate_steal_time(struct kvm_vcpu *vcpu)
-{
-       u64 delta;
-
-       if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
-               return;
-
-       delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
-       vcpu->arch.st.last_steal = current->sched_info.run_delay;
-       vcpu->arch.st.accum_steal = delta;
-}
-
  static void record_steal_time(struct kvm_vcpu *vcpu)
  {
-       accumulate_steal_time(vcpu);
-
         if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                 return;
  
@@ -2026,9 +2012,26 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
                 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
                 return;
  
-       vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
-       vcpu->arch.st.steal.version += 2;
-       vcpu->arch.st.accum_steal = 0;
+       if (vcpu->arch.st.steal.version & 1)
+               vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
+
+       vcpu->arch.st.steal.version += 1;
+
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+               &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+       smp_wmb();
+
+       vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+               vcpu->arch.st.last_steal;
+       vcpu->arch.st.last_steal = current->sched_info.run_delay;
+
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+               &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+       smp_wmb();
+
+       vcpu->arch.st.steal.version += 1;
  
         kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
                 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
@@ -6095,12 +6098,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
         }
  
         /* try to inject new event if pending */
-       if (vcpu->arch.nmi_pending) {
-               if (kvm_x86_ops->nmi_allowed(vcpu)) {
-                       --vcpu->arch.nmi_pending;
-                       vcpu->arch.nmi_injected = true;
-                       kvm_x86_ops->set_nmi(vcpu);
-               }
+       if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+               --vcpu->arch.nmi_pending;
+               vcpu->arch.nmi_injected = true;
+               kvm_x86_ops->set_nmi(vcpu);
         } else if (kvm_cpu_has_injectable_intr(vcpu)) {
                 /*
                  * Because interrupts can be injected asynchronously, we are
@@ -6569,10 +6570,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 if (inject_pending_event(vcpu, req_int_win) != 0)
                         req_immediate_exit = true;
                 /* enable NMI/IRQ window open exits if needed */
-               else if (vcpu->arch.nmi_pending)
-                       kvm_x86_ops->enable_nmi_window(vcpu);
-               else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
-                       kvm_x86_ops->enable_irq_window(vcpu);
+               else {
+                       if (vcpu->arch.nmi_pending)
+                               kvm_x86_ops->enable_nmi_window(vcpu);
+                       if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
+                               kvm_x86_ops->enable_irq_window(vcpu);
+               }
  
                 if (kvm_lapic_enabled(vcpu)) {
                         update_cr8_intercept(vcpu);
@@ -6590,8 +6593,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
         kvm_x86_ops->prepare_guest_switch(vcpu);
         if (vcpu->fpu_active)
                 kvm_load_guest_fpu(vcpu);
-       kvm_load_guest_xcr0(vcpu);
-
         vcpu->mode = IN_GUEST_MODE;
  
         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6618,6 +6619,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 goto cancel_injection;
         }
  
+       kvm_load_guest_xcr0(vcpu);
+
         if (req_immediate_exit)
                 smp_send_reschedule(vcpu->cpu);
  
@@ -6667,6 +6670,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
         vcpu->mode = OUTSIDE_GUEST_MODE;
         smp_wmb();
  
+       kvm_put_guest_xcr0(vcpu);
+
         /* Interrupt is enabled by handle_external_intr() */
         kvm_x86_ops->handle_external_intr(vcpu);
  
@@ -7314,7 +7319,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
          * and assume host would use all available bits.
          * Guest xcr0 would be loaded later.
          */
-       kvm_put_guest_xcr0(vcpu);
         vcpu->guest_fpu_loaded = 1;
         __kernel_fpu_begin();
         __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
@@ -7323,8 +7327,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  
  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
  {
-       kvm_put_guest_xcr0(vcpu);
-
         if (!vcpu->guest_fpu_loaded) {
                 vcpu->fpu_counter = 0;
                 return;
@@ -8357,19 +8359,21 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
  }
  EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
  
+bool kvm_arch_has_irq_bypass(void)
+{
+       return kvm_x86_ops->update_pi_irte != NULL;
+}
+
  int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
                                       struct irq_bypass_producer *prod)
  {
         struct kvm_kernel_irqfd *irqfd =
                 container_of(cons, struct kvm_kernel_irqfd, consumer);
  
-       if (kvm_x86_ops->update_pi_irte) {
-               irqfd->producer = prod;
-               return kvm_x86_ops->update_pi_irte(irqfd->kvm,
-                               prod->irq, irqfd->gsi, 1);
-       }
+       irqfd->producer = prod;
  
-       return -EINVAL;
+       return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+                                          prod->irq, irqfd->gsi, 1);
  }
  
  void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@@ -8379,11 +8383,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
         struct kvm_kernel_irqfd *irqfd =
                 container_of(cons, struct kvm_kernel_irqfd, consumer);
  
-       if (!kvm_x86_ops->update_pi_irte) {
-               WARN_ON(irqfd->producer != NULL);
-               return;
-       }
-
         WARN_ON(irqfd->producer != prod);
         irqfd->producer = NULL;