KVM: PPC: Book3S HV: Use bitmap of active threads rather than count
authorPaul Mackerras <paulus@samba.org>
Sat, 28 Mar 2015 03:21:09 +0000 (14:21 +1100)
committerAlexander Graf <agraf@suse.de>
Tue, 21 Apr 2015 13:21:33 +0000 (15:21 +0200)
Currently, the entry_exit_count field in the kvmppc_vcore struct
contains two 8-bit counts, one of the threads that have started entering
the guest, and one of the threads that have started exiting the guest.
This changes it to an entry_exit_map field which contains two bitmaps
of 8 bits each.  The advantage of doing this is that it gives us a
bitmap of which threads need to be signalled when exiting the guest.
That means that we no longer need to use the trick of setting the
HDEC to 0 to pull the other threads out of the guest, which led in
some cases to a spurious HDEC interrupt on the next guest entry.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 1517faa..d67a838 100644 (file)
@@ -263,15 +263,15 @@ struct kvm_arch {
 
 /*
  * Struct for a virtual core.
- * Note: entry_exit_count combines an entry count in the bottom 8 bits
- * and an exit count in the next 8 bits.  This is so that we can
- * atomically increment the entry count iff the exit count is 0
- * without taking the lock.
+ * Note: entry_exit_map combines a bitmap of threads that have entered
+ * in the bottom 8 bits and a bitmap of threads that have exited in the
+ * next 8 bits.  This is so that we can atomically set the entry bit
+ * iff the exit map is 0 without taking a lock.
  */
 struct kvmppc_vcore {
        int n_runnable;
        int num_threads;
-       int entry_exit_count;
+       int entry_exit_map;
        int napping_threads;
        int first_vcpuid;
        u16 pcpu;
@@ -296,8 +296,9 @@ struct kvmppc_vcore {
        ulong conferring_threads;
 };
 
-#define VCORE_ENTRY_COUNT(vc)  ((vc)->entry_exit_count & 0xff)
-#define VCORE_EXIT_COUNT(vc)   ((vc)->entry_exit_count >> 8)
+#define VCORE_ENTRY_MAP(vc)    ((vc)->entry_exit_map & 0xff)
+#define VCORE_EXIT_MAP(vc)     ((vc)->entry_exit_map >> 8)
+#define VCORE_IS_EXITING(vc)   (VCORE_EXIT_MAP(vc) != 0)
 
 /* Values for vcore_state */
 #define VCORE_INACTIVE 0
index 8aa8246..0d07efb 100644 (file)
@@ -562,7 +562,7 @@ int main(void)
        DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
        DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
        DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
-       DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
+       DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
        DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
        DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
        DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
index 7c1335d..ea1600f 100644 (file)
@@ -1952,7 +1952,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
        /*
         * Initialize *vc.
         */
-       vc->entry_exit_count = 0;
+       vc->entry_exit_map = 0;
        vc->preempt_tb = TB_NIL;
        vc->in_guest = 0;
        vc->napping_threads = 0;
@@ -2119,8 +2119,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
         * this thread straight away and have it join in.
         */
        if (!signal_pending(current)) {
-               if (vc->vcore_state == VCORE_RUNNING &&
-                   VCORE_EXIT_COUNT(vc) == 0) {
+               if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) {
                        kvmppc_create_dtl_entry(vcpu, vc);
                        kvmppc_start_thread(vcpu);
                        trace_kvm_guest_enter(vcpu);
index 1954a1c..2754251 100644 (file)
@@ -115,11 +115,11 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
        int rv = H_SUCCESS; /* => don't yield */
 
        set_bit(vcpu->arch.ptid, &vc->conferring_threads);
-       while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
-               threads_running = VCORE_ENTRY_COUNT(vc);
-               threads_ceded = hweight32(vc->napping_threads);
-               threads_conferring = hweight32(vc->conferring_threads);
-               if (threads_ceded + threads_conferring >= threads_running) {
+       while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
+               threads_running = VCORE_ENTRY_MAP(vc);
+               threads_ceded = vc->napping_threads;
+               threads_conferring = vc->conferring_threads;
+               if ((threads_ceded | threads_conferring) == threads_running) {
                        rv = H_TOO_HARD; /* => do yield */
                        break;
                }
index 16719af..245f5c9 100644 (file)
@@ -185,7 +185,7 @@ kvmppc_primary_no_guest:
        or      r3, r3, r0
        stwcx.  r3, 0, r6
        bne     1b
-       /* order napping_threads update vs testing entry_exit_count */
+       /* order napping_threads update vs testing entry_exit_map */
        isync
        li      r12, 0
        lwz     r7, VCORE_ENTRY_EXIT(r5)
@@ -406,19 +406,21 @@ kvmppc_hv_entry:
         * We don't have to lock against concurrent tlbies,
         * but we do have to coordinate across hardware threads.
         */
-       /* Increment entry count iff exit count is zero. */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       addi    r9,r5,VCORE_ENTRY_EXIT
-21:    lwarx   r3,0,r9
-       cmpwi   r3,0x100                /* any threads starting to exit? */
+       /* Set bit in entry map iff exit map is zero. */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       li      r7, 1
+       lbz     r6, HSTATE_PTID(r13)
+       sld     r7, r7, r6
+       addi    r9, r5, VCORE_ENTRY_EXIT
+21:    lwarx   r3, 0, r9
+       cmpwi   r3, 0x100               /* any threads starting to exit? */
        bge     secondary_too_late      /* if so we're too late to the party */
-       addi    r3,r3,1
-       stwcx.  r3,0,r9
+       or      r3, r3, r7
+       stwcx.  r3, 0, r9
        bne     21b
 
        /* Primary thread switches to guest partition. */
        ld      r9,VCORE_KVM(r5)        /* pointer to struct kvm */
-       lbz     r6,HSTATE_PTID(r13)
        cmpwi   r6,0
        bne     20f
        ld      r6,KVM_SDR1(r9)
@@ -1477,13 +1479,16 @@ kvmhv_do_exit:                  /* r12 = trap, r13 = paca */
         * We don't have to lock against tlbies but we do
         * have to coordinate the hardware threads.
         */
-       /* Increment the threads-exiting-guest count in the 0xff00
-          bits of vcore->entry_exit_count */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       addi    r6,r5,VCORE_ENTRY_EXIT
-41:    lwarx   r3,0,r6
-       addi    r0,r3,0x100
-       stwcx.  r0,0,r6
+       /* Set our bit in the threads-exiting-guest map in the 0xff00
+          bits of vcore->entry_exit_map */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       lbz     r4, HSTATE_PTID(r13)
+       li      r7, 0x100
+       sld     r7, r7, r4
+       addi    r6, r5, VCORE_ENTRY_EXIT
+41:    lwarx   r3, 0, r6
+       or      r0, r3, r7
+       stwcx.  r0, 0, r6
        bne     41b
        isync           /* order stwcx. vs. reading napping_threads */
 
@@ -1492,9 +1497,9 @@ kvmhv_do_exit:                    /* r12 = trap, r13 = paca */
         * up to the kernel or qemu; we can't handle it in real mode.
         * Thus we have to do a partition switch, so we have to
         * collect the other threads, if we are the first thread
-        * to take an interrupt.  To do this, we set the HDEC to 0,
-        * which causes an HDEC interrupt in all threads within 2ns
-        * because the HDEC register is shared between all 4 threads.
+        * to take an interrupt.  To do this, we send a message or
+        * IPI to all the threads that have their bit set in the entry
+        * map in vcore->entry_exit_map (other than ourselves).
         * However, we don't need to bother if this is an HDEC
         * interrupt, since the other threads will already be on their
         * way here in that case.
@@ -1503,17 +1508,8 @@ kvmhv_do_exit:                   /* r12 = trap, r13 = paca */
        bge     43f
        cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
        beq     43f
-       li      r0,0
-       mtspr   SPRN_HDEC,r0
 
-       /*
-        * Send an IPI to any napping threads, since an HDEC interrupt
-        * doesn't wake CPUs up from nap.
-        */
-       lwz     r3,VCORE_NAPPING_THREADS(r5)
-       lbz     r4,HSTATE_PTID(r13)
-       li      r0,1
-       sld     r0,r0,r4
+       srwi    r0,r7,8
        andc.   r3,r3,r0                /* no sense IPI'ing ourselves */
        beq     43f
        /* Order entry/exit update vs. IPIs */
@@ -2091,12 +2087,11 @@ _GLOBAL(kvmppc_h_cede)          /* r3 = vcpu pointer, r11 = msr, r13 = paca */
        addi    r6,r5,VCORE_NAPPING_THREADS
 31:    lwarx   r4,0,r6
        or      r4,r4,r0
-       PPC_POPCNTW(R7,R4)
-       cmpw    r7,r8
-       bge     kvm_cede_exit
+       cmpw    r4,r8
+       beq     kvm_cede_exit
        stwcx.  r4,0,r6
        bne     31b
-       /* order napping_threads update vs testing entry_exit_count */
+       /* order napping_threads update vs testing entry_exit_map */
        isync
        li      r0,NAPPING_CEDE
        stb     r0,HSTATE_NAPPING(r13)