KVM: remove kvm_guest_enter/exit wrappers
[cascardo/linux.git] / arch / s390 / kvm / kvm-s390.c
index ad93b40..d42428c 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
+#include <linux/mman.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/slab.h>
@@ -98,6 +99,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "instruction_stfl", VCPU_STAT(instruction_stfl) },
        { "instruction_tprot", VCPU_STAT(instruction_tprot) },
        { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
+       { "instruction_sie", VCPU_STAT(instruction_sie) },
        { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
        { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
        { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
@@ -123,6 +125,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { NULL }
 };
 
+/* allow nested virtualization in KVM (if enabled by user space) */
+static int nested;
+module_param(nested, int, S_IRUGO);
+MODULE_PARM_DESC(nested, "Nested virtualization support");
+
 /* upper facilities limit for kvm */
 unsigned long kvm_s390_fac_list_mask[16] = {
        0xffe6000000000000UL,
@@ -141,6 +148,7 @@ static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS)
 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 
 static struct gmap_notifier gmap_notifier;
+static struct gmap_notifier vsie_gmap_notifier;
 debug_info_t *kvm_s390_dbf;
 
 /* Section: not file related */
@@ -150,7 +158,8 @@ int kvm_arch_hardware_enable(void)
        return 0;
 }
 
-static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+                             unsigned long end);
 
 /*
  * This callback is executed during stop_machine(). All CPUs are therefore
@@ -172,6 +181,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
                        vcpu->arch.sie_block->epoch -= *delta;
                        if (vcpu->arch.cputm_enabled)
                                vcpu->arch.cputm_start += *delta;
+                       if (vcpu->arch.vsie_block)
+                               vcpu->arch.vsie_block->epoch -= *delta;
                }
        }
        return NOTIFY_OK;
@@ -184,7 +195,9 @@ static struct notifier_block kvm_clock_notifier = {
 int kvm_arch_hardware_setup(void)
 {
        gmap_notifier.notifier_call = kvm_gmap_notifier;
-       gmap_register_ipte_notifier(&gmap_notifier);
+       gmap_register_pte_notifier(&gmap_notifier);
+       vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
+       gmap_register_pte_notifier(&vsie_gmap_notifier);
        atomic_notifier_chain_register(&s390_epoch_delta_notifier,
                                       &kvm_clock_notifier);
        return 0;
@@ -192,7 +205,8 @@ int kvm_arch_hardware_setup(void)
 
 void kvm_arch_hardware_unsetup(void)
 {
-       gmap_unregister_ipte_notifier(&gmap_notifier);
+       gmap_unregister_pte_notifier(&gmap_notifier);
+       gmap_unregister_pte_notifier(&vsie_gmap_notifier);
        atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
                                         &kvm_clock_notifier);
 }
@@ -250,6 +264,46 @@ static void kvm_s390_cpu_feat_init(void)
 
        if (MACHINE_HAS_ESOP)
                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+       /*
+        * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
+        * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
+        */
+       if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+           !test_facility(3) || !nested)
+               return;
+       allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
+       if (sclp.has_64bscao)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
+       if (sclp.has_siif)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
+       if (sclp.has_gpere)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
+       if (sclp.has_gsls)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
+       if (sclp.has_ib)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
+       if (sclp.has_cei)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
+       if (sclp.has_ibs)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
+       /*
+        * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
+        * all skey handling functions read/set the skey from the PGSTE
+        * instead of the real storage key.
+        *
+        * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
+        * pages being detected as preserved although they are resident.
+        *
+        * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
+        * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
+        *
+        * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
+        * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
+        * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
+        *
+        * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
+        * cannot easily shadow the SCA because of the ipte lock.
+        */
 }
 
 int kvm_arch_init(void *opaque)
@@ -422,7 +476,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
                break;
        case KVM_CAP_S390_VECTOR_REGISTERS:
                mutex_lock(&kvm->lock);
-               if (atomic_read(&kvm->online_vcpus)) {
+               if (kvm->created_vcpus) {
                        r = -EBUSY;
                } else if (MACHINE_HAS_VX) {
                        set_kvm_facility(kvm->arch.model.fac_mask, 129);
@@ -437,7 +491,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
        case KVM_CAP_S390_RI:
                r = -EINVAL;
                mutex_lock(&kvm->lock);
-               if (atomic_read(&kvm->online_vcpus)) {
+               if (kvm->created_vcpus) {
                        r = -EBUSY;
                } else if (test_facility(64)) {
                        set_kvm_facility(kvm->arch.model.fac_mask, 64);
@@ -492,7 +546,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
                ret = -EBUSY;
                VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
                mutex_lock(&kvm->lock);
-               if (atomic_read(&kvm->online_vcpus) == 0) {
+               if (!kvm->created_vcpus) {
                        kvm->arch.use_cmma = 1;
                        ret = 0;
                }
@@ -530,20 +584,20 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
                if (!new_limit)
                        return -EINVAL;
 
-               /* gmap_alloc takes last usable address */
+               /* gmap_create takes last usable address */
                if (new_limit != KVM_S390_NO_MEM_LIMIT)
                        new_limit -= 1;
 
                ret = -EBUSY;
                mutex_lock(&kvm->lock);
-               if (atomic_read(&kvm->online_vcpus) == 0) {
-                       /* gmap_alloc will round the limit up */
-                       struct gmap *new = gmap_alloc(current->mm, new_limit);
+               if (!kvm->created_vcpus) {
+                       /* gmap_create will round the limit up */
+                       struct gmap *new = gmap_create(current->mm, new_limit);
 
                        if (!new) {
                                ret = -ENOMEM;
                        } else {
-                               gmap_free(kvm->arch.gmap);
+                               gmap_remove(kvm->arch.gmap);
                                new->private = kvm;
                                kvm->arch.gmap = new;
                                ret = 0;
@@ -713,7 +767,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
        int ret = 0;
 
        mutex_lock(&kvm->lock);
-       if (atomic_read(&kvm->online_vcpus)) {
+       if (kvm->created_vcpus) {
                ret = -EBUSY;
                goto out;
        }
@@ -1029,7 +1083,6 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 {
        uint8_t *keys;
        uint64_t hva;
-       unsigned long curkey;
        int i, r = 0;
 
        if (args->flags != 0)
@@ -1050,26 +1103,27 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
        if (!keys)
                return -ENOMEM;
 
+       down_read(&current->mm->mmap_sem);
        for (i = 0; i < args->count; i++) {
                hva = gfn_to_hva(kvm, args->start_gfn + i);
                if (kvm_is_error_hva(hva)) {
                        r = -EFAULT;
-                       goto out;
+                       break;
                }
 
-               curkey = get_guest_storage_key(current->mm, hva);
-               if (IS_ERR_VALUE(curkey)) {
-                       r = curkey;
-                       goto out;
-               }
-               keys[i] = curkey;
+               r = get_guest_storage_key(current->mm, hva, &keys[i]);
+               if (r)
+                       break;
+       }
+       up_read(&current->mm->mmap_sem);
+
+       if (!r) {
+               r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+                                sizeof(uint8_t) * args->count);
+               if (r)
+                       r = -EFAULT;
        }
 
-       r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
-                        sizeof(uint8_t) * args->count);
-       if (r)
-               r = -EFAULT;
-out:
        kvfree(keys);
        return r;
 }
@@ -1106,24 +1160,25 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
        if (r)
                goto out;
 
+       down_read(&current->mm->mmap_sem);
        for (i = 0; i < args->count; i++) {
                hva = gfn_to_hva(kvm, args->start_gfn + i);
                if (kvm_is_error_hva(hva)) {
                        r = -EFAULT;
-                       goto out;
+                       break;
                }
 
                /* Lowest order bit is reserved */
                if (keys[i] & 0x01) {
                        r = -EINVAL;
-                       goto out;
+                       break;
                }
 
-               r = set_guest_storage_key(current->mm, hva,
-                                         (unsigned long)keys[i], 0);
+               r = set_guest_storage_key(current->mm, hva, keys[i], 0);
                if (r)
-                       goto out;
+                       break;
        }
+       up_read(&current->mm->mmap_sem);
 out:
        kvfree(keys);
        return r;
@@ -1391,7 +1446,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
                else
                        kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
                                                    sclp.hamax + 1);
-               kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
+               kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
                if (!kvm->arch.gmap)
                        goto out_err;
                kvm->arch.gmap->private = kvm;
@@ -1403,6 +1458,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm->arch.epoch = 0;
 
        spin_lock_init(&kvm->arch.start_stop_lock);
+       kvm_s390_vsie_init(kvm);
        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
 
        return 0;
@@ -1424,7 +1480,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
                sca_del_vcpu(vcpu);
 
        if (kvm_is_ucontrol(vcpu->kvm))
-               gmap_free(vcpu->arch.gmap);
+               gmap_remove(vcpu->arch.gmap);
 
        if (vcpu->kvm->arch.use_cmma)
                kvm_s390_vcpu_unsetup_cmma(vcpu);
@@ -1457,16 +1513,17 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        debug_unregister(kvm->arch.dbf);
        free_page((unsigned long)kvm->arch.sie_page2);
        if (!kvm_is_ucontrol(kvm))
-               gmap_free(kvm->arch.gmap);
+               gmap_remove(kvm->arch.gmap);
        kvm_s390_destroy_adapters(kvm);
        kvm_s390_clear_float_irqs(kvm);
+       kvm_s390_vsie_destroy(kvm);
        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
 }
 
 /* Section: vcpu related */
 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
+       vcpu->arch.gmap = gmap_create(current->mm, -1UL);
        if (!vcpu->arch.gmap)
                return -ENOMEM;
        vcpu->arch.gmap->private = vcpu->kvm;
@@ -1716,7 +1773,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        save_access_regs(vcpu->arch.host_acrs);
        restore_access_regs(vcpu->run->s.regs.acrs);
-       gmap_enable(vcpu->arch.gmap);
+       gmap_enable(vcpu->arch.enabled_gmap);
        atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
                __start_cpu_timer_accounting(vcpu);
@@ -1729,7 +1786,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
                __stop_cpu_timer_accounting(vcpu);
        atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
-       gmap_disable(vcpu->arch.gmap);
+       vcpu->arch.enabled_gmap = gmap_get_enabled();
+       gmap_disable(vcpu->arch.enabled_gmap);
 
        /* Save guest register state */
        save_fpu_regs();
@@ -1778,7 +1836,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
                sca_add_vcpu(vcpu);
        }
-
+       /* make vcpu_load load the right gmap on the first trigger */
+       vcpu->arch.enabled_gmap = vcpu->arch.gmap;
 }
 
 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
@@ -1837,10 +1896,12 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
        kvm_s390_vcpu_setup_model(vcpu);
 
-       vcpu->arch.sie_block->ecb = 0x02;
+       /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
+       if (MACHINE_HAS_ESOP)
+               vcpu->arch.sie_block->ecb |= 0x02;
        if (test_kvm_facility(vcpu->kvm, 9))
                vcpu->arch.sie_block->ecb |= 0x04;
-       if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
+       if (test_kvm_facility(vcpu->kvm, 73))
                vcpu->arch.sie_block->ecb |= 0x10;
 
        if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
@@ -1973,16 +2034,25 @@ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
        kvm_s390_vcpu_request(vcpu);
 }
 
-static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+                             unsigned long end)
 {
-       int i;
        struct kvm *kvm = gmap->private;
        struct kvm_vcpu *vcpu;
+       unsigned long prefix;
+       int i;
 
+       if (gmap_is_shadow(gmap))
+               return;
+       if (start >= 1UL << 31)
+               /* We are only interested in prefix pages */
+               return;
        kvm_for_each_vcpu(i, vcpu, kvm) {
                /* match against both prefix pages */
-               if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
-                       VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+               prefix = kvm_s390_get_prefix(vcpu);
+               if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
+                       VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
+                                  start, end);
                        kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
                }
        }
@@ -2261,16 +2331,16 @@ retry:
                return 0;
        /*
         * We use MMU_RELOAD just to re-arm the ipte notifier for the
-        * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+        * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
         * This ensures that the ipte instruction for this request has
         * already finished. We might race against a second unmapper that
         * wants to set the blocking bit. Lets just retry the request loop.
         */
        if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
                int rc;
-               rc = gmap_ipte_notify(vcpu->arch.gmap,
-                                     kvm_s390_get_prefix(vcpu),
-                                     PAGE_SIZE * 2);
+               rc = gmap_mprotect_notify(vcpu->arch.gmap,
+                                         kvm_s390_get_prefix(vcpu),
+                                         PAGE_SIZE * 2, PROT_WRITE);
                if (rc)
                        return rc;
                goto retry;
@@ -2553,14 +2623,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                 * guest_enter and guest_exit should be no uaccess.
                 */
                local_irq_disable();
-               __kvm_guest_enter();
+               guest_enter_irqoff();
                __disable_cpu_timer_accounting(vcpu);
                local_irq_enable();
                exit_reason = sie64a(vcpu->arch.sie_block,
                                     vcpu->run->s.regs.gprs);
                local_irq_disable();
                __enable_cpu_timer_accounting(vcpu);
-               __kvm_guest_exit();
+               guest_exit_irqoff();
                local_irq_enable();
                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);