KVM: remove kvm_guest_enter/exit wrappers

[cascardo/linux.git] / arch / s390 / kvm / kvm-s390.c
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index ad93b40..d42428c 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -21,6 +21,7 @@
  #include <linux/init.h>
  #include <linux/kvm.h>
  #include <linux/kvm_host.h>
+#include <linux/mman.h>
  #include <linux/module.h>
  #include <linux/random.h>
  #include <linux/slab.h>
@@ -98,6 +99,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
+       { "instruction_sie", VCPU_STAT(instruction_sie) },
         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
@@ -123,6 +125,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
         { NULL }
  };
  
+/* allow nested virtualization in KVM (if enabled by user space) */
+static int nested;
+module_param(nested, int, S_IRUGO);
+MODULE_PARM_DESC(nested, "Nested virtualization support");
+
  /* upper facilities limit for kvm */
  unsigned long kvm_s390_fac_list_mask[16] = {
         0xffe6000000000000UL,
@@ -141,6 +148,7 @@ static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS)
  static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
  
  static struct gmap_notifier gmap_notifier;
+static struct gmap_notifier vsie_gmap_notifier;
  debug_info_t *kvm_s390_dbf;
  
  /* Section: not file related */
@@ -150,7 +158,8 @@ int kvm_arch_hardware_enable(void)
         return 0;
  }
  
-static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+                             unsigned long end);
  
  /*
   * This callback is executed during stop_machine(). All CPUs are therefore
@@ -172,6 +181,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
                         vcpu->arch.sie_block->epoch -= *delta;
                         if (vcpu->arch.cputm_enabled)
                                 vcpu->arch.cputm_start += *delta;
+                       if (vcpu->arch.vsie_block)
+                               vcpu->arch.vsie_block->epoch -= *delta;
                 }
         }
         return NOTIFY_OK;
@@ -184,7 +195,9 @@ static struct notifier_block kvm_clock_notifier = {
  int kvm_arch_hardware_setup(void)
  {
         gmap_notifier.notifier_call = kvm_gmap_notifier;
-       gmap_register_ipte_notifier(&gmap_notifier);
+       gmap_register_pte_notifier(&gmap_notifier);
+       vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
+       gmap_register_pte_notifier(&vsie_gmap_notifier);
         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
                                        &kvm_clock_notifier);
         return 0;
@@ -192,7 +205,8 @@ int kvm_arch_hardware_setup(void)
  
  void kvm_arch_hardware_unsetup(void)
  {
-       gmap_unregister_ipte_notifier(&gmap_notifier);
+       gmap_unregister_pte_notifier(&gmap_notifier);
+       gmap_unregister_pte_notifier(&vsie_gmap_notifier);
         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
                                          &kvm_clock_notifier);
  }
@@ -250,6 +264,46 @@ static void kvm_s390_cpu_feat_init(void)
  
         if (MACHINE_HAS_ESOP)
                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+       /*
+        * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
+        * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
+        */
+       if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+           !test_facility(3) || !nested)
+               return;
+       allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
+       if (sclp.has_64bscao)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
+       if (sclp.has_siif)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
+       if (sclp.has_gpere)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
+       if (sclp.has_gsls)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
+       if (sclp.has_ib)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
+       if (sclp.has_cei)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
+       if (sclp.has_ibs)
+               allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
+       /*
+        * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
+        * all skey handling functions read/set the skey from the PGSTE
+        * instead of the real storage key.
+        *
+        * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
+        * pages being detected as preserved although they are resident.
+        *
+        * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
+        * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
+        *
+        * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
+        * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
+        * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
+        *
+        * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
+        * cannot easily shadow the SCA because of the ipte lock.
+        */
  }
  
  int kvm_arch_init(void *opaque)
@@ -422,7 +476,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
                 break;
         case KVM_CAP_S390_VECTOR_REGISTERS:
                 mutex_lock(&kvm->lock);
-               if (atomic_read(&kvm->online_vcpus)) {
+               if (kvm->created_vcpus) {
                         r = -EBUSY;
                 } else if (MACHINE_HAS_VX) {
                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
@@ -437,7 +491,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
         case KVM_CAP_S390_RI:
                 r = -EINVAL;
                 mutex_lock(&kvm->lock);
-               if (atomic_read(&kvm->online_vcpus)) {
+               if (kvm->created_vcpus) {
                         r = -EBUSY;
                 } else if (test_facility(64)) {
                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
@@ -492,7 +546,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
                 ret = -EBUSY;
                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
                 mutex_lock(&kvm->lock);
-               if (atomic_read(&kvm->online_vcpus) == 0) {
+               if (!kvm->created_vcpus) {
                         kvm->arch.use_cmma = 1;
                         ret = 0;
                 }
@@ -530,20 +584,20 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
                 if (!new_limit)
                         return -EINVAL;
  
-               /* gmap_alloc takes last usable address */
+               /* gmap_create takes last usable address */
                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
                         new_limit -= 1;
  
                 ret = -EBUSY;
                 mutex_lock(&kvm->lock);
-               if (atomic_read(&kvm->online_vcpus) == 0) {
-                       /* gmap_alloc will round the limit up */
-                       struct gmap *new = gmap_alloc(current->mm, new_limit);
+               if (!kvm->created_vcpus) {
+                       /* gmap_create will round the limit up */
+                       struct gmap *new = gmap_create(current->mm, new_limit);
  
                         if (!new) {
                                 ret = -ENOMEM;
                         } else {
-                               gmap_free(kvm->arch.gmap);
+                               gmap_remove(kvm->arch.gmap);
                                 new->private = kvm;
                                 kvm->arch.gmap = new;
                                 ret = 0;
@@ -713,7 +767,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
         int ret = 0;
  
         mutex_lock(&kvm->lock);
-       if (atomic_read(&kvm->online_vcpus)) {
+       if (kvm->created_vcpus) {
                 ret = -EBUSY;
                 goto out;
         }
@@ -1029,7 +1083,6 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
  {
         uint8_t *keys;
         uint64_t hva;
-       unsigned long curkey;
         int i, r = 0;
  
         if (args->flags != 0)
@@ -1050,26 +1103,27 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
         if (!keys)
                 return -ENOMEM;
  
+       down_read(&current->mm->mmap_sem);
         for (i = 0; i < args->count; i++) {
                 hva = gfn_to_hva(kvm, args->start_gfn + i);
                 if (kvm_is_error_hva(hva)) {
                         r = -EFAULT;
-                       goto out;
+                       break;
                 }
  
-               curkey = get_guest_storage_key(current->mm, hva);
-               if (IS_ERR_VALUE(curkey)) {
-                       r = curkey;
-                       goto out;
-               }
-               keys[i] = curkey;
+               r = get_guest_storage_key(current->mm, hva, &keys[i]);
+               if (r)
+                       break;
+       }
+       up_read(&current->mm->mmap_sem);
+
+       if (!r) {
+               r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+                                sizeof(uint8_t) * args->count);
+               if (r)
+                       r = -EFAULT;
         }
  
-       r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
-                        sizeof(uint8_t) * args->count);
-       if (r)
-               r = -EFAULT;
-out:
         kvfree(keys);
         return r;
  }
@@ -1106,24 +1160,25 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
         if (r)
                 goto out;
  
+       down_read(&current->mm->mmap_sem);
         for (i = 0; i < args->count; i++) {
                 hva = gfn_to_hva(kvm, args->start_gfn + i);
                 if (kvm_is_error_hva(hva)) {
                         r = -EFAULT;
-                       goto out;
+                       break;
                 }
  
                 /* Lowest order bit is reserved */
                 if (keys[i] & 0x01) {
                         r = -EINVAL;
-                       goto out;
+                       break;
                 }
  
-               r = set_guest_storage_key(current->mm, hva,
-                                         (unsigned long)keys[i], 0);
+               r = set_guest_storage_key(current->mm, hva, keys[i], 0);
                 if (r)
-                       goto out;
+                       break;
         }
+       up_read(&current->mm->mmap_sem);
  out:
         kvfree(keys);
         return r;
@@ -1391,7 +1446,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
                 else
                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
                                                     sclp.hamax + 1);
-               kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
+               kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
                 if (!kvm->arch.gmap)
                         goto out_err;
                 kvm->arch.gmap->private = kvm;
@@ -1403,6 +1458,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
         kvm->arch.epoch = 0;
  
         spin_lock_init(&kvm->arch.start_stop_lock);
+       kvm_s390_vsie_init(kvm);
         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
  
         return 0;
@@ -1424,7 +1480,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
                 sca_del_vcpu(vcpu);
  
         if (kvm_is_ucontrol(vcpu->kvm))
-               gmap_free(vcpu->arch.gmap);
+               gmap_remove(vcpu->arch.gmap);
  
         if (vcpu->kvm->arch.use_cmma)
                 kvm_s390_vcpu_unsetup_cmma(vcpu);
@@ -1457,16 +1513,17 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
         debug_unregister(kvm->arch.dbf);
         free_page((unsigned long)kvm->arch.sie_page2);
         if (!kvm_is_ucontrol(kvm))
-               gmap_free(kvm->arch.gmap);
+               gmap_remove(kvm->arch.gmap);
         kvm_s390_destroy_adapters(kvm);
         kvm_s390_clear_float_irqs(kvm);
+       kvm_s390_vsie_destroy(kvm);
         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
  }
  
  /* Section: vcpu related */
  static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
  {
-       vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
+       vcpu->arch.gmap = gmap_create(current->mm, -1UL);
         if (!vcpu->arch.gmap)
                 return -ENOMEM;
         vcpu->arch.gmap->private = vcpu->kvm;
@@ -1716,7 +1773,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  
         save_access_regs(vcpu->arch.host_acrs);
         restore_access_regs(vcpu->run->s.regs.acrs);
-       gmap_enable(vcpu->arch.gmap);
+       gmap_enable(vcpu->arch.enabled_gmap);
         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
                 __start_cpu_timer_accounting(vcpu);
@@ -1729,7 +1786,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
                 __stop_cpu_timer_accounting(vcpu);
         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
-       gmap_disable(vcpu->arch.gmap);
+       vcpu->arch.enabled_gmap = gmap_get_enabled();
+       gmap_disable(vcpu->arch.enabled_gmap);
  
         /* Save guest register state */
         save_fpu_regs();
@@ -1778,7 +1836,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
                 sca_add_vcpu(vcpu);
         }
-
+       /* make vcpu_load load the right gmap on the first trigger */
+       vcpu->arch.enabled_gmap = vcpu->arch.gmap;
  }
  
  static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
@@ -1837,10 +1896,12 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
  
         kvm_s390_vcpu_setup_model(vcpu);
  
-       vcpu->arch.sie_block->ecb = 0x02;
+       /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
+       if (MACHINE_HAS_ESOP)
+               vcpu->arch.sie_block->ecb |= 0x02;
         if (test_kvm_facility(vcpu->kvm, 9))
                 vcpu->arch.sie_block->ecb |= 0x04;
-       if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
+       if (test_kvm_facility(vcpu->kvm, 73))
                 vcpu->arch.sie_block->ecb |= 0x10;
  
         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
@@ -1973,16 +2034,25 @@ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
         kvm_s390_vcpu_request(vcpu);
  }
  
-static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+                             unsigned long end)
  {
-       int i;
         struct kvm *kvm = gmap->private;
         struct kvm_vcpu *vcpu;
+       unsigned long prefix;
+       int i;
  
+       if (gmap_is_shadow(gmap))
+               return;
+       if (start >= 1UL << 31)
+               /* We are only interested in prefix pages */
+               return;
         kvm_for_each_vcpu(i, vcpu, kvm) {
                 /* match against both prefix pages */
-               if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
-                       VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+               prefix = kvm_s390_get_prefix(vcpu);
+               if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
+                       VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
+                                  start, end);
                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
                 }
         }
@@ -2261,16 +2331,16 @@ retry:
                 return 0;
         /*
          * We use MMU_RELOAD just to re-arm the ipte notifier for the
-        * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+        * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
          * This ensures that the ipte instruction for this request has
          * already finished. We might race against a second unmapper that
          * wants to set the blocking bit. Lets just retry the request loop.
          */
         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
                 int rc;
-               rc = gmap_ipte_notify(vcpu->arch.gmap,
-                                     kvm_s390_get_prefix(vcpu),
-                                     PAGE_SIZE * 2);
+               rc = gmap_mprotect_notify(vcpu->arch.gmap,
+                                         kvm_s390_get_prefix(vcpu),
+                                         PAGE_SIZE * 2, PROT_WRITE);
                 if (rc)
                         return rc;
                 goto retry;
@@ -2553,14 +2623,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                  * guest_enter and guest_exit should be no uaccess.
                  */
                 local_irq_disable();
-               __kvm_guest_enter();
+               guest_enter_irqoff();
                 __disable_cpu_timer_accounting(vcpu);
                 local_irq_enable();
                 exit_reason = sie64a(vcpu->arch.sie_block,
                                      vcpu->run->s.regs.gprs);
                 local_irq_disable();
                 __enable_cpu_timer_accounting(vcpu);
-               __kvm_guest_exit();
+               guest_exit_irqoff();
                 local_irq_enable();
                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);