arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <linux/bitmap.h>
  30 #include <asm/asm-offsets.h>
  31 #include <asm/lowcore.h>
  32 #include <asm/etr.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/gmap.h>
  35 #include <asm/nmi.h>
  36 #include <asm/switch_to.h>
  37 #include <asm/isc.h>
  38 #include <asm/sclp.h>
  39 #include <asm/cpacf.h>
  40 #include <asm/etr.h>
  41 #include "kvm-s390.h"
  42 #include "gaccess.h"
  43
  44 #define KMSG_COMPONENT "kvm-s390"
  45 #undef pr_fmt
  46 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  47
  48 #define CREATE_TRACE_POINTS
  49 #include "trace.h"
  50 #include "trace-s390.h"
  51
  52 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  53 #define LOCAL_IRQS 32
  54 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  55                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  56
  57 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  58
  59 struct kvm_stats_debugfs_item debugfs_entries[] = {
  60         { "userspace_handled", VCPU_STAT(exit_userspace) },
  61         { "exit_null", VCPU_STAT(exit_null) },
  62         { "exit_validity", VCPU_STAT(exit_validity) },
  63         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  64         { "exit_external_request", VCPU_STAT(exit_external_request) },
  65         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  66         { "exit_instruction", VCPU_STAT(exit_instruction) },
  67         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  68         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  69         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  70         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  71         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  72         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  73         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  74         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  75         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  76         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  77         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  78         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  79         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  80         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  81         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  82         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  83         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  84         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  85         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  86         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  87         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  88         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  89         { "instruction_spx", VCPU_STAT(instruction_spx) },
  90         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  91         { "instruction_stap", VCPU_STAT(instruction_stap) },
  92         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  93         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  94         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  95         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  96         { "instruction_essa", VCPU_STAT(instruction_essa) },
  97         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  98         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  99         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 100         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 101         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 102         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 103         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 104         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 105         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 106         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 107         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 108         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 109         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 110         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 111         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 112         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 113         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 114         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 115         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 116         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 117         { "diagnose_10", VCPU_STAT(diagnose_10) },
 118         { "diagnose_44", VCPU_STAT(diagnose_44) },
 119         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 120         { "diagnose_258", VCPU_STAT(diagnose_258) },
 121         { "diagnose_308", VCPU_STAT(diagnose_308) },
 122         { "diagnose_500", VCPU_STAT(diagnose_500) },
 123         { NULL }
 124 };
 125
 126 /* upper facilities limit for kvm */
 127 unsigned long kvm_s390_fac_list_mask[16] = {
 128         0xffe6000000000000UL,
 129         0x005e000000000000UL,
 130 };
 131
 132 unsigned long kvm_s390_fac_list_mask_size(void)
 133 {
 134         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 135         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 136 }
 137
 138 /* available cpu features supported by kvm */
 139 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 140 /* available subfunctions indicated via query / "test bit" */
 141 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 142
 143 static struct gmap_notifier gmap_notifier;
 144 debug_info_t *kvm_s390_dbf;
 145
 146 /* Section: not file related */
 147 int kvm_arch_hardware_enable(void)
 148 {
 149         /* every s390 is virtualization enabled ;-) */
 150         return 0;
 151 }
 152
 153 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 154
 155 /*
 156  * This callback is executed during stop_machine(). All CPUs are therefore
 157  * temporarily stopped. In order not to change guest behavior, we have to
 158  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 159  * so a CPU won't be stopped while calculating with the epoch.
 160  */
 161 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 162                           void *v)
 163 {
 164         struct kvm *kvm;
 165         struct kvm_vcpu *vcpu;
 166         int i;
 167         unsigned long long *delta = v;
 168
 169         list_for_each_entry(kvm, &vm_list, vm_list) {
 170                 kvm->arch.epoch -= *delta;
 171                 kvm_for_each_vcpu(i, vcpu, kvm) {
 172                         vcpu->arch.sie_block->epoch -= *delta;
 173                         if (vcpu->arch.cputm_enabled)
 174                                 vcpu->arch.cputm_start += *delta;
 175                 }
 176         }
 177         return NOTIFY_OK;
 178 }
 179
 180 static struct notifier_block kvm_clock_notifier = {
 181         .notifier_call = kvm_clock_sync,
 182 };
 183
 184 int kvm_arch_hardware_setup(void)
 185 {
 186         gmap_notifier.notifier_call = kvm_gmap_notifier;
 187         gmap_register_ipte_notifier(&gmap_notifier);
 188         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 189                                        &kvm_clock_notifier);
 190         return 0;
 191 }
 192
 193 void kvm_arch_hardware_unsetup(void)
 194 {
 195         gmap_unregister_ipte_notifier(&gmap_notifier);
 196         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 197                                          &kvm_clock_notifier);
 198 }
 199
 200 static void allow_cpu_feat(unsigned long nr)
 201 {
 202         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 203 }
 204
 205 static inline int plo_test_bit(unsigned char nr)
 206 {
 207         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 208         int cc = 3; /* subfunction not available */
 209
 210         asm volatile(
 211                 /* Parameter registers are ignored for "test bit" */
 212                 "       plo     0,0,0,0(0)\n"
 213                 "       ipm     %0\n"
 214                 "       srl     %0,28\n"
 215                 : "=d" (cc)
 216                 : "d" (r0)
 217                 : "cc");
 218         return cc == 0;
 219 }
 220
 221 static void kvm_s390_cpu_feat_init(void)
 222 {
 223         int i;
 224
 225         for (i = 0; i < 256; ++i) {
 226                 if (plo_test_bit(i))
 227                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 228         }
 229
 230         if (test_facility(28)) /* TOD-clock steering */
 231                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 232
 233         if (test_facility(17)) { /* MSA */
 234                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 235                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 236                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 237                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 238                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 239         }
 240         if (test_facility(76)) /* MSA3 */
 241                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 242         if (test_facility(77)) { /* MSA4 */
 243                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 244                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 245                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 246                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 247         }
 248         if (test_facility(57)) /* MSA5 */
 249                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 250
 251         if (MACHINE_HAS_ESOP)
 252                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 253 }
 254
 255 int kvm_arch_init(void *opaque)
 256 {
 257         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 258         if (!kvm_s390_dbf)
 259                 return -ENOMEM;
 260
 261         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 262                 debug_unregister(kvm_s390_dbf);
 263                 return -ENOMEM;
 264         }
 265
 266         kvm_s390_cpu_feat_init();
 267
 268         /* Register floating interrupt controller interface. */
 269         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 270 }
 271
 272 void kvm_arch_exit(void)
 273 {
 274         debug_unregister(kvm_s390_dbf);
 275 }
 276
 277 /* Section: device related */
 278 long kvm_arch_dev_ioctl(struct file *filp,
 279                         unsigned int ioctl, unsigned long arg)
 280 {
 281         if (ioctl == KVM_S390_ENABLE_SIE)
 282                 return s390_enable_sie();
 283         return -EINVAL;
 284 }
 285
 286 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 287 {
 288         int r;
 289
 290         switch (ext) {
 291         case KVM_CAP_S390_PSW:
 292         case KVM_CAP_S390_GMAP:
 293         case KVM_CAP_SYNC_MMU:
 294 #ifdef CONFIG_KVM_S390_UCONTROL
 295         case KVM_CAP_S390_UCONTROL:
 296 #endif
 297         case KVM_CAP_ASYNC_PF:
 298         case KVM_CAP_SYNC_REGS:
 299         case KVM_CAP_ONE_REG:
 300         case KVM_CAP_ENABLE_CAP:
 301         case KVM_CAP_S390_CSS_SUPPORT:
 302         case KVM_CAP_IOEVENTFD:
 303         case KVM_CAP_DEVICE_CTRL:
 304         case KVM_CAP_ENABLE_CAP_VM:
 305         case KVM_CAP_S390_IRQCHIP:
 306         case KVM_CAP_VM_ATTRIBUTES:
 307         case KVM_CAP_MP_STATE:
 308         case KVM_CAP_S390_INJECT_IRQ:
 309         case KVM_CAP_S390_USER_SIGP:
 310         case KVM_CAP_S390_USER_STSI:
 311         case KVM_CAP_S390_SKEYS:
 312         case KVM_CAP_S390_IRQ_STATE:
 313                 r = 1;
 314                 break;
 315         case KVM_CAP_S390_MEM_OP:
 316                 r = MEM_OP_MAX_SIZE;
 317                 break;
 318         case KVM_CAP_NR_VCPUS:
 319         case KVM_CAP_MAX_VCPUS:
 320                 r = KVM_S390_BSCA_CPU_SLOTS;
 321                 if (sclp.has_esca && sclp.has_64bscao)
 322                         r = KVM_S390_ESCA_CPU_SLOTS;
 323                 break;
 324         case KVM_CAP_NR_MEMSLOTS:
 325                 r = KVM_USER_MEM_SLOTS;
 326                 break;
 327         case KVM_CAP_S390_COW:
 328                 r = MACHINE_HAS_ESOP;
 329                 break;
 330         case KVM_CAP_S390_VECTOR_REGISTERS:
 331                 r = MACHINE_HAS_VX;
 332                 break;
 333         case KVM_CAP_S390_RI:
 334                 r = test_facility(64);
 335                 break;
 336         default:
 337                 r = 0;
 338         }
 339         return r;
 340 }
 341
 342 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 343                                         struct kvm_memory_slot *memslot)
 344 {
 345         gfn_t cur_gfn, last_gfn;
 346         unsigned long address;
 347         struct gmap *gmap = kvm->arch.gmap;
 348
 349         /* Loop over all guest pages */
 350         last_gfn = memslot->base_gfn + memslot->npages;
 351         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 352                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 353
 354                 if (test_and_clear_guest_dirty(gmap->mm, address))
 355                         mark_page_dirty(kvm, cur_gfn);
 356                 if (fatal_signal_pending(current))
 357                         return;
 358                 cond_resched();
 359         }
 360 }
 361
 362 /* Section: vm related */
 363 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 364
 365 /*
 366  * Get (and clear) the dirty memory log for a memory slot.
 367  */
 368 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 369                                struct kvm_dirty_log *log)
 370 {
 371         int r;
 372         unsigned long n;
 373         struct kvm_memslots *slots;
 374         struct kvm_memory_slot *memslot;
 375         int is_dirty = 0;
 376
 377         mutex_lock(&kvm->slots_lock);
 378
 379         r = -EINVAL;
 380         if (log->slot >= KVM_USER_MEM_SLOTS)
 381                 goto out;
 382
 383         slots = kvm_memslots(kvm);
 384         memslot = id_to_memslot(slots, log->slot);
 385         r = -ENOENT;
 386         if (!memslot->dirty_bitmap)
 387                 goto out;
 388
 389         kvm_s390_sync_dirty_log(kvm, memslot);
 390         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 391         if (r)
 392                 goto out;
 393
 394         /* Clear the dirty log */
 395         if (is_dirty) {
 396                 n = kvm_dirty_bitmap_bytes(memslot);
 397                 memset(memslot->dirty_bitmap, 0, n);
 398         }
 399         r = 0;
 400 out:
 401         mutex_unlock(&kvm->slots_lock);
 402         return r;
 403 }
 404
 405 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 406 {
 407         int r;
 408
 409         if (cap->flags)
 410                 return -EINVAL;
 411
 412         switch (cap->cap) {
 413         case KVM_CAP_S390_IRQCHIP:
 414                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 415                 kvm->arch.use_irqchip = 1;
 416                 r = 0;
 417                 break;
 418         case KVM_CAP_S390_USER_SIGP:
 419                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 420                 kvm->arch.user_sigp = 1;
 421                 r = 0;
 422                 break;
 423         case KVM_CAP_S390_VECTOR_REGISTERS:
 424                 mutex_lock(&kvm->lock);
 425                 if (atomic_read(&kvm->online_vcpus)) {
 426                         r = -EBUSY;
 427                 } else if (MACHINE_HAS_VX) {
 428                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 429                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 430                         r = 0;
 431                 } else
 432                         r = -EINVAL;
 433                 mutex_unlock(&kvm->lock);
 434                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 435                          r ? "(not available)" : "(success)");
 436                 break;
 437         case KVM_CAP_S390_RI:
 438                 r = -EINVAL;
 439                 mutex_lock(&kvm->lock);
 440                 if (atomic_read(&kvm->online_vcpus)) {
 441                         r = -EBUSY;
 442                 } else if (test_facility(64)) {
 443                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 444                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 445                         r = 0;
 446                 }
 447                 mutex_unlock(&kvm->lock);
 448                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 449                          r ? "(not available)" : "(success)");
 450                 break;
 451         case KVM_CAP_S390_USER_STSI:
 452                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 453                 kvm->arch.user_stsi = 1;
 454                 r = 0;
 455                 break;
 456         default:
 457                 r = -EINVAL;
 458                 break;
 459         }
 460         return r;
 461 }
 462
 463 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 464 {
 465         int ret;
 466
 467         switch (attr->attr) {
 468         case KVM_S390_VM_MEM_LIMIT_SIZE:
 469                 ret = 0;
 470                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 471                          kvm->arch.mem_limit);
 472                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 473                         ret = -EFAULT;
 474                 break;
 475         default:
 476                 ret = -ENXIO;
 477                 break;
 478         }
 479         return ret;
 480 }
 481
 482 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 483 {
 484         int ret;
 485         unsigned int idx;
 486         switch (attr->attr) {
 487         case KVM_S390_VM_MEM_ENABLE_CMMA:
 488                 ret = -EINVAL;
 489                 if (!sclp.has_cmma)
 490                         break;
 491
 492                 ret = -EBUSY;
 493                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 494                 mutex_lock(&kvm->lock);
 495                 if (atomic_read(&kvm->online_vcpus) == 0) {
 496                         kvm->arch.use_cmma = 1;
 497                         ret = 0;
 498                 }
 499                 mutex_unlock(&kvm->lock);
 500                 break;
 501         case KVM_S390_VM_MEM_CLR_CMMA:
 502                 ret = -EINVAL;
 503                 if (!kvm->arch.use_cmma)
 504                         break;
 505
 506                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 507                 mutex_lock(&kvm->lock);
 508                 idx = srcu_read_lock(&kvm->srcu);
 509                 s390_reset_cmma(kvm->arch.gmap->mm);
 510                 srcu_read_unlock(&kvm->srcu, idx);
 511                 mutex_unlock(&kvm->lock);
 512                 ret = 0;
 513                 break;
 514         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 515                 unsigned long new_limit;
 516
 517                 if (kvm_is_ucontrol(kvm))
 518                         return -EINVAL;
 519
 520                 if (get_user(new_limit, (u64 __user *)attr->addr))
 521                         return -EFAULT;
 522
 523                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 524                     new_limit > kvm->arch.mem_limit)
 525                         return -E2BIG;
 526
 527                 if (!new_limit)
 528                         return -EINVAL;
 529
 530                 /* gmap_alloc takes last usable address */
 531                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 532                         new_limit -= 1;
 533
 534                 ret = -EBUSY;
 535                 mutex_lock(&kvm->lock);
 536                 if (atomic_read(&kvm->online_vcpus) == 0) {
 537                         /* gmap_alloc will round the limit up */
 538                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 539
 540                         if (!new) {
 541                                 ret = -ENOMEM;
 542                         } else {
 543                                 gmap_free(kvm->arch.gmap);
 544                                 new->private = kvm;
 545                                 kvm->arch.gmap = new;
 546                                 ret = 0;
 547                         }
 548                 }
 549                 mutex_unlock(&kvm->lock);
 550                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 551                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 552                          (void *) kvm->arch.gmap->asce);
 553                 break;
 554         }
 555         default:
 556                 ret = -ENXIO;
 557                 break;
 558         }
 559         return ret;
 560 }
 561
 562 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 563
 564 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 565 {
 566         struct kvm_vcpu *vcpu;
 567         int i;
 568
 569         if (!test_kvm_facility(kvm, 76))
 570                 return -EINVAL;
 571
 572         mutex_lock(&kvm->lock);
 573         switch (attr->attr) {
 574         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 575                 get_random_bytes(
 576                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 577                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 578                 kvm->arch.crypto.aes_kw = 1;
 579                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 580                 break;
 581         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 582                 get_random_bytes(
 583                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 584                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 585                 kvm->arch.crypto.dea_kw = 1;
 586                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 587                 break;
 588         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 589                 kvm->arch.crypto.aes_kw = 0;
 590                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 591                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 592                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 593                 break;
 594         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 595                 kvm->arch.crypto.dea_kw = 0;
 596                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 597                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 598                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 599                 break;
 600         default:
 601                 mutex_unlock(&kvm->lock);
 602                 return -ENXIO;
 603         }
 604
 605         kvm_for_each_vcpu(i, vcpu, kvm) {
 606                 kvm_s390_vcpu_crypto_setup(vcpu);
 607                 exit_sie(vcpu);
 608         }
 609         mutex_unlock(&kvm->lock);
 610         return 0;
 611 }
 612
 613 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 614 {
 615         u8 gtod_high;
 616
 617         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 618                                            sizeof(gtod_high)))
 619                 return -EFAULT;
 620
 621         if (gtod_high != 0)
 622                 return -EINVAL;
 623         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 624
 625         return 0;
 626 }
 627
 628 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 629 {
 630         u64 gtod;
 631
 632         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 633                 return -EFAULT;
 634
 635         kvm_s390_set_tod_clock(kvm, gtod);
 636         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 637         return 0;
 638 }
 639
 640 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 641 {
 642         int ret;
 643
 644         if (attr->flags)
 645                 return -EINVAL;
 646
 647         switch (attr->attr) {
 648         case KVM_S390_VM_TOD_HIGH:
 649                 ret = kvm_s390_set_tod_high(kvm, attr);
 650                 break;
 651         case KVM_S390_VM_TOD_LOW:
 652                 ret = kvm_s390_set_tod_low(kvm, attr);
 653                 break;
 654         default:
 655                 ret = -ENXIO;
 656                 break;
 657         }
 658         return ret;
 659 }
 660
 661 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 662 {
 663         u8 gtod_high = 0;
 664
 665         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 666                                          sizeof(gtod_high)))
 667                 return -EFAULT;
 668         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 669
 670         return 0;
 671 }
 672
 673 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 674 {
 675         u64 gtod;
 676
 677         gtod = kvm_s390_get_tod_clock_fast(kvm);
 678         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 679                 return -EFAULT;
 680         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 681
 682         return 0;
 683 }
 684
 685 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 686 {
 687         int ret;
 688
 689         if (attr->flags)
 690                 return -EINVAL;
 691
 692         switch (attr->attr) {
 693         case KVM_S390_VM_TOD_HIGH:
 694                 ret = kvm_s390_get_tod_high(kvm, attr);
 695                 break;
 696         case KVM_S390_VM_TOD_LOW:
 697                 ret = kvm_s390_get_tod_low(kvm, attr);
 698                 break;
 699         default:
 700                 ret = -ENXIO;
 701                 break;
 702         }
 703         return ret;
 704 }
 705
 706 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 707 {
 708         struct kvm_s390_vm_cpu_processor *proc;
 709         u16 lowest_ibc, unblocked_ibc;
 710         int ret = 0;
 711
 712         mutex_lock(&kvm->lock);
 713         if (atomic_read(&kvm->online_vcpus)) {
 714                 ret = -EBUSY;
 715                 goto out;
 716         }
 717         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 718         if (!proc) {
 719                 ret = -ENOMEM;
 720                 goto out;
 721         }
 722         if (!copy_from_user(proc, (void __user *)attr->addr,
 723                             sizeof(*proc))) {
 724                 kvm->arch.model.cpuid = proc->cpuid;
 725                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 726                 unblocked_ibc = sclp.ibc & 0xfff;
 727                 if (lowest_ibc) {
 728                         if (proc->ibc > unblocked_ibc)
 729                                 kvm->arch.model.ibc = unblocked_ibc;
 730                         else if (proc->ibc < lowest_ibc)
 731                                 kvm->arch.model.ibc = lowest_ibc;
 732                         else
 733                                 kvm->arch.model.ibc = proc->ibc;
 734                 }
 735                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 736                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 737         } else
 738                 ret = -EFAULT;
 739         kfree(proc);
 740 out:
 741         mutex_unlock(&kvm->lock);
 742         return ret;
 743 }
 744
 745 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 746                                        struct kvm_device_attr *attr)
 747 {
 748         struct kvm_s390_vm_cpu_feat data;
 749         int ret = -EBUSY;
 750
 751         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 752                 return -EFAULT;
 753         if (!bitmap_subset((unsigned long *) data.feat,
 754                            kvm_s390_available_cpu_feat,
 755                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 756                 return -EINVAL;
 757
 758         mutex_lock(&kvm->lock);
 759         if (!atomic_read(&kvm->online_vcpus)) {
 760                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 761                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 762                 ret = 0;
 763         }
 764         mutex_unlock(&kvm->lock);
 765         return ret;
 766 }
 767
 768 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 769                                           struct kvm_device_attr *attr)
 770 {
 771         /*
 772          * Once supported by kernel + hw, we have to store the subfunctions
 773          * in kvm->arch and remember that user space configured them.
 774          */
 775         return -ENXIO;
 776 }
 777
 778 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 779 {
 780         int ret = -ENXIO;
 781
 782         switch (attr->attr) {
 783         case KVM_S390_VM_CPU_PROCESSOR:
 784                 ret = kvm_s390_set_processor(kvm, attr);
 785                 break;
 786         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 787                 ret = kvm_s390_set_processor_feat(kvm, attr);
 788                 break;
 789         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 790                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 791                 break;
 792         }
 793         return ret;
 794 }
 795
 796 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 797 {
 798         struct kvm_s390_vm_cpu_processor *proc;
 799         int ret = 0;
 800
 801         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 802         if (!proc) {
 803                 ret = -ENOMEM;
 804                 goto out;
 805         }
 806         proc->cpuid = kvm->arch.model.cpuid;
 807         proc->ibc = kvm->arch.model.ibc;
 808         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 809                S390_ARCH_FAC_LIST_SIZE_BYTE);
 810         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 811                 ret = -EFAULT;
 812         kfree(proc);
 813 out:
 814         return ret;
 815 }
 816
 817 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 818 {
 819         struct kvm_s390_vm_cpu_machine *mach;
 820         int ret = 0;
 821
 822         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 823         if (!mach) {
 824                 ret = -ENOMEM;
 825                 goto out;
 826         }
 827         get_cpu_id((struct cpuid *) &mach->cpuid);
 828         mach->ibc = sclp.ibc;
 829         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 830                S390_ARCH_FAC_LIST_SIZE_BYTE);
 831         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 832                S390_ARCH_FAC_LIST_SIZE_BYTE);
 833         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 834                 ret = -EFAULT;
 835         kfree(mach);
 836 out:
 837         return ret;
 838 }
 839
 840 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 841                                        struct kvm_device_attr *attr)
 842 {
 843         struct kvm_s390_vm_cpu_feat data;
 844
 845         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 846                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 847         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 848                 return -EFAULT;
 849         return 0;
 850 }
 851
 852 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 853                                      struct kvm_device_attr *attr)
 854 {
 855         struct kvm_s390_vm_cpu_feat data;
 856
 857         bitmap_copy((unsigned long *) data.feat,
 858                     kvm_s390_available_cpu_feat,
 859                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 860         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 861                 return -EFAULT;
 862         return 0;
 863 }
 864
 865 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 866                                           struct kvm_device_attr *attr)
 867 {
 868         /*
 869          * Once we can actually configure subfunctions (kernel + hw support),
 870          * we have to check if they were already set by user space, if so copy
 871          * them from kvm->arch.
 872          */
 873         return -ENXIO;
 874 }
 875
 876 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 877                                         struct kvm_device_attr *attr)
 878 {
 879         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 880             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 881                 return -EFAULT;
 882         return 0;
 883 }
 884 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 885 {
 886         int ret = -ENXIO;
 887
 888         switch (attr->attr) {
 889         case KVM_S390_VM_CPU_PROCESSOR:
 890                 ret = kvm_s390_get_processor(kvm, attr);
 891                 break;
 892         case KVM_S390_VM_CPU_MACHINE:
 893                 ret = kvm_s390_get_machine(kvm, attr);
 894                 break;
 895         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 896                 ret = kvm_s390_get_processor_feat(kvm, attr);
 897                 break;
 898         case KVM_S390_VM_CPU_MACHINE_FEAT:
 899                 ret = kvm_s390_get_machine_feat(kvm, attr);
 900                 break;
 901         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 902                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 903                 break;
 904         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 905                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 906                 break;
 907         }
 908         return ret;
 909 }
 910
 911 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 912 {
 913         int ret;
 914
 915         switch (attr->group) {
 916         case KVM_S390_VM_MEM_CTRL:
 917                 ret = kvm_s390_set_mem_control(kvm, attr);
 918                 break;
 919         case KVM_S390_VM_TOD:
 920                 ret = kvm_s390_set_tod(kvm, attr);
 921                 break;
 922         case KVM_S390_VM_CPU_MODEL:
 923                 ret = kvm_s390_set_cpu_model(kvm, attr);
 924                 break;
 925         case KVM_S390_VM_CRYPTO:
 926                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 927                 break;
 928         default:
 929                 ret = -ENXIO;
 930                 break;
 931         }
 932
 933         return ret;
 934 }
 935
 936 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 937 {
 938         int ret;
 939
 940         switch (attr->group) {
 941         case KVM_S390_VM_MEM_CTRL:
 942                 ret = kvm_s390_get_mem_control(kvm, attr);
 943                 break;
 944         case KVM_S390_VM_TOD:
 945                 ret = kvm_s390_get_tod(kvm, attr);
 946                 break;
 947         case KVM_S390_VM_CPU_MODEL:
 948                 ret = kvm_s390_get_cpu_model(kvm, attr);
 949                 break;
 950         default:
 951                 ret = -ENXIO;
 952                 break;
 953         }
 954
 955         return ret;
 956 }
 957
 958 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 959 {
 960         int ret;
 961
 962         switch (attr->group) {
 963         case KVM_S390_VM_MEM_CTRL:
 964                 switch (attr->attr) {
 965                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 966                 case KVM_S390_VM_MEM_CLR_CMMA:
 967                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 968                         ret = 0;
 969                         break;
 970                 default:
 971                         ret = -ENXIO;
 972                         break;
 973                 }
 974                 break;
 975         case KVM_S390_VM_TOD:
 976                 switch (attr->attr) {
 977                 case KVM_S390_VM_TOD_LOW:
 978                 case KVM_S390_VM_TOD_HIGH:
 979                         ret = 0;
 980                         break;
 981                 default:
 982                         ret = -ENXIO;
 983                         break;
 984                 }
 985                 break;
 986         case KVM_S390_VM_CPU_MODEL:
 987                 switch (attr->attr) {
 988                 case KVM_S390_VM_CPU_PROCESSOR:
 989                 case KVM_S390_VM_CPU_MACHINE:
 990                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 991                 case KVM_S390_VM_CPU_MACHINE_FEAT:
 992                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 993                         ret = 0;
 994                         break;
 995                 /* configuring subfunctions is not supported yet */
 996                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 997                 default:
 998                         ret = -ENXIO;
 999                         break;
1000                 }
1001                 break;
1002         case KVM_S390_VM_CRYPTO:
1003                 switch (attr->attr) {
1004                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1005                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1006                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1007                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1008                         ret = 0;
1009                         break;
1010                 default:
1011                         ret = -ENXIO;
1012                         break;
1013                 }
1014                 break;
1015         default:
1016                 ret = -ENXIO;
1017                 break;
1018         }
1019
1020         return ret;
1021 }
1022
1023 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1024 {
1025         uint8_t *keys;
1026         uint64_t hva;
1027         unsigned long curkey;
1028         int i, r = 0;
1029
1030         if (args->flags != 0)
1031                 return -EINVAL;
1032
1033         /* Is this guest using storage keys? */
1034         if (!mm_use_skey(current->mm))
1035                 return KVM_S390_GET_SKEYS_NONE;
1036
1037         /* Enforce sane limit on memory allocation */
1038         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1039                 return -EINVAL;
1040
1041         keys = kmalloc_array(args->count, sizeof(uint8_t),
1042                              GFP_KERNEL | __GFP_NOWARN);
1043         if (!keys)
1044                 keys = vmalloc(sizeof(uint8_t) * args->count);
1045         if (!keys)
1046                 return -ENOMEM;
1047
1048         for (i = 0; i < args->count; i++) {
1049                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1050                 if (kvm_is_error_hva(hva)) {
1051                         r = -EFAULT;
1052                         goto out;
1053                 }
1054
1055                 curkey = get_guest_storage_key(current->mm, hva);
1056                 if (IS_ERR_VALUE(curkey)) {
1057                         r = curkey;
1058                         goto out;
1059                 }
1060                 keys[i] = curkey;
1061         }
1062
1063         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1064                          sizeof(uint8_t) * args->count);
1065         if (r)
1066                 r = -EFAULT;
1067 out:
1068         kvfree(keys);
1069         return r;
1070 }
1071
1072 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1073 {
1074         uint8_t *keys;
1075         uint64_t hva;
1076         int i, r = 0;
1077
1078         if (args->flags != 0)
1079                 return -EINVAL;
1080
1081         /* Enforce sane limit on memory allocation */
1082         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1083                 return -EINVAL;
1084
1085         keys = kmalloc_array(args->count, sizeof(uint8_t),
1086                              GFP_KERNEL | __GFP_NOWARN);
1087         if (!keys)
1088                 keys = vmalloc(sizeof(uint8_t) * args->count);
1089         if (!keys)
1090                 return -ENOMEM;
1091
1092         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1093                            sizeof(uint8_t) * args->count);
1094         if (r) {
1095                 r = -EFAULT;
1096                 goto out;
1097         }
1098
1099         /* Enable storage key handling for the guest */
1100         r = s390_enable_skey();
1101         if (r)
1102                 goto out;
1103
1104         for (i = 0; i < args->count; i++) {
1105                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1106                 if (kvm_is_error_hva(hva)) {
1107                         r = -EFAULT;
1108                         goto out;
1109                 }
1110
1111                 /* Lowest order bit is reserved */
1112                 if (keys[i] & 0x01) {
1113                         r = -EINVAL;
1114                         goto out;
1115                 }
1116
1117                 r = set_guest_storage_key(current->mm, hva,
1118                                           (unsigned long)keys[i], 0);
1119                 if (r)
1120                         goto out;
1121         }
1122 out:
1123         kvfree(keys);
1124         return r;
1125 }
1126
1127 long kvm_arch_vm_ioctl(struct file *filp,
1128                        unsigned int ioctl, unsigned long arg)
1129 {
1130         struct kvm *kvm = filp->private_data;
1131         void __user *argp = (void __user *)arg;
1132         struct kvm_device_attr attr;
1133         int r;
1134
1135         switch (ioctl) {
1136         case KVM_S390_INTERRUPT: {
1137                 struct kvm_s390_interrupt s390int;
1138
1139                 r = -EFAULT;
1140                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1141                         break;
1142                 r = kvm_s390_inject_vm(kvm, &s390int);
1143                 break;
1144         }
1145         case KVM_ENABLE_CAP: {
1146                 struct kvm_enable_cap cap;
1147                 r = -EFAULT;
1148                 if (copy_from_user(&cap, argp, sizeof(cap)))
1149                         break;
1150                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1151                 break;
1152         }
1153         case KVM_CREATE_IRQCHIP: {
1154                 struct kvm_irq_routing_entry routing;
1155
1156                 r = -EINVAL;
1157                 if (kvm->arch.use_irqchip) {
1158                         /* Set up dummy routing. */
1159                         memset(&routing, 0, sizeof(routing));
1160                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1161                 }
1162                 break;
1163         }
1164         case KVM_SET_DEVICE_ATTR: {
1165                 r = -EFAULT;
1166                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1167                         break;
1168                 r = kvm_s390_vm_set_attr(kvm, &attr);
1169                 break;
1170         }
1171         case KVM_GET_DEVICE_ATTR: {
1172                 r = -EFAULT;
1173                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1174                         break;
1175                 r = kvm_s390_vm_get_attr(kvm, &attr);
1176                 break;
1177         }
1178         case KVM_HAS_DEVICE_ATTR: {
1179                 r = -EFAULT;
1180                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1181                         break;
1182                 r = kvm_s390_vm_has_attr(kvm, &attr);
1183                 break;
1184         }
1185         case KVM_S390_GET_SKEYS: {
1186                 struct kvm_s390_skeys args;
1187
1188                 r = -EFAULT;
1189                 if (copy_from_user(&args, argp,
1190                                    sizeof(struct kvm_s390_skeys)))
1191                         break;
1192                 r = kvm_s390_get_skeys(kvm, &args);
1193                 break;
1194         }
1195         case KVM_S390_SET_SKEYS: {
1196                 struct kvm_s390_skeys args;
1197
1198                 r = -EFAULT;
1199                 if (copy_from_user(&args, argp,
1200                                    sizeof(struct kvm_s390_skeys)))
1201                         break;
1202                 r = kvm_s390_set_skeys(kvm, &args);
1203                 break;
1204         }
1205         default:
1206                 r = -ENOTTY;
1207         }
1208
1209         return r;
1210 }
1211
1212 static int kvm_s390_query_ap_config(u8 *config)
1213 {
1214         u32 fcn_code = 0x04000000UL;
1215         u32 cc = 0;
1216
1217         memset(config, 0, 128);
1218         asm volatile(
1219                 "lgr 0,%1\n"
1220                 "lgr 2,%2\n"
1221                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1222                 "0: ipm %0\n"
1223                 "srl %0,28\n"
1224                 "1:\n"
1225                 EX_TABLE(0b, 1b)
1226                 : "+r" (cc)
1227                 : "r" (fcn_code), "r" (config)
1228                 : "cc", "0", "2", "memory"
1229         );
1230
1231         return cc;
1232 }
1233
1234 static int kvm_s390_apxa_installed(void)
1235 {
1236         u8 config[128];
1237         int cc;
1238
1239         if (test_facility(12)) {
1240                 cc = kvm_s390_query_ap_config(config);
1241
1242                 if (cc)
1243                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1244                 else
1245                         return config[0] & 0x40;
1246         }
1247
1248         return 0;
1249 }
1250
1251 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1252 {
1253         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1254
1255         if (kvm_s390_apxa_installed())
1256                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1257         else
1258                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1259 }
1260
1261 static u64 kvm_s390_get_initial_cpuid(void)
1262 {
1263         struct cpuid cpuid;
1264
1265         get_cpu_id(&cpuid);
1266         cpuid.version = 0xff;
1267         return *((u64 *) &cpuid);
1268 }
1269
1270 static void kvm_s390_crypto_init(struct kvm *kvm)
1271 {
1272         if (!test_kvm_facility(kvm, 76))
1273                 return;
1274
1275         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1276         kvm_s390_set_crycb_format(kvm);
1277
1278         /* Enable AES/DEA protected key functions by default */
1279         kvm->arch.crypto.aes_kw = 1;
1280         kvm->arch.crypto.dea_kw = 1;
1281         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1282                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1283         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1284                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1285 }
1286
1287 static void sca_dispose(struct kvm *kvm)
1288 {
1289         if (kvm->arch.use_esca)
1290                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1291         else
1292                 free_page((unsigned long)(kvm->arch.sca));
1293         kvm->arch.sca = NULL;
1294 }
1295
1296 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1297 {
1298         gfp_t alloc_flags = GFP_KERNEL;
1299         int i, rc;
1300         char debug_name[16];
1301         static unsigned long sca_offset;
1302
1303         rc = -EINVAL;
1304 #ifdef CONFIG_KVM_S390_UCONTROL
1305         if (type & ~KVM_VM_S390_UCONTROL)
1306                 goto out_err;
1307         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1308                 goto out_err;
1309 #else
1310         if (type)
1311                 goto out_err;
1312 #endif
1313
1314         rc = s390_enable_sie();
1315         if (rc)
1316                 goto out_err;
1317
1318         rc = -ENOMEM;
1319
1320         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1321
1322         kvm->arch.use_esca = 0; /* start with basic SCA */
1323         if (!sclp.has_64bscao)
1324                 alloc_flags |= GFP_DMA;
1325         rwlock_init(&kvm->arch.sca_lock);
1326         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1327         if (!kvm->arch.sca)
1328                 goto out_err;
1329         spin_lock(&kvm_lock);
1330         sca_offset += 16;
1331         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1332                 sca_offset = 0;
1333         kvm->arch.sca = (struct bsca_block *)
1334                         ((char *) kvm->arch.sca + sca_offset);
1335         spin_unlock(&kvm_lock);
1336
1337         sprintf(debug_name, "kvm-%u", current->pid);
1338
1339         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1340         if (!kvm->arch.dbf)
1341                 goto out_err;
1342
1343         kvm->arch.sie_page2 =
1344              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1345         if (!kvm->arch.sie_page2)
1346                 goto out_err;
1347
1348         /* Populate the facility mask initially. */
1349         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1350                S390_ARCH_FAC_LIST_SIZE_BYTE);
1351         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1352                 if (i < kvm_s390_fac_list_mask_size())
1353                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1354                 else
1355                         kvm->arch.model.fac_mask[i] = 0UL;
1356         }
1357
1358         /* Populate the facility list initially. */
1359         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1360         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1361                S390_ARCH_FAC_LIST_SIZE_BYTE);
1362
1363         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1364         set_kvm_facility(kvm->arch.model.fac_list, 74);
1365
1366         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1367         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1368
1369         kvm_s390_crypto_init(kvm);
1370
1371         spin_lock_init(&kvm->arch.float_int.lock);
1372         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1373                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1374         init_waitqueue_head(&kvm->arch.ipte_wq);
1375         mutex_init(&kvm->arch.ipte_mutex);
1376
1377         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1378         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1379
1380         if (type & KVM_VM_S390_UCONTROL) {
1381                 kvm->arch.gmap = NULL;
1382                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1383         } else {
1384                 if (sclp.hamax == U64_MAX)
1385                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1386                 else
1387                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1388                                                     sclp.hamax + 1);
1389                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1390                 if (!kvm->arch.gmap)
1391                         goto out_err;
1392                 kvm->arch.gmap->private = kvm;
1393                 kvm->arch.gmap->pfault_enabled = 0;
1394         }
1395
1396         kvm->arch.css_support = 0;
1397         kvm->arch.use_irqchip = 0;
1398         kvm->arch.epoch = 0;
1399
1400         spin_lock_init(&kvm->arch.start_stop_lock);
1401         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1402
1403         return 0;
1404 out_err:
1405         free_page((unsigned long)kvm->arch.sie_page2);
1406         debug_unregister(kvm->arch.dbf);
1407         sca_dispose(kvm);
1408         KVM_EVENT(3, "creation of vm failed: %d", rc);
1409         return rc;
1410 }
1411
1412 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1413 {
1414         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1415         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1416         kvm_s390_clear_local_irqs(vcpu);
1417         kvm_clear_async_pf_completion_queue(vcpu);
1418         if (!kvm_is_ucontrol(vcpu->kvm))
1419                 sca_del_vcpu(vcpu);
1420
1421         if (kvm_is_ucontrol(vcpu->kvm))
1422                 gmap_free(vcpu->arch.gmap);
1423
1424         if (vcpu->kvm->arch.use_cmma)
1425                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1426         free_page((unsigned long)(vcpu->arch.sie_block));
1427
1428         kvm_vcpu_uninit(vcpu);
1429         kmem_cache_free(kvm_vcpu_cache, vcpu);
1430 }
1431
1432 static void kvm_free_vcpus(struct kvm *kvm)
1433 {
1434         unsigned int i;
1435         struct kvm_vcpu *vcpu;
1436
1437         kvm_for_each_vcpu(i, vcpu, kvm)
1438                 kvm_arch_vcpu_destroy(vcpu);
1439
1440         mutex_lock(&kvm->lock);
1441         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1442                 kvm->vcpus[i] = NULL;
1443
1444         atomic_set(&kvm->online_vcpus, 0);
1445         mutex_unlock(&kvm->lock);
1446 }
1447
1448 void kvm_arch_destroy_vm(struct kvm *kvm)
1449 {
1450         kvm_free_vcpus(kvm);
1451         sca_dispose(kvm);
1452         debug_unregister(kvm->arch.dbf);
1453         free_page((unsigned long)kvm->arch.sie_page2);
1454         if (!kvm_is_ucontrol(kvm))
1455                 gmap_free(kvm->arch.gmap);
1456         kvm_s390_destroy_adapters(kvm);
1457         kvm_s390_clear_float_irqs(kvm);
1458         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1459 }
1460
1461 /* Section: vcpu related */
1462 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1463 {
1464         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1465         if (!vcpu->arch.gmap)
1466                 return -ENOMEM;
1467         vcpu->arch.gmap->private = vcpu->kvm;
1468
1469         return 0;
1470 }
1471
1472 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1473 {
1474         read_lock(&vcpu->kvm->arch.sca_lock);
1475         if (vcpu->kvm->arch.use_esca) {
1476                 struct esca_block *sca = vcpu->kvm->arch.sca;
1477
1478                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1479                 sca->cpu[vcpu->vcpu_id].sda = 0;
1480         } else {
1481                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1482
1483                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1484                 sca->cpu[vcpu->vcpu_id].sda = 0;
1485         }
1486         read_unlock(&vcpu->kvm->arch.sca_lock);
1487 }
1488
1489 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1490 {
1491         read_lock(&vcpu->kvm->arch.sca_lock);
1492         if (vcpu->kvm->arch.use_esca) {
1493                 struct esca_block *sca = vcpu->kvm->arch.sca;
1494
1495                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1496                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1497                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1498                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1499                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1500         } else {
1501                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1502
1503                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1504                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1505                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1506                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1507         }
1508         read_unlock(&vcpu->kvm->arch.sca_lock);
1509 }
1510
1511 /* Basic SCA to Extended SCA data copy routines */
1512 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1513 {
1514         d->sda = s->sda;
1515         d->sigp_ctrl.c = s->sigp_ctrl.c;
1516         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1517 }
1518
1519 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1520 {
1521         int i;
1522
1523         d->ipte_control = s->ipte_control;
1524         d->mcn[0] = s->mcn;
1525         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1526                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1527 }
1528
1529 static int sca_switch_to_extended(struct kvm *kvm)
1530 {
1531         struct bsca_block *old_sca = kvm->arch.sca;
1532         struct esca_block *new_sca;
1533         struct kvm_vcpu *vcpu;
1534         unsigned int vcpu_idx;
1535         u32 scaol, scaoh;
1536
1537         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1538         if (!new_sca)
1539                 return -ENOMEM;
1540
1541         scaoh = (u32)((u64)(new_sca) >> 32);
1542         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1543
1544         kvm_s390_vcpu_block_all(kvm);
1545         write_lock(&kvm->arch.sca_lock);
1546
1547         sca_copy_b_to_e(new_sca, old_sca);
1548
1549         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1550                 vcpu->arch.sie_block->scaoh = scaoh;
1551                 vcpu->arch.sie_block->scaol = scaol;
1552                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1553         }
1554         kvm->arch.sca = new_sca;
1555         kvm->arch.use_esca = 1;
1556
1557         write_unlock(&kvm->arch.sca_lock);
1558         kvm_s390_vcpu_unblock_all(kvm);
1559
1560         free_page((unsigned long)old_sca);
1561
1562         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1563                  old_sca, kvm->arch.sca);
1564         return 0;
1565 }
1566
1567 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1568 {
1569         int rc;
1570
1571         if (id < KVM_S390_BSCA_CPU_SLOTS)
1572                 return true;
1573         if (!sclp.has_esca || !sclp.has_64bscao)
1574                 return false;
1575
1576         mutex_lock(&kvm->lock);
1577         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1578         mutex_unlock(&kvm->lock);
1579
1580         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1581 }
1582
1583 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1584 {
1585         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1586         kvm_clear_async_pf_completion_queue(vcpu);
1587         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1588                                     KVM_SYNC_GPRS |
1589                                     KVM_SYNC_ACRS |
1590                                     KVM_SYNC_CRS |
1591                                     KVM_SYNC_ARCH0 |
1592                                     KVM_SYNC_PFAULT;
1593         if (test_kvm_facility(vcpu->kvm, 64))
1594                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1595         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1596          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1597          */
1598         if (MACHINE_HAS_VX)
1599                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1600         else
1601                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1602
1603         if (kvm_is_ucontrol(vcpu->kvm))
1604                 return __kvm_ucontrol_vcpu_init(vcpu);
1605
1606         return 0;
1607 }
1608
1609 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1610 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1611 {
1612         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1613         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1614         vcpu->arch.cputm_start = get_tod_clock_fast();
1615         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1616 }
1617
1618 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1619 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1620 {
1621         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1622         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1623         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1624         vcpu->arch.cputm_start = 0;
1625         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1626 }
1627
1628 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1629 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1630 {
1631         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1632         vcpu->arch.cputm_enabled = true;
1633         __start_cpu_timer_accounting(vcpu);
1634 }
1635
1636 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1637 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1638 {
1639         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1640         __stop_cpu_timer_accounting(vcpu);
1641         vcpu->arch.cputm_enabled = false;
1642 }
1643
1644 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1645 {
1646         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1647         __enable_cpu_timer_accounting(vcpu);
1648         preempt_enable();
1649 }
1650
1651 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1652 {
1653         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1654         __disable_cpu_timer_accounting(vcpu);
1655         preempt_enable();
1656 }
1657
1658 /* set the cpu timer - may only be called from the VCPU thread itself */
1659 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1660 {
1661         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1662         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1663         if (vcpu->arch.cputm_enabled)
1664                 vcpu->arch.cputm_start = get_tod_clock_fast();
1665         vcpu->arch.sie_block->cputm = cputm;
1666         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1667         preempt_enable();
1668 }
1669
1670 /* update and get the cpu timer - can also be called from other VCPU threads */
1671 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1672 {
1673         unsigned int seq;
1674         __u64 value;
1675
1676         if (unlikely(!vcpu->arch.cputm_enabled))
1677                 return vcpu->arch.sie_block->cputm;
1678
1679         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1680         do {
1681                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1682                 /*
1683                  * If the writer would ever execute a read in the critical
1684                  * section, e.g. in irq context, we have a deadlock.
1685                  */
1686                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1687                 value = vcpu->arch.sie_block->cputm;
1688                 /* if cputm_start is 0, accounting is being started/stopped */
1689                 if (likely(vcpu->arch.cputm_start))
1690                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1691         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1692         preempt_enable();
1693         return value;
1694 }
1695
1696 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1697 {
1698         /* Save host register state */
1699         save_fpu_regs();
1700         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1701         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1702
1703         if (MACHINE_HAS_VX)
1704                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1705         else
1706                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1707         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1708         if (test_fp_ctl(current->thread.fpu.fpc))
1709                 /* User space provided an invalid FPC, let's clear it */
1710                 current->thread.fpu.fpc = 0;
1711
1712         save_access_regs(vcpu->arch.host_acrs);
1713         restore_access_regs(vcpu->run->s.regs.acrs);
1714         gmap_enable(vcpu->arch.gmap);
1715         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1716         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1717                 __start_cpu_timer_accounting(vcpu);
1718         vcpu->cpu = cpu;
1719 }
1720
1721 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1722 {
1723         vcpu->cpu = -1;
1724         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1725                 __stop_cpu_timer_accounting(vcpu);
1726         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1727         gmap_disable(vcpu->arch.gmap);
1728
1729         /* Save guest register state */
1730         save_fpu_regs();
1731         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1732
1733         /* Restore host register state */
1734         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1735         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1736
1737         save_access_regs(vcpu->run->s.regs.acrs);
1738         restore_access_regs(vcpu->arch.host_acrs);
1739 }
1740
1741 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1742 {
1743         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1744         vcpu->arch.sie_block->gpsw.mask = 0UL;
1745         vcpu->arch.sie_block->gpsw.addr = 0UL;
1746         kvm_s390_set_prefix(vcpu, 0);
1747         kvm_s390_set_cpu_timer(vcpu, 0);
1748         vcpu->arch.sie_block->ckc       = 0UL;
1749         vcpu->arch.sie_block->todpr     = 0;
1750         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1751         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1752         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1753         /* make sure the new fpc will be lazily loaded */
1754         save_fpu_regs();
1755         current->thread.fpu.fpc = 0;
1756         vcpu->arch.sie_block->gbea = 1;
1757         vcpu->arch.sie_block->pp = 0;
1758         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1759         kvm_clear_async_pf_completion_queue(vcpu);
1760         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1761                 kvm_s390_vcpu_stop(vcpu);
1762         kvm_s390_clear_local_irqs(vcpu);
1763 }
1764
1765 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1766 {
1767         mutex_lock(&vcpu->kvm->lock);
1768         preempt_disable();
1769         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1770         preempt_enable();
1771         mutex_unlock(&vcpu->kvm->lock);
1772         if (!kvm_is_ucontrol(vcpu->kvm)) {
1773                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1774                 sca_add_vcpu(vcpu);
1775         }
1776
1777 }
1778
1779 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1780 {
1781         if (!test_kvm_facility(vcpu->kvm, 76))
1782                 return;
1783
1784         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1785
1786         if (vcpu->kvm->arch.crypto.aes_kw)
1787                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1788         if (vcpu->kvm->arch.crypto.dea_kw)
1789                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1790
1791         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1792 }
1793
1794 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1795 {
1796         free_page(vcpu->arch.sie_block->cbrlo);
1797         vcpu->arch.sie_block->cbrlo = 0;
1798 }
1799
1800 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1801 {
1802         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1803         if (!vcpu->arch.sie_block->cbrlo)
1804                 return -ENOMEM;
1805
1806         vcpu->arch.sie_block->ecb2 |= 0x80;
1807         vcpu->arch.sie_block->ecb2 &= ~0x08;
1808         return 0;
1809 }
1810
1811 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1812 {
1813         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1814
1815         vcpu->arch.sie_block->ibc = model->ibc;
1816         if (test_kvm_facility(vcpu->kvm, 7))
1817                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1818 }
1819
1820 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1821 {
1822         int rc = 0;
1823
1824         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1825                                                     CPUSTAT_SM |
1826                                                     CPUSTAT_STOPPED);
1827
1828         if (test_kvm_facility(vcpu->kvm, 78))
1829                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1830         else if (test_kvm_facility(vcpu->kvm, 8))
1831                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1832
1833         kvm_s390_vcpu_setup_model(vcpu);
1834
1835         vcpu->arch.sie_block->ecb = 0x02;
1836         if (test_kvm_facility(vcpu->kvm, 9))
1837                 vcpu->arch.sie_block->ecb |= 0x04;
1838         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1839                 vcpu->arch.sie_block->ecb |= 0x10;
1840
1841         if (test_kvm_facility(vcpu->kvm, 8))
1842                 vcpu->arch.sie_block->ecb2 |= 0x08;
1843         vcpu->arch.sie_block->eca   = 0xC1002000U;
1844         if (sclp.has_siif)
1845                 vcpu->arch.sie_block->eca |= 1;
1846         if (sclp.has_sigpif)
1847                 vcpu->arch.sie_block->eca |= 0x10000000U;
1848         if (test_kvm_facility(vcpu->kvm, 64))
1849                 vcpu->arch.sie_block->ecb3 |= 0x01;
1850         if (test_kvm_facility(vcpu->kvm, 129)) {
1851                 vcpu->arch.sie_block->eca |= 0x00020000;
1852                 vcpu->arch.sie_block->ecd |= 0x20000000;
1853         }
1854         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1855         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1856         if (test_kvm_facility(vcpu->kvm, 74))
1857                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1858
1859         if (vcpu->kvm->arch.use_cmma) {
1860                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1861                 if (rc)
1862                         return rc;
1863         }
1864         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1865         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1866
1867         kvm_s390_vcpu_crypto_setup(vcpu);
1868
1869         return rc;
1870 }
1871
1872 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1873                                       unsigned int id)
1874 {
1875         struct kvm_vcpu *vcpu;
1876         struct sie_page *sie_page;
1877         int rc = -EINVAL;
1878
1879         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1880                 goto out;
1881
1882         rc = -ENOMEM;
1883
1884         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1885         if (!vcpu)
1886                 goto out;
1887
1888         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1889         if (!sie_page)
1890                 goto out_free_cpu;
1891
1892         vcpu->arch.sie_block = &sie_page->sie_block;
1893         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1894
1895         vcpu->arch.sie_block->icpua = id;
1896         spin_lock_init(&vcpu->arch.local_int.lock);
1897         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1898         vcpu->arch.local_int.wq = &vcpu->wq;
1899         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1900         seqcount_init(&vcpu->arch.cputm_seqcount);
1901
1902         rc = kvm_vcpu_init(vcpu, kvm, id);
1903         if (rc)
1904                 goto out_free_sie_block;
1905         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1906                  vcpu->arch.sie_block);
1907         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1908
1909         return vcpu;
1910 out_free_sie_block:
1911         free_page((unsigned long)(vcpu->arch.sie_block));
1912 out_free_cpu:
1913         kmem_cache_free(kvm_vcpu_cache, vcpu);
1914 out:
1915         return ERR_PTR(rc);
1916 }
1917
1918 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1919 {
1920         return kvm_s390_vcpu_has_irq(vcpu, 0);
1921 }
1922
1923 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1924 {
1925         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1926         exit_sie(vcpu);
1927 }
1928
1929 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1930 {
1931         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1932 }
1933
1934 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1935 {
1936         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1937         exit_sie(vcpu);
1938 }
1939
1940 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1941 {
1942         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1943 }
1944
1945 /*
1946  * Kick a guest cpu out of SIE and wait until SIE is not running.
1947  * If the CPU is not running (e.g. waiting as idle) the function will
1948  * return immediately. */
1949 void exit_sie(struct kvm_vcpu *vcpu)
1950 {
1951         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1952         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1953                 cpu_relax();
1954 }
1955
1956 /* Kick a guest cpu out of SIE to process a request synchronously */
1957 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1958 {
1959         kvm_make_request(req, vcpu);
1960         kvm_s390_vcpu_request(vcpu);
1961 }
1962
1963 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1964 {
1965         int i;
1966         struct kvm *kvm = gmap->private;
1967         struct kvm_vcpu *vcpu;
1968
1969         kvm_for_each_vcpu(i, vcpu, kvm) {
1970                 /* match against both prefix pages */
1971                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1972                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1973                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1974                 }
1975         }
1976 }
1977
1978 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1979 {
1980         /* kvm common code refers to this, but never calls it */
1981         BUG();
1982         return 0;
1983 }
1984
1985 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1986                                            struct kvm_one_reg *reg)
1987 {
1988         int r = -EINVAL;
1989
1990         switch (reg->id) {
1991         case KVM_REG_S390_TODPR:
1992                 r = put_user(vcpu->arch.sie_block->todpr,
1993                              (u32 __user *)reg->addr);
1994                 break;
1995         case KVM_REG_S390_EPOCHDIFF:
1996                 r = put_user(vcpu->arch.sie_block->epoch,
1997                              (u64 __user *)reg->addr);
1998                 break;
1999         case KVM_REG_S390_CPU_TIMER:
2000                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2001                              (u64 __user *)reg->addr);
2002                 break;
2003         case KVM_REG_S390_CLOCK_COMP:
2004                 r = put_user(vcpu->arch.sie_block->ckc,
2005                              (u64 __user *)reg->addr);
2006                 break;
2007         case KVM_REG_S390_PFTOKEN:
2008                 r = put_user(vcpu->arch.pfault_token,
2009                              (u64 __user *)reg->addr);
2010                 break;
2011         case KVM_REG_S390_PFCOMPARE:
2012                 r = put_user(vcpu->arch.pfault_compare,
2013                              (u64 __user *)reg->addr);
2014                 break;
2015         case KVM_REG_S390_PFSELECT:
2016                 r = put_user(vcpu->arch.pfault_select,
2017                              (u64 __user *)reg->addr);
2018                 break;
2019         case KVM_REG_S390_PP:
2020                 r = put_user(vcpu->arch.sie_block->pp,
2021                              (u64 __user *)reg->addr);
2022                 break;
2023         case KVM_REG_S390_GBEA:
2024                 r = put_user(vcpu->arch.sie_block->gbea,
2025                              (u64 __user *)reg->addr);
2026                 break;
2027         default:
2028                 break;
2029         }
2030
2031         return r;
2032 }
2033
2034 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2035                                            struct kvm_one_reg *reg)
2036 {
2037         int r = -EINVAL;
2038         __u64 val;
2039
2040         switch (reg->id) {
2041         case KVM_REG_S390_TODPR:
2042                 r = get_user(vcpu->arch.sie_block->todpr,
2043                              (u32 __user *)reg->addr);
2044                 break;
2045         case KVM_REG_S390_EPOCHDIFF:
2046                 r = get_user(vcpu->arch.sie_block->epoch,
2047                              (u64 __user *)reg->addr);
2048                 break;
2049         case KVM_REG_S390_CPU_TIMER:
2050                 r = get_user(val, (u64 __user *)reg->addr);
2051                 if (!r)
2052                         kvm_s390_set_cpu_timer(vcpu, val);
2053                 break;
2054         case KVM_REG_S390_CLOCK_COMP:
2055                 r = get_user(vcpu->arch.sie_block->ckc,
2056                              (u64 __user *)reg->addr);
2057                 break;
2058         case KVM_REG_S390_PFTOKEN:
2059                 r = get_user(vcpu->arch.pfault_token,
2060                              (u64 __user *)reg->addr);
2061                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2062                         kvm_clear_async_pf_completion_queue(vcpu);
2063                 break;
2064         case KVM_REG_S390_PFCOMPARE:
2065                 r = get_user(vcpu->arch.pfault_compare,
2066                              (u64 __user *)reg->addr);
2067                 break;
2068         case KVM_REG_S390_PFSELECT:
2069                 r = get_user(vcpu->arch.pfault_select,
2070                              (u64 __user *)reg->addr);
2071                 break;
2072         case KVM_REG_S390_PP:
2073                 r = get_user(vcpu->arch.sie_block->pp,
2074                              (u64 __user *)reg->addr);
2075                 break;
2076         case KVM_REG_S390_GBEA:
2077                 r = get_user(vcpu->arch.sie_block->gbea,
2078                              (u64 __user *)reg->addr);
2079                 break;
2080         default:
2081                 break;
2082         }
2083
2084         return r;
2085 }
2086
2087 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2088 {
2089         kvm_s390_vcpu_initial_reset(vcpu);
2090         return 0;
2091 }
2092
2093 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2094 {
2095         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2096         return 0;
2097 }
2098
2099 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2100 {
2101         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2102         return 0;
2103 }
2104
2105 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2106                                   struct kvm_sregs *sregs)
2107 {
2108         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2109         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2110         restore_access_regs(vcpu->run->s.regs.acrs);
2111         return 0;
2112 }
2113
2114 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2115                                   struct kvm_sregs *sregs)
2116 {
2117         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2118         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2119         return 0;
2120 }
2121
2122 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2123 {
2124         /* make sure the new values will be lazily loaded */
2125         save_fpu_regs();
2126         if (test_fp_ctl(fpu->fpc))
2127                 return -EINVAL;
2128         current->thread.fpu.fpc = fpu->fpc;
2129         if (MACHINE_HAS_VX)
2130                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2131         else
2132                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2133         return 0;
2134 }
2135
2136 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2137 {
2138         /* make sure we have the latest values */
2139         save_fpu_regs();
2140         if (MACHINE_HAS_VX)
2141                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2142         else
2143                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2144         fpu->fpc = current->thread.fpu.fpc;
2145         return 0;
2146 }
2147
2148 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2149 {
2150         int rc = 0;
2151
2152         if (!is_vcpu_stopped(vcpu))
2153                 rc = -EBUSY;
2154         else {
2155                 vcpu->run->psw_mask = psw.mask;
2156                 vcpu->run->psw_addr = psw.addr;
2157         }
2158         return rc;
2159 }
2160
2161 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2162                                   struct kvm_translation *tr)
2163 {
2164         return -EINVAL; /* not implemented yet */
2165 }
2166
2167 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2168                               KVM_GUESTDBG_USE_HW_BP | \
2169                               KVM_GUESTDBG_ENABLE)
2170
2171 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2172                                         struct kvm_guest_debug *dbg)
2173 {
2174         int rc = 0;
2175
2176         vcpu->guest_debug = 0;
2177         kvm_s390_clear_bp_data(vcpu);
2178
2179         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2180                 return -EINVAL;
2181         if (!sclp.has_gpere)
2182                 return -EINVAL;
2183
2184         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2185                 vcpu->guest_debug = dbg->control;
2186                 /* enforce guest PER */
2187                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2188
2189                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2190                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2191         } else {
2192                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2193                 vcpu->arch.guestdbg.last_bp = 0;
2194         }
2195
2196         if (rc) {
2197                 vcpu->guest_debug = 0;
2198                 kvm_s390_clear_bp_data(vcpu);
2199                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2200         }
2201
2202         return rc;
2203 }
2204
2205 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2206                                     struct kvm_mp_state *mp_state)
2207 {
2208         /* CHECK_STOP and LOAD are not supported yet */
2209         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2210                                        KVM_MP_STATE_OPERATING;
2211 }
2212
2213 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2214                                     struct kvm_mp_state *mp_state)
2215 {
2216         int rc = 0;
2217
2218         /* user space knows about this interface - let it control the state */
2219         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2220
2221         switch (mp_state->mp_state) {
2222         case KVM_MP_STATE_STOPPED:
2223                 kvm_s390_vcpu_stop(vcpu);
2224                 break;
2225         case KVM_MP_STATE_OPERATING:
2226                 kvm_s390_vcpu_start(vcpu);
2227                 break;
2228         case KVM_MP_STATE_LOAD:
2229         case KVM_MP_STATE_CHECK_STOP:
2230                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2231         default:
2232                 rc = -ENXIO;
2233         }
2234
2235         return rc;
2236 }
2237
2238 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2239 {
2240         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2241 }
2242
2243 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2244 {
2245 retry:
2246         kvm_s390_vcpu_request_handled(vcpu);
2247         if (!vcpu->requests)
2248                 return 0;
2249         /*
2250          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2251          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2252          * This ensures that the ipte instruction for this request has
2253          * already finished. We might race against a second unmapper that
2254          * wants to set the blocking bit. Lets just retry the request loop.
2255          */
2256         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2257                 int rc;
2258                 rc = gmap_ipte_notify(vcpu->arch.gmap,
2259                                       kvm_s390_get_prefix(vcpu),
2260                                       PAGE_SIZE * 2);
2261                 if (rc)
2262                         return rc;
2263                 goto retry;
2264         }
2265
2266         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2267                 vcpu->arch.sie_block->ihcpu = 0xffff;
2268                 goto retry;
2269         }
2270
2271         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2272                 if (!ibs_enabled(vcpu)) {
2273                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2274                         atomic_or(CPUSTAT_IBS,
2275                                         &vcpu->arch.sie_block->cpuflags);
2276                 }
2277                 goto retry;
2278         }
2279
2280         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2281                 if (ibs_enabled(vcpu)) {
2282                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2283                         atomic_andnot(CPUSTAT_IBS,
2284                                           &vcpu->arch.sie_block->cpuflags);
2285                 }
2286                 goto retry;
2287         }
2288
2289         /* nothing to do, just clear the request */
2290         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2291
2292         return 0;
2293 }
2294
2295 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2296 {
2297         struct kvm_vcpu *vcpu;
2298         int i;
2299
2300         mutex_lock(&kvm->lock);
2301         preempt_disable();
2302         kvm->arch.epoch = tod - get_tod_clock();
2303         kvm_s390_vcpu_block_all(kvm);
2304         kvm_for_each_vcpu(i, vcpu, kvm)
2305                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2306         kvm_s390_vcpu_unblock_all(kvm);
2307         preempt_enable();
2308         mutex_unlock(&kvm->lock);
2309 }
2310
2311 /**
2312  * kvm_arch_fault_in_page - fault-in guest page if necessary
2313  * @vcpu: The corresponding virtual cpu
2314  * @gpa: Guest physical address
2315  * @writable: Whether the page should be writable or not
2316  *
2317  * Make sure that a guest page has been faulted-in on the host.
2318  *
2319  * Return: Zero on success, negative error code otherwise.
2320  */
2321 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2322 {
2323         return gmap_fault(vcpu->arch.gmap, gpa,
2324                           writable ? FAULT_FLAG_WRITE : 0);
2325 }
2326
2327 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2328                                       unsigned long token)
2329 {
2330         struct kvm_s390_interrupt inti;
2331         struct kvm_s390_irq irq;
2332
2333         if (start_token) {
2334                 irq.u.ext.ext_params2 = token;
2335                 irq.type = KVM_S390_INT_PFAULT_INIT;
2336                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2337         } else {
2338                 inti.type = KVM_S390_INT_PFAULT_DONE;
2339                 inti.parm64 = token;
2340                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2341         }
2342 }
2343
2344 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2345                                      struct kvm_async_pf *work)
2346 {
2347         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2348         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2349 }
2350
2351 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2352                                  struct kvm_async_pf *work)
2353 {
2354         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2355         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2356 }
2357
2358 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2359                                struct kvm_async_pf *work)
2360 {
2361         /* s390 will always inject the page directly */
2362 }
2363
2364 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2365 {
2366         /*
2367          * s390 will always inject the page directly,
2368          * but we still want check_async_completion to cleanup
2369          */
2370         return true;
2371 }
2372
2373 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2374 {
2375         hva_t hva;
2376         struct kvm_arch_async_pf arch;
2377         int rc;
2378
2379         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2380                 return 0;
2381         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2382             vcpu->arch.pfault_compare)
2383                 return 0;
2384         if (psw_extint_disabled(vcpu))
2385                 return 0;
2386         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2387                 return 0;
2388         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2389                 return 0;
2390         if (!vcpu->arch.gmap->pfault_enabled)
2391                 return 0;
2392
2393         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2394         hva += current->thread.gmap_addr & ~PAGE_MASK;
2395         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2396                 return 0;
2397
2398         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2399         return rc;
2400 }
2401
2402 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2403 {
2404         int rc, cpuflags;
2405
2406         /*
2407          * On s390 notifications for arriving pages will be delivered directly
2408          * to the guest but the house keeping for completed pfaults is
2409          * handled outside the worker.
2410          */
2411         kvm_check_async_pf_completion(vcpu);
2412
2413         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2414         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2415
2416         if (need_resched())
2417                 schedule();
2418
2419         if (test_cpu_flag(CIF_MCCK_PENDING))
2420                 s390_handle_mcck();
2421
2422         if (!kvm_is_ucontrol(vcpu->kvm)) {
2423                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2424                 if (rc)
2425                         return rc;
2426         }
2427
2428         rc = kvm_s390_handle_requests(vcpu);
2429         if (rc)
2430                 return rc;
2431
2432         if (guestdbg_enabled(vcpu)) {
2433                 kvm_s390_backup_guest_per_regs(vcpu);
2434                 kvm_s390_patch_guest_per_regs(vcpu);
2435         }
2436
2437         vcpu->arch.sie_block->icptcode = 0;
2438         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2439         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2440         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2441
2442         return 0;
2443 }
2444
2445 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2446 {
2447         struct kvm_s390_pgm_info pgm_info = {
2448                 .code = PGM_ADDRESSING,
2449         };
2450         u8 opcode, ilen;
2451         int rc;
2452
2453         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2454         trace_kvm_s390_sie_fault(vcpu);
2455
2456         /*
2457          * We want to inject an addressing exception, which is defined as a
2458          * suppressing or terminating exception. However, since we came here
2459          * by a DAT access exception, the PSW still points to the faulting
2460          * instruction since DAT exceptions are nullifying. So we've got
2461          * to look up the current opcode to get the length of the instruction
2462          * to be able to forward the PSW.
2463          */
2464         rc = read_guest_instr(vcpu, &opcode, 1);
2465         ilen = insn_length(opcode);
2466         if (rc < 0) {
2467                 return rc;
2468         } else if (rc) {
2469                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2470                  * Forward by arbitrary ilc, injection will take care of
2471                  * nullification if necessary.
2472                  */
2473                 pgm_info = vcpu->arch.pgm;
2474                 ilen = 4;
2475         }
2476         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2477         kvm_s390_forward_psw(vcpu, ilen);
2478         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2479 }
2480
2481 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2482 {
2483         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2484                    vcpu->arch.sie_block->icptcode);
2485         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2486
2487         if (guestdbg_enabled(vcpu))
2488                 kvm_s390_restore_guest_per_regs(vcpu);
2489
2490         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2491         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2492
2493         if (vcpu->arch.sie_block->icptcode > 0) {
2494                 int rc = kvm_handle_sie_intercept(vcpu);
2495
2496                 if (rc != -EOPNOTSUPP)
2497                         return rc;
2498                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2499                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2500                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2501                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2502                 return -EREMOTE;
2503         } else if (exit_reason != -EFAULT) {
2504                 vcpu->stat.exit_null++;
2505                 return 0;
2506         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2507                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2508                 vcpu->run->s390_ucontrol.trans_exc_code =
2509                                                 current->thread.gmap_addr;
2510                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2511                 return -EREMOTE;
2512         } else if (current->thread.gmap_pfault) {
2513                 trace_kvm_s390_major_guest_pfault(vcpu);
2514                 current->thread.gmap_pfault = 0;
2515                 if (kvm_arch_setup_async_pf(vcpu))
2516                         return 0;
2517                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2518         }
2519         return vcpu_post_run_fault_in_sie(vcpu);
2520 }
2521
2522 static int __vcpu_run(struct kvm_vcpu *vcpu)
2523 {
2524         int rc, exit_reason;
2525
2526         /*
2527          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2528          * ning the guest), so that memslots (and other stuff) are protected
2529          */
2530         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2531
2532         do {
2533                 rc = vcpu_pre_run(vcpu);
2534                 if (rc)
2535                         break;
2536
2537                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2538                 /*
2539                  * As PF_VCPU will be used in fault handler, between
2540                  * guest_enter and guest_exit should be no uaccess.
2541                  */
2542                 local_irq_disable();
2543                 __kvm_guest_enter();
2544                 __disable_cpu_timer_accounting(vcpu);
2545                 local_irq_enable();
2546                 exit_reason = sie64a(vcpu->arch.sie_block,
2547                                      vcpu->run->s.regs.gprs);
2548                 local_irq_disable();
2549                 __enable_cpu_timer_accounting(vcpu);
2550                 __kvm_guest_exit();
2551                 local_irq_enable();
2552                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2553
2554                 rc = vcpu_post_run(vcpu, exit_reason);
2555         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2556
2557         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2558         return rc;
2559 }
2560
2561 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2562 {
2563         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2564         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2565         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2566                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2567         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2568                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2569                 /* some control register changes require a tlb flush */
2570                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2571         }
2572         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2573                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2574                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2575                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2576                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2577                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2578         }
2579         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2580                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2581                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2582                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2583                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2584                         kvm_clear_async_pf_completion_queue(vcpu);
2585         }
2586         kvm_run->kvm_dirty_regs = 0;
2587 }
2588
2589 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2590 {
2591         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2592         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2593         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2594         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2595         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2596         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2597         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2598         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2599         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2600         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2601         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2602         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2603 }
2604
2605 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2606 {
2607         int rc;
2608         sigset_t sigsaved;
2609
2610         if (guestdbg_exit_pending(vcpu)) {
2611                 kvm_s390_prepare_debug_exit(vcpu);
2612                 return 0;
2613         }
2614
2615         if (vcpu->sigset_active)
2616                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2617
2618         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2619                 kvm_s390_vcpu_start(vcpu);
2620         } else if (is_vcpu_stopped(vcpu)) {
2621                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2622                                    vcpu->vcpu_id);
2623                 return -EINVAL;
2624         }
2625
2626         sync_regs(vcpu, kvm_run);
2627         enable_cpu_timer_accounting(vcpu);
2628
2629         might_fault();
2630         rc = __vcpu_run(vcpu);
2631
2632         if (signal_pending(current) && !rc) {
2633                 kvm_run->exit_reason = KVM_EXIT_INTR;
2634                 rc = -EINTR;
2635         }
2636
2637         if (guestdbg_exit_pending(vcpu) && !rc)  {
2638                 kvm_s390_prepare_debug_exit(vcpu);
2639                 rc = 0;
2640         }
2641
2642         if (rc == -EREMOTE) {
2643                 /* userspace support is needed, kvm_run has been prepared */
2644                 rc = 0;
2645         }
2646
2647         disable_cpu_timer_accounting(vcpu);
2648         store_regs(vcpu, kvm_run);
2649
2650         if (vcpu->sigset_active)
2651                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2652
2653         vcpu->stat.exit_userspace++;
2654         return rc;
2655 }
2656
2657 /*
2658  * store status at address
2659  * we use have two special cases:
2660  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2661  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2662  */
2663 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2664 {
2665         unsigned char archmode = 1;
2666         freg_t fprs[NUM_FPRS];
2667         unsigned int px;
2668         u64 clkcomp, cputm;
2669         int rc;
2670
2671         px = kvm_s390_get_prefix(vcpu);
2672         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2673                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2674                         return -EFAULT;
2675                 gpa = 0;
2676         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2677                 if (write_guest_real(vcpu, 163, &archmode, 1))
2678                         return -EFAULT;
2679                 gpa = px;
2680         } else
2681                 gpa -= __LC_FPREGS_SAVE_AREA;
2682
2683         /* manually convert vector registers if necessary */
2684         if (MACHINE_HAS_VX) {
2685                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2686                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2687                                      fprs, 128);
2688         } else {
2689                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2690                                      vcpu->run->s.regs.fprs, 128);
2691         }
2692         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2693                               vcpu->run->s.regs.gprs, 128);
2694         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2695                               &vcpu->arch.sie_block->gpsw, 16);
2696         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2697                               &px, 4);
2698         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2699                               &vcpu->run->s.regs.fpc, 4);
2700         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2701                               &vcpu->arch.sie_block->todpr, 4);
2702         cputm = kvm_s390_get_cpu_timer(vcpu);
2703         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2704                               &cputm, 8);
2705         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2706         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2707                               &clkcomp, 8);
2708         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2709                               &vcpu->run->s.regs.acrs, 64);
2710         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2711                               &vcpu->arch.sie_block->gcr, 128);
2712         return rc ? -EFAULT : 0;
2713 }
2714
2715 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2716 {
2717         /*
2718          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2719          * copying in vcpu load/put. Lets update our copies before we save
2720          * it into the save area
2721          */
2722         save_fpu_regs();
2723         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2724         save_access_regs(vcpu->run->s.regs.acrs);
2725
2726         return kvm_s390_store_status_unloaded(vcpu, addr);
2727 }
2728
2729 /*
2730  * store additional status at address
2731  */
2732 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2733                                         unsigned long gpa)
2734 {
2735         /* Only bits 0-53 are used for address formation */
2736         if (!(gpa & ~0x3ff))
2737                 return 0;
2738
2739         return write_guest_abs(vcpu, gpa & ~0x3ff,
2740                                (void *)&vcpu->run->s.regs.vrs, 512);
2741 }
2742
2743 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2744 {
2745         if (!test_kvm_facility(vcpu->kvm, 129))
2746                 return 0;
2747
2748         /*
2749          * The guest VXRS are in the host VXRs due to the lazy
2750          * copying in vcpu load/put. We can simply call save_fpu_regs()
2751          * to save the current register state because we are in the
2752          * middle of a load/put cycle.
2753          *
2754          * Let's update our copies before we save it into the save area.
2755          */
2756         save_fpu_regs();
2757
2758         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2759 }
2760
2761 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2762 {
2763         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2764         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2765 }
2766
2767 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2768 {
2769         unsigned int i;
2770         struct kvm_vcpu *vcpu;
2771
2772         kvm_for_each_vcpu(i, vcpu, kvm) {
2773                 __disable_ibs_on_vcpu(vcpu);
2774         }
2775 }
2776
2777 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2778 {
2779         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2780         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2781 }
2782
2783 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2784 {
2785         int i, online_vcpus, started_vcpus = 0;
2786
2787         if (!is_vcpu_stopped(vcpu))
2788                 return;
2789
2790         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2791         /* Only one cpu at a time may enter/leave the STOPPED state. */
2792         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2793         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2794
2795         for (i = 0; i < online_vcpus; i++) {
2796                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2797                         started_vcpus++;
2798         }
2799
2800         if (started_vcpus == 0) {
2801                 /* we're the only active VCPU -> speed it up */
2802                 __enable_ibs_on_vcpu(vcpu);
2803         } else if (started_vcpus == 1) {
2804                 /*
2805                  * As we are starting a second VCPU, we have to disable
2806                  * the IBS facility on all VCPUs to remove potentially
2807                  * oustanding ENABLE requests.
2808                  */
2809                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2810         }
2811
2812         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2813         /*
2814          * Another VCPU might have used IBS while we were offline.
2815          * Let's play safe and flush the VCPU at startup.
2816          */
2817         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2818         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2819         return;
2820 }
2821
2822 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2823 {
2824         int i, online_vcpus, started_vcpus = 0;
2825         struct kvm_vcpu *started_vcpu = NULL;
2826
2827         if (is_vcpu_stopped(vcpu))
2828                 return;
2829
2830         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2831         /* Only one cpu at a time may enter/leave the STOPPED state. */
2832         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2833         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2834
2835         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2836         kvm_s390_clear_stop_irq(vcpu);
2837
2838         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2839         __disable_ibs_on_vcpu(vcpu);
2840
2841         for (i = 0; i < online_vcpus; i++) {
2842                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2843                         started_vcpus++;
2844                         started_vcpu = vcpu->kvm->vcpus[i];
2845                 }
2846         }
2847
2848         if (started_vcpus == 1) {
2849                 /*
2850                  * As we only have one VCPU left, we want to enable the
2851                  * IBS facility for that VCPU to speed it up.
2852                  */
2853                 __enable_ibs_on_vcpu(started_vcpu);
2854         }
2855
2856         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2857         return;
2858 }
2859
2860 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2861                                      struct kvm_enable_cap *cap)
2862 {
2863         int r;
2864
2865         if (cap->flags)
2866                 return -EINVAL;
2867
2868         switch (cap->cap) {
2869         case KVM_CAP_S390_CSS_SUPPORT:
2870                 if (!vcpu->kvm->arch.css_support) {
2871                         vcpu->kvm->arch.css_support = 1;
2872                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2873                         trace_kvm_s390_enable_css(vcpu->kvm);
2874                 }
2875                 r = 0;
2876                 break;
2877         default:
2878                 r = -EINVAL;
2879                 break;
2880         }
2881         return r;
2882 }
2883
2884 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2885                                   struct kvm_s390_mem_op *mop)
2886 {
2887         void __user *uaddr = (void __user *)mop->buf;
2888         void *tmpbuf = NULL;
2889         int r, srcu_idx;
2890         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2891                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2892
2893         if (mop->flags & ~supported_flags)
2894                 return -EINVAL;
2895
2896         if (mop->size > MEM_OP_MAX_SIZE)
2897                 return -E2BIG;
2898
2899         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2900                 tmpbuf = vmalloc(mop->size);
2901                 if (!tmpbuf)
2902                         return -ENOMEM;
2903         }
2904
2905         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2906
2907         switch (mop->op) {
2908         case KVM_S390_MEMOP_LOGICAL_READ:
2909                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2910                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2911                                             mop->size, GACC_FETCH);
2912                         break;
2913                 }
2914                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2915                 if (r == 0) {
2916                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2917                                 r = -EFAULT;
2918                 }
2919                 break;
2920         case KVM_S390_MEMOP_LOGICAL_WRITE:
2921                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2922                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2923                                             mop->size, GACC_STORE);
2924                         break;
2925                 }
2926                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2927                         r = -EFAULT;
2928                         break;
2929                 }
2930                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2931                 break;
2932         default:
2933                 r = -EINVAL;
2934         }
2935
2936         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2937
2938         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2939                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2940
2941         vfree(tmpbuf);
2942         return r;
2943 }
2944
2945 long kvm_arch_vcpu_ioctl(struct file *filp,
2946                          unsigned int ioctl, unsigned long arg)
2947 {
2948         struct kvm_vcpu *vcpu = filp->private_data;
2949         void __user *argp = (void __user *)arg;
2950         int idx;
2951         long r;
2952
2953         switch (ioctl) {
2954         case KVM_S390_IRQ: {
2955                 struct kvm_s390_irq s390irq;
2956
2957                 r = -EFAULT;
2958                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2959                         break;
2960                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2961                 break;
2962         }
2963         case KVM_S390_INTERRUPT: {
2964                 struct kvm_s390_interrupt s390int;
2965                 struct kvm_s390_irq s390irq;
2966
2967                 r = -EFAULT;
2968                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2969                         break;
2970                 if (s390int_to_s390irq(&s390int, &s390irq))
2971                         return -EINVAL;
2972                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2973                 break;
2974         }
2975         case KVM_S390_STORE_STATUS:
2976                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2977                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2978                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2979                 break;
2980         case KVM_S390_SET_INITIAL_PSW: {
2981                 psw_t psw;
2982
2983                 r = -EFAULT;
2984                 if (copy_from_user(&psw, argp, sizeof(psw)))
2985                         break;
2986                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2987                 break;
2988         }
2989         case KVM_S390_INITIAL_RESET:
2990                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2991                 break;
2992         case KVM_SET_ONE_REG:
2993         case KVM_GET_ONE_REG: {
2994                 struct kvm_one_reg reg;
2995                 r = -EFAULT;
2996                 if (copy_from_user(&reg, argp, sizeof(reg)))
2997                         break;
2998                 if (ioctl == KVM_SET_ONE_REG)
2999                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3000                 else
3001                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3002                 break;
3003         }
3004 #ifdef CONFIG_KVM_S390_UCONTROL
3005         case KVM_S390_UCAS_MAP: {
3006                 struct kvm_s390_ucas_mapping ucasmap;
3007
3008                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3009                         r = -EFAULT;
3010                         break;
3011                 }
3012
3013                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3014                         r = -EINVAL;
3015                         break;
3016                 }
3017
3018                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3019                                      ucasmap.vcpu_addr, ucasmap.length);
3020                 break;
3021         }
3022         case KVM_S390_UCAS_UNMAP: {
3023                 struct kvm_s390_ucas_mapping ucasmap;
3024
3025                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3026                         r = -EFAULT;
3027                         break;
3028                 }
3029
3030                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3031                         r = -EINVAL;
3032                         break;
3033                 }
3034
3035                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3036                         ucasmap.length);
3037                 break;
3038         }
3039 #endif
3040         case KVM_S390_VCPU_FAULT: {
3041                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3042                 break;
3043         }
3044         case KVM_ENABLE_CAP:
3045         {
3046                 struct kvm_enable_cap cap;
3047                 r = -EFAULT;
3048                 if (copy_from_user(&cap, argp, sizeof(cap)))
3049                         break;
3050                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3051                 break;
3052         }
3053         case KVM_S390_MEM_OP: {
3054                 struct kvm_s390_mem_op mem_op;
3055
3056                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3057                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3058                 else
3059                         r = -EFAULT;
3060                 break;
3061         }
3062         case KVM_S390_SET_IRQ_STATE: {
3063                 struct kvm_s390_irq_state irq_state;
3064
3065                 r = -EFAULT;
3066                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3067                         break;
3068                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3069                     irq_state.len == 0 ||
3070                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3071                         r = -EINVAL;
3072                         break;
3073                 }
3074                 r = kvm_s390_set_irq_state(vcpu,
3075                                            (void __user *) irq_state.buf,
3076                                            irq_state.len);
3077                 break;
3078         }
3079         case KVM_S390_GET_IRQ_STATE: {
3080                 struct kvm_s390_irq_state irq_state;
3081
3082                 r = -EFAULT;
3083                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3084                         break;
3085                 if (irq_state.len == 0) {
3086                         r = -EINVAL;
3087                         break;
3088                 }
3089                 r = kvm_s390_get_irq_state(vcpu,
3090                                            (__u8 __user *)  irq_state.buf,
3091                                            irq_state.len);
3092                 break;
3093         }
3094         default:
3095                 r = -ENOTTY;
3096         }
3097         return r;
3098 }
3099
3100 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3101 {
3102 #ifdef CONFIG_KVM_S390_UCONTROL
3103         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3104                  && (kvm_is_ucontrol(vcpu->kvm))) {
3105                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3106                 get_page(vmf->page);
3107                 return 0;
3108         }
3109 #endif
3110         return VM_FAULT_SIGBUS;
3111 }
3112
3113 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3114                             unsigned long npages)
3115 {
3116         return 0;
3117 }
3118
3119 /* Section: memory related */
3120 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3121                                    struct kvm_memory_slot *memslot,
3122                                    const struct kvm_userspace_memory_region *mem,
3123                                    enum kvm_mr_change change)
3124 {
3125         /* A few sanity checks. We can have memory slots which have to be
3126            located/ended at a segment boundary (1MB). The memory in userland is
3127            ok to be fragmented into various different vmas. It is okay to mmap()
3128            and munmap() stuff in this slot after doing this call at any time */
3129
3130         if (mem->userspace_addr & 0xffffful)
3131                 return -EINVAL;
3132
3133         if (mem->memory_size & 0xffffful)
3134                 return -EINVAL;
3135
3136         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3137                 return -EINVAL;
3138
3139         return 0;
3140 }
3141
3142 void kvm_arch_commit_memory_region(struct kvm *kvm,
3143                                 const struct kvm_userspace_memory_region *mem,
3144                                 const struct kvm_memory_slot *old,
3145                                 const struct kvm_memory_slot *new,
3146                                 enum kvm_mr_change change)
3147 {
3148         int rc;
3149
3150         /* If the basics of the memslot do not change, we do not want
3151          * to update the gmap. Every update causes several unnecessary
3152          * segment translation exceptions. This is usually handled just
3153          * fine by the normal fault handler + gmap, but it will also
3154          * cause faults on the prefix page of running guest CPUs.
3155          */
3156         if (old->userspace_addr == mem->userspace_addr &&
3157             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3158             old->npages * PAGE_SIZE == mem->memory_size)
3159                 return;
3160
3161         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3162                 mem->guest_phys_addr, mem->memory_size);
3163         if (rc)
3164                 pr_warn("failed to commit memory region\n");
3165         return;
3166 }
3167
3168 static inline unsigned long nonhyp_mask(int i)
3169 {
3170         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3171
3172         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3173 }
3174
3175 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3176 {
3177         vcpu->valid_wakeup = false;
3178 }
3179
3180 static int __init kvm_s390_init(void)
3181 {
3182         int i;
3183
3184         if (!sclp.has_sief2) {
3185                 pr_info("SIE not available\n");
3186                 return -ENODEV;
3187         }
3188
3189         for (i = 0; i < 16; i++)
3190                 kvm_s390_fac_list_mask[i] |=
3191                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3192
3193         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3194 }
3195
3196 static void __exit kvm_s390_exit(void)
3197 {
3198         kvm_exit();
3199 }
3200
3201 module_init(kvm_s390_init);
3202 module_exit(kvm_s390_exit);
3203
3204 /*
3205  * Enable autoloading of the kvm module.
3206  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3207  * since x86 takes a different approach.
3208  */
3209 #include <linux/miscdevice.h>
3210 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3211 MODULE_ALIAS("devname:kvm");