arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <linux/bitmap.h>
  30 #include <asm/asm-offsets.h>
  31 #include <asm/lowcore.h>
  32 #include <asm/etr.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/gmap.h>
  35 #include <asm/nmi.h>
  36 #include <asm/switch_to.h>
  37 #include <asm/isc.h>
  38 #include <asm/sclp.h>
  39 #include <asm/cpacf.h>
  40 #include <asm/etr.h>
  41 #include "kvm-s390.h"
  42 #include "gaccess.h"
  43
  44 #define KMSG_COMPONENT "kvm-s390"
  45 #undef pr_fmt
  46 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  47
  48 #define CREATE_TRACE_POINTS
  49 #include "trace.h"
  50 #include "trace-s390.h"
  51
  52 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  53 #define LOCAL_IRQS 32
  54 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  55                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  56
  57 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  58
  59 struct kvm_stats_debugfs_item debugfs_entries[] = {
  60         { "userspace_handled", VCPU_STAT(exit_userspace) },
  61         { "exit_null", VCPU_STAT(exit_null) },
  62         { "exit_validity", VCPU_STAT(exit_validity) },
  63         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  64         { "exit_external_request", VCPU_STAT(exit_external_request) },
  65         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  66         { "exit_instruction", VCPU_STAT(exit_instruction) },
  67         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  68         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  69         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  70         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  71         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  72         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  73         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  74         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  75         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  76         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  77         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  78         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  79         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  80         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  81         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  82         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  83         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  84         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  85         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  86         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  87         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  88         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  89         { "instruction_spx", VCPU_STAT(instruction_spx) },
  90         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  91         { "instruction_stap", VCPU_STAT(instruction_stap) },
  92         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  93         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  94         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  95         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  96         { "instruction_essa", VCPU_STAT(instruction_essa) },
  97         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  98         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  99         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 100         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 101         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 102         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 103         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 104         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 105         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 106         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 107         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 108         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 109         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 110         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 111         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 112         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 113         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 114         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 115         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 116         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 117         { "diagnose_10", VCPU_STAT(diagnose_10) },
 118         { "diagnose_44", VCPU_STAT(diagnose_44) },
 119         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 120         { "diagnose_258", VCPU_STAT(diagnose_258) },
 121         { "diagnose_308", VCPU_STAT(diagnose_308) },
 122         { "diagnose_500", VCPU_STAT(diagnose_500) },
 123         { NULL }
 124 };
 125
 126 /* upper facilities limit for kvm */
 127 unsigned long kvm_s390_fac_list_mask[16] = {
 128         0xffe6000000000000UL,
 129         0x005e000000000000UL,
 130 };
 131
 132 unsigned long kvm_s390_fac_list_mask_size(void)
 133 {
 134         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 135         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 136 }
 137
 138 /* available cpu features supported by kvm */
 139 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 140 /* available subfunctions indicated via query / "test bit" */
 141 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 142
 143 static struct gmap_notifier gmap_notifier;
 144 debug_info_t *kvm_s390_dbf;
 145
 146 /* Section: not file related */
 147 int kvm_arch_hardware_enable(void)
 148 {
 149         /* every s390 is virtualization enabled ;-) */
 150         return 0;
 151 }
 152
 153 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 154
 155 /*
 156  * This callback is executed during stop_machine(). All CPUs are therefore
 157  * temporarily stopped. In order not to change guest behavior, we have to
 158  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 159  * so a CPU won't be stopped while calculating with the epoch.
 160  */
 161 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 162                           void *v)
 163 {
 164         struct kvm *kvm;
 165         struct kvm_vcpu *vcpu;
 166         int i;
 167         unsigned long long *delta = v;
 168
 169         list_for_each_entry(kvm, &vm_list, vm_list) {
 170                 kvm->arch.epoch -= *delta;
 171                 kvm_for_each_vcpu(i, vcpu, kvm) {
 172                         vcpu->arch.sie_block->epoch -= *delta;
 173                         if (vcpu->arch.cputm_enabled)
 174                                 vcpu->arch.cputm_start += *delta;
 175                 }
 176         }
 177         return NOTIFY_OK;
 178 }
 179
 180 static struct notifier_block kvm_clock_notifier = {
 181         .notifier_call = kvm_clock_sync,
 182 };
 183
 184 int kvm_arch_hardware_setup(void)
 185 {
 186         gmap_notifier.notifier_call = kvm_gmap_notifier;
 187         gmap_register_ipte_notifier(&gmap_notifier);
 188         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 189                                        &kvm_clock_notifier);
 190         return 0;
 191 }
 192
 193 void kvm_arch_hardware_unsetup(void)
 194 {
 195         gmap_unregister_ipte_notifier(&gmap_notifier);
 196         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 197                                          &kvm_clock_notifier);
 198 }
 199
 200 static void allow_cpu_feat(unsigned long nr)
 201 {
 202         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 203 }
 204
 205 static inline int plo_test_bit(unsigned char nr)
 206 {
 207         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 208         int cc = 3; /* subfunction not available */
 209
 210         asm volatile(
 211                 /* Parameter registers are ignored for "test bit" */
 212                 "       plo     0,0,0,0(0)\n"
 213                 "       ipm     %0\n"
 214                 "       srl     %0,28\n"
 215                 : "=d" (cc)
 216                 : "d" (r0)
 217                 : "cc");
 218         return cc == 0;
 219 }
 220
 221 static void kvm_s390_cpu_feat_init(void)
 222 {
 223         int i;
 224
 225         for (i = 0; i < 256; ++i) {
 226                 if (plo_test_bit(i))
 227                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 228         }
 229
 230         if (test_facility(28)) /* TOD-clock steering */
 231                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 232
 233         if (test_facility(17)) { /* MSA */
 234                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 235                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 236                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 237                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 238                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 239         }
 240         if (test_facility(76)) /* MSA3 */
 241                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 242         if (test_facility(77)) { /* MSA4 */
 243                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 244                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 245                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 246                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 247         }
 248         if (test_facility(57)) /* MSA5 */
 249                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 250
 251         if (MACHINE_HAS_ESOP)
 252                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 253 }
 254
 255 int kvm_arch_init(void *opaque)
 256 {
 257         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 258         if (!kvm_s390_dbf)
 259                 return -ENOMEM;
 260
 261         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 262                 debug_unregister(kvm_s390_dbf);
 263                 return -ENOMEM;
 264         }
 265
 266         kvm_s390_cpu_feat_init();
 267
 268         /* Register floating interrupt controller interface. */
 269         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 270 }
 271
 272 void kvm_arch_exit(void)
 273 {
 274         debug_unregister(kvm_s390_dbf);
 275 }
 276
 277 /* Section: device related */
 278 long kvm_arch_dev_ioctl(struct file *filp,
 279                         unsigned int ioctl, unsigned long arg)
 280 {
 281         if (ioctl == KVM_S390_ENABLE_SIE)
 282                 return s390_enable_sie();
 283         return -EINVAL;
 284 }
 285
 286 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 287 {
 288         int r;
 289
 290         switch (ext) {
 291         case KVM_CAP_S390_PSW:
 292         case KVM_CAP_S390_GMAP:
 293         case KVM_CAP_SYNC_MMU:
 294 #ifdef CONFIG_KVM_S390_UCONTROL
 295         case KVM_CAP_S390_UCONTROL:
 296 #endif
 297         case KVM_CAP_ASYNC_PF:
 298         case KVM_CAP_SYNC_REGS:
 299         case KVM_CAP_ONE_REG:
 300         case KVM_CAP_ENABLE_CAP:
 301         case KVM_CAP_S390_CSS_SUPPORT:
 302         case KVM_CAP_IOEVENTFD:
 303         case KVM_CAP_DEVICE_CTRL:
 304         case KVM_CAP_ENABLE_CAP_VM:
 305         case KVM_CAP_S390_IRQCHIP:
 306         case KVM_CAP_VM_ATTRIBUTES:
 307         case KVM_CAP_MP_STATE:
 308         case KVM_CAP_S390_INJECT_IRQ:
 309         case KVM_CAP_S390_USER_SIGP:
 310         case KVM_CAP_S390_USER_STSI:
 311         case KVM_CAP_S390_SKEYS:
 312         case KVM_CAP_S390_IRQ_STATE:
 313                 r = 1;
 314                 break;
 315         case KVM_CAP_S390_MEM_OP:
 316                 r = MEM_OP_MAX_SIZE;
 317                 break;
 318         case KVM_CAP_NR_VCPUS:
 319         case KVM_CAP_MAX_VCPUS:
 320                 r = KVM_S390_BSCA_CPU_SLOTS;
 321                 if (sclp.has_esca && sclp.has_64bscao)
 322                         r = KVM_S390_ESCA_CPU_SLOTS;
 323                 break;
 324         case KVM_CAP_NR_MEMSLOTS:
 325                 r = KVM_USER_MEM_SLOTS;
 326                 break;
 327         case KVM_CAP_S390_COW:
 328                 r = MACHINE_HAS_ESOP;
 329                 break;
 330         case KVM_CAP_S390_VECTOR_REGISTERS:
 331                 r = MACHINE_HAS_VX;
 332                 break;
 333         case KVM_CAP_S390_RI:
 334                 r = test_facility(64);
 335                 break;
 336         default:
 337                 r = 0;
 338         }
 339         return r;
 340 }
 341
 342 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 343                                         struct kvm_memory_slot *memslot)
 344 {
 345         gfn_t cur_gfn, last_gfn;
 346         unsigned long address;
 347         struct gmap *gmap = kvm->arch.gmap;
 348
 349         /* Loop over all guest pages */
 350         last_gfn = memslot->base_gfn + memslot->npages;
 351         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 352                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 353
 354                 if (test_and_clear_guest_dirty(gmap->mm, address))
 355                         mark_page_dirty(kvm, cur_gfn);
 356                 if (fatal_signal_pending(current))
 357                         return;
 358                 cond_resched();
 359         }
 360 }
 361
 362 /* Section: vm related */
 363 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 364
 365 /*
 366  * Get (and clear) the dirty memory log for a memory slot.
 367  */
 368 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 369                                struct kvm_dirty_log *log)
 370 {
 371         int r;
 372         unsigned long n;
 373         struct kvm_memslots *slots;
 374         struct kvm_memory_slot *memslot;
 375         int is_dirty = 0;
 376
 377         mutex_lock(&kvm->slots_lock);
 378
 379         r = -EINVAL;
 380         if (log->slot >= KVM_USER_MEM_SLOTS)
 381                 goto out;
 382
 383         slots = kvm_memslots(kvm);
 384         memslot = id_to_memslot(slots, log->slot);
 385         r = -ENOENT;
 386         if (!memslot->dirty_bitmap)
 387                 goto out;
 388
 389         kvm_s390_sync_dirty_log(kvm, memslot);
 390         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 391         if (r)
 392                 goto out;
 393
 394         /* Clear the dirty log */
 395         if (is_dirty) {
 396                 n = kvm_dirty_bitmap_bytes(memslot);
 397                 memset(memslot->dirty_bitmap, 0, n);
 398         }
 399         r = 0;
 400 out:
 401         mutex_unlock(&kvm->slots_lock);
 402         return r;
 403 }
 404
 405 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 406 {
 407         int r;
 408
 409         if (cap->flags)
 410                 return -EINVAL;
 411
 412         switch (cap->cap) {
 413         case KVM_CAP_S390_IRQCHIP:
 414                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 415                 kvm->arch.use_irqchip = 1;
 416                 r = 0;
 417                 break;
 418         case KVM_CAP_S390_USER_SIGP:
 419                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 420                 kvm->arch.user_sigp = 1;
 421                 r = 0;
 422                 break;
 423         case KVM_CAP_S390_VECTOR_REGISTERS:
 424                 mutex_lock(&kvm->lock);
 425                 if (atomic_read(&kvm->online_vcpus)) {
 426                         r = -EBUSY;
 427                 } else if (MACHINE_HAS_VX) {
 428                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 429                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 430                         r = 0;
 431                 } else
 432                         r = -EINVAL;
 433                 mutex_unlock(&kvm->lock);
 434                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 435                          r ? "(not available)" : "(success)");
 436                 break;
 437         case KVM_CAP_S390_RI:
 438                 r = -EINVAL;
 439                 mutex_lock(&kvm->lock);
 440                 if (atomic_read(&kvm->online_vcpus)) {
 441                         r = -EBUSY;
 442                 } else if (test_facility(64)) {
 443                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 444                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 445                         r = 0;
 446                 }
 447                 mutex_unlock(&kvm->lock);
 448                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 449                          r ? "(not available)" : "(success)");
 450                 break;
 451         case KVM_CAP_S390_USER_STSI:
 452                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 453                 kvm->arch.user_stsi = 1;
 454                 r = 0;
 455                 break;
 456         default:
 457                 r = -EINVAL;
 458                 break;
 459         }
 460         return r;
 461 }
 462
 463 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 464 {
 465         int ret;
 466
 467         switch (attr->attr) {
 468         case KVM_S390_VM_MEM_LIMIT_SIZE:
 469                 ret = 0;
 470                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 471                          kvm->arch.mem_limit);
 472                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 473                         ret = -EFAULT;
 474                 break;
 475         default:
 476                 ret = -ENXIO;
 477                 break;
 478         }
 479         return ret;
 480 }
 481
 482 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 483 {
 484         int ret;
 485         unsigned int idx;
 486         switch (attr->attr) {
 487         case KVM_S390_VM_MEM_ENABLE_CMMA:
 488                 ret = -ENXIO;
 489                 if (!sclp.has_cmma)
 490                         break;
 491
 492                 ret = -EBUSY;
 493                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 494                 mutex_lock(&kvm->lock);
 495                 if (atomic_read(&kvm->online_vcpus) == 0) {
 496                         kvm->arch.use_cmma = 1;
 497                         ret = 0;
 498                 }
 499                 mutex_unlock(&kvm->lock);
 500                 break;
 501         case KVM_S390_VM_MEM_CLR_CMMA:
 502                 ret = -ENXIO;
 503                 if (!sclp.has_cmma)
 504                         break;
 505                 ret = -EINVAL;
 506                 if (!kvm->arch.use_cmma)
 507                         break;
 508
 509                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 510                 mutex_lock(&kvm->lock);
 511                 idx = srcu_read_lock(&kvm->srcu);
 512                 s390_reset_cmma(kvm->arch.gmap->mm);
 513                 srcu_read_unlock(&kvm->srcu, idx);
 514                 mutex_unlock(&kvm->lock);
 515                 ret = 0;
 516                 break;
 517         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 518                 unsigned long new_limit;
 519
 520                 if (kvm_is_ucontrol(kvm))
 521                         return -EINVAL;
 522
 523                 if (get_user(new_limit, (u64 __user *)attr->addr))
 524                         return -EFAULT;
 525
 526                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 527                     new_limit > kvm->arch.mem_limit)
 528                         return -E2BIG;
 529
 530                 if (!new_limit)
 531                         return -EINVAL;
 532
 533                 /* gmap_alloc takes last usable address */
 534                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 535                         new_limit -= 1;
 536
 537                 ret = -EBUSY;
 538                 mutex_lock(&kvm->lock);
 539                 if (atomic_read(&kvm->online_vcpus) == 0) {
 540                         /* gmap_alloc will round the limit up */
 541                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 542
 543                         if (!new) {
 544                                 ret = -ENOMEM;
 545                         } else {
 546                                 gmap_free(kvm->arch.gmap);
 547                                 new->private = kvm;
 548                                 kvm->arch.gmap = new;
 549                                 ret = 0;
 550                         }
 551                 }
 552                 mutex_unlock(&kvm->lock);
 553                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 554                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 555                          (void *) kvm->arch.gmap->asce);
 556                 break;
 557         }
 558         default:
 559                 ret = -ENXIO;
 560                 break;
 561         }
 562         return ret;
 563 }
 564
 565 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 566
 567 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 568 {
 569         struct kvm_vcpu *vcpu;
 570         int i;
 571
 572         if (!test_kvm_facility(kvm, 76))
 573                 return -EINVAL;
 574
 575         mutex_lock(&kvm->lock);
 576         switch (attr->attr) {
 577         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 578                 get_random_bytes(
 579                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 580                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 581                 kvm->arch.crypto.aes_kw = 1;
 582                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 583                 break;
 584         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 585                 get_random_bytes(
 586                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 587                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 588                 kvm->arch.crypto.dea_kw = 1;
 589                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 590                 break;
 591         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 592                 kvm->arch.crypto.aes_kw = 0;
 593                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 594                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 595                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 596                 break;
 597         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 598                 kvm->arch.crypto.dea_kw = 0;
 599                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 600                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 601                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 602                 break;
 603         default:
 604                 mutex_unlock(&kvm->lock);
 605                 return -ENXIO;
 606         }
 607
 608         kvm_for_each_vcpu(i, vcpu, kvm) {
 609                 kvm_s390_vcpu_crypto_setup(vcpu);
 610                 exit_sie(vcpu);
 611         }
 612         mutex_unlock(&kvm->lock);
 613         return 0;
 614 }
 615
 616 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 617 {
 618         u8 gtod_high;
 619
 620         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 621                                            sizeof(gtod_high)))
 622                 return -EFAULT;
 623
 624         if (gtod_high != 0)
 625                 return -EINVAL;
 626         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 627
 628         return 0;
 629 }
 630
 631 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 632 {
 633         u64 gtod;
 634
 635         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 636                 return -EFAULT;
 637
 638         kvm_s390_set_tod_clock(kvm, gtod);
 639         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 640         return 0;
 641 }
 642
 643 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 644 {
 645         int ret;
 646
 647         if (attr->flags)
 648                 return -EINVAL;
 649
 650         switch (attr->attr) {
 651         case KVM_S390_VM_TOD_HIGH:
 652                 ret = kvm_s390_set_tod_high(kvm, attr);
 653                 break;
 654         case KVM_S390_VM_TOD_LOW:
 655                 ret = kvm_s390_set_tod_low(kvm, attr);
 656                 break;
 657         default:
 658                 ret = -ENXIO;
 659                 break;
 660         }
 661         return ret;
 662 }
 663
 664 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 665 {
 666         u8 gtod_high = 0;
 667
 668         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 669                                          sizeof(gtod_high)))
 670                 return -EFAULT;
 671         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 672
 673         return 0;
 674 }
 675
 676 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 677 {
 678         u64 gtod;
 679
 680         gtod = kvm_s390_get_tod_clock_fast(kvm);
 681         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 682                 return -EFAULT;
 683         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 684
 685         return 0;
 686 }
 687
 688 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 689 {
 690         int ret;
 691
 692         if (attr->flags)
 693                 return -EINVAL;
 694
 695         switch (attr->attr) {
 696         case KVM_S390_VM_TOD_HIGH:
 697                 ret = kvm_s390_get_tod_high(kvm, attr);
 698                 break;
 699         case KVM_S390_VM_TOD_LOW:
 700                 ret = kvm_s390_get_tod_low(kvm, attr);
 701                 break;
 702         default:
 703                 ret = -ENXIO;
 704                 break;
 705         }
 706         return ret;
 707 }
 708
 709 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 710 {
 711         struct kvm_s390_vm_cpu_processor *proc;
 712         u16 lowest_ibc, unblocked_ibc;
 713         int ret = 0;
 714
 715         mutex_lock(&kvm->lock);
 716         if (atomic_read(&kvm->online_vcpus)) {
 717                 ret = -EBUSY;
 718                 goto out;
 719         }
 720         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 721         if (!proc) {
 722                 ret = -ENOMEM;
 723                 goto out;
 724         }
 725         if (!copy_from_user(proc, (void __user *)attr->addr,
 726                             sizeof(*proc))) {
 727                 kvm->arch.model.cpuid = proc->cpuid;
 728                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 729                 unblocked_ibc = sclp.ibc & 0xfff;
 730                 if (lowest_ibc) {
 731                         if (proc->ibc > unblocked_ibc)
 732                                 kvm->arch.model.ibc = unblocked_ibc;
 733                         else if (proc->ibc < lowest_ibc)
 734                                 kvm->arch.model.ibc = lowest_ibc;
 735                         else
 736                                 kvm->arch.model.ibc = proc->ibc;
 737                 }
 738                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 739                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 740         } else
 741                 ret = -EFAULT;
 742         kfree(proc);
 743 out:
 744         mutex_unlock(&kvm->lock);
 745         return ret;
 746 }
 747
 748 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 749                                        struct kvm_device_attr *attr)
 750 {
 751         struct kvm_s390_vm_cpu_feat data;
 752         int ret = -EBUSY;
 753
 754         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 755                 return -EFAULT;
 756         if (!bitmap_subset((unsigned long *) data.feat,
 757                            kvm_s390_available_cpu_feat,
 758                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 759                 return -EINVAL;
 760
 761         mutex_lock(&kvm->lock);
 762         if (!atomic_read(&kvm->online_vcpus)) {
 763                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 764                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 765                 ret = 0;
 766         }
 767         mutex_unlock(&kvm->lock);
 768         return ret;
 769 }
 770
 771 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 772                                           struct kvm_device_attr *attr)
 773 {
 774         /*
 775          * Once supported by kernel + hw, we have to store the subfunctions
 776          * in kvm->arch and remember that user space configured them.
 777          */
 778         return -ENXIO;
 779 }
 780
 781 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 782 {
 783         int ret = -ENXIO;
 784
 785         switch (attr->attr) {
 786         case KVM_S390_VM_CPU_PROCESSOR:
 787                 ret = kvm_s390_set_processor(kvm, attr);
 788                 break;
 789         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 790                 ret = kvm_s390_set_processor_feat(kvm, attr);
 791                 break;
 792         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 793                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 794                 break;
 795         }
 796         return ret;
 797 }
 798
 799 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 800 {
 801         struct kvm_s390_vm_cpu_processor *proc;
 802         int ret = 0;
 803
 804         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 805         if (!proc) {
 806                 ret = -ENOMEM;
 807                 goto out;
 808         }
 809         proc->cpuid = kvm->arch.model.cpuid;
 810         proc->ibc = kvm->arch.model.ibc;
 811         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 812                S390_ARCH_FAC_LIST_SIZE_BYTE);
 813         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 814                 ret = -EFAULT;
 815         kfree(proc);
 816 out:
 817         return ret;
 818 }
 819
 820 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 821 {
 822         struct kvm_s390_vm_cpu_machine *mach;
 823         int ret = 0;
 824
 825         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 826         if (!mach) {
 827                 ret = -ENOMEM;
 828                 goto out;
 829         }
 830         get_cpu_id((struct cpuid *) &mach->cpuid);
 831         mach->ibc = sclp.ibc;
 832         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 833                S390_ARCH_FAC_LIST_SIZE_BYTE);
 834         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 835                S390_ARCH_FAC_LIST_SIZE_BYTE);
 836         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 837                 ret = -EFAULT;
 838         kfree(mach);
 839 out:
 840         return ret;
 841 }
 842
 843 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 844                                        struct kvm_device_attr *attr)
 845 {
 846         struct kvm_s390_vm_cpu_feat data;
 847
 848         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 849                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 850         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 851                 return -EFAULT;
 852         return 0;
 853 }
 854
 855 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 856                                      struct kvm_device_attr *attr)
 857 {
 858         struct kvm_s390_vm_cpu_feat data;
 859
 860         bitmap_copy((unsigned long *) data.feat,
 861                     kvm_s390_available_cpu_feat,
 862                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 863         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 864                 return -EFAULT;
 865         return 0;
 866 }
 867
 868 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 869                                           struct kvm_device_attr *attr)
 870 {
 871         /*
 872          * Once we can actually configure subfunctions (kernel + hw support),
 873          * we have to check if they were already set by user space, if so copy
 874          * them from kvm->arch.
 875          */
 876         return -ENXIO;
 877 }
 878
 879 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 880                                         struct kvm_device_attr *attr)
 881 {
 882         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 883             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 884                 return -EFAULT;
 885         return 0;
 886 }
 887 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 888 {
 889         int ret = -ENXIO;
 890
 891         switch (attr->attr) {
 892         case KVM_S390_VM_CPU_PROCESSOR:
 893                 ret = kvm_s390_get_processor(kvm, attr);
 894                 break;
 895         case KVM_S390_VM_CPU_MACHINE:
 896                 ret = kvm_s390_get_machine(kvm, attr);
 897                 break;
 898         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 899                 ret = kvm_s390_get_processor_feat(kvm, attr);
 900                 break;
 901         case KVM_S390_VM_CPU_MACHINE_FEAT:
 902                 ret = kvm_s390_get_machine_feat(kvm, attr);
 903                 break;
 904         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 905                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 906                 break;
 907         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 908                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 909                 break;
 910         }
 911         return ret;
 912 }
 913
 914 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 915 {
 916         int ret;
 917
 918         switch (attr->group) {
 919         case KVM_S390_VM_MEM_CTRL:
 920                 ret = kvm_s390_set_mem_control(kvm, attr);
 921                 break;
 922         case KVM_S390_VM_TOD:
 923                 ret = kvm_s390_set_tod(kvm, attr);
 924                 break;
 925         case KVM_S390_VM_CPU_MODEL:
 926                 ret = kvm_s390_set_cpu_model(kvm, attr);
 927                 break;
 928         case KVM_S390_VM_CRYPTO:
 929                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 930                 break;
 931         default:
 932                 ret = -ENXIO;
 933                 break;
 934         }
 935
 936         return ret;
 937 }
 938
 939 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 940 {
 941         int ret;
 942
 943         switch (attr->group) {
 944         case KVM_S390_VM_MEM_CTRL:
 945                 ret = kvm_s390_get_mem_control(kvm, attr);
 946                 break;
 947         case KVM_S390_VM_TOD:
 948                 ret = kvm_s390_get_tod(kvm, attr);
 949                 break;
 950         case KVM_S390_VM_CPU_MODEL:
 951                 ret = kvm_s390_get_cpu_model(kvm, attr);
 952                 break;
 953         default:
 954                 ret = -ENXIO;
 955                 break;
 956         }
 957
 958         return ret;
 959 }
 960
 961 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 962 {
 963         int ret;
 964
 965         switch (attr->group) {
 966         case KVM_S390_VM_MEM_CTRL:
 967                 switch (attr->attr) {
 968                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 969                 case KVM_S390_VM_MEM_CLR_CMMA:
 970                         ret = sclp.has_cmma ? 0 : -ENXIO;
 971                         break;
 972                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 973                         ret = 0;
 974                         break;
 975                 default:
 976                         ret = -ENXIO;
 977                         break;
 978                 }
 979                 break;
 980         case KVM_S390_VM_TOD:
 981                 switch (attr->attr) {
 982                 case KVM_S390_VM_TOD_LOW:
 983                 case KVM_S390_VM_TOD_HIGH:
 984                         ret = 0;
 985                         break;
 986                 default:
 987                         ret = -ENXIO;
 988                         break;
 989                 }
 990                 break;
 991         case KVM_S390_VM_CPU_MODEL:
 992                 switch (attr->attr) {
 993                 case KVM_S390_VM_CPU_PROCESSOR:
 994                 case KVM_S390_VM_CPU_MACHINE:
 995                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 996                 case KVM_S390_VM_CPU_MACHINE_FEAT:
 997                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 998                         ret = 0;
 999                         break;
1000                 /* configuring subfunctions is not supported yet */
1001                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1002                 default:
1003                         ret = -ENXIO;
1004                         break;
1005                 }
1006                 break;
1007         case KVM_S390_VM_CRYPTO:
1008                 switch (attr->attr) {
1009                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1010                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1011                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1012                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1013                         ret = 0;
1014                         break;
1015                 default:
1016                         ret = -ENXIO;
1017                         break;
1018                 }
1019                 break;
1020         default:
1021                 ret = -ENXIO;
1022                 break;
1023         }
1024
1025         return ret;
1026 }
1027
1028 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1029 {
1030         uint8_t *keys;
1031         uint64_t hva;
1032         unsigned long curkey;
1033         int i, r = 0;
1034
1035         if (args->flags != 0)
1036                 return -EINVAL;
1037
1038         /* Is this guest using storage keys? */
1039         if (!mm_use_skey(current->mm))
1040                 return KVM_S390_GET_SKEYS_NONE;
1041
1042         /* Enforce sane limit on memory allocation */
1043         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1044                 return -EINVAL;
1045
1046         keys = kmalloc_array(args->count, sizeof(uint8_t),
1047                              GFP_KERNEL | __GFP_NOWARN);
1048         if (!keys)
1049                 keys = vmalloc(sizeof(uint8_t) * args->count);
1050         if (!keys)
1051                 return -ENOMEM;
1052
1053         for (i = 0; i < args->count; i++) {
1054                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1055                 if (kvm_is_error_hva(hva)) {
1056                         r = -EFAULT;
1057                         goto out;
1058                 }
1059
1060                 curkey = get_guest_storage_key(current->mm, hva);
1061                 if (IS_ERR_VALUE(curkey)) {
1062                         r = curkey;
1063                         goto out;
1064                 }
1065                 keys[i] = curkey;
1066         }
1067
1068         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1069                          sizeof(uint8_t) * args->count);
1070         if (r)
1071                 r = -EFAULT;
1072 out:
1073         kvfree(keys);
1074         return r;
1075 }
1076
1077 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1078 {
1079         uint8_t *keys;
1080         uint64_t hva;
1081         int i, r = 0;
1082
1083         if (args->flags != 0)
1084                 return -EINVAL;
1085
1086         /* Enforce sane limit on memory allocation */
1087         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1088                 return -EINVAL;
1089
1090         keys = kmalloc_array(args->count, sizeof(uint8_t),
1091                              GFP_KERNEL | __GFP_NOWARN);
1092         if (!keys)
1093                 keys = vmalloc(sizeof(uint8_t) * args->count);
1094         if (!keys)
1095                 return -ENOMEM;
1096
1097         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1098                            sizeof(uint8_t) * args->count);
1099         if (r) {
1100                 r = -EFAULT;
1101                 goto out;
1102         }
1103
1104         /* Enable storage key handling for the guest */
1105         r = s390_enable_skey();
1106         if (r)
1107                 goto out;
1108
1109         for (i = 0; i < args->count; i++) {
1110                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1111                 if (kvm_is_error_hva(hva)) {
1112                         r = -EFAULT;
1113                         goto out;
1114                 }
1115
1116                 /* Lowest order bit is reserved */
1117                 if (keys[i] & 0x01) {
1118                         r = -EINVAL;
1119                         goto out;
1120                 }
1121
1122                 r = set_guest_storage_key(current->mm, hva,
1123                                           (unsigned long)keys[i], 0);
1124                 if (r)
1125                         goto out;
1126         }
1127 out:
1128         kvfree(keys);
1129         return r;
1130 }
1131
1132 long kvm_arch_vm_ioctl(struct file *filp,
1133                        unsigned int ioctl, unsigned long arg)
1134 {
1135         struct kvm *kvm = filp->private_data;
1136         void __user *argp = (void __user *)arg;
1137         struct kvm_device_attr attr;
1138         int r;
1139
1140         switch (ioctl) {
1141         case KVM_S390_INTERRUPT: {
1142                 struct kvm_s390_interrupt s390int;
1143
1144                 r = -EFAULT;
1145                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1146                         break;
1147                 r = kvm_s390_inject_vm(kvm, &s390int);
1148                 break;
1149         }
1150         case KVM_ENABLE_CAP: {
1151                 struct kvm_enable_cap cap;
1152                 r = -EFAULT;
1153                 if (copy_from_user(&cap, argp, sizeof(cap)))
1154                         break;
1155                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1156                 break;
1157         }
1158         case KVM_CREATE_IRQCHIP: {
1159                 struct kvm_irq_routing_entry routing;
1160
1161                 r = -EINVAL;
1162                 if (kvm->arch.use_irqchip) {
1163                         /* Set up dummy routing. */
1164                         memset(&routing, 0, sizeof(routing));
1165                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1166                 }
1167                 break;
1168         }
1169         case KVM_SET_DEVICE_ATTR: {
1170                 r = -EFAULT;
1171                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1172                         break;
1173                 r = kvm_s390_vm_set_attr(kvm, &attr);
1174                 break;
1175         }
1176         case KVM_GET_DEVICE_ATTR: {
1177                 r = -EFAULT;
1178                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1179                         break;
1180                 r = kvm_s390_vm_get_attr(kvm, &attr);
1181                 break;
1182         }
1183         case KVM_HAS_DEVICE_ATTR: {
1184                 r = -EFAULT;
1185                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1186                         break;
1187                 r = kvm_s390_vm_has_attr(kvm, &attr);
1188                 break;
1189         }
1190         case KVM_S390_GET_SKEYS: {
1191                 struct kvm_s390_skeys args;
1192
1193                 r = -EFAULT;
1194                 if (copy_from_user(&args, argp,
1195                                    sizeof(struct kvm_s390_skeys)))
1196                         break;
1197                 r = kvm_s390_get_skeys(kvm, &args);
1198                 break;
1199         }
1200         case KVM_S390_SET_SKEYS: {
1201                 struct kvm_s390_skeys args;
1202
1203                 r = -EFAULT;
1204                 if (copy_from_user(&args, argp,
1205                                    sizeof(struct kvm_s390_skeys)))
1206                         break;
1207                 r = kvm_s390_set_skeys(kvm, &args);
1208                 break;
1209         }
1210         default:
1211                 r = -ENOTTY;
1212         }
1213
1214         return r;
1215 }
1216
1217 static int kvm_s390_query_ap_config(u8 *config)
1218 {
1219         u32 fcn_code = 0x04000000UL;
1220         u32 cc = 0;
1221
1222         memset(config, 0, 128);
1223         asm volatile(
1224                 "lgr 0,%1\n"
1225                 "lgr 2,%2\n"
1226                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1227                 "0: ipm %0\n"
1228                 "srl %0,28\n"
1229                 "1:\n"
1230                 EX_TABLE(0b, 1b)
1231                 : "+r" (cc)
1232                 : "r" (fcn_code), "r" (config)
1233                 : "cc", "0", "2", "memory"
1234         );
1235
1236         return cc;
1237 }
1238
1239 static int kvm_s390_apxa_installed(void)
1240 {
1241         u8 config[128];
1242         int cc;
1243
1244         if (test_facility(12)) {
1245                 cc = kvm_s390_query_ap_config(config);
1246
1247                 if (cc)
1248                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1249                 else
1250                         return config[0] & 0x40;
1251         }
1252
1253         return 0;
1254 }
1255
1256 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1257 {
1258         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1259
1260         if (kvm_s390_apxa_installed())
1261                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1262         else
1263                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1264 }
1265
1266 static u64 kvm_s390_get_initial_cpuid(void)
1267 {
1268         struct cpuid cpuid;
1269
1270         get_cpu_id(&cpuid);
1271         cpuid.version = 0xff;
1272         return *((u64 *) &cpuid);
1273 }
1274
1275 static void kvm_s390_crypto_init(struct kvm *kvm)
1276 {
1277         if (!test_kvm_facility(kvm, 76))
1278                 return;
1279
1280         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1281         kvm_s390_set_crycb_format(kvm);
1282
1283         /* Enable AES/DEA protected key functions by default */
1284         kvm->arch.crypto.aes_kw = 1;
1285         kvm->arch.crypto.dea_kw = 1;
1286         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1287                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1288         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1289                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1290 }
1291
1292 static void sca_dispose(struct kvm *kvm)
1293 {
1294         if (kvm->arch.use_esca)
1295                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1296         else
1297                 free_page((unsigned long)(kvm->arch.sca));
1298         kvm->arch.sca = NULL;
1299 }
1300
1301 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1302 {
1303         gfp_t alloc_flags = GFP_KERNEL;
1304         int i, rc;
1305         char debug_name[16];
1306         static unsigned long sca_offset;
1307
1308         rc = -EINVAL;
1309 #ifdef CONFIG_KVM_S390_UCONTROL
1310         if (type & ~KVM_VM_S390_UCONTROL)
1311                 goto out_err;
1312         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1313                 goto out_err;
1314 #else
1315         if (type)
1316                 goto out_err;
1317 #endif
1318
1319         rc = s390_enable_sie();
1320         if (rc)
1321                 goto out_err;
1322
1323         rc = -ENOMEM;
1324
1325         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1326
1327         kvm->arch.use_esca = 0; /* start with basic SCA */
1328         if (!sclp.has_64bscao)
1329                 alloc_flags |= GFP_DMA;
1330         rwlock_init(&kvm->arch.sca_lock);
1331         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1332         if (!kvm->arch.sca)
1333                 goto out_err;
1334         spin_lock(&kvm_lock);
1335         sca_offset += 16;
1336         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1337                 sca_offset = 0;
1338         kvm->arch.sca = (struct bsca_block *)
1339                         ((char *) kvm->arch.sca + sca_offset);
1340         spin_unlock(&kvm_lock);
1341
1342         sprintf(debug_name, "kvm-%u", current->pid);
1343
1344         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1345         if (!kvm->arch.dbf)
1346                 goto out_err;
1347
1348         kvm->arch.sie_page2 =
1349              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1350         if (!kvm->arch.sie_page2)
1351                 goto out_err;
1352
1353         /* Populate the facility mask initially. */
1354         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1355                S390_ARCH_FAC_LIST_SIZE_BYTE);
1356         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1357                 if (i < kvm_s390_fac_list_mask_size())
1358                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1359                 else
1360                         kvm->arch.model.fac_mask[i] = 0UL;
1361         }
1362
1363         /* Populate the facility list initially. */
1364         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1365         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1366                S390_ARCH_FAC_LIST_SIZE_BYTE);
1367
1368         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1369         set_kvm_facility(kvm->arch.model.fac_list, 74);
1370
1371         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1372         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1373
1374         kvm_s390_crypto_init(kvm);
1375
1376         spin_lock_init(&kvm->arch.float_int.lock);
1377         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1378                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1379         init_waitqueue_head(&kvm->arch.ipte_wq);
1380         mutex_init(&kvm->arch.ipte_mutex);
1381
1382         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1383         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1384
1385         if (type & KVM_VM_S390_UCONTROL) {
1386                 kvm->arch.gmap = NULL;
1387                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1388         } else {
1389                 if (sclp.hamax == U64_MAX)
1390                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1391                 else
1392                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1393                                                     sclp.hamax + 1);
1394                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1395                 if (!kvm->arch.gmap)
1396                         goto out_err;
1397                 kvm->arch.gmap->private = kvm;
1398                 kvm->arch.gmap->pfault_enabled = 0;
1399         }
1400
1401         kvm->arch.css_support = 0;
1402         kvm->arch.use_irqchip = 0;
1403         kvm->arch.epoch = 0;
1404
1405         spin_lock_init(&kvm->arch.start_stop_lock);
1406         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1407
1408         return 0;
1409 out_err:
1410         free_page((unsigned long)kvm->arch.sie_page2);
1411         debug_unregister(kvm->arch.dbf);
1412         sca_dispose(kvm);
1413         KVM_EVENT(3, "creation of vm failed: %d", rc);
1414         return rc;
1415 }
1416
1417 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1418 {
1419         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1420         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1421         kvm_s390_clear_local_irqs(vcpu);
1422         kvm_clear_async_pf_completion_queue(vcpu);
1423         if (!kvm_is_ucontrol(vcpu->kvm))
1424                 sca_del_vcpu(vcpu);
1425
1426         if (kvm_is_ucontrol(vcpu->kvm))
1427                 gmap_free(vcpu->arch.gmap);
1428
1429         if (vcpu->kvm->arch.use_cmma)
1430                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1431         free_page((unsigned long)(vcpu->arch.sie_block));
1432
1433         kvm_vcpu_uninit(vcpu);
1434         kmem_cache_free(kvm_vcpu_cache, vcpu);
1435 }
1436
1437 static void kvm_free_vcpus(struct kvm *kvm)
1438 {
1439         unsigned int i;
1440         struct kvm_vcpu *vcpu;
1441
1442         kvm_for_each_vcpu(i, vcpu, kvm)
1443                 kvm_arch_vcpu_destroy(vcpu);
1444
1445         mutex_lock(&kvm->lock);
1446         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1447                 kvm->vcpus[i] = NULL;
1448
1449         atomic_set(&kvm->online_vcpus, 0);
1450         mutex_unlock(&kvm->lock);
1451 }
1452
1453 void kvm_arch_destroy_vm(struct kvm *kvm)
1454 {
1455         kvm_free_vcpus(kvm);
1456         sca_dispose(kvm);
1457         debug_unregister(kvm->arch.dbf);
1458         free_page((unsigned long)kvm->arch.sie_page2);
1459         if (!kvm_is_ucontrol(kvm))
1460                 gmap_free(kvm->arch.gmap);
1461         kvm_s390_destroy_adapters(kvm);
1462         kvm_s390_clear_float_irqs(kvm);
1463         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1464 }
1465
1466 /* Section: vcpu related */
1467 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1468 {
1469         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1470         if (!vcpu->arch.gmap)
1471                 return -ENOMEM;
1472         vcpu->arch.gmap->private = vcpu->kvm;
1473
1474         return 0;
1475 }
1476
1477 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1478 {
1479         read_lock(&vcpu->kvm->arch.sca_lock);
1480         if (vcpu->kvm->arch.use_esca) {
1481                 struct esca_block *sca = vcpu->kvm->arch.sca;
1482
1483                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1484                 sca->cpu[vcpu->vcpu_id].sda = 0;
1485         } else {
1486                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1487
1488                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1489                 sca->cpu[vcpu->vcpu_id].sda = 0;
1490         }
1491         read_unlock(&vcpu->kvm->arch.sca_lock);
1492 }
1493
1494 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1495 {
1496         read_lock(&vcpu->kvm->arch.sca_lock);
1497         if (vcpu->kvm->arch.use_esca) {
1498                 struct esca_block *sca = vcpu->kvm->arch.sca;
1499
1500                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1501                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1502                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1503                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1504                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1505         } else {
1506                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1507
1508                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1509                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1510                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1511                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1512         }
1513         read_unlock(&vcpu->kvm->arch.sca_lock);
1514 }
1515
1516 /* Basic SCA to Extended SCA data copy routines */
1517 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1518 {
1519         d->sda = s->sda;
1520         d->sigp_ctrl.c = s->sigp_ctrl.c;
1521         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1522 }
1523
1524 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1525 {
1526         int i;
1527
1528         d->ipte_control = s->ipte_control;
1529         d->mcn[0] = s->mcn;
1530         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1531                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1532 }
1533
1534 static int sca_switch_to_extended(struct kvm *kvm)
1535 {
1536         struct bsca_block *old_sca = kvm->arch.sca;
1537         struct esca_block *new_sca;
1538         struct kvm_vcpu *vcpu;
1539         unsigned int vcpu_idx;
1540         u32 scaol, scaoh;
1541
1542         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1543         if (!new_sca)
1544                 return -ENOMEM;
1545
1546         scaoh = (u32)((u64)(new_sca) >> 32);
1547         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1548
1549         kvm_s390_vcpu_block_all(kvm);
1550         write_lock(&kvm->arch.sca_lock);
1551
1552         sca_copy_b_to_e(new_sca, old_sca);
1553
1554         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1555                 vcpu->arch.sie_block->scaoh = scaoh;
1556                 vcpu->arch.sie_block->scaol = scaol;
1557                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1558         }
1559         kvm->arch.sca = new_sca;
1560         kvm->arch.use_esca = 1;
1561
1562         write_unlock(&kvm->arch.sca_lock);
1563         kvm_s390_vcpu_unblock_all(kvm);
1564
1565         free_page((unsigned long)old_sca);
1566
1567         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1568                  old_sca, kvm->arch.sca);
1569         return 0;
1570 }
1571
1572 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1573 {
1574         int rc;
1575
1576         if (id < KVM_S390_BSCA_CPU_SLOTS)
1577                 return true;
1578         if (!sclp.has_esca || !sclp.has_64bscao)
1579                 return false;
1580
1581         mutex_lock(&kvm->lock);
1582         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1583         mutex_unlock(&kvm->lock);
1584
1585         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1586 }
1587
1588 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1589 {
1590         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1591         kvm_clear_async_pf_completion_queue(vcpu);
1592         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1593                                     KVM_SYNC_GPRS |
1594                                     KVM_SYNC_ACRS |
1595                                     KVM_SYNC_CRS |
1596                                     KVM_SYNC_ARCH0 |
1597                                     KVM_SYNC_PFAULT;
1598         if (test_kvm_facility(vcpu->kvm, 64))
1599                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1600         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1601          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1602          */
1603         if (MACHINE_HAS_VX)
1604                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1605         else
1606                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1607
1608         if (kvm_is_ucontrol(vcpu->kvm))
1609                 return __kvm_ucontrol_vcpu_init(vcpu);
1610
1611         return 0;
1612 }
1613
1614 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1615 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1616 {
1617         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1618         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1619         vcpu->arch.cputm_start = get_tod_clock_fast();
1620         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1621 }
1622
1623 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1624 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1625 {
1626         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1627         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1628         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1629         vcpu->arch.cputm_start = 0;
1630         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1631 }
1632
1633 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1634 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1635 {
1636         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1637         vcpu->arch.cputm_enabled = true;
1638         __start_cpu_timer_accounting(vcpu);
1639 }
1640
1641 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1642 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1643 {
1644         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1645         __stop_cpu_timer_accounting(vcpu);
1646         vcpu->arch.cputm_enabled = false;
1647 }
1648
1649 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1650 {
1651         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1652         __enable_cpu_timer_accounting(vcpu);
1653         preempt_enable();
1654 }
1655
1656 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1657 {
1658         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1659         __disable_cpu_timer_accounting(vcpu);
1660         preempt_enable();
1661 }
1662
1663 /* set the cpu timer - may only be called from the VCPU thread itself */
1664 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1665 {
1666         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1667         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1668         if (vcpu->arch.cputm_enabled)
1669                 vcpu->arch.cputm_start = get_tod_clock_fast();
1670         vcpu->arch.sie_block->cputm = cputm;
1671         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1672         preempt_enable();
1673 }
1674
1675 /* update and get the cpu timer - can also be called from other VCPU threads */
1676 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1677 {
1678         unsigned int seq;
1679         __u64 value;
1680
1681         if (unlikely(!vcpu->arch.cputm_enabled))
1682                 return vcpu->arch.sie_block->cputm;
1683
1684         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1685         do {
1686                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1687                 /*
1688                  * If the writer would ever execute a read in the critical
1689                  * section, e.g. in irq context, we have a deadlock.
1690                  */
1691                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1692                 value = vcpu->arch.sie_block->cputm;
1693                 /* if cputm_start is 0, accounting is being started/stopped */
1694                 if (likely(vcpu->arch.cputm_start))
1695                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1696         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1697         preempt_enable();
1698         return value;
1699 }
1700
1701 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1702 {
1703         /* Save host register state */
1704         save_fpu_regs();
1705         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1706         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1707
1708         if (MACHINE_HAS_VX)
1709                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1710         else
1711                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1712         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1713         if (test_fp_ctl(current->thread.fpu.fpc))
1714                 /* User space provided an invalid FPC, let's clear it */
1715                 current->thread.fpu.fpc = 0;
1716
1717         save_access_regs(vcpu->arch.host_acrs);
1718         restore_access_regs(vcpu->run->s.regs.acrs);
1719         gmap_enable(vcpu->arch.gmap);
1720         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1721         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1722                 __start_cpu_timer_accounting(vcpu);
1723         vcpu->cpu = cpu;
1724 }
1725
1726 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1727 {
1728         vcpu->cpu = -1;
1729         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1730                 __stop_cpu_timer_accounting(vcpu);
1731         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1732         gmap_disable(vcpu->arch.gmap);
1733
1734         /* Save guest register state */
1735         save_fpu_regs();
1736         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1737
1738         /* Restore host register state */
1739         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1740         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1741
1742         save_access_regs(vcpu->run->s.regs.acrs);
1743         restore_access_regs(vcpu->arch.host_acrs);
1744 }
1745
1746 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1747 {
1748         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1749         vcpu->arch.sie_block->gpsw.mask = 0UL;
1750         vcpu->arch.sie_block->gpsw.addr = 0UL;
1751         kvm_s390_set_prefix(vcpu, 0);
1752         kvm_s390_set_cpu_timer(vcpu, 0);
1753         vcpu->arch.sie_block->ckc       = 0UL;
1754         vcpu->arch.sie_block->todpr     = 0;
1755         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1756         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1757         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1758         /* make sure the new fpc will be lazily loaded */
1759         save_fpu_regs();
1760         current->thread.fpu.fpc = 0;
1761         vcpu->arch.sie_block->gbea = 1;
1762         vcpu->arch.sie_block->pp = 0;
1763         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1764         kvm_clear_async_pf_completion_queue(vcpu);
1765         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1766                 kvm_s390_vcpu_stop(vcpu);
1767         kvm_s390_clear_local_irqs(vcpu);
1768 }
1769
1770 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1771 {
1772         mutex_lock(&vcpu->kvm->lock);
1773         preempt_disable();
1774         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1775         preempt_enable();
1776         mutex_unlock(&vcpu->kvm->lock);
1777         if (!kvm_is_ucontrol(vcpu->kvm)) {
1778                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1779                 sca_add_vcpu(vcpu);
1780         }
1781
1782 }
1783
1784 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1785 {
1786         if (!test_kvm_facility(vcpu->kvm, 76))
1787                 return;
1788
1789         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1790
1791         if (vcpu->kvm->arch.crypto.aes_kw)
1792                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1793         if (vcpu->kvm->arch.crypto.dea_kw)
1794                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1795
1796         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1797 }
1798
1799 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1800 {
1801         free_page(vcpu->arch.sie_block->cbrlo);
1802         vcpu->arch.sie_block->cbrlo = 0;
1803 }
1804
1805 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1806 {
1807         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1808         if (!vcpu->arch.sie_block->cbrlo)
1809                 return -ENOMEM;
1810
1811         vcpu->arch.sie_block->ecb2 |= 0x80;
1812         vcpu->arch.sie_block->ecb2 &= ~0x08;
1813         return 0;
1814 }
1815
1816 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1817 {
1818         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1819
1820         vcpu->arch.sie_block->ibc = model->ibc;
1821         if (test_kvm_facility(vcpu->kvm, 7))
1822                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1823 }
1824
1825 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1826 {
1827         int rc = 0;
1828
1829         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1830                                                     CPUSTAT_SM |
1831                                                     CPUSTAT_STOPPED);
1832
1833         if (test_kvm_facility(vcpu->kvm, 78))
1834                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1835         else if (test_kvm_facility(vcpu->kvm, 8))
1836                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1837
1838         kvm_s390_vcpu_setup_model(vcpu);
1839
1840         vcpu->arch.sie_block->ecb = 0x02;
1841         if (test_kvm_facility(vcpu->kvm, 9))
1842                 vcpu->arch.sie_block->ecb |= 0x04;
1843         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1844                 vcpu->arch.sie_block->ecb |= 0x10;
1845
1846         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1847                 vcpu->arch.sie_block->ecb2 |= 0x08;
1848         vcpu->arch.sie_block->eca = 0x1002000U;
1849         if (sclp.has_cei)
1850                 vcpu->arch.sie_block->eca |= 0x80000000U;
1851         if (sclp.has_ib)
1852                 vcpu->arch.sie_block->eca |= 0x40000000U;
1853         if (sclp.has_siif)
1854                 vcpu->arch.sie_block->eca |= 1;
1855         if (sclp.has_sigpif)
1856                 vcpu->arch.sie_block->eca |= 0x10000000U;
1857         if (test_kvm_facility(vcpu->kvm, 64))
1858                 vcpu->arch.sie_block->ecb3 |= 0x01;
1859         if (test_kvm_facility(vcpu->kvm, 129)) {
1860                 vcpu->arch.sie_block->eca |= 0x00020000;
1861                 vcpu->arch.sie_block->ecd |= 0x20000000;
1862         }
1863         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1864         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1865         if (test_kvm_facility(vcpu->kvm, 74))
1866                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1867
1868         if (vcpu->kvm->arch.use_cmma) {
1869                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1870                 if (rc)
1871                         return rc;
1872         }
1873         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1874         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1875
1876         kvm_s390_vcpu_crypto_setup(vcpu);
1877
1878         return rc;
1879 }
1880
1881 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1882                                       unsigned int id)
1883 {
1884         struct kvm_vcpu *vcpu;
1885         struct sie_page *sie_page;
1886         int rc = -EINVAL;
1887
1888         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1889                 goto out;
1890
1891         rc = -ENOMEM;
1892
1893         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1894         if (!vcpu)
1895                 goto out;
1896
1897         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1898         if (!sie_page)
1899                 goto out_free_cpu;
1900
1901         vcpu->arch.sie_block = &sie_page->sie_block;
1902         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1903
1904         /* the real guest size will always be smaller than msl */
1905         vcpu->arch.sie_block->mso = 0;
1906         vcpu->arch.sie_block->msl = sclp.hamax;
1907
1908         vcpu->arch.sie_block->icpua = id;
1909         spin_lock_init(&vcpu->arch.local_int.lock);
1910         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1911         vcpu->arch.local_int.wq = &vcpu->wq;
1912         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1913         seqcount_init(&vcpu->arch.cputm_seqcount);
1914
1915         rc = kvm_vcpu_init(vcpu, kvm, id);
1916         if (rc)
1917                 goto out_free_sie_block;
1918         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1919                  vcpu->arch.sie_block);
1920         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1921
1922         return vcpu;
1923 out_free_sie_block:
1924         free_page((unsigned long)(vcpu->arch.sie_block));
1925 out_free_cpu:
1926         kmem_cache_free(kvm_vcpu_cache, vcpu);
1927 out:
1928         return ERR_PTR(rc);
1929 }
1930
1931 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1932 {
1933         return kvm_s390_vcpu_has_irq(vcpu, 0);
1934 }
1935
1936 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1937 {
1938         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1939         exit_sie(vcpu);
1940 }
1941
1942 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1943 {
1944         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1945 }
1946
1947 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1948 {
1949         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1950         exit_sie(vcpu);
1951 }
1952
1953 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1954 {
1955         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1956 }
1957
1958 /*
1959  * Kick a guest cpu out of SIE and wait until SIE is not running.
1960  * If the CPU is not running (e.g. waiting as idle) the function will
1961  * return immediately. */
1962 void exit_sie(struct kvm_vcpu *vcpu)
1963 {
1964         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1965         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1966                 cpu_relax();
1967 }
1968
1969 /* Kick a guest cpu out of SIE to process a request synchronously */
1970 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1971 {
1972         kvm_make_request(req, vcpu);
1973         kvm_s390_vcpu_request(vcpu);
1974 }
1975
1976 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1977 {
1978         int i;
1979         struct kvm *kvm = gmap->private;
1980         struct kvm_vcpu *vcpu;
1981
1982         kvm_for_each_vcpu(i, vcpu, kvm) {
1983                 /* match against both prefix pages */
1984                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1985                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1986                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1987                 }
1988         }
1989 }
1990
1991 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1992 {
1993         /* kvm common code refers to this, but never calls it */
1994         BUG();
1995         return 0;
1996 }
1997
1998 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1999                                            struct kvm_one_reg *reg)
2000 {
2001         int r = -EINVAL;
2002
2003         switch (reg->id) {
2004         case KVM_REG_S390_TODPR:
2005                 r = put_user(vcpu->arch.sie_block->todpr,
2006                              (u32 __user *)reg->addr);
2007                 break;
2008         case KVM_REG_S390_EPOCHDIFF:
2009                 r = put_user(vcpu->arch.sie_block->epoch,
2010                              (u64 __user *)reg->addr);
2011                 break;
2012         case KVM_REG_S390_CPU_TIMER:
2013                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2014                              (u64 __user *)reg->addr);
2015                 break;
2016         case KVM_REG_S390_CLOCK_COMP:
2017                 r = put_user(vcpu->arch.sie_block->ckc,
2018                              (u64 __user *)reg->addr);
2019                 break;
2020         case KVM_REG_S390_PFTOKEN:
2021                 r = put_user(vcpu->arch.pfault_token,
2022                              (u64 __user *)reg->addr);
2023                 break;
2024         case KVM_REG_S390_PFCOMPARE:
2025                 r = put_user(vcpu->arch.pfault_compare,
2026                              (u64 __user *)reg->addr);
2027                 break;
2028         case KVM_REG_S390_PFSELECT:
2029                 r = put_user(vcpu->arch.pfault_select,
2030                              (u64 __user *)reg->addr);
2031                 break;
2032         case KVM_REG_S390_PP:
2033                 r = put_user(vcpu->arch.sie_block->pp,
2034                              (u64 __user *)reg->addr);
2035                 break;
2036         case KVM_REG_S390_GBEA:
2037                 r = put_user(vcpu->arch.sie_block->gbea,
2038                              (u64 __user *)reg->addr);
2039                 break;
2040         default:
2041                 break;
2042         }
2043
2044         return r;
2045 }
2046
2047 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2048                                            struct kvm_one_reg *reg)
2049 {
2050         int r = -EINVAL;
2051         __u64 val;
2052
2053         switch (reg->id) {
2054         case KVM_REG_S390_TODPR:
2055                 r = get_user(vcpu->arch.sie_block->todpr,
2056                              (u32 __user *)reg->addr);
2057                 break;
2058         case KVM_REG_S390_EPOCHDIFF:
2059                 r = get_user(vcpu->arch.sie_block->epoch,
2060                              (u64 __user *)reg->addr);
2061                 break;
2062         case KVM_REG_S390_CPU_TIMER:
2063                 r = get_user(val, (u64 __user *)reg->addr);
2064                 if (!r)
2065                         kvm_s390_set_cpu_timer(vcpu, val);
2066                 break;
2067         case KVM_REG_S390_CLOCK_COMP:
2068                 r = get_user(vcpu->arch.sie_block->ckc,
2069                              (u64 __user *)reg->addr);
2070                 break;
2071         case KVM_REG_S390_PFTOKEN:
2072                 r = get_user(vcpu->arch.pfault_token,
2073                              (u64 __user *)reg->addr);
2074                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2075                         kvm_clear_async_pf_completion_queue(vcpu);
2076                 break;
2077         case KVM_REG_S390_PFCOMPARE:
2078                 r = get_user(vcpu->arch.pfault_compare,
2079                              (u64 __user *)reg->addr);
2080                 break;
2081         case KVM_REG_S390_PFSELECT:
2082                 r = get_user(vcpu->arch.pfault_select,
2083                              (u64 __user *)reg->addr);
2084                 break;
2085         case KVM_REG_S390_PP:
2086                 r = get_user(vcpu->arch.sie_block->pp,
2087                              (u64 __user *)reg->addr);
2088                 break;
2089         case KVM_REG_S390_GBEA:
2090                 r = get_user(vcpu->arch.sie_block->gbea,
2091                              (u64 __user *)reg->addr);
2092                 break;
2093         default:
2094                 break;
2095         }
2096
2097         return r;
2098 }
2099
2100 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2101 {
2102         kvm_s390_vcpu_initial_reset(vcpu);
2103         return 0;
2104 }
2105
2106 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2107 {
2108         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2109         return 0;
2110 }
2111
2112 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2113 {
2114         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2115         return 0;
2116 }
2117
2118 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2119                                   struct kvm_sregs *sregs)
2120 {
2121         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2122         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2123         restore_access_regs(vcpu->run->s.regs.acrs);
2124         return 0;
2125 }
2126
2127 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2128                                   struct kvm_sregs *sregs)
2129 {
2130         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2131         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2132         return 0;
2133 }
2134
2135 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2136 {
2137         /* make sure the new values will be lazily loaded */
2138         save_fpu_regs();
2139         if (test_fp_ctl(fpu->fpc))
2140                 return -EINVAL;
2141         current->thread.fpu.fpc = fpu->fpc;
2142         if (MACHINE_HAS_VX)
2143                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2144         else
2145                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2146         return 0;
2147 }
2148
2149 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2150 {
2151         /* make sure we have the latest values */
2152         save_fpu_regs();
2153         if (MACHINE_HAS_VX)
2154                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2155         else
2156                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2157         fpu->fpc = current->thread.fpu.fpc;
2158         return 0;
2159 }
2160
2161 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2162 {
2163         int rc = 0;
2164
2165         if (!is_vcpu_stopped(vcpu))
2166                 rc = -EBUSY;
2167         else {
2168                 vcpu->run->psw_mask = psw.mask;
2169                 vcpu->run->psw_addr = psw.addr;
2170         }
2171         return rc;
2172 }
2173
2174 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2175                                   struct kvm_translation *tr)
2176 {
2177         return -EINVAL; /* not implemented yet */
2178 }
2179
2180 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2181                               KVM_GUESTDBG_USE_HW_BP | \
2182                               KVM_GUESTDBG_ENABLE)
2183
2184 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2185                                         struct kvm_guest_debug *dbg)
2186 {
2187         int rc = 0;
2188
2189         vcpu->guest_debug = 0;
2190         kvm_s390_clear_bp_data(vcpu);
2191
2192         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2193                 return -EINVAL;
2194         if (!sclp.has_gpere)
2195                 return -EINVAL;
2196
2197         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2198                 vcpu->guest_debug = dbg->control;
2199                 /* enforce guest PER */
2200                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2201
2202                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2203                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2204         } else {
2205                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2206                 vcpu->arch.guestdbg.last_bp = 0;
2207         }
2208
2209         if (rc) {
2210                 vcpu->guest_debug = 0;
2211                 kvm_s390_clear_bp_data(vcpu);
2212                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2213         }
2214
2215         return rc;
2216 }
2217
2218 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2219                                     struct kvm_mp_state *mp_state)
2220 {
2221         /* CHECK_STOP and LOAD are not supported yet */
2222         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2223                                        KVM_MP_STATE_OPERATING;
2224 }
2225
2226 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2227                                     struct kvm_mp_state *mp_state)
2228 {
2229         int rc = 0;
2230
2231         /* user space knows about this interface - let it control the state */
2232         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2233
2234         switch (mp_state->mp_state) {
2235         case KVM_MP_STATE_STOPPED:
2236                 kvm_s390_vcpu_stop(vcpu);
2237                 break;
2238         case KVM_MP_STATE_OPERATING:
2239                 kvm_s390_vcpu_start(vcpu);
2240                 break;
2241         case KVM_MP_STATE_LOAD:
2242         case KVM_MP_STATE_CHECK_STOP:
2243                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2244         default:
2245                 rc = -ENXIO;
2246         }
2247
2248         return rc;
2249 }
2250
2251 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2252 {
2253         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2254 }
2255
2256 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2257 {
2258 retry:
2259         kvm_s390_vcpu_request_handled(vcpu);
2260         if (!vcpu->requests)
2261                 return 0;
2262         /*
2263          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2264          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2265          * This ensures that the ipte instruction for this request has
2266          * already finished. We might race against a second unmapper that
2267          * wants to set the blocking bit. Lets just retry the request loop.
2268          */
2269         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2270                 int rc;
2271                 rc = gmap_ipte_notify(vcpu->arch.gmap,
2272                                       kvm_s390_get_prefix(vcpu),
2273                                       PAGE_SIZE * 2);
2274                 if (rc)
2275                         return rc;
2276                 goto retry;
2277         }
2278
2279         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2280                 vcpu->arch.sie_block->ihcpu = 0xffff;
2281                 goto retry;
2282         }
2283
2284         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2285                 if (!ibs_enabled(vcpu)) {
2286                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2287                         atomic_or(CPUSTAT_IBS,
2288                                         &vcpu->arch.sie_block->cpuflags);
2289                 }
2290                 goto retry;
2291         }
2292
2293         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2294                 if (ibs_enabled(vcpu)) {
2295                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2296                         atomic_andnot(CPUSTAT_IBS,
2297                                           &vcpu->arch.sie_block->cpuflags);
2298                 }
2299                 goto retry;
2300         }
2301
2302         /* nothing to do, just clear the request */
2303         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2304
2305         return 0;
2306 }
2307
2308 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2309 {
2310         struct kvm_vcpu *vcpu;
2311         int i;
2312
2313         mutex_lock(&kvm->lock);
2314         preempt_disable();
2315         kvm->arch.epoch = tod - get_tod_clock();
2316         kvm_s390_vcpu_block_all(kvm);
2317         kvm_for_each_vcpu(i, vcpu, kvm)
2318                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2319         kvm_s390_vcpu_unblock_all(kvm);
2320         preempt_enable();
2321         mutex_unlock(&kvm->lock);
2322 }
2323
2324 /**
2325  * kvm_arch_fault_in_page - fault-in guest page if necessary
2326  * @vcpu: The corresponding virtual cpu
2327  * @gpa: Guest physical address
2328  * @writable: Whether the page should be writable or not
2329  *
2330  * Make sure that a guest page has been faulted-in on the host.
2331  *
2332  * Return: Zero on success, negative error code otherwise.
2333  */
2334 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2335 {
2336         return gmap_fault(vcpu->arch.gmap, gpa,
2337                           writable ? FAULT_FLAG_WRITE : 0);
2338 }
2339
2340 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2341                                       unsigned long token)
2342 {
2343         struct kvm_s390_interrupt inti;
2344         struct kvm_s390_irq irq;
2345
2346         if (start_token) {
2347                 irq.u.ext.ext_params2 = token;
2348                 irq.type = KVM_S390_INT_PFAULT_INIT;
2349                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2350         } else {
2351                 inti.type = KVM_S390_INT_PFAULT_DONE;
2352                 inti.parm64 = token;
2353                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2354         }
2355 }
2356
2357 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2358                                      struct kvm_async_pf *work)
2359 {
2360         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2361         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2362 }
2363
2364 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2365                                  struct kvm_async_pf *work)
2366 {
2367         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2368         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2369 }
2370
2371 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2372                                struct kvm_async_pf *work)
2373 {
2374         /* s390 will always inject the page directly */
2375 }
2376
2377 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2378 {
2379         /*
2380          * s390 will always inject the page directly,
2381          * but we still want check_async_completion to cleanup
2382          */
2383         return true;
2384 }
2385
2386 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2387 {
2388         hva_t hva;
2389         struct kvm_arch_async_pf arch;
2390         int rc;
2391
2392         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2393                 return 0;
2394         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2395             vcpu->arch.pfault_compare)
2396                 return 0;
2397         if (psw_extint_disabled(vcpu))
2398                 return 0;
2399         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2400                 return 0;
2401         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2402                 return 0;
2403         if (!vcpu->arch.gmap->pfault_enabled)
2404                 return 0;
2405
2406         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2407         hva += current->thread.gmap_addr & ~PAGE_MASK;
2408         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2409                 return 0;
2410
2411         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2412         return rc;
2413 }
2414
2415 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2416 {
2417         int rc, cpuflags;
2418
2419         /*
2420          * On s390 notifications for arriving pages will be delivered directly
2421          * to the guest but the house keeping for completed pfaults is
2422          * handled outside the worker.
2423          */
2424         kvm_check_async_pf_completion(vcpu);
2425
2426         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2427         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2428
2429         if (need_resched())
2430                 schedule();
2431
2432         if (test_cpu_flag(CIF_MCCK_PENDING))
2433                 s390_handle_mcck();
2434
2435         if (!kvm_is_ucontrol(vcpu->kvm)) {
2436                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2437                 if (rc)
2438                         return rc;
2439         }
2440
2441         rc = kvm_s390_handle_requests(vcpu);
2442         if (rc)
2443                 return rc;
2444
2445         if (guestdbg_enabled(vcpu)) {
2446                 kvm_s390_backup_guest_per_regs(vcpu);
2447                 kvm_s390_patch_guest_per_regs(vcpu);
2448         }
2449
2450         vcpu->arch.sie_block->icptcode = 0;
2451         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2452         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2453         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2454
2455         return 0;
2456 }
2457
2458 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2459 {
2460         struct kvm_s390_pgm_info pgm_info = {
2461                 .code = PGM_ADDRESSING,
2462         };
2463         u8 opcode, ilen;
2464         int rc;
2465
2466         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2467         trace_kvm_s390_sie_fault(vcpu);
2468
2469         /*
2470          * We want to inject an addressing exception, which is defined as a
2471          * suppressing or terminating exception. However, since we came here
2472          * by a DAT access exception, the PSW still points to the faulting
2473          * instruction since DAT exceptions are nullifying. So we've got
2474          * to look up the current opcode to get the length of the instruction
2475          * to be able to forward the PSW.
2476          */
2477         rc = read_guest_instr(vcpu, &opcode, 1);
2478         ilen = insn_length(opcode);
2479         if (rc < 0) {
2480                 return rc;
2481         } else if (rc) {
2482                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2483                  * Forward by arbitrary ilc, injection will take care of
2484                  * nullification if necessary.
2485                  */
2486                 pgm_info = vcpu->arch.pgm;
2487                 ilen = 4;
2488         }
2489         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2490         kvm_s390_forward_psw(vcpu, ilen);
2491         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2492 }
2493
2494 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2495 {
2496         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2497                    vcpu->arch.sie_block->icptcode);
2498         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2499
2500         if (guestdbg_enabled(vcpu))
2501                 kvm_s390_restore_guest_per_regs(vcpu);
2502
2503         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2504         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2505
2506         if (vcpu->arch.sie_block->icptcode > 0) {
2507                 int rc = kvm_handle_sie_intercept(vcpu);
2508
2509                 if (rc != -EOPNOTSUPP)
2510                         return rc;
2511                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2512                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2513                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2514                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2515                 return -EREMOTE;
2516         } else if (exit_reason != -EFAULT) {
2517                 vcpu->stat.exit_null++;
2518                 return 0;
2519         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2520                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2521                 vcpu->run->s390_ucontrol.trans_exc_code =
2522                                                 current->thread.gmap_addr;
2523                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2524                 return -EREMOTE;
2525         } else if (current->thread.gmap_pfault) {
2526                 trace_kvm_s390_major_guest_pfault(vcpu);
2527                 current->thread.gmap_pfault = 0;
2528                 if (kvm_arch_setup_async_pf(vcpu))
2529                         return 0;
2530                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2531         }
2532         return vcpu_post_run_fault_in_sie(vcpu);
2533 }
2534
2535 static int __vcpu_run(struct kvm_vcpu *vcpu)
2536 {
2537         int rc, exit_reason;
2538
2539         /*
2540          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2541          * ning the guest), so that memslots (and other stuff) are protected
2542          */
2543         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2544
2545         do {
2546                 rc = vcpu_pre_run(vcpu);
2547                 if (rc)
2548                         break;
2549
2550                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2551                 /*
2552                  * As PF_VCPU will be used in fault handler, between
2553                  * guest_enter and guest_exit should be no uaccess.
2554                  */
2555                 local_irq_disable();
2556                 __kvm_guest_enter();
2557                 __disable_cpu_timer_accounting(vcpu);
2558                 local_irq_enable();
2559                 exit_reason = sie64a(vcpu->arch.sie_block,
2560                                      vcpu->run->s.regs.gprs);
2561                 local_irq_disable();
2562                 __enable_cpu_timer_accounting(vcpu);
2563                 __kvm_guest_exit();
2564                 local_irq_enable();
2565                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2566
2567                 rc = vcpu_post_run(vcpu, exit_reason);
2568         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2569
2570         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2571         return rc;
2572 }
2573
2574 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2575 {
2576         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2577         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2578         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2579                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2580         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2581                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2582                 /* some control register changes require a tlb flush */
2583                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2584         }
2585         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2586                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2587                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2588                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2589                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2590                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2591         }
2592         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2593                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2594                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2595                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2596                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2597                         kvm_clear_async_pf_completion_queue(vcpu);
2598         }
2599         kvm_run->kvm_dirty_regs = 0;
2600 }
2601
2602 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2603 {
2604         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2605         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2606         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2607         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2608         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2609         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2610         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2611         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2612         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2613         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2614         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2615         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2616 }
2617
2618 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2619 {
2620         int rc;
2621         sigset_t sigsaved;
2622
2623         if (guestdbg_exit_pending(vcpu)) {
2624                 kvm_s390_prepare_debug_exit(vcpu);
2625                 return 0;
2626         }
2627
2628         if (vcpu->sigset_active)
2629                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2630
2631         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2632                 kvm_s390_vcpu_start(vcpu);
2633         } else if (is_vcpu_stopped(vcpu)) {
2634                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2635                                    vcpu->vcpu_id);
2636                 return -EINVAL;
2637         }
2638
2639         sync_regs(vcpu, kvm_run);
2640         enable_cpu_timer_accounting(vcpu);
2641
2642         might_fault();
2643         rc = __vcpu_run(vcpu);
2644
2645         if (signal_pending(current) && !rc) {
2646                 kvm_run->exit_reason = KVM_EXIT_INTR;
2647                 rc = -EINTR;
2648         }
2649
2650         if (guestdbg_exit_pending(vcpu) && !rc)  {
2651                 kvm_s390_prepare_debug_exit(vcpu);
2652                 rc = 0;
2653         }
2654
2655         if (rc == -EREMOTE) {
2656                 /* userspace support is needed, kvm_run has been prepared */
2657                 rc = 0;
2658         }
2659
2660         disable_cpu_timer_accounting(vcpu);
2661         store_regs(vcpu, kvm_run);
2662
2663         if (vcpu->sigset_active)
2664                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2665
2666         vcpu->stat.exit_userspace++;
2667         return rc;
2668 }
2669
2670 /*
2671  * store status at address
2672  * we use have two special cases:
2673  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2674  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2675  */
2676 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2677 {
2678         unsigned char archmode = 1;
2679         freg_t fprs[NUM_FPRS];
2680         unsigned int px;
2681         u64 clkcomp, cputm;
2682         int rc;
2683
2684         px = kvm_s390_get_prefix(vcpu);
2685         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2686                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2687                         return -EFAULT;
2688                 gpa = 0;
2689         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2690                 if (write_guest_real(vcpu, 163, &archmode, 1))
2691                         return -EFAULT;
2692                 gpa = px;
2693         } else
2694                 gpa -= __LC_FPREGS_SAVE_AREA;
2695
2696         /* manually convert vector registers if necessary */
2697         if (MACHINE_HAS_VX) {
2698                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2699                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2700                                      fprs, 128);
2701         } else {
2702                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2703                                      vcpu->run->s.regs.fprs, 128);
2704         }
2705         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2706                               vcpu->run->s.regs.gprs, 128);
2707         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2708                               &vcpu->arch.sie_block->gpsw, 16);
2709         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2710                               &px, 4);
2711         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2712                               &vcpu->run->s.regs.fpc, 4);
2713         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2714                               &vcpu->arch.sie_block->todpr, 4);
2715         cputm = kvm_s390_get_cpu_timer(vcpu);
2716         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2717                               &cputm, 8);
2718         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2719         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2720                               &clkcomp, 8);
2721         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2722                               &vcpu->run->s.regs.acrs, 64);
2723         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2724                               &vcpu->arch.sie_block->gcr, 128);
2725         return rc ? -EFAULT : 0;
2726 }
2727
2728 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2729 {
2730         /*
2731          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2732          * copying in vcpu load/put. Lets update our copies before we save
2733          * it into the save area
2734          */
2735         save_fpu_regs();
2736         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2737         save_access_regs(vcpu->run->s.regs.acrs);
2738
2739         return kvm_s390_store_status_unloaded(vcpu, addr);
2740 }
2741
2742 /*
2743  * store additional status at address
2744  */
2745 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2746                                         unsigned long gpa)
2747 {
2748         /* Only bits 0-53 are used for address formation */
2749         if (!(gpa & ~0x3ff))
2750                 return 0;
2751
2752         return write_guest_abs(vcpu, gpa & ~0x3ff,
2753                                (void *)&vcpu->run->s.regs.vrs, 512);
2754 }
2755
2756 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2757 {
2758         if (!test_kvm_facility(vcpu->kvm, 129))
2759                 return 0;
2760
2761         /*
2762          * The guest VXRS are in the host VXRs due to the lazy
2763          * copying in vcpu load/put. We can simply call save_fpu_regs()
2764          * to save the current register state because we are in the
2765          * middle of a load/put cycle.
2766          *
2767          * Let's update our copies before we save it into the save area.
2768          */
2769         save_fpu_regs();
2770
2771         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2772 }
2773
2774 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2775 {
2776         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2777         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2778 }
2779
2780 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2781 {
2782         unsigned int i;
2783         struct kvm_vcpu *vcpu;
2784
2785         kvm_for_each_vcpu(i, vcpu, kvm) {
2786                 __disable_ibs_on_vcpu(vcpu);
2787         }
2788 }
2789
2790 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2791 {
2792         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2793         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2794 }
2795
2796 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2797 {
2798         int i, online_vcpus, started_vcpus = 0;
2799
2800         if (!is_vcpu_stopped(vcpu))
2801                 return;
2802
2803         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2804         /* Only one cpu at a time may enter/leave the STOPPED state. */
2805         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2806         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2807
2808         for (i = 0; i < online_vcpus; i++) {
2809                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2810                         started_vcpus++;
2811         }
2812
2813         if (started_vcpus == 0) {
2814                 /* we're the only active VCPU -> speed it up */
2815                 __enable_ibs_on_vcpu(vcpu);
2816         } else if (started_vcpus == 1) {
2817                 /*
2818                  * As we are starting a second VCPU, we have to disable
2819                  * the IBS facility on all VCPUs to remove potentially
2820                  * oustanding ENABLE requests.
2821                  */
2822                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2823         }
2824
2825         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2826         /*
2827          * Another VCPU might have used IBS while we were offline.
2828          * Let's play safe and flush the VCPU at startup.
2829          */
2830         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2831         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2832         return;
2833 }
2834
2835 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2836 {
2837         int i, online_vcpus, started_vcpus = 0;
2838         struct kvm_vcpu *started_vcpu = NULL;
2839
2840         if (is_vcpu_stopped(vcpu))
2841                 return;
2842
2843         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2844         /* Only one cpu at a time may enter/leave the STOPPED state. */
2845         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2846         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2847
2848         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2849         kvm_s390_clear_stop_irq(vcpu);
2850
2851         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2852         __disable_ibs_on_vcpu(vcpu);
2853
2854         for (i = 0; i < online_vcpus; i++) {
2855                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2856                         started_vcpus++;
2857                         started_vcpu = vcpu->kvm->vcpus[i];
2858                 }
2859         }
2860
2861         if (started_vcpus == 1) {
2862                 /*
2863                  * As we only have one VCPU left, we want to enable the
2864                  * IBS facility for that VCPU to speed it up.
2865                  */
2866                 __enable_ibs_on_vcpu(started_vcpu);
2867         }
2868
2869         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2870         return;
2871 }
2872
2873 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2874                                      struct kvm_enable_cap *cap)
2875 {
2876         int r;
2877
2878         if (cap->flags)
2879                 return -EINVAL;
2880
2881         switch (cap->cap) {
2882         case KVM_CAP_S390_CSS_SUPPORT:
2883                 if (!vcpu->kvm->arch.css_support) {
2884                         vcpu->kvm->arch.css_support = 1;
2885                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2886                         trace_kvm_s390_enable_css(vcpu->kvm);
2887                 }
2888                 r = 0;
2889                 break;
2890         default:
2891                 r = -EINVAL;
2892                 break;
2893         }
2894         return r;
2895 }
2896
2897 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2898                                   struct kvm_s390_mem_op *mop)
2899 {
2900         void __user *uaddr = (void __user *)mop->buf;
2901         void *tmpbuf = NULL;
2902         int r, srcu_idx;
2903         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2904                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2905
2906         if (mop->flags & ~supported_flags)
2907                 return -EINVAL;
2908
2909         if (mop->size > MEM_OP_MAX_SIZE)
2910                 return -E2BIG;
2911
2912         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2913                 tmpbuf = vmalloc(mop->size);
2914                 if (!tmpbuf)
2915                         return -ENOMEM;
2916         }
2917
2918         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2919
2920         switch (mop->op) {
2921         case KVM_S390_MEMOP_LOGICAL_READ:
2922                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2923                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2924                                             mop->size, GACC_FETCH);
2925                         break;
2926                 }
2927                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2928                 if (r == 0) {
2929                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2930                                 r = -EFAULT;
2931                 }
2932                 break;
2933         case KVM_S390_MEMOP_LOGICAL_WRITE:
2934                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2935                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2936                                             mop->size, GACC_STORE);
2937                         break;
2938                 }
2939                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2940                         r = -EFAULT;
2941                         break;
2942                 }
2943                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2944                 break;
2945         default:
2946                 r = -EINVAL;
2947         }
2948
2949         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2950
2951         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2952                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2953
2954         vfree(tmpbuf);
2955         return r;
2956 }
2957
2958 long kvm_arch_vcpu_ioctl(struct file *filp,
2959                          unsigned int ioctl, unsigned long arg)
2960 {
2961         struct kvm_vcpu *vcpu = filp->private_data;
2962         void __user *argp = (void __user *)arg;
2963         int idx;
2964         long r;
2965
2966         switch (ioctl) {
2967         case KVM_S390_IRQ: {
2968                 struct kvm_s390_irq s390irq;
2969
2970                 r = -EFAULT;
2971                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2972                         break;
2973                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2974                 break;
2975         }
2976         case KVM_S390_INTERRUPT: {
2977                 struct kvm_s390_interrupt s390int;
2978                 struct kvm_s390_irq s390irq;
2979
2980                 r = -EFAULT;
2981                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2982                         break;
2983                 if (s390int_to_s390irq(&s390int, &s390irq))
2984                         return -EINVAL;
2985                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2986                 break;
2987         }
2988         case KVM_S390_STORE_STATUS:
2989                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2990                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2991                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2992                 break;
2993         case KVM_S390_SET_INITIAL_PSW: {
2994                 psw_t psw;
2995
2996                 r = -EFAULT;
2997                 if (copy_from_user(&psw, argp, sizeof(psw)))
2998                         break;
2999                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3000                 break;
3001         }
3002         case KVM_S390_INITIAL_RESET:
3003                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3004                 break;
3005         case KVM_SET_ONE_REG:
3006         case KVM_GET_ONE_REG: {
3007                 struct kvm_one_reg reg;
3008                 r = -EFAULT;
3009                 if (copy_from_user(&reg, argp, sizeof(reg)))
3010                         break;
3011                 if (ioctl == KVM_SET_ONE_REG)
3012                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3013                 else
3014                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3015                 break;
3016         }
3017 #ifdef CONFIG_KVM_S390_UCONTROL
3018         case KVM_S390_UCAS_MAP: {
3019                 struct kvm_s390_ucas_mapping ucasmap;
3020
3021                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3022                         r = -EFAULT;
3023                         break;
3024                 }
3025
3026                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3027                         r = -EINVAL;
3028                         break;
3029                 }
3030
3031                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3032                                      ucasmap.vcpu_addr, ucasmap.length);
3033                 break;
3034         }
3035         case KVM_S390_UCAS_UNMAP: {
3036                 struct kvm_s390_ucas_mapping ucasmap;
3037
3038                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3039                         r = -EFAULT;
3040                         break;
3041                 }
3042
3043                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3044                         r = -EINVAL;
3045                         break;
3046                 }
3047
3048                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3049                         ucasmap.length);
3050                 break;
3051         }
3052 #endif
3053         case KVM_S390_VCPU_FAULT: {
3054                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3055                 break;
3056         }
3057         case KVM_ENABLE_CAP:
3058         {
3059                 struct kvm_enable_cap cap;
3060                 r = -EFAULT;
3061                 if (copy_from_user(&cap, argp, sizeof(cap)))
3062                         break;
3063                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3064                 break;
3065         }
3066         case KVM_S390_MEM_OP: {
3067                 struct kvm_s390_mem_op mem_op;
3068
3069                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3070                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3071                 else
3072                         r = -EFAULT;
3073                 break;
3074         }
3075         case KVM_S390_SET_IRQ_STATE: {
3076                 struct kvm_s390_irq_state irq_state;
3077
3078                 r = -EFAULT;
3079                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3080                         break;
3081                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3082                     irq_state.len == 0 ||
3083                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3084                         r = -EINVAL;
3085                         break;
3086                 }
3087                 r = kvm_s390_set_irq_state(vcpu,
3088                                            (void __user *) irq_state.buf,
3089                                            irq_state.len);
3090                 break;
3091         }
3092         case KVM_S390_GET_IRQ_STATE: {
3093                 struct kvm_s390_irq_state irq_state;
3094
3095                 r = -EFAULT;
3096                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3097                         break;
3098                 if (irq_state.len == 0) {
3099                         r = -EINVAL;
3100                         break;
3101                 }
3102                 r = kvm_s390_get_irq_state(vcpu,
3103                                            (__u8 __user *)  irq_state.buf,
3104                                            irq_state.len);
3105                 break;
3106         }
3107         default:
3108                 r = -ENOTTY;
3109         }
3110         return r;
3111 }
3112
3113 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3114 {
3115 #ifdef CONFIG_KVM_S390_UCONTROL
3116         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3117                  && (kvm_is_ucontrol(vcpu->kvm))) {
3118                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3119                 get_page(vmf->page);
3120                 return 0;
3121         }
3122 #endif
3123         return VM_FAULT_SIGBUS;
3124 }
3125
3126 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3127                             unsigned long npages)
3128 {
3129         return 0;
3130 }
3131
3132 /* Section: memory related */
3133 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3134                                    struct kvm_memory_slot *memslot,
3135                                    const struct kvm_userspace_memory_region *mem,
3136                                    enum kvm_mr_change change)
3137 {
3138         /* A few sanity checks. We can have memory slots which have to be
3139            located/ended at a segment boundary (1MB). The memory in userland is
3140            ok to be fragmented into various different vmas. It is okay to mmap()
3141            and munmap() stuff in this slot after doing this call at any time */
3142
3143         if (mem->userspace_addr & 0xffffful)
3144                 return -EINVAL;
3145
3146         if (mem->memory_size & 0xffffful)
3147                 return -EINVAL;
3148
3149         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3150                 return -EINVAL;
3151
3152         return 0;
3153 }
3154
3155 void kvm_arch_commit_memory_region(struct kvm *kvm,
3156                                 const struct kvm_userspace_memory_region *mem,
3157                                 const struct kvm_memory_slot *old,
3158                                 const struct kvm_memory_slot *new,
3159                                 enum kvm_mr_change change)
3160 {
3161         int rc;
3162
3163         /* If the basics of the memslot do not change, we do not want
3164          * to update the gmap. Every update causes several unnecessary
3165          * segment translation exceptions. This is usually handled just
3166          * fine by the normal fault handler + gmap, but it will also
3167          * cause faults on the prefix page of running guest CPUs.
3168          */
3169         if (old->userspace_addr == mem->userspace_addr &&
3170             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3171             old->npages * PAGE_SIZE == mem->memory_size)
3172                 return;
3173
3174         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3175                 mem->guest_phys_addr, mem->memory_size);
3176         if (rc)
3177                 pr_warn("failed to commit memory region\n");
3178         return;
3179 }
3180
3181 static inline unsigned long nonhyp_mask(int i)
3182 {
3183         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3184
3185         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3186 }
3187
3188 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3189 {
3190         vcpu->valid_wakeup = false;
3191 }
3192
3193 static int __init kvm_s390_init(void)
3194 {
3195         int i;
3196
3197         if (!sclp.has_sief2) {
3198                 pr_info("SIE not available\n");
3199                 return -ENODEV;
3200         }
3201
3202         for (i = 0; i < 16; i++)
3203                 kvm_s390_fac_list_mask[i] |=
3204                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3205
3206         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3207 }
3208
3209 static void __exit kvm_s390_exit(void)
3210 {
3211         kvm_exit();
3212 }
3213
3214 module_init(kvm_s390_init);
3215 module_exit(kvm_s390_exit);
3216
3217 /*
3218  * Enable autoloading of the kvm module.
3219  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3220  * since x86 takes a different approach.
3221  */
3222 #include <linux/miscdevice.h>
3223 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3224 MODULE_ALIAS("devname:kvm");