arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <linux/bitmap.h>
  30 #include <asm/asm-offsets.h>
  31 #include <asm/lowcore.h>
  32 #include <asm/etr.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/gmap.h>
  35 #include <asm/nmi.h>
  36 #include <asm/switch_to.h>
  37 #include <asm/isc.h>
  38 #include <asm/sclp.h>
  39 #include <asm/cpacf.h>
  40 #include <asm/etr.h>
  41 #include "kvm-s390.h"
  42 #include "gaccess.h"
  43
  44 #define KMSG_COMPONENT "kvm-s390"
  45 #undef pr_fmt
  46 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  47
  48 #define CREATE_TRACE_POINTS
  49 #include "trace.h"
  50 #include "trace-s390.h"
  51
  52 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  53 #define LOCAL_IRQS 32
  54 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  55                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  56
  57 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  58
  59 struct kvm_stats_debugfs_item debugfs_entries[] = {
  60         { "userspace_handled", VCPU_STAT(exit_userspace) },
  61         { "exit_null", VCPU_STAT(exit_null) },
  62         { "exit_validity", VCPU_STAT(exit_validity) },
  63         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  64         { "exit_external_request", VCPU_STAT(exit_external_request) },
  65         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  66         { "exit_instruction", VCPU_STAT(exit_instruction) },
  67         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  68         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  69         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  70         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  71         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  72         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  73         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  74         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  75         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  76         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  77         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  78         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  79         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  80         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  81         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  82         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  83         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  84         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  85         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  86         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  87         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  88         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  89         { "instruction_spx", VCPU_STAT(instruction_spx) },
  90         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  91         { "instruction_stap", VCPU_STAT(instruction_stap) },
  92         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  93         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  94         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  95         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  96         { "instruction_essa", VCPU_STAT(instruction_essa) },
  97         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  98         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  99         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 100         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 101         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 102         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 103         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 104         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 105         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 106         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 107         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 108         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 109         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 110         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 111         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 112         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 113         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 114         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 115         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 116         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 117         { "diagnose_10", VCPU_STAT(diagnose_10) },
 118         { "diagnose_44", VCPU_STAT(diagnose_44) },
 119         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 120         { "diagnose_258", VCPU_STAT(diagnose_258) },
 121         { "diagnose_308", VCPU_STAT(diagnose_308) },
 122         { "diagnose_500", VCPU_STAT(diagnose_500) },
 123         { NULL }
 124 };
 125
 126 /* upper facilities limit for kvm */
 127 unsigned long kvm_s390_fac_list_mask[16] = {
 128         0xffe6000000000000UL,
 129         0x005e000000000000UL,
 130 };
 131
 132 unsigned long kvm_s390_fac_list_mask_size(void)
 133 {
 134         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 135         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 136 }
 137
 138 /* available cpu features supported by kvm */
 139 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 140 /* available subfunctions indicated via query / "test bit" */
 141 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 142
 143 static struct gmap_notifier gmap_notifier;
 144 debug_info_t *kvm_s390_dbf;
 145
 146 /* Section: not file related */
 147 int kvm_arch_hardware_enable(void)
 148 {
 149         /* every s390 is virtualization enabled ;-) */
 150         return 0;
 151 }
 152
 153 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 154
 155 /*
 156  * This callback is executed during stop_machine(). All CPUs are therefore
 157  * temporarily stopped. In order not to change guest behavior, we have to
 158  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 159  * so a CPU won't be stopped while calculating with the epoch.
 160  */
 161 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 162                           void *v)
 163 {
 164         struct kvm *kvm;
 165         struct kvm_vcpu *vcpu;
 166         int i;
 167         unsigned long long *delta = v;
 168
 169         list_for_each_entry(kvm, &vm_list, vm_list) {
 170                 kvm->arch.epoch -= *delta;
 171                 kvm_for_each_vcpu(i, vcpu, kvm) {
 172                         vcpu->arch.sie_block->epoch -= *delta;
 173                         if (vcpu->arch.cputm_enabled)
 174                                 vcpu->arch.cputm_start += *delta;
 175                 }
 176         }
 177         return NOTIFY_OK;
 178 }
 179
 180 static struct notifier_block kvm_clock_notifier = {
 181         .notifier_call = kvm_clock_sync,
 182 };
 183
 184 int kvm_arch_hardware_setup(void)
 185 {
 186         gmap_notifier.notifier_call = kvm_gmap_notifier;
 187         gmap_register_ipte_notifier(&gmap_notifier);
 188         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 189                                        &kvm_clock_notifier);
 190         return 0;
 191 }
 192
 193 void kvm_arch_hardware_unsetup(void)
 194 {
 195         gmap_unregister_ipte_notifier(&gmap_notifier);
 196         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 197                                          &kvm_clock_notifier);
 198 }
 199
 200 static void allow_cpu_feat(unsigned long nr)
 201 {
 202         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 203 }
 204
 205 static inline int plo_test_bit(unsigned char nr)
 206 {
 207         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 208         int cc = 3; /* subfunction not available */
 209
 210         asm volatile(
 211                 /* Parameter registers are ignored for "test bit" */
 212                 "       plo     0,0,0,0(0)\n"
 213                 "       ipm     %0\n"
 214                 "       srl     %0,28\n"
 215                 : "=d" (cc)
 216                 : "d" (r0)
 217                 : "cc");
 218         return cc == 0;
 219 }
 220
 221 static void kvm_s390_cpu_feat_init(void)
 222 {
 223         int i;
 224
 225         for (i = 0; i < 256; ++i) {
 226                 if (plo_test_bit(i))
 227                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 228         }
 229
 230         if (test_facility(28)) /* TOD-clock steering */
 231                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 232
 233         if (test_facility(17)) { /* MSA */
 234                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 235                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 236                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 237                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 238                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 239         }
 240         if (test_facility(76)) /* MSA3 */
 241                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 242         if (test_facility(77)) { /* MSA4 */
 243                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 244                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 245                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 246                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 247         }
 248         if (test_facility(57)) /* MSA5 */
 249                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 250
 251         if (MACHINE_HAS_ESOP)
 252                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 253 }
 254
 255 int kvm_arch_init(void *opaque)
 256 {
 257         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 258         if (!kvm_s390_dbf)
 259                 return -ENOMEM;
 260
 261         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 262                 debug_unregister(kvm_s390_dbf);
 263                 return -ENOMEM;
 264         }
 265
 266         kvm_s390_cpu_feat_init();
 267
 268         /* Register floating interrupt controller interface. */
 269         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 270 }
 271
 272 void kvm_arch_exit(void)
 273 {
 274         debug_unregister(kvm_s390_dbf);
 275 }
 276
 277 /* Section: device related */
 278 long kvm_arch_dev_ioctl(struct file *filp,
 279                         unsigned int ioctl, unsigned long arg)
 280 {
 281         if (ioctl == KVM_S390_ENABLE_SIE)
 282                 return s390_enable_sie();
 283         return -EINVAL;
 284 }
 285
 286 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 287 {
 288         int r;
 289
 290         switch (ext) {
 291         case KVM_CAP_S390_PSW:
 292         case KVM_CAP_S390_GMAP:
 293         case KVM_CAP_SYNC_MMU:
 294 #ifdef CONFIG_KVM_S390_UCONTROL
 295         case KVM_CAP_S390_UCONTROL:
 296 #endif
 297         case KVM_CAP_ASYNC_PF:
 298         case KVM_CAP_SYNC_REGS:
 299         case KVM_CAP_ONE_REG:
 300         case KVM_CAP_ENABLE_CAP:
 301         case KVM_CAP_S390_CSS_SUPPORT:
 302         case KVM_CAP_IOEVENTFD:
 303         case KVM_CAP_DEVICE_CTRL:
 304         case KVM_CAP_ENABLE_CAP_VM:
 305         case KVM_CAP_S390_IRQCHIP:
 306         case KVM_CAP_VM_ATTRIBUTES:
 307         case KVM_CAP_MP_STATE:
 308         case KVM_CAP_S390_INJECT_IRQ:
 309         case KVM_CAP_S390_USER_SIGP:
 310         case KVM_CAP_S390_USER_STSI:
 311         case KVM_CAP_S390_SKEYS:
 312         case KVM_CAP_S390_IRQ_STATE:
 313                 r = 1;
 314                 break;
 315         case KVM_CAP_S390_MEM_OP:
 316                 r = MEM_OP_MAX_SIZE;
 317                 break;
 318         case KVM_CAP_NR_VCPUS:
 319         case KVM_CAP_MAX_VCPUS:
 320                 r = KVM_S390_BSCA_CPU_SLOTS;
 321                 if (sclp.has_esca && sclp.has_64bscao)
 322                         r = KVM_S390_ESCA_CPU_SLOTS;
 323                 break;
 324         case KVM_CAP_NR_MEMSLOTS:
 325                 r = KVM_USER_MEM_SLOTS;
 326                 break;
 327         case KVM_CAP_S390_COW:
 328                 r = MACHINE_HAS_ESOP;
 329                 break;
 330         case KVM_CAP_S390_VECTOR_REGISTERS:
 331                 r = MACHINE_HAS_VX;
 332                 break;
 333         case KVM_CAP_S390_RI:
 334                 r = test_facility(64);
 335                 break;
 336         default:
 337                 r = 0;
 338         }
 339         return r;
 340 }
 341
 342 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 343                                         struct kvm_memory_slot *memslot)
 344 {
 345         gfn_t cur_gfn, last_gfn;
 346         unsigned long address;
 347         struct gmap *gmap = kvm->arch.gmap;
 348
 349         /* Loop over all guest pages */
 350         last_gfn = memslot->base_gfn + memslot->npages;
 351         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 352                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 353
 354                 if (test_and_clear_guest_dirty(gmap->mm, address))
 355                         mark_page_dirty(kvm, cur_gfn);
 356                 if (fatal_signal_pending(current))
 357                         return;
 358                 cond_resched();
 359         }
 360 }
 361
 362 /* Section: vm related */
 363 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 364
 365 /*
 366  * Get (and clear) the dirty memory log for a memory slot.
 367  */
 368 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 369                                struct kvm_dirty_log *log)
 370 {
 371         int r;
 372         unsigned long n;
 373         struct kvm_memslots *slots;
 374         struct kvm_memory_slot *memslot;
 375         int is_dirty = 0;
 376
 377         mutex_lock(&kvm->slots_lock);
 378
 379         r = -EINVAL;
 380         if (log->slot >= KVM_USER_MEM_SLOTS)
 381                 goto out;
 382
 383         slots = kvm_memslots(kvm);
 384         memslot = id_to_memslot(slots, log->slot);
 385         r = -ENOENT;
 386         if (!memslot->dirty_bitmap)
 387                 goto out;
 388
 389         kvm_s390_sync_dirty_log(kvm, memslot);
 390         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 391         if (r)
 392                 goto out;
 393
 394         /* Clear the dirty log */
 395         if (is_dirty) {
 396                 n = kvm_dirty_bitmap_bytes(memslot);
 397                 memset(memslot->dirty_bitmap, 0, n);
 398         }
 399         r = 0;
 400 out:
 401         mutex_unlock(&kvm->slots_lock);
 402         return r;
 403 }
 404
 405 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 406 {
 407         int r;
 408
 409         if (cap->flags)
 410                 return -EINVAL;
 411
 412         switch (cap->cap) {
 413         case KVM_CAP_S390_IRQCHIP:
 414                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 415                 kvm->arch.use_irqchip = 1;
 416                 r = 0;
 417                 break;
 418         case KVM_CAP_S390_USER_SIGP:
 419                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 420                 kvm->arch.user_sigp = 1;
 421                 r = 0;
 422                 break;
 423         case KVM_CAP_S390_VECTOR_REGISTERS:
 424                 mutex_lock(&kvm->lock);
 425                 if (atomic_read(&kvm->online_vcpus)) {
 426                         r = -EBUSY;
 427                 } else if (MACHINE_HAS_VX) {
 428                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 429                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 430                         r = 0;
 431                 } else
 432                         r = -EINVAL;
 433                 mutex_unlock(&kvm->lock);
 434                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 435                          r ? "(not available)" : "(success)");
 436                 break;
 437         case KVM_CAP_S390_RI:
 438                 r = -EINVAL;
 439                 mutex_lock(&kvm->lock);
 440                 if (atomic_read(&kvm->online_vcpus)) {
 441                         r = -EBUSY;
 442                 } else if (test_facility(64)) {
 443                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 444                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 445                         r = 0;
 446                 }
 447                 mutex_unlock(&kvm->lock);
 448                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 449                          r ? "(not available)" : "(success)");
 450                 break;
 451         case KVM_CAP_S390_USER_STSI:
 452                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 453                 kvm->arch.user_stsi = 1;
 454                 r = 0;
 455                 break;
 456         default:
 457                 r = -EINVAL;
 458                 break;
 459         }
 460         return r;
 461 }
 462
 463 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 464 {
 465         int ret;
 466
 467         switch (attr->attr) {
 468         case KVM_S390_VM_MEM_LIMIT_SIZE:
 469                 ret = 0;
 470                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 471                          kvm->arch.mem_limit);
 472                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 473                         ret = -EFAULT;
 474                 break;
 475         default:
 476                 ret = -ENXIO;
 477                 break;
 478         }
 479         return ret;
 480 }
 481
 482 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 483 {
 484         int ret;
 485         unsigned int idx;
 486         switch (attr->attr) {
 487         case KVM_S390_VM_MEM_ENABLE_CMMA:
 488                 ret = -ENXIO;
 489                 if (!sclp.has_cmma)
 490                         break;
 491
 492                 ret = -EBUSY;
 493                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 494                 mutex_lock(&kvm->lock);
 495                 if (atomic_read(&kvm->online_vcpus) == 0) {
 496                         kvm->arch.use_cmma = 1;
 497                         ret = 0;
 498                 }
 499                 mutex_unlock(&kvm->lock);
 500                 break;
 501         case KVM_S390_VM_MEM_CLR_CMMA:
 502                 ret = -ENXIO;
 503                 if (!sclp.has_cmma)
 504                         break;
 505                 ret = -EINVAL;
 506                 if (!kvm->arch.use_cmma)
 507                         break;
 508
 509                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 510                 mutex_lock(&kvm->lock);
 511                 idx = srcu_read_lock(&kvm->srcu);
 512                 s390_reset_cmma(kvm->arch.gmap->mm);
 513                 srcu_read_unlock(&kvm->srcu, idx);
 514                 mutex_unlock(&kvm->lock);
 515                 ret = 0;
 516                 break;
 517         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 518                 unsigned long new_limit;
 519
 520                 if (kvm_is_ucontrol(kvm))
 521                         return -EINVAL;
 522
 523                 if (get_user(new_limit, (u64 __user *)attr->addr))
 524                         return -EFAULT;
 525
 526                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 527                     new_limit > kvm->arch.mem_limit)
 528                         return -E2BIG;
 529
 530                 if (!new_limit)
 531                         return -EINVAL;
 532
 533                 /* gmap_alloc takes last usable address */
 534                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 535                         new_limit -= 1;
 536
 537                 ret = -EBUSY;
 538                 mutex_lock(&kvm->lock);
 539                 if (atomic_read(&kvm->online_vcpus) == 0) {
 540                         /* gmap_alloc will round the limit up */
 541                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 542
 543                         if (!new) {
 544                                 ret = -ENOMEM;
 545                         } else {
 546                                 gmap_free(kvm->arch.gmap);
 547                                 new->private = kvm;
 548                                 kvm->arch.gmap = new;
 549                                 ret = 0;
 550                         }
 551                 }
 552                 mutex_unlock(&kvm->lock);
 553                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 554                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 555                          (void *) kvm->arch.gmap->asce);
 556                 break;
 557         }
 558         default:
 559                 ret = -ENXIO;
 560                 break;
 561         }
 562         return ret;
 563 }
 564
 565 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 566
 567 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 568 {
 569         struct kvm_vcpu *vcpu;
 570         int i;
 571
 572         if (!test_kvm_facility(kvm, 76))
 573                 return -EINVAL;
 574
 575         mutex_lock(&kvm->lock);
 576         switch (attr->attr) {
 577         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 578                 get_random_bytes(
 579                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 580                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 581                 kvm->arch.crypto.aes_kw = 1;
 582                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 583                 break;
 584         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 585                 get_random_bytes(
 586                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 587                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 588                 kvm->arch.crypto.dea_kw = 1;
 589                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 590                 break;
 591         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 592                 kvm->arch.crypto.aes_kw = 0;
 593                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 594                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 595                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 596                 break;
 597         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 598                 kvm->arch.crypto.dea_kw = 0;
 599                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 600                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 601                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 602                 break;
 603         default:
 604                 mutex_unlock(&kvm->lock);
 605                 return -ENXIO;
 606         }
 607
 608         kvm_for_each_vcpu(i, vcpu, kvm) {
 609                 kvm_s390_vcpu_crypto_setup(vcpu);
 610                 exit_sie(vcpu);
 611         }
 612         mutex_unlock(&kvm->lock);
 613         return 0;
 614 }
 615
 616 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 617 {
 618         u8 gtod_high;
 619
 620         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 621                                            sizeof(gtod_high)))
 622                 return -EFAULT;
 623
 624         if (gtod_high != 0)
 625                 return -EINVAL;
 626         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 627
 628         return 0;
 629 }
 630
 631 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 632 {
 633         u64 gtod;
 634
 635         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 636                 return -EFAULT;
 637
 638         kvm_s390_set_tod_clock(kvm, gtod);
 639         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 640         return 0;
 641 }
 642
 643 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 644 {
 645         int ret;
 646
 647         if (attr->flags)
 648                 return -EINVAL;
 649
 650         switch (attr->attr) {
 651         case KVM_S390_VM_TOD_HIGH:
 652                 ret = kvm_s390_set_tod_high(kvm, attr);
 653                 break;
 654         case KVM_S390_VM_TOD_LOW:
 655                 ret = kvm_s390_set_tod_low(kvm, attr);
 656                 break;
 657         default:
 658                 ret = -ENXIO;
 659                 break;
 660         }
 661         return ret;
 662 }
 663
 664 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 665 {
 666         u8 gtod_high = 0;
 667
 668         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 669                                          sizeof(gtod_high)))
 670                 return -EFAULT;
 671         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 672
 673         return 0;
 674 }
 675
 676 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 677 {
 678         u64 gtod;
 679
 680         gtod = kvm_s390_get_tod_clock_fast(kvm);
 681         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 682                 return -EFAULT;
 683         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 684
 685         return 0;
 686 }
 687
 688 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 689 {
 690         int ret;
 691
 692         if (attr->flags)
 693                 return -EINVAL;
 694
 695         switch (attr->attr) {
 696         case KVM_S390_VM_TOD_HIGH:
 697                 ret = kvm_s390_get_tod_high(kvm, attr);
 698                 break;
 699         case KVM_S390_VM_TOD_LOW:
 700                 ret = kvm_s390_get_tod_low(kvm, attr);
 701                 break;
 702         default:
 703                 ret = -ENXIO;
 704                 break;
 705         }
 706         return ret;
 707 }
 708
 709 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 710 {
 711         struct kvm_s390_vm_cpu_processor *proc;
 712         u16 lowest_ibc, unblocked_ibc;
 713         int ret = 0;
 714
 715         mutex_lock(&kvm->lock);
 716         if (atomic_read(&kvm->online_vcpus)) {
 717                 ret = -EBUSY;
 718                 goto out;
 719         }
 720         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 721         if (!proc) {
 722                 ret = -ENOMEM;
 723                 goto out;
 724         }
 725         if (!copy_from_user(proc, (void __user *)attr->addr,
 726                             sizeof(*proc))) {
 727                 kvm->arch.model.cpuid = proc->cpuid;
 728                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 729                 unblocked_ibc = sclp.ibc & 0xfff;
 730                 if (lowest_ibc) {
 731                         if (proc->ibc > unblocked_ibc)
 732                                 kvm->arch.model.ibc = unblocked_ibc;
 733                         else if (proc->ibc < lowest_ibc)
 734                                 kvm->arch.model.ibc = lowest_ibc;
 735                         else
 736                                 kvm->arch.model.ibc = proc->ibc;
 737                 }
 738                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 739                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 740         } else
 741                 ret = -EFAULT;
 742         kfree(proc);
 743 out:
 744         mutex_unlock(&kvm->lock);
 745         return ret;
 746 }
 747
 748 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 749                                        struct kvm_device_attr *attr)
 750 {
 751         struct kvm_s390_vm_cpu_feat data;
 752         int ret = -EBUSY;
 753
 754         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 755                 return -EFAULT;
 756         if (!bitmap_subset((unsigned long *) data.feat,
 757                            kvm_s390_available_cpu_feat,
 758                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 759                 return -EINVAL;
 760
 761         mutex_lock(&kvm->lock);
 762         if (!atomic_read(&kvm->online_vcpus)) {
 763                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 764                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 765                 ret = 0;
 766         }
 767         mutex_unlock(&kvm->lock);
 768         return ret;
 769 }
 770
 771 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 772                                           struct kvm_device_attr *attr)
 773 {
 774         /*
 775          * Once supported by kernel + hw, we have to store the subfunctions
 776          * in kvm->arch and remember that user space configured them.
 777          */
 778         return -ENXIO;
 779 }
 780
 781 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 782 {
 783         int ret = -ENXIO;
 784
 785         switch (attr->attr) {
 786         case KVM_S390_VM_CPU_PROCESSOR:
 787                 ret = kvm_s390_set_processor(kvm, attr);
 788                 break;
 789         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 790                 ret = kvm_s390_set_processor_feat(kvm, attr);
 791                 break;
 792         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 793                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 794                 break;
 795         }
 796         return ret;
 797 }
 798
 799 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 800 {
 801         struct kvm_s390_vm_cpu_processor *proc;
 802         int ret = 0;
 803
 804         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 805         if (!proc) {
 806                 ret = -ENOMEM;
 807                 goto out;
 808         }
 809         proc->cpuid = kvm->arch.model.cpuid;
 810         proc->ibc = kvm->arch.model.ibc;
 811         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 812                S390_ARCH_FAC_LIST_SIZE_BYTE);
 813         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 814                 ret = -EFAULT;
 815         kfree(proc);
 816 out:
 817         return ret;
 818 }
 819
 820 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 821 {
 822         struct kvm_s390_vm_cpu_machine *mach;
 823         int ret = 0;
 824
 825         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 826         if (!mach) {
 827                 ret = -ENOMEM;
 828                 goto out;
 829         }
 830         get_cpu_id((struct cpuid *) &mach->cpuid);
 831         mach->ibc = sclp.ibc;
 832         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 833                S390_ARCH_FAC_LIST_SIZE_BYTE);
 834         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 835                S390_ARCH_FAC_LIST_SIZE_BYTE);
 836         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 837                 ret = -EFAULT;
 838         kfree(mach);
 839 out:
 840         return ret;
 841 }
 842
 843 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 844                                        struct kvm_device_attr *attr)
 845 {
 846         struct kvm_s390_vm_cpu_feat data;
 847
 848         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 849                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 850         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 851                 return -EFAULT;
 852         return 0;
 853 }
 854
 855 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 856                                      struct kvm_device_attr *attr)
 857 {
 858         struct kvm_s390_vm_cpu_feat data;
 859
 860         bitmap_copy((unsigned long *) data.feat,
 861                     kvm_s390_available_cpu_feat,
 862                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 863         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 864                 return -EFAULT;
 865         return 0;
 866 }
 867
 868 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 869                                           struct kvm_device_attr *attr)
 870 {
 871         /*
 872          * Once we can actually configure subfunctions (kernel + hw support),
 873          * we have to check if they were already set by user space, if so copy
 874          * them from kvm->arch.
 875          */
 876         return -ENXIO;
 877 }
 878
 879 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 880                                         struct kvm_device_attr *attr)
 881 {
 882         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 883             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 884                 return -EFAULT;
 885         return 0;
 886 }
 887 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 888 {
 889         int ret = -ENXIO;
 890
 891         switch (attr->attr) {
 892         case KVM_S390_VM_CPU_PROCESSOR:
 893                 ret = kvm_s390_get_processor(kvm, attr);
 894                 break;
 895         case KVM_S390_VM_CPU_MACHINE:
 896                 ret = kvm_s390_get_machine(kvm, attr);
 897                 break;
 898         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 899                 ret = kvm_s390_get_processor_feat(kvm, attr);
 900                 break;
 901         case KVM_S390_VM_CPU_MACHINE_FEAT:
 902                 ret = kvm_s390_get_machine_feat(kvm, attr);
 903                 break;
 904         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 905                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 906                 break;
 907         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 908                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 909                 break;
 910         }
 911         return ret;
 912 }
 913
 914 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 915 {
 916         int ret;
 917
 918         switch (attr->group) {
 919         case KVM_S390_VM_MEM_CTRL:
 920                 ret = kvm_s390_set_mem_control(kvm, attr);
 921                 break;
 922         case KVM_S390_VM_TOD:
 923                 ret = kvm_s390_set_tod(kvm, attr);
 924                 break;
 925         case KVM_S390_VM_CPU_MODEL:
 926                 ret = kvm_s390_set_cpu_model(kvm, attr);
 927                 break;
 928         case KVM_S390_VM_CRYPTO:
 929                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 930                 break;
 931         default:
 932                 ret = -ENXIO;
 933                 break;
 934         }
 935
 936         return ret;
 937 }
 938
 939 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 940 {
 941         int ret;
 942
 943         switch (attr->group) {
 944         case KVM_S390_VM_MEM_CTRL:
 945                 ret = kvm_s390_get_mem_control(kvm, attr);
 946                 break;
 947         case KVM_S390_VM_TOD:
 948                 ret = kvm_s390_get_tod(kvm, attr);
 949                 break;
 950         case KVM_S390_VM_CPU_MODEL:
 951                 ret = kvm_s390_get_cpu_model(kvm, attr);
 952                 break;
 953         default:
 954                 ret = -ENXIO;
 955                 break;
 956         }
 957
 958         return ret;
 959 }
 960
 961 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 962 {
 963         int ret;
 964
 965         switch (attr->group) {
 966         case KVM_S390_VM_MEM_CTRL:
 967                 switch (attr->attr) {
 968                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 969                 case KVM_S390_VM_MEM_CLR_CMMA:
 970                         ret = sclp.has_cmma ? 0 : -ENXIO;
 971                         break;
 972                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 973                         ret = 0;
 974                         break;
 975                 default:
 976                         ret = -ENXIO;
 977                         break;
 978                 }
 979                 break;
 980         case KVM_S390_VM_TOD:
 981                 switch (attr->attr) {
 982                 case KVM_S390_VM_TOD_LOW:
 983                 case KVM_S390_VM_TOD_HIGH:
 984                         ret = 0;
 985                         break;
 986                 default:
 987                         ret = -ENXIO;
 988                         break;
 989                 }
 990                 break;
 991         case KVM_S390_VM_CPU_MODEL:
 992                 switch (attr->attr) {
 993                 case KVM_S390_VM_CPU_PROCESSOR:
 994                 case KVM_S390_VM_CPU_MACHINE:
 995                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 996                 case KVM_S390_VM_CPU_MACHINE_FEAT:
 997                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 998                         ret = 0;
 999                         break;
1000                 /* configuring subfunctions is not supported yet */
1001                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1002                 default:
1003                         ret = -ENXIO;
1004                         break;
1005                 }
1006                 break;
1007         case KVM_S390_VM_CRYPTO:
1008                 switch (attr->attr) {
1009                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1010                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1011                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1012                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1013                         ret = 0;
1014                         break;
1015                 default:
1016                         ret = -ENXIO;
1017                         break;
1018                 }
1019                 break;
1020         default:
1021                 ret = -ENXIO;
1022                 break;
1023         }
1024
1025         return ret;
1026 }
1027
1028 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1029 {
1030         uint8_t *keys;
1031         uint64_t hva;
1032         int i, r = 0;
1033
1034         if (args->flags != 0)
1035                 return -EINVAL;
1036
1037         /* Is this guest using storage keys? */
1038         if (!mm_use_skey(current->mm))
1039                 return KVM_S390_GET_SKEYS_NONE;
1040
1041         /* Enforce sane limit on memory allocation */
1042         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1043                 return -EINVAL;
1044
1045         keys = kmalloc_array(args->count, sizeof(uint8_t),
1046                              GFP_KERNEL | __GFP_NOWARN);
1047         if (!keys)
1048                 keys = vmalloc(sizeof(uint8_t) * args->count);
1049         if (!keys)
1050                 return -ENOMEM;
1051
1052         down_read(&current->mm->mmap_sem);
1053         for (i = 0; i < args->count; i++) {
1054                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1055                 if (kvm_is_error_hva(hva)) {
1056                         r = -EFAULT;
1057                         break;
1058                 }
1059
1060                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1061                 if (r)
1062                         break;
1063         }
1064         up_read(&current->mm->mmap_sem);
1065
1066         if (!r) {
1067                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1068                                  sizeof(uint8_t) * args->count);
1069                 if (r)
1070                         r = -EFAULT;
1071         }
1072
1073         kvfree(keys);
1074         return r;
1075 }
1076
1077 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1078 {
1079         uint8_t *keys;
1080         uint64_t hva;
1081         int i, r = 0;
1082
1083         if (args->flags != 0)
1084                 return -EINVAL;
1085
1086         /* Enforce sane limit on memory allocation */
1087         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1088                 return -EINVAL;
1089
1090         keys = kmalloc_array(args->count, sizeof(uint8_t),
1091                              GFP_KERNEL | __GFP_NOWARN);
1092         if (!keys)
1093                 keys = vmalloc(sizeof(uint8_t) * args->count);
1094         if (!keys)
1095                 return -ENOMEM;
1096
1097         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1098                            sizeof(uint8_t) * args->count);
1099         if (r) {
1100                 r = -EFAULT;
1101                 goto out;
1102         }
1103
1104         /* Enable storage key handling for the guest */
1105         r = s390_enable_skey();
1106         if (r)
1107                 goto out;
1108
1109         down_read(&current->mm->mmap_sem);
1110         for (i = 0; i < args->count; i++) {
1111                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1112                 if (kvm_is_error_hva(hva)) {
1113                         r = -EFAULT;
1114                         break;
1115                 }
1116
1117                 /* Lowest order bit is reserved */
1118                 if (keys[i] & 0x01) {
1119                         r = -EINVAL;
1120                         break;
1121                 }
1122
1123                 r = set_guest_storage_key(current->mm, hva,
1124                                           (unsigned long)keys[i], 0);
1125                 if (r)
1126                         break;
1127         }
1128         up_read(&current->mm->mmap_sem);
1129 out:
1130         kvfree(keys);
1131         return r;
1132 }
1133
1134 long kvm_arch_vm_ioctl(struct file *filp,
1135                        unsigned int ioctl, unsigned long arg)
1136 {
1137         struct kvm *kvm = filp->private_data;
1138         void __user *argp = (void __user *)arg;
1139         struct kvm_device_attr attr;
1140         int r;
1141
1142         switch (ioctl) {
1143         case KVM_S390_INTERRUPT: {
1144                 struct kvm_s390_interrupt s390int;
1145
1146                 r = -EFAULT;
1147                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1148                         break;
1149                 r = kvm_s390_inject_vm(kvm, &s390int);
1150                 break;
1151         }
1152         case KVM_ENABLE_CAP: {
1153                 struct kvm_enable_cap cap;
1154                 r = -EFAULT;
1155                 if (copy_from_user(&cap, argp, sizeof(cap)))
1156                         break;
1157                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1158                 break;
1159         }
1160         case KVM_CREATE_IRQCHIP: {
1161                 struct kvm_irq_routing_entry routing;
1162
1163                 r = -EINVAL;
1164                 if (kvm->arch.use_irqchip) {
1165                         /* Set up dummy routing. */
1166                         memset(&routing, 0, sizeof(routing));
1167                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1168                 }
1169                 break;
1170         }
1171         case KVM_SET_DEVICE_ATTR: {
1172                 r = -EFAULT;
1173                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1174                         break;
1175                 r = kvm_s390_vm_set_attr(kvm, &attr);
1176                 break;
1177         }
1178         case KVM_GET_DEVICE_ATTR: {
1179                 r = -EFAULT;
1180                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1181                         break;
1182                 r = kvm_s390_vm_get_attr(kvm, &attr);
1183                 break;
1184         }
1185         case KVM_HAS_DEVICE_ATTR: {
1186                 r = -EFAULT;
1187                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1188                         break;
1189                 r = kvm_s390_vm_has_attr(kvm, &attr);
1190                 break;
1191         }
1192         case KVM_S390_GET_SKEYS: {
1193                 struct kvm_s390_skeys args;
1194
1195                 r = -EFAULT;
1196                 if (copy_from_user(&args, argp,
1197                                    sizeof(struct kvm_s390_skeys)))
1198                         break;
1199                 r = kvm_s390_get_skeys(kvm, &args);
1200                 break;
1201         }
1202         case KVM_S390_SET_SKEYS: {
1203                 struct kvm_s390_skeys args;
1204
1205                 r = -EFAULT;
1206                 if (copy_from_user(&args, argp,
1207                                    sizeof(struct kvm_s390_skeys)))
1208                         break;
1209                 r = kvm_s390_set_skeys(kvm, &args);
1210                 break;
1211         }
1212         default:
1213                 r = -ENOTTY;
1214         }
1215
1216         return r;
1217 }
1218
1219 static int kvm_s390_query_ap_config(u8 *config)
1220 {
1221         u32 fcn_code = 0x04000000UL;
1222         u32 cc = 0;
1223
1224         memset(config, 0, 128);
1225         asm volatile(
1226                 "lgr 0,%1\n"
1227                 "lgr 2,%2\n"
1228                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1229                 "0: ipm %0\n"
1230                 "srl %0,28\n"
1231                 "1:\n"
1232                 EX_TABLE(0b, 1b)
1233                 : "+r" (cc)
1234                 : "r" (fcn_code), "r" (config)
1235                 : "cc", "0", "2", "memory"
1236         );
1237
1238         return cc;
1239 }
1240
1241 static int kvm_s390_apxa_installed(void)
1242 {
1243         u8 config[128];
1244         int cc;
1245
1246         if (test_facility(12)) {
1247                 cc = kvm_s390_query_ap_config(config);
1248
1249                 if (cc)
1250                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1251                 else
1252                         return config[0] & 0x40;
1253         }
1254
1255         return 0;
1256 }
1257
1258 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1259 {
1260         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1261
1262         if (kvm_s390_apxa_installed())
1263                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1264         else
1265                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1266 }
1267
1268 static u64 kvm_s390_get_initial_cpuid(void)
1269 {
1270         struct cpuid cpuid;
1271
1272         get_cpu_id(&cpuid);
1273         cpuid.version = 0xff;
1274         return *((u64 *) &cpuid);
1275 }
1276
1277 static void kvm_s390_crypto_init(struct kvm *kvm)
1278 {
1279         if (!test_kvm_facility(kvm, 76))
1280                 return;
1281
1282         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1283         kvm_s390_set_crycb_format(kvm);
1284
1285         /* Enable AES/DEA protected key functions by default */
1286         kvm->arch.crypto.aes_kw = 1;
1287         kvm->arch.crypto.dea_kw = 1;
1288         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1289                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1290         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1291                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1292 }
1293
1294 static void sca_dispose(struct kvm *kvm)
1295 {
1296         if (kvm->arch.use_esca)
1297                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1298         else
1299                 free_page((unsigned long)(kvm->arch.sca));
1300         kvm->arch.sca = NULL;
1301 }
1302
1303 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1304 {
1305         gfp_t alloc_flags = GFP_KERNEL;
1306         int i, rc;
1307         char debug_name[16];
1308         static unsigned long sca_offset;
1309
1310         rc = -EINVAL;
1311 #ifdef CONFIG_KVM_S390_UCONTROL
1312         if (type & ~KVM_VM_S390_UCONTROL)
1313                 goto out_err;
1314         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1315                 goto out_err;
1316 #else
1317         if (type)
1318                 goto out_err;
1319 #endif
1320
1321         rc = s390_enable_sie();
1322         if (rc)
1323                 goto out_err;
1324
1325         rc = -ENOMEM;
1326
1327         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1328
1329         kvm->arch.use_esca = 0; /* start with basic SCA */
1330         if (!sclp.has_64bscao)
1331                 alloc_flags |= GFP_DMA;
1332         rwlock_init(&kvm->arch.sca_lock);
1333         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1334         if (!kvm->arch.sca)
1335                 goto out_err;
1336         spin_lock(&kvm_lock);
1337         sca_offset += 16;
1338         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1339                 sca_offset = 0;
1340         kvm->arch.sca = (struct bsca_block *)
1341                         ((char *) kvm->arch.sca + sca_offset);
1342         spin_unlock(&kvm_lock);
1343
1344         sprintf(debug_name, "kvm-%u", current->pid);
1345
1346         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1347         if (!kvm->arch.dbf)
1348                 goto out_err;
1349
1350         kvm->arch.sie_page2 =
1351              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1352         if (!kvm->arch.sie_page2)
1353                 goto out_err;
1354
1355         /* Populate the facility mask initially. */
1356         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1357                S390_ARCH_FAC_LIST_SIZE_BYTE);
1358         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1359                 if (i < kvm_s390_fac_list_mask_size())
1360                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1361                 else
1362                         kvm->arch.model.fac_mask[i] = 0UL;
1363         }
1364
1365         /* Populate the facility list initially. */
1366         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1367         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1368                S390_ARCH_FAC_LIST_SIZE_BYTE);
1369
1370         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1371         set_kvm_facility(kvm->arch.model.fac_list, 74);
1372
1373         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1374         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1375
1376         kvm_s390_crypto_init(kvm);
1377
1378         spin_lock_init(&kvm->arch.float_int.lock);
1379         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1380                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1381         init_waitqueue_head(&kvm->arch.ipte_wq);
1382         mutex_init(&kvm->arch.ipte_mutex);
1383
1384         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1385         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1386
1387         if (type & KVM_VM_S390_UCONTROL) {
1388                 kvm->arch.gmap = NULL;
1389                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1390         } else {
1391                 if (sclp.hamax == U64_MAX)
1392                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1393                 else
1394                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1395                                                     sclp.hamax + 1);
1396                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1397                 if (!kvm->arch.gmap)
1398                         goto out_err;
1399                 kvm->arch.gmap->private = kvm;
1400                 kvm->arch.gmap->pfault_enabled = 0;
1401         }
1402
1403         kvm->arch.css_support = 0;
1404         kvm->arch.use_irqchip = 0;
1405         kvm->arch.epoch = 0;
1406
1407         spin_lock_init(&kvm->arch.start_stop_lock);
1408         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1409
1410         return 0;
1411 out_err:
1412         free_page((unsigned long)kvm->arch.sie_page2);
1413         debug_unregister(kvm->arch.dbf);
1414         sca_dispose(kvm);
1415         KVM_EVENT(3, "creation of vm failed: %d", rc);
1416         return rc;
1417 }
1418
1419 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1420 {
1421         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1422         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1423         kvm_s390_clear_local_irqs(vcpu);
1424         kvm_clear_async_pf_completion_queue(vcpu);
1425         if (!kvm_is_ucontrol(vcpu->kvm))
1426                 sca_del_vcpu(vcpu);
1427
1428         if (kvm_is_ucontrol(vcpu->kvm))
1429                 gmap_free(vcpu->arch.gmap);
1430
1431         if (vcpu->kvm->arch.use_cmma)
1432                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1433         free_page((unsigned long)(vcpu->arch.sie_block));
1434
1435         kvm_vcpu_uninit(vcpu);
1436         kmem_cache_free(kvm_vcpu_cache, vcpu);
1437 }
1438
1439 static void kvm_free_vcpus(struct kvm *kvm)
1440 {
1441         unsigned int i;
1442         struct kvm_vcpu *vcpu;
1443
1444         kvm_for_each_vcpu(i, vcpu, kvm)
1445                 kvm_arch_vcpu_destroy(vcpu);
1446
1447         mutex_lock(&kvm->lock);
1448         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1449                 kvm->vcpus[i] = NULL;
1450
1451         atomic_set(&kvm->online_vcpus, 0);
1452         mutex_unlock(&kvm->lock);
1453 }
1454
1455 void kvm_arch_destroy_vm(struct kvm *kvm)
1456 {
1457         kvm_free_vcpus(kvm);
1458         sca_dispose(kvm);
1459         debug_unregister(kvm->arch.dbf);
1460         free_page((unsigned long)kvm->arch.sie_page2);
1461         if (!kvm_is_ucontrol(kvm))
1462                 gmap_free(kvm->arch.gmap);
1463         kvm_s390_destroy_adapters(kvm);
1464         kvm_s390_clear_float_irqs(kvm);
1465         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1466 }
1467
1468 /* Section: vcpu related */
1469 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1470 {
1471         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1472         if (!vcpu->arch.gmap)
1473                 return -ENOMEM;
1474         vcpu->arch.gmap->private = vcpu->kvm;
1475
1476         return 0;
1477 }
1478
1479 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1480 {
1481         read_lock(&vcpu->kvm->arch.sca_lock);
1482         if (vcpu->kvm->arch.use_esca) {
1483                 struct esca_block *sca = vcpu->kvm->arch.sca;
1484
1485                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1486                 sca->cpu[vcpu->vcpu_id].sda = 0;
1487         } else {
1488                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1489
1490                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1491                 sca->cpu[vcpu->vcpu_id].sda = 0;
1492         }
1493         read_unlock(&vcpu->kvm->arch.sca_lock);
1494 }
1495
1496 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1497 {
1498         read_lock(&vcpu->kvm->arch.sca_lock);
1499         if (vcpu->kvm->arch.use_esca) {
1500                 struct esca_block *sca = vcpu->kvm->arch.sca;
1501
1502                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1503                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1504                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1505                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1506                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1507         } else {
1508                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1509
1510                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1511                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1512                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1513                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1514         }
1515         read_unlock(&vcpu->kvm->arch.sca_lock);
1516 }
1517
1518 /* Basic SCA to Extended SCA data copy routines */
1519 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1520 {
1521         d->sda = s->sda;
1522         d->sigp_ctrl.c = s->sigp_ctrl.c;
1523         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1524 }
1525
1526 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1527 {
1528         int i;
1529
1530         d->ipte_control = s->ipte_control;
1531         d->mcn[0] = s->mcn;
1532         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1533                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1534 }
1535
1536 static int sca_switch_to_extended(struct kvm *kvm)
1537 {
1538         struct bsca_block *old_sca = kvm->arch.sca;
1539         struct esca_block *new_sca;
1540         struct kvm_vcpu *vcpu;
1541         unsigned int vcpu_idx;
1542         u32 scaol, scaoh;
1543
1544         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1545         if (!new_sca)
1546                 return -ENOMEM;
1547
1548         scaoh = (u32)((u64)(new_sca) >> 32);
1549         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1550
1551         kvm_s390_vcpu_block_all(kvm);
1552         write_lock(&kvm->arch.sca_lock);
1553
1554         sca_copy_b_to_e(new_sca, old_sca);
1555
1556         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1557                 vcpu->arch.sie_block->scaoh = scaoh;
1558                 vcpu->arch.sie_block->scaol = scaol;
1559                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1560         }
1561         kvm->arch.sca = new_sca;
1562         kvm->arch.use_esca = 1;
1563
1564         write_unlock(&kvm->arch.sca_lock);
1565         kvm_s390_vcpu_unblock_all(kvm);
1566
1567         free_page((unsigned long)old_sca);
1568
1569         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1570                  old_sca, kvm->arch.sca);
1571         return 0;
1572 }
1573
1574 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1575 {
1576         int rc;
1577
1578         if (id < KVM_S390_BSCA_CPU_SLOTS)
1579                 return true;
1580         if (!sclp.has_esca || !sclp.has_64bscao)
1581                 return false;
1582
1583         mutex_lock(&kvm->lock);
1584         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1585         mutex_unlock(&kvm->lock);
1586
1587         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1588 }
1589
1590 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1591 {
1592         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1593         kvm_clear_async_pf_completion_queue(vcpu);
1594         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1595                                     KVM_SYNC_GPRS |
1596                                     KVM_SYNC_ACRS |
1597                                     KVM_SYNC_CRS |
1598                                     KVM_SYNC_ARCH0 |
1599                                     KVM_SYNC_PFAULT;
1600         if (test_kvm_facility(vcpu->kvm, 64))
1601                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1602         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1603          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1604          */
1605         if (MACHINE_HAS_VX)
1606                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1607         else
1608                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1609
1610         if (kvm_is_ucontrol(vcpu->kvm))
1611                 return __kvm_ucontrol_vcpu_init(vcpu);
1612
1613         return 0;
1614 }
1615
1616 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1617 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1618 {
1619         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1620         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1621         vcpu->arch.cputm_start = get_tod_clock_fast();
1622         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1623 }
1624
1625 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1626 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1627 {
1628         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1629         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1630         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1631         vcpu->arch.cputm_start = 0;
1632         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1633 }
1634
1635 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1636 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1637 {
1638         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1639         vcpu->arch.cputm_enabled = true;
1640         __start_cpu_timer_accounting(vcpu);
1641 }
1642
1643 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1644 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1645 {
1646         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1647         __stop_cpu_timer_accounting(vcpu);
1648         vcpu->arch.cputm_enabled = false;
1649 }
1650
1651 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1652 {
1653         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1654         __enable_cpu_timer_accounting(vcpu);
1655         preempt_enable();
1656 }
1657
1658 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1659 {
1660         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1661         __disable_cpu_timer_accounting(vcpu);
1662         preempt_enable();
1663 }
1664
1665 /* set the cpu timer - may only be called from the VCPU thread itself */
1666 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1667 {
1668         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1669         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1670         if (vcpu->arch.cputm_enabled)
1671                 vcpu->arch.cputm_start = get_tod_clock_fast();
1672         vcpu->arch.sie_block->cputm = cputm;
1673         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1674         preempt_enable();
1675 }
1676
1677 /* update and get the cpu timer - can also be called from other VCPU threads */
1678 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1679 {
1680         unsigned int seq;
1681         __u64 value;
1682
1683         if (unlikely(!vcpu->arch.cputm_enabled))
1684                 return vcpu->arch.sie_block->cputm;
1685
1686         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1687         do {
1688                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1689                 /*
1690                  * If the writer would ever execute a read in the critical
1691                  * section, e.g. in irq context, we have a deadlock.
1692                  */
1693                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1694                 value = vcpu->arch.sie_block->cputm;
1695                 /* if cputm_start is 0, accounting is being started/stopped */
1696                 if (likely(vcpu->arch.cputm_start))
1697                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1698         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1699         preempt_enable();
1700         return value;
1701 }
1702
1703 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1704 {
1705         /* Save host register state */
1706         save_fpu_regs();
1707         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1708         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1709
1710         if (MACHINE_HAS_VX)
1711                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1712         else
1713                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1714         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1715         if (test_fp_ctl(current->thread.fpu.fpc))
1716                 /* User space provided an invalid FPC, let's clear it */
1717                 current->thread.fpu.fpc = 0;
1718
1719         save_access_regs(vcpu->arch.host_acrs);
1720         restore_access_regs(vcpu->run->s.regs.acrs);
1721         gmap_enable(vcpu->arch.gmap);
1722         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1723         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1724                 __start_cpu_timer_accounting(vcpu);
1725         vcpu->cpu = cpu;
1726 }
1727
1728 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1729 {
1730         vcpu->cpu = -1;
1731         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1732                 __stop_cpu_timer_accounting(vcpu);
1733         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1734         gmap_disable(vcpu->arch.gmap);
1735
1736         /* Save guest register state */
1737         save_fpu_regs();
1738         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1739
1740         /* Restore host register state */
1741         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1742         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1743
1744         save_access_regs(vcpu->run->s.regs.acrs);
1745         restore_access_regs(vcpu->arch.host_acrs);
1746 }
1747
1748 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1749 {
1750         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1751         vcpu->arch.sie_block->gpsw.mask = 0UL;
1752         vcpu->arch.sie_block->gpsw.addr = 0UL;
1753         kvm_s390_set_prefix(vcpu, 0);
1754         kvm_s390_set_cpu_timer(vcpu, 0);
1755         vcpu->arch.sie_block->ckc       = 0UL;
1756         vcpu->arch.sie_block->todpr     = 0;
1757         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1758         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1759         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1760         /* make sure the new fpc will be lazily loaded */
1761         save_fpu_regs();
1762         current->thread.fpu.fpc = 0;
1763         vcpu->arch.sie_block->gbea = 1;
1764         vcpu->arch.sie_block->pp = 0;
1765         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1766         kvm_clear_async_pf_completion_queue(vcpu);
1767         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1768                 kvm_s390_vcpu_stop(vcpu);
1769         kvm_s390_clear_local_irqs(vcpu);
1770 }
1771
1772 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1773 {
1774         mutex_lock(&vcpu->kvm->lock);
1775         preempt_disable();
1776         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1777         preempt_enable();
1778         mutex_unlock(&vcpu->kvm->lock);
1779         if (!kvm_is_ucontrol(vcpu->kvm)) {
1780                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1781                 sca_add_vcpu(vcpu);
1782         }
1783
1784 }
1785
1786 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1787 {
1788         if (!test_kvm_facility(vcpu->kvm, 76))
1789                 return;
1790
1791         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1792
1793         if (vcpu->kvm->arch.crypto.aes_kw)
1794                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1795         if (vcpu->kvm->arch.crypto.dea_kw)
1796                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1797
1798         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1799 }
1800
1801 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1802 {
1803         free_page(vcpu->arch.sie_block->cbrlo);
1804         vcpu->arch.sie_block->cbrlo = 0;
1805 }
1806
1807 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1808 {
1809         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1810         if (!vcpu->arch.sie_block->cbrlo)
1811                 return -ENOMEM;
1812
1813         vcpu->arch.sie_block->ecb2 |= 0x80;
1814         vcpu->arch.sie_block->ecb2 &= ~0x08;
1815         return 0;
1816 }
1817
1818 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1819 {
1820         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1821
1822         vcpu->arch.sie_block->ibc = model->ibc;
1823         if (test_kvm_facility(vcpu->kvm, 7))
1824                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1825 }
1826
1827 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1828 {
1829         int rc = 0;
1830
1831         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1832                                                     CPUSTAT_SM |
1833                                                     CPUSTAT_STOPPED);
1834
1835         if (test_kvm_facility(vcpu->kvm, 78))
1836                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1837         else if (test_kvm_facility(vcpu->kvm, 8))
1838                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1839
1840         kvm_s390_vcpu_setup_model(vcpu);
1841
1842         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1843         if (MACHINE_HAS_ESOP)
1844                 vcpu->arch.sie_block->ecb |= 0x02;
1845         if (test_kvm_facility(vcpu->kvm, 9))
1846                 vcpu->arch.sie_block->ecb |= 0x04;
1847         if (test_kvm_facility(vcpu->kvm, 73))
1848                 vcpu->arch.sie_block->ecb |= 0x10;
1849
1850         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1851                 vcpu->arch.sie_block->ecb2 |= 0x08;
1852         vcpu->arch.sie_block->eca = 0x1002000U;
1853         if (sclp.has_cei)
1854                 vcpu->arch.sie_block->eca |= 0x80000000U;
1855         if (sclp.has_ib)
1856                 vcpu->arch.sie_block->eca |= 0x40000000U;
1857         if (sclp.has_siif)
1858                 vcpu->arch.sie_block->eca |= 1;
1859         if (sclp.has_sigpif)
1860                 vcpu->arch.sie_block->eca |= 0x10000000U;
1861         if (test_kvm_facility(vcpu->kvm, 64))
1862                 vcpu->arch.sie_block->ecb3 |= 0x01;
1863         if (test_kvm_facility(vcpu->kvm, 129)) {
1864                 vcpu->arch.sie_block->eca |= 0x00020000;
1865                 vcpu->arch.sie_block->ecd |= 0x20000000;
1866         }
1867         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1868         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1869         if (test_kvm_facility(vcpu->kvm, 74))
1870                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1871
1872         if (vcpu->kvm->arch.use_cmma) {
1873                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1874                 if (rc)
1875                         return rc;
1876         }
1877         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1878         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1879
1880         kvm_s390_vcpu_crypto_setup(vcpu);
1881
1882         return rc;
1883 }
1884
1885 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1886                                       unsigned int id)
1887 {
1888         struct kvm_vcpu *vcpu;
1889         struct sie_page *sie_page;
1890         int rc = -EINVAL;
1891
1892         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1893                 goto out;
1894
1895         rc = -ENOMEM;
1896
1897         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1898         if (!vcpu)
1899                 goto out;
1900
1901         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1902         if (!sie_page)
1903                 goto out_free_cpu;
1904
1905         vcpu->arch.sie_block = &sie_page->sie_block;
1906         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1907
1908         /* the real guest size will always be smaller than msl */
1909         vcpu->arch.sie_block->mso = 0;
1910         vcpu->arch.sie_block->msl = sclp.hamax;
1911
1912         vcpu->arch.sie_block->icpua = id;
1913         spin_lock_init(&vcpu->arch.local_int.lock);
1914         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1915         vcpu->arch.local_int.wq = &vcpu->wq;
1916         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1917         seqcount_init(&vcpu->arch.cputm_seqcount);
1918
1919         rc = kvm_vcpu_init(vcpu, kvm, id);
1920         if (rc)
1921                 goto out_free_sie_block;
1922         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1923                  vcpu->arch.sie_block);
1924         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1925
1926         return vcpu;
1927 out_free_sie_block:
1928         free_page((unsigned long)(vcpu->arch.sie_block));
1929 out_free_cpu:
1930         kmem_cache_free(kvm_vcpu_cache, vcpu);
1931 out:
1932         return ERR_PTR(rc);
1933 }
1934
1935 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1936 {
1937         return kvm_s390_vcpu_has_irq(vcpu, 0);
1938 }
1939
1940 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1941 {
1942         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1943         exit_sie(vcpu);
1944 }
1945
1946 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1947 {
1948         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1949 }
1950
1951 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1952 {
1953         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1954         exit_sie(vcpu);
1955 }
1956
1957 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1958 {
1959         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1960 }
1961
1962 /*
1963  * Kick a guest cpu out of SIE and wait until SIE is not running.
1964  * If the CPU is not running (e.g. waiting as idle) the function will
1965  * return immediately. */
1966 void exit_sie(struct kvm_vcpu *vcpu)
1967 {
1968         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1969         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1970                 cpu_relax();
1971 }
1972
1973 /* Kick a guest cpu out of SIE to process a request synchronously */
1974 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1975 {
1976         kvm_make_request(req, vcpu);
1977         kvm_s390_vcpu_request(vcpu);
1978 }
1979
1980 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1981 {
1982         int i;
1983         struct kvm *kvm = gmap->private;
1984         struct kvm_vcpu *vcpu;
1985
1986         kvm_for_each_vcpu(i, vcpu, kvm) {
1987                 /* match against both prefix pages */
1988                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1989                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1990                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1991                 }
1992         }
1993 }
1994
1995 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1996 {
1997         /* kvm common code refers to this, but never calls it */
1998         BUG();
1999         return 0;
2000 }
2001
2002 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2003                                            struct kvm_one_reg *reg)
2004 {
2005         int r = -EINVAL;
2006
2007         switch (reg->id) {
2008         case KVM_REG_S390_TODPR:
2009                 r = put_user(vcpu->arch.sie_block->todpr,
2010                              (u32 __user *)reg->addr);
2011                 break;
2012         case KVM_REG_S390_EPOCHDIFF:
2013                 r = put_user(vcpu->arch.sie_block->epoch,
2014                              (u64 __user *)reg->addr);
2015                 break;
2016         case KVM_REG_S390_CPU_TIMER:
2017                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2018                              (u64 __user *)reg->addr);
2019                 break;
2020         case KVM_REG_S390_CLOCK_COMP:
2021                 r = put_user(vcpu->arch.sie_block->ckc,
2022                              (u64 __user *)reg->addr);
2023                 break;
2024         case KVM_REG_S390_PFTOKEN:
2025                 r = put_user(vcpu->arch.pfault_token,
2026                              (u64 __user *)reg->addr);
2027                 break;
2028         case KVM_REG_S390_PFCOMPARE:
2029                 r = put_user(vcpu->arch.pfault_compare,
2030                              (u64 __user *)reg->addr);
2031                 break;
2032         case KVM_REG_S390_PFSELECT:
2033                 r = put_user(vcpu->arch.pfault_select,
2034                              (u64 __user *)reg->addr);
2035                 break;
2036         case KVM_REG_S390_PP:
2037                 r = put_user(vcpu->arch.sie_block->pp,
2038                              (u64 __user *)reg->addr);
2039                 break;
2040         case KVM_REG_S390_GBEA:
2041                 r = put_user(vcpu->arch.sie_block->gbea,
2042                              (u64 __user *)reg->addr);
2043                 break;
2044         default:
2045                 break;
2046         }
2047
2048         return r;
2049 }
2050
2051 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2052                                            struct kvm_one_reg *reg)
2053 {
2054         int r = -EINVAL;
2055         __u64 val;
2056
2057         switch (reg->id) {
2058         case KVM_REG_S390_TODPR:
2059                 r = get_user(vcpu->arch.sie_block->todpr,
2060                              (u32 __user *)reg->addr);
2061                 break;
2062         case KVM_REG_S390_EPOCHDIFF:
2063                 r = get_user(vcpu->arch.sie_block->epoch,
2064                              (u64 __user *)reg->addr);
2065                 break;
2066         case KVM_REG_S390_CPU_TIMER:
2067                 r = get_user(val, (u64 __user *)reg->addr);
2068                 if (!r)
2069                         kvm_s390_set_cpu_timer(vcpu, val);
2070                 break;
2071         case KVM_REG_S390_CLOCK_COMP:
2072                 r = get_user(vcpu->arch.sie_block->ckc,
2073                              (u64 __user *)reg->addr);
2074                 break;
2075         case KVM_REG_S390_PFTOKEN:
2076                 r = get_user(vcpu->arch.pfault_token,
2077                              (u64 __user *)reg->addr);
2078                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2079                         kvm_clear_async_pf_completion_queue(vcpu);
2080                 break;
2081         case KVM_REG_S390_PFCOMPARE:
2082                 r = get_user(vcpu->arch.pfault_compare,
2083                              (u64 __user *)reg->addr);
2084                 break;
2085         case KVM_REG_S390_PFSELECT:
2086                 r = get_user(vcpu->arch.pfault_select,
2087                              (u64 __user *)reg->addr);
2088                 break;
2089         case KVM_REG_S390_PP:
2090                 r = get_user(vcpu->arch.sie_block->pp,
2091                              (u64 __user *)reg->addr);
2092                 break;
2093         case KVM_REG_S390_GBEA:
2094                 r = get_user(vcpu->arch.sie_block->gbea,
2095                              (u64 __user *)reg->addr);
2096                 break;
2097         default:
2098                 break;
2099         }
2100
2101         return r;
2102 }
2103
2104 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2105 {
2106         kvm_s390_vcpu_initial_reset(vcpu);
2107         return 0;
2108 }
2109
2110 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2111 {
2112         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2113         return 0;
2114 }
2115
2116 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2117 {
2118         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2119         return 0;
2120 }
2121
2122 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2123                                   struct kvm_sregs *sregs)
2124 {
2125         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2126         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2127         restore_access_regs(vcpu->run->s.regs.acrs);
2128         return 0;
2129 }
2130
2131 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2132                                   struct kvm_sregs *sregs)
2133 {
2134         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2135         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2136         return 0;
2137 }
2138
2139 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2140 {
2141         /* make sure the new values will be lazily loaded */
2142         save_fpu_regs();
2143         if (test_fp_ctl(fpu->fpc))
2144                 return -EINVAL;
2145         current->thread.fpu.fpc = fpu->fpc;
2146         if (MACHINE_HAS_VX)
2147                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2148         else
2149                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2150         return 0;
2151 }
2152
2153 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2154 {
2155         /* make sure we have the latest values */
2156         save_fpu_regs();
2157         if (MACHINE_HAS_VX)
2158                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2159         else
2160                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2161         fpu->fpc = current->thread.fpu.fpc;
2162         return 0;
2163 }
2164
2165 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2166 {
2167         int rc = 0;
2168
2169         if (!is_vcpu_stopped(vcpu))
2170                 rc = -EBUSY;
2171         else {
2172                 vcpu->run->psw_mask = psw.mask;
2173                 vcpu->run->psw_addr = psw.addr;
2174         }
2175         return rc;
2176 }
2177
2178 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2179                                   struct kvm_translation *tr)
2180 {
2181         return -EINVAL; /* not implemented yet */
2182 }
2183
2184 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2185                               KVM_GUESTDBG_USE_HW_BP | \
2186                               KVM_GUESTDBG_ENABLE)
2187
2188 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2189                                         struct kvm_guest_debug *dbg)
2190 {
2191         int rc = 0;
2192
2193         vcpu->guest_debug = 0;
2194         kvm_s390_clear_bp_data(vcpu);
2195
2196         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2197                 return -EINVAL;
2198         if (!sclp.has_gpere)
2199                 return -EINVAL;
2200
2201         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2202                 vcpu->guest_debug = dbg->control;
2203                 /* enforce guest PER */
2204                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2205
2206                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2207                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2208         } else {
2209                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2210                 vcpu->arch.guestdbg.last_bp = 0;
2211         }
2212
2213         if (rc) {
2214                 vcpu->guest_debug = 0;
2215                 kvm_s390_clear_bp_data(vcpu);
2216                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2217         }
2218
2219         return rc;
2220 }
2221
2222 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2223                                     struct kvm_mp_state *mp_state)
2224 {
2225         /* CHECK_STOP and LOAD are not supported yet */
2226         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2227                                        KVM_MP_STATE_OPERATING;
2228 }
2229
2230 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2231                                     struct kvm_mp_state *mp_state)
2232 {
2233         int rc = 0;
2234
2235         /* user space knows about this interface - let it control the state */
2236         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2237
2238         switch (mp_state->mp_state) {
2239         case KVM_MP_STATE_STOPPED:
2240                 kvm_s390_vcpu_stop(vcpu);
2241                 break;
2242         case KVM_MP_STATE_OPERATING:
2243                 kvm_s390_vcpu_start(vcpu);
2244                 break;
2245         case KVM_MP_STATE_LOAD:
2246         case KVM_MP_STATE_CHECK_STOP:
2247                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2248         default:
2249                 rc = -ENXIO;
2250         }
2251
2252         return rc;
2253 }
2254
2255 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2256 {
2257         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2258 }
2259
2260 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2261 {
2262 retry:
2263         kvm_s390_vcpu_request_handled(vcpu);
2264         if (!vcpu->requests)
2265                 return 0;
2266         /*
2267          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2268          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2269          * This ensures that the ipte instruction for this request has
2270          * already finished. We might race against a second unmapper that
2271          * wants to set the blocking bit. Lets just retry the request loop.
2272          */
2273         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2274                 int rc;
2275                 rc = gmap_ipte_notify(vcpu->arch.gmap,
2276                                       kvm_s390_get_prefix(vcpu),
2277                                       PAGE_SIZE * 2);
2278                 if (rc)
2279                         return rc;
2280                 goto retry;
2281         }
2282
2283         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2284                 vcpu->arch.sie_block->ihcpu = 0xffff;
2285                 goto retry;
2286         }
2287
2288         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2289                 if (!ibs_enabled(vcpu)) {
2290                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2291                         atomic_or(CPUSTAT_IBS,
2292                                         &vcpu->arch.sie_block->cpuflags);
2293                 }
2294                 goto retry;
2295         }
2296
2297         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2298                 if (ibs_enabled(vcpu)) {
2299                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2300                         atomic_andnot(CPUSTAT_IBS,
2301                                           &vcpu->arch.sie_block->cpuflags);
2302                 }
2303                 goto retry;
2304         }
2305
2306         /* nothing to do, just clear the request */
2307         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2308
2309         return 0;
2310 }
2311
2312 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2313 {
2314         struct kvm_vcpu *vcpu;
2315         int i;
2316
2317         mutex_lock(&kvm->lock);
2318         preempt_disable();
2319         kvm->arch.epoch = tod - get_tod_clock();
2320         kvm_s390_vcpu_block_all(kvm);
2321         kvm_for_each_vcpu(i, vcpu, kvm)
2322                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2323         kvm_s390_vcpu_unblock_all(kvm);
2324         preempt_enable();
2325         mutex_unlock(&kvm->lock);
2326 }
2327
2328 /**
2329  * kvm_arch_fault_in_page - fault-in guest page if necessary
2330  * @vcpu: The corresponding virtual cpu
2331  * @gpa: Guest physical address
2332  * @writable: Whether the page should be writable or not
2333  *
2334  * Make sure that a guest page has been faulted-in on the host.
2335  *
2336  * Return: Zero on success, negative error code otherwise.
2337  */
2338 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2339 {
2340         return gmap_fault(vcpu->arch.gmap, gpa,
2341                           writable ? FAULT_FLAG_WRITE : 0);
2342 }
2343
2344 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2345                                       unsigned long token)
2346 {
2347         struct kvm_s390_interrupt inti;
2348         struct kvm_s390_irq irq;
2349
2350         if (start_token) {
2351                 irq.u.ext.ext_params2 = token;
2352                 irq.type = KVM_S390_INT_PFAULT_INIT;
2353                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2354         } else {
2355                 inti.type = KVM_S390_INT_PFAULT_DONE;
2356                 inti.parm64 = token;
2357                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2358         }
2359 }
2360
2361 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2362                                      struct kvm_async_pf *work)
2363 {
2364         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2365         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2366 }
2367
2368 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2369                                  struct kvm_async_pf *work)
2370 {
2371         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2372         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2373 }
2374
2375 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2376                                struct kvm_async_pf *work)
2377 {
2378         /* s390 will always inject the page directly */
2379 }
2380
2381 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2382 {
2383         /*
2384          * s390 will always inject the page directly,
2385          * but we still want check_async_completion to cleanup
2386          */
2387         return true;
2388 }
2389
2390 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2391 {
2392         hva_t hva;
2393         struct kvm_arch_async_pf arch;
2394         int rc;
2395
2396         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2397                 return 0;
2398         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2399             vcpu->arch.pfault_compare)
2400                 return 0;
2401         if (psw_extint_disabled(vcpu))
2402                 return 0;
2403         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2404                 return 0;
2405         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2406                 return 0;
2407         if (!vcpu->arch.gmap->pfault_enabled)
2408                 return 0;
2409
2410         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2411         hva += current->thread.gmap_addr & ~PAGE_MASK;
2412         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2413                 return 0;
2414
2415         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2416         return rc;
2417 }
2418
2419 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2420 {
2421         int rc, cpuflags;
2422
2423         /*
2424          * On s390 notifications for arriving pages will be delivered directly
2425          * to the guest but the house keeping for completed pfaults is
2426          * handled outside the worker.
2427          */
2428         kvm_check_async_pf_completion(vcpu);
2429
2430         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2431         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2432
2433         if (need_resched())
2434                 schedule();
2435
2436         if (test_cpu_flag(CIF_MCCK_PENDING))
2437                 s390_handle_mcck();
2438
2439         if (!kvm_is_ucontrol(vcpu->kvm)) {
2440                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2441                 if (rc)
2442                         return rc;
2443         }
2444
2445         rc = kvm_s390_handle_requests(vcpu);
2446         if (rc)
2447                 return rc;
2448
2449         if (guestdbg_enabled(vcpu)) {
2450                 kvm_s390_backup_guest_per_regs(vcpu);
2451                 kvm_s390_patch_guest_per_regs(vcpu);
2452         }
2453
2454         vcpu->arch.sie_block->icptcode = 0;
2455         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2456         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2457         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2458
2459         return 0;
2460 }
2461
2462 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2463 {
2464         struct kvm_s390_pgm_info pgm_info = {
2465                 .code = PGM_ADDRESSING,
2466         };
2467         u8 opcode, ilen;
2468         int rc;
2469
2470         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2471         trace_kvm_s390_sie_fault(vcpu);
2472
2473         /*
2474          * We want to inject an addressing exception, which is defined as a
2475          * suppressing or terminating exception. However, since we came here
2476          * by a DAT access exception, the PSW still points to the faulting
2477          * instruction since DAT exceptions are nullifying. So we've got
2478          * to look up the current opcode to get the length of the instruction
2479          * to be able to forward the PSW.
2480          */
2481         rc = read_guest_instr(vcpu, &opcode, 1);
2482         ilen = insn_length(opcode);
2483         if (rc < 0) {
2484                 return rc;
2485         } else if (rc) {
2486                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2487                  * Forward by arbitrary ilc, injection will take care of
2488                  * nullification if necessary.
2489                  */
2490                 pgm_info = vcpu->arch.pgm;
2491                 ilen = 4;
2492         }
2493         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2494         kvm_s390_forward_psw(vcpu, ilen);
2495         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2496 }
2497
2498 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2499 {
2500         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2501                    vcpu->arch.sie_block->icptcode);
2502         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2503
2504         if (guestdbg_enabled(vcpu))
2505                 kvm_s390_restore_guest_per_regs(vcpu);
2506
2507         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2508         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2509
2510         if (vcpu->arch.sie_block->icptcode > 0) {
2511                 int rc = kvm_handle_sie_intercept(vcpu);
2512
2513                 if (rc != -EOPNOTSUPP)
2514                         return rc;
2515                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2516                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2517                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2518                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2519                 return -EREMOTE;
2520         } else if (exit_reason != -EFAULT) {
2521                 vcpu->stat.exit_null++;
2522                 return 0;
2523         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2524                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2525                 vcpu->run->s390_ucontrol.trans_exc_code =
2526                                                 current->thread.gmap_addr;
2527                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2528                 return -EREMOTE;
2529         } else if (current->thread.gmap_pfault) {
2530                 trace_kvm_s390_major_guest_pfault(vcpu);
2531                 current->thread.gmap_pfault = 0;
2532                 if (kvm_arch_setup_async_pf(vcpu))
2533                         return 0;
2534                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2535         }
2536         return vcpu_post_run_fault_in_sie(vcpu);
2537 }
2538
2539 static int __vcpu_run(struct kvm_vcpu *vcpu)
2540 {
2541         int rc, exit_reason;
2542
2543         /*
2544          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2545          * ning the guest), so that memslots (and other stuff) are protected
2546          */
2547         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2548
2549         do {
2550                 rc = vcpu_pre_run(vcpu);
2551                 if (rc)
2552                         break;
2553
2554                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2555                 /*
2556                  * As PF_VCPU will be used in fault handler, between
2557                  * guest_enter and guest_exit should be no uaccess.
2558                  */
2559                 local_irq_disable();
2560                 __kvm_guest_enter();
2561                 __disable_cpu_timer_accounting(vcpu);
2562                 local_irq_enable();
2563                 exit_reason = sie64a(vcpu->arch.sie_block,
2564                                      vcpu->run->s.regs.gprs);
2565                 local_irq_disable();
2566                 __enable_cpu_timer_accounting(vcpu);
2567                 __kvm_guest_exit();
2568                 local_irq_enable();
2569                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2570
2571                 rc = vcpu_post_run(vcpu, exit_reason);
2572         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2573
2574         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2575         return rc;
2576 }
2577
2578 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2579 {
2580         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2581         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2582         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2583                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2584         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2585                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2586                 /* some control register changes require a tlb flush */
2587                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2588         }
2589         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2590                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2591                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2592                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2593                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2594                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2595         }
2596         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2597                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2598                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2599                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2600                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2601                         kvm_clear_async_pf_completion_queue(vcpu);
2602         }
2603         kvm_run->kvm_dirty_regs = 0;
2604 }
2605
2606 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2607 {
2608         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2609         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2610         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2611         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2612         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2613         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2614         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2615         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2616         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2617         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2618         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2619         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2620 }
2621
2622 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2623 {
2624         int rc;
2625         sigset_t sigsaved;
2626
2627         if (guestdbg_exit_pending(vcpu)) {
2628                 kvm_s390_prepare_debug_exit(vcpu);
2629                 return 0;
2630         }
2631
2632         if (vcpu->sigset_active)
2633                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2634
2635         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2636                 kvm_s390_vcpu_start(vcpu);
2637         } else if (is_vcpu_stopped(vcpu)) {
2638                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2639                                    vcpu->vcpu_id);
2640                 return -EINVAL;
2641         }
2642
2643         sync_regs(vcpu, kvm_run);
2644         enable_cpu_timer_accounting(vcpu);
2645
2646         might_fault();
2647         rc = __vcpu_run(vcpu);
2648
2649         if (signal_pending(current) && !rc) {
2650                 kvm_run->exit_reason = KVM_EXIT_INTR;
2651                 rc = -EINTR;
2652         }
2653
2654         if (guestdbg_exit_pending(vcpu) && !rc)  {
2655                 kvm_s390_prepare_debug_exit(vcpu);
2656                 rc = 0;
2657         }
2658
2659         if (rc == -EREMOTE) {
2660                 /* userspace support is needed, kvm_run has been prepared */
2661                 rc = 0;
2662         }
2663
2664         disable_cpu_timer_accounting(vcpu);
2665         store_regs(vcpu, kvm_run);
2666
2667         if (vcpu->sigset_active)
2668                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2669
2670         vcpu->stat.exit_userspace++;
2671         return rc;
2672 }
2673
2674 /*
2675  * store status at address
2676  * we use have two special cases:
2677  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2678  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2679  */
2680 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2681 {
2682         unsigned char archmode = 1;
2683         freg_t fprs[NUM_FPRS];
2684         unsigned int px;
2685         u64 clkcomp, cputm;
2686         int rc;
2687
2688         px = kvm_s390_get_prefix(vcpu);
2689         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2690                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2691                         return -EFAULT;
2692                 gpa = 0;
2693         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2694                 if (write_guest_real(vcpu, 163, &archmode, 1))
2695                         return -EFAULT;
2696                 gpa = px;
2697         } else
2698                 gpa -= __LC_FPREGS_SAVE_AREA;
2699
2700         /* manually convert vector registers if necessary */
2701         if (MACHINE_HAS_VX) {
2702                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2703                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2704                                      fprs, 128);
2705         } else {
2706                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2707                                      vcpu->run->s.regs.fprs, 128);
2708         }
2709         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2710                               vcpu->run->s.regs.gprs, 128);
2711         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2712                               &vcpu->arch.sie_block->gpsw, 16);
2713         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2714                               &px, 4);
2715         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2716                               &vcpu->run->s.regs.fpc, 4);
2717         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2718                               &vcpu->arch.sie_block->todpr, 4);
2719         cputm = kvm_s390_get_cpu_timer(vcpu);
2720         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2721                               &cputm, 8);
2722         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2723         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2724                               &clkcomp, 8);
2725         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2726                               &vcpu->run->s.regs.acrs, 64);
2727         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2728                               &vcpu->arch.sie_block->gcr, 128);
2729         return rc ? -EFAULT : 0;
2730 }
2731
2732 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2733 {
2734         /*
2735          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2736          * copying in vcpu load/put. Lets update our copies before we save
2737          * it into the save area
2738          */
2739         save_fpu_regs();
2740         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2741         save_access_regs(vcpu->run->s.regs.acrs);
2742
2743         return kvm_s390_store_status_unloaded(vcpu, addr);
2744 }
2745
2746 /*
2747  * store additional status at address
2748  */
2749 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2750                                         unsigned long gpa)
2751 {
2752         /* Only bits 0-53 are used for address formation */
2753         if (!(gpa & ~0x3ff))
2754                 return 0;
2755
2756         return write_guest_abs(vcpu, gpa & ~0x3ff,
2757                                (void *)&vcpu->run->s.regs.vrs, 512);
2758 }
2759
2760 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2761 {
2762         if (!test_kvm_facility(vcpu->kvm, 129))
2763                 return 0;
2764
2765         /*
2766          * The guest VXRS are in the host VXRs due to the lazy
2767          * copying in vcpu load/put. We can simply call save_fpu_regs()
2768          * to save the current register state because we are in the
2769          * middle of a load/put cycle.
2770          *
2771          * Let's update our copies before we save it into the save area.
2772          */
2773         save_fpu_regs();
2774
2775         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2776 }
2777
2778 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2779 {
2780         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2781         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2782 }
2783
2784 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2785 {
2786         unsigned int i;
2787         struct kvm_vcpu *vcpu;
2788
2789         kvm_for_each_vcpu(i, vcpu, kvm) {
2790                 __disable_ibs_on_vcpu(vcpu);
2791         }
2792 }
2793
2794 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2795 {
2796         if (!sclp.has_ibs)
2797                 return;
2798         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2799         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2800 }
2801
2802 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2803 {
2804         int i, online_vcpus, started_vcpus = 0;
2805
2806         if (!is_vcpu_stopped(vcpu))
2807                 return;
2808
2809         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2810         /* Only one cpu at a time may enter/leave the STOPPED state. */
2811         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2812         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2813
2814         for (i = 0; i < online_vcpus; i++) {
2815                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2816                         started_vcpus++;
2817         }
2818
2819         if (started_vcpus == 0) {
2820                 /* we're the only active VCPU -> speed it up */
2821                 __enable_ibs_on_vcpu(vcpu);
2822         } else if (started_vcpus == 1) {
2823                 /*
2824                  * As we are starting a second VCPU, we have to disable
2825                  * the IBS facility on all VCPUs to remove potentially
2826                  * oustanding ENABLE requests.
2827                  */
2828                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2829         }
2830
2831         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2832         /*
2833          * Another VCPU might have used IBS while we were offline.
2834          * Let's play safe and flush the VCPU at startup.
2835          */
2836         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2837         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2838         return;
2839 }
2840
2841 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2842 {
2843         int i, online_vcpus, started_vcpus = 0;
2844         struct kvm_vcpu *started_vcpu = NULL;
2845
2846         if (is_vcpu_stopped(vcpu))
2847                 return;
2848
2849         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2850         /* Only one cpu at a time may enter/leave the STOPPED state. */
2851         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2852         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2853
2854         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2855         kvm_s390_clear_stop_irq(vcpu);
2856
2857         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2858         __disable_ibs_on_vcpu(vcpu);
2859
2860         for (i = 0; i < online_vcpus; i++) {
2861                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2862                         started_vcpus++;
2863                         started_vcpu = vcpu->kvm->vcpus[i];
2864                 }
2865         }
2866
2867         if (started_vcpus == 1) {
2868                 /*
2869                  * As we only have one VCPU left, we want to enable the
2870                  * IBS facility for that VCPU to speed it up.
2871                  */
2872                 __enable_ibs_on_vcpu(started_vcpu);
2873         }
2874
2875         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2876         return;
2877 }
2878
2879 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2880                                      struct kvm_enable_cap *cap)
2881 {
2882         int r;
2883
2884         if (cap->flags)
2885                 return -EINVAL;
2886
2887         switch (cap->cap) {
2888         case KVM_CAP_S390_CSS_SUPPORT:
2889                 if (!vcpu->kvm->arch.css_support) {
2890                         vcpu->kvm->arch.css_support = 1;
2891                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2892                         trace_kvm_s390_enable_css(vcpu->kvm);
2893                 }
2894                 r = 0;
2895                 break;
2896         default:
2897                 r = -EINVAL;
2898                 break;
2899         }
2900         return r;
2901 }
2902
2903 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2904                                   struct kvm_s390_mem_op *mop)
2905 {
2906         void __user *uaddr = (void __user *)mop->buf;
2907         void *tmpbuf = NULL;
2908         int r, srcu_idx;
2909         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2910                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2911
2912         if (mop->flags & ~supported_flags)
2913                 return -EINVAL;
2914
2915         if (mop->size > MEM_OP_MAX_SIZE)
2916                 return -E2BIG;
2917
2918         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2919                 tmpbuf = vmalloc(mop->size);
2920                 if (!tmpbuf)
2921                         return -ENOMEM;
2922         }
2923
2924         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2925
2926         switch (mop->op) {
2927         case KVM_S390_MEMOP_LOGICAL_READ:
2928                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2929                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2930                                             mop->size, GACC_FETCH);
2931                         break;
2932                 }
2933                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2934                 if (r == 0) {
2935                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2936                                 r = -EFAULT;
2937                 }
2938                 break;
2939         case KVM_S390_MEMOP_LOGICAL_WRITE:
2940                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2941                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2942                                             mop->size, GACC_STORE);
2943                         break;
2944                 }
2945                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2946                         r = -EFAULT;
2947                         break;
2948                 }
2949                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2950                 break;
2951         default:
2952                 r = -EINVAL;
2953         }
2954
2955         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2956
2957         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2958                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2959
2960         vfree(tmpbuf);
2961         return r;
2962 }
2963
2964 long kvm_arch_vcpu_ioctl(struct file *filp,
2965                          unsigned int ioctl, unsigned long arg)
2966 {
2967         struct kvm_vcpu *vcpu = filp->private_data;
2968         void __user *argp = (void __user *)arg;
2969         int idx;
2970         long r;
2971
2972         switch (ioctl) {
2973         case KVM_S390_IRQ: {
2974                 struct kvm_s390_irq s390irq;
2975
2976                 r = -EFAULT;
2977                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2978                         break;
2979                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2980                 break;
2981         }
2982         case KVM_S390_INTERRUPT: {
2983                 struct kvm_s390_interrupt s390int;
2984                 struct kvm_s390_irq s390irq;
2985
2986                 r = -EFAULT;
2987                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2988                         break;
2989                 if (s390int_to_s390irq(&s390int, &s390irq))
2990                         return -EINVAL;
2991                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2992                 break;
2993         }
2994         case KVM_S390_STORE_STATUS:
2995                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2996                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2997                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2998                 break;
2999         case KVM_S390_SET_INITIAL_PSW: {
3000                 psw_t psw;
3001
3002                 r = -EFAULT;
3003                 if (copy_from_user(&psw, argp, sizeof(psw)))
3004                         break;
3005                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3006                 break;
3007         }
3008         case KVM_S390_INITIAL_RESET:
3009                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3010                 break;
3011         case KVM_SET_ONE_REG:
3012         case KVM_GET_ONE_REG: {
3013                 struct kvm_one_reg reg;
3014                 r = -EFAULT;
3015                 if (copy_from_user(&reg, argp, sizeof(reg)))
3016                         break;
3017                 if (ioctl == KVM_SET_ONE_REG)
3018                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3019                 else
3020                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3021                 break;
3022         }
3023 #ifdef CONFIG_KVM_S390_UCONTROL
3024         case KVM_S390_UCAS_MAP: {
3025                 struct kvm_s390_ucas_mapping ucasmap;
3026
3027                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3028                         r = -EFAULT;
3029                         break;
3030                 }
3031
3032                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3033                         r = -EINVAL;
3034                         break;
3035                 }
3036
3037                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3038                                      ucasmap.vcpu_addr, ucasmap.length);
3039                 break;
3040         }
3041         case KVM_S390_UCAS_UNMAP: {
3042                 struct kvm_s390_ucas_mapping ucasmap;
3043
3044                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3045                         r = -EFAULT;
3046                         break;
3047                 }
3048
3049                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3050                         r = -EINVAL;
3051                         break;
3052                 }
3053
3054                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3055                         ucasmap.length);
3056                 break;
3057         }
3058 #endif
3059         case KVM_S390_VCPU_FAULT: {
3060                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3061                 break;
3062         }
3063         case KVM_ENABLE_CAP:
3064         {
3065                 struct kvm_enable_cap cap;
3066                 r = -EFAULT;
3067                 if (copy_from_user(&cap, argp, sizeof(cap)))
3068                         break;
3069                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3070                 break;
3071         }
3072         case KVM_S390_MEM_OP: {
3073                 struct kvm_s390_mem_op mem_op;
3074
3075                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3076                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3077                 else
3078                         r = -EFAULT;
3079                 break;
3080         }
3081         case KVM_S390_SET_IRQ_STATE: {
3082                 struct kvm_s390_irq_state irq_state;
3083
3084                 r = -EFAULT;
3085                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3086                         break;
3087                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3088                     irq_state.len == 0 ||
3089                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3090                         r = -EINVAL;
3091                         break;
3092                 }
3093                 r = kvm_s390_set_irq_state(vcpu,
3094                                            (void __user *) irq_state.buf,
3095                                            irq_state.len);
3096                 break;
3097         }
3098         case KVM_S390_GET_IRQ_STATE: {
3099                 struct kvm_s390_irq_state irq_state;
3100
3101                 r = -EFAULT;
3102                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3103                         break;
3104                 if (irq_state.len == 0) {
3105                         r = -EINVAL;
3106                         break;
3107                 }
3108                 r = kvm_s390_get_irq_state(vcpu,
3109                                            (__u8 __user *)  irq_state.buf,
3110                                            irq_state.len);
3111                 break;
3112         }
3113         default:
3114                 r = -ENOTTY;
3115         }
3116         return r;
3117 }
3118
3119 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3120 {
3121 #ifdef CONFIG_KVM_S390_UCONTROL
3122         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3123                  && (kvm_is_ucontrol(vcpu->kvm))) {
3124                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3125                 get_page(vmf->page);
3126                 return 0;
3127         }
3128 #endif
3129         return VM_FAULT_SIGBUS;
3130 }
3131
3132 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3133                             unsigned long npages)
3134 {
3135         return 0;
3136 }
3137
3138 /* Section: memory related */
3139 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3140                                    struct kvm_memory_slot *memslot,
3141                                    const struct kvm_userspace_memory_region *mem,
3142                                    enum kvm_mr_change change)
3143 {
3144         /* A few sanity checks. We can have memory slots which have to be
3145            located/ended at a segment boundary (1MB). The memory in userland is
3146            ok to be fragmented into various different vmas. It is okay to mmap()
3147            and munmap() stuff in this slot after doing this call at any time */
3148
3149         if (mem->userspace_addr & 0xffffful)
3150                 return -EINVAL;
3151
3152         if (mem->memory_size & 0xffffful)
3153                 return -EINVAL;
3154
3155         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3156                 return -EINVAL;
3157
3158         return 0;
3159 }
3160
3161 void kvm_arch_commit_memory_region(struct kvm *kvm,
3162                                 const struct kvm_userspace_memory_region *mem,
3163                                 const struct kvm_memory_slot *old,
3164                                 const struct kvm_memory_slot *new,
3165                                 enum kvm_mr_change change)
3166 {
3167         int rc;
3168
3169         /* If the basics of the memslot do not change, we do not want
3170          * to update the gmap. Every update causes several unnecessary
3171          * segment translation exceptions. This is usually handled just
3172          * fine by the normal fault handler + gmap, but it will also
3173          * cause faults on the prefix page of running guest CPUs.
3174          */
3175         if (old->userspace_addr == mem->userspace_addr &&
3176             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3177             old->npages * PAGE_SIZE == mem->memory_size)
3178                 return;
3179
3180         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3181                 mem->guest_phys_addr, mem->memory_size);
3182         if (rc)
3183                 pr_warn("failed to commit memory region\n");
3184         return;
3185 }
3186
3187 static inline unsigned long nonhyp_mask(int i)
3188 {
3189         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3190
3191         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3192 }
3193
3194 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3195 {
3196         vcpu->valid_wakeup = false;
3197 }
3198
3199 static int __init kvm_s390_init(void)
3200 {
3201         int i;
3202
3203         if (!sclp.has_sief2) {
3204                 pr_info("SIE not available\n");
3205                 return -ENODEV;
3206         }
3207
3208         for (i = 0; i < 16; i++)
3209                 kvm_s390_fac_list_mask[i] |=
3210                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3211
3212         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3213 }
3214
3215 static void __exit kvm_s390_exit(void)
3216 {
3217         kvm_exit();
3218 }
3219
3220 module_init(kvm_s390_init);
3221 module_exit(kvm_s390_exit);
3222
3223 /*
3224  * Enable autoloading of the kvm module.
3225  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3226  * since x86 takes a different approach.
3227  */
3228 #include <linux/miscdevice.h>
3229 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3230 MODULE_ALIAS("devname:kvm");