2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
201 void kvm_arch_exit(void)
203 debug_unregister(kvm_s390_dbf);
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
244 case KVM_CAP_S390_MEM_OP:
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
249 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250 : KVM_S390_BSCA_CPU_SLOTS;
252 case KVM_CAP_NR_MEMSLOTS:
253 r = KVM_USER_MEM_SLOTS;
255 case KVM_CAP_S390_COW:
256 r = MACHINE_HAS_ESOP;
258 case KVM_CAP_S390_VECTOR_REGISTERS:
267 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
268 struct kvm_memory_slot *memslot)
270 gfn_t cur_gfn, last_gfn;
271 unsigned long address;
272 struct gmap *gmap = kvm->arch.gmap;
274 down_read(&gmap->mm->mmap_sem);
275 /* Loop over all guest pages */
276 last_gfn = memslot->base_gfn + memslot->npages;
277 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
278 address = gfn_to_hva_memslot(memslot, cur_gfn);
280 if (gmap_test_and_clear_dirty(address, gmap))
281 mark_page_dirty(kvm, cur_gfn);
283 up_read(&gmap->mm->mmap_sem);
286 /* Section: vm related */
287 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
290 * Get (and clear) the dirty memory log for a memory slot.
292 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
293 struct kvm_dirty_log *log)
297 struct kvm_memslots *slots;
298 struct kvm_memory_slot *memslot;
301 mutex_lock(&kvm->slots_lock);
304 if (log->slot >= KVM_USER_MEM_SLOTS)
307 slots = kvm_memslots(kvm);
308 memslot = id_to_memslot(slots, log->slot);
310 if (!memslot->dirty_bitmap)
313 kvm_s390_sync_dirty_log(kvm, memslot);
314 r = kvm_get_dirty_log(kvm, log, &is_dirty);
318 /* Clear the dirty log */
320 n = kvm_dirty_bitmap_bytes(memslot);
321 memset(memslot->dirty_bitmap, 0, n);
325 mutex_unlock(&kvm->slots_lock);
329 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
337 case KVM_CAP_S390_IRQCHIP:
338 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
339 kvm->arch.use_irqchip = 1;
342 case KVM_CAP_S390_USER_SIGP:
343 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
344 kvm->arch.user_sigp = 1;
347 case KVM_CAP_S390_VECTOR_REGISTERS:
348 mutex_lock(&kvm->lock);
349 if (atomic_read(&kvm->online_vcpus)) {
351 } else if (MACHINE_HAS_VX) {
352 set_kvm_facility(kvm->arch.model.fac->mask, 129);
353 set_kvm_facility(kvm->arch.model.fac->list, 129);
357 mutex_unlock(&kvm->lock);
358 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
359 r ? "(not available)" : "(success)");
361 case KVM_CAP_S390_USER_STSI:
362 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
363 kvm->arch.user_stsi = 1;
373 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
377 switch (attr->attr) {
378 case KVM_S390_VM_MEM_LIMIT_SIZE:
380 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
381 kvm->arch.mem_limit);
382 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
392 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
396 switch (attr->attr) {
397 case KVM_S390_VM_MEM_ENABLE_CMMA:
398 /* enable CMMA only for z10 and later (EDAT_1) */
400 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
404 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
405 mutex_lock(&kvm->lock);
406 if (atomic_read(&kvm->online_vcpus) == 0) {
407 kvm->arch.use_cmma = 1;
410 mutex_unlock(&kvm->lock);
412 case KVM_S390_VM_MEM_CLR_CMMA:
414 if (!kvm->arch.use_cmma)
417 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
418 mutex_lock(&kvm->lock);
419 idx = srcu_read_lock(&kvm->srcu);
420 s390_reset_cmma(kvm->arch.gmap->mm);
421 srcu_read_unlock(&kvm->srcu, idx);
422 mutex_unlock(&kvm->lock);
425 case KVM_S390_VM_MEM_LIMIT_SIZE: {
426 unsigned long new_limit;
428 if (kvm_is_ucontrol(kvm))
431 if (get_user(new_limit, (u64 __user *)attr->addr))
434 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
435 new_limit > kvm->arch.mem_limit)
441 /* gmap_alloc takes last usable address */
442 if (new_limit != KVM_S390_NO_MEM_LIMIT)
446 mutex_lock(&kvm->lock);
447 if (atomic_read(&kvm->online_vcpus) == 0) {
448 /* gmap_alloc will round the limit up */
449 struct gmap *new = gmap_alloc(current->mm, new_limit);
454 gmap_free(kvm->arch.gmap);
456 kvm->arch.gmap = new;
460 mutex_unlock(&kvm->lock);
461 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
462 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
463 (void *) kvm->arch.gmap->asce);
473 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
475 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
477 struct kvm_vcpu *vcpu;
480 if (!test_kvm_facility(kvm, 76))
483 mutex_lock(&kvm->lock);
484 switch (attr->attr) {
485 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
487 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
488 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
489 kvm->arch.crypto.aes_kw = 1;
490 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
492 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
494 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
495 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
496 kvm->arch.crypto.dea_kw = 1;
497 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
499 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
500 kvm->arch.crypto.aes_kw = 0;
501 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
502 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
503 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
505 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
506 kvm->arch.crypto.dea_kw = 0;
507 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
508 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
509 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
512 mutex_unlock(&kvm->lock);
516 kvm_for_each_vcpu(i, vcpu, kvm) {
517 kvm_s390_vcpu_crypto_setup(vcpu);
520 mutex_unlock(&kvm->lock);
524 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
528 if (copy_from_user(>od_high, (void __user *)attr->addr,
534 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
539 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
543 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
546 kvm_s390_set_tod_clock(kvm, gtod);
547 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
551 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
558 switch (attr->attr) {
559 case KVM_S390_VM_TOD_HIGH:
560 ret = kvm_s390_set_tod_high(kvm, attr);
562 case KVM_S390_VM_TOD_LOW:
563 ret = kvm_s390_set_tod_low(kvm, attr);
572 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
576 if (copy_to_user((void __user *)attr->addr, >od_high,
579 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
584 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
588 gtod = kvm_s390_get_tod_clock_fast(kvm);
589 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
591 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
596 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
603 switch (attr->attr) {
604 case KVM_S390_VM_TOD_HIGH:
605 ret = kvm_s390_get_tod_high(kvm, attr);
607 case KVM_S390_VM_TOD_LOW:
608 ret = kvm_s390_get_tod_low(kvm, attr);
617 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
619 struct kvm_s390_vm_cpu_processor *proc;
622 mutex_lock(&kvm->lock);
623 if (atomic_read(&kvm->online_vcpus)) {
627 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
632 if (!copy_from_user(proc, (void __user *)attr->addr,
634 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
635 sizeof(struct cpuid));
636 kvm->arch.model.ibc = proc->ibc;
637 memcpy(kvm->arch.model.fac->list, proc->fac_list,
638 S390_ARCH_FAC_LIST_SIZE_BYTE);
643 mutex_unlock(&kvm->lock);
647 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
651 switch (attr->attr) {
652 case KVM_S390_VM_CPU_PROCESSOR:
653 ret = kvm_s390_set_processor(kvm, attr);
659 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
661 struct kvm_s390_vm_cpu_processor *proc;
664 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
669 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
670 proc->ibc = kvm->arch.model.ibc;
671 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
672 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
679 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
681 struct kvm_s390_vm_cpu_machine *mach;
684 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
689 get_cpu_id((struct cpuid *) &mach->cpuid);
690 mach->ibc = sclp.ibc;
691 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
692 S390_ARCH_FAC_LIST_SIZE_BYTE);
693 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
694 S390_ARCH_FAC_LIST_SIZE_BYTE);
695 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
702 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
706 switch (attr->attr) {
707 case KVM_S390_VM_CPU_PROCESSOR:
708 ret = kvm_s390_get_processor(kvm, attr);
710 case KVM_S390_VM_CPU_MACHINE:
711 ret = kvm_s390_get_machine(kvm, attr);
717 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
721 switch (attr->group) {
722 case KVM_S390_VM_MEM_CTRL:
723 ret = kvm_s390_set_mem_control(kvm, attr);
725 case KVM_S390_VM_TOD:
726 ret = kvm_s390_set_tod(kvm, attr);
728 case KVM_S390_VM_CPU_MODEL:
729 ret = kvm_s390_set_cpu_model(kvm, attr);
731 case KVM_S390_VM_CRYPTO:
732 ret = kvm_s390_vm_set_crypto(kvm, attr);
742 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
746 switch (attr->group) {
747 case KVM_S390_VM_MEM_CTRL:
748 ret = kvm_s390_get_mem_control(kvm, attr);
750 case KVM_S390_VM_TOD:
751 ret = kvm_s390_get_tod(kvm, attr);
753 case KVM_S390_VM_CPU_MODEL:
754 ret = kvm_s390_get_cpu_model(kvm, attr);
764 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
768 switch (attr->group) {
769 case KVM_S390_VM_MEM_CTRL:
770 switch (attr->attr) {
771 case KVM_S390_VM_MEM_ENABLE_CMMA:
772 case KVM_S390_VM_MEM_CLR_CMMA:
773 case KVM_S390_VM_MEM_LIMIT_SIZE:
781 case KVM_S390_VM_TOD:
782 switch (attr->attr) {
783 case KVM_S390_VM_TOD_LOW:
784 case KVM_S390_VM_TOD_HIGH:
792 case KVM_S390_VM_CPU_MODEL:
793 switch (attr->attr) {
794 case KVM_S390_VM_CPU_PROCESSOR:
795 case KVM_S390_VM_CPU_MACHINE:
803 case KVM_S390_VM_CRYPTO:
804 switch (attr->attr) {
805 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
806 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
807 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
808 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
828 unsigned long curkey;
831 if (args->flags != 0)
834 /* Is this guest using storage keys? */
835 if (!mm_use_skey(current->mm))
836 return KVM_S390_GET_SKEYS_NONE;
838 /* Enforce sane limit on memory allocation */
839 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
842 keys = kmalloc_array(args->count, sizeof(uint8_t),
843 GFP_KERNEL | __GFP_NOWARN);
845 keys = vmalloc(sizeof(uint8_t) * args->count);
849 for (i = 0; i < args->count; i++) {
850 hva = gfn_to_hva(kvm, args->start_gfn + i);
851 if (kvm_is_error_hva(hva)) {
856 curkey = get_guest_storage_key(current->mm, hva);
857 if (IS_ERR_VALUE(curkey)) {
864 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
865 sizeof(uint8_t) * args->count);
873 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
879 if (args->flags != 0)
882 /* Enforce sane limit on memory allocation */
883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
886 keys = kmalloc_array(args->count, sizeof(uint8_t),
887 GFP_KERNEL | __GFP_NOWARN);
889 keys = vmalloc(sizeof(uint8_t) * args->count);
893 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
894 sizeof(uint8_t) * args->count);
900 /* Enable storage key handling for the guest */
901 r = s390_enable_skey();
905 for (i = 0; i < args->count; i++) {
906 hva = gfn_to_hva(kvm, args->start_gfn + i);
907 if (kvm_is_error_hva(hva)) {
912 /* Lowest order bit is reserved */
913 if (keys[i] & 0x01) {
918 r = set_guest_storage_key(current->mm, hva,
919 (unsigned long)keys[i], 0);
928 long kvm_arch_vm_ioctl(struct file *filp,
929 unsigned int ioctl, unsigned long arg)
931 struct kvm *kvm = filp->private_data;
932 void __user *argp = (void __user *)arg;
933 struct kvm_device_attr attr;
937 case KVM_S390_INTERRUPT: {
938 struct kvm_s390_interrupt s390int;
941 if (copy_from_user(&s390int, argp, sizeof(s390int)))
943 r = kvm_s390_inject_vm(kvm, &s390int);
946 case KVM_ENABLE_CAP: {
947 struct kvm_enable_cap cap;
949 if (copy_from_user(&cap, argp, sizeof(cap)))
951 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
954 case KVM_CREATE_IRQCHIP: {
955 struct kvm_irq_routing_entry routing;
958 if (kvm->arch.use_irqchip) {
959 /* Set up dummy routing. */
960 memset(&routing, 0, sizeof(routing));
961 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
965 case KVM_SET_DEVICE_ATTR: {
967 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
969 r = kvm_s390_vm_set_attr(kvm, &attr);
972 case KVM_GET_DEVICE_ATTR: {
974 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
976 r = kvm_s390_vm_get_attr(kvm, &attr);
979 case KVM_HAS_DEVICE_ATTR: {
981 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
983 r = kvm_s390_vm_has_attr(kvm, &attr);
986 case KVM_S390_GET_SKEYS: {
987 struct kvm_s390_skeys args;
990 if (copy_from_user(&args, argp,
991 sizeof(struct kvm_s390_skeys)))
993 r = kvm_s390_get_skeys(kvm, &args);
996 case KVM_S390_SET_SKEYS: {
997 struct kvm_s390_skeys args;
1000 if (copy_from_user(&args, argp,
1001 sizeof(struct kvm_s390_skeys)))
1003 r = kvm_s390_set_skeys(kvm, &args);
1013 static int kvm_s390_query_ap_config(u8 *config)
1015 u32 fcn_code = 0x04000000UL;
1018 memset(config, 0, 128);
1022 ".long 0xb2af0000\n" /* PQAP(QCI) */
1028 : "r" (fcn_code), "r" (config)
1029 : "cc", "0", "2", "memory"
1035 static int kvm_s390_apxa_installed(void)
1040 if (test_facility(12)) {
1041 cc = kvm_s390_query_ap_config(config);
1044 pr_err("PQAP(QCI) failed with cc=%d", cc);
1046 return config[0] & 0x40;
1052 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1054 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1056 if (kvm_s390_apxa_installed())
1057 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1059 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1062 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1065 cpu_id->version = 0xff;
1068 static int kvm_s390_crypto_init(struct kvm *kvm)
1070 if (!test_kvm_facility(kvm, 76))
1073 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1074 GFP_KERNEL | GFP_DMA);
1075 if (!kvm->arch.crypto.crycb)
1078 kvm_s390_set_crycb_format(kvm);
1080 /* Enable AES/DEA protected key functions by default */
1081 kvm->arch.crypto.aes_kw = 1;
1082 kvm->arch.crypto.dea_kw = 1;
1083 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1084 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1085 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1086 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1091 static void sca_dispose(struct kvm *kvm)
1093 if (kvm->arch.use_esca)
1094 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1096 free_page((unsigned long)(kvm->arch.sca));
1097 kvm->arch.sca = NULL;
1100 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1103 char debug_name[16];
1104 static unsigned long sca_offset;
1107 #ifdef CONFIG_KVM_S390_UCONTROL
1108 if (type & ~KVM_VM_S390_UCONTROL)
1110 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1117 rc = s390_enable_sie();
1123 kvm->arch.use_esca = 0; /* start with basic SCA */
1124 rwlock_init(&kvm->arch.sca_lock);
1125 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1128 spin_lock(&kvm_lock);
1130 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1132 kvm->arch.sca = (struct bsca_block *)
1133 ((char *) kvm->arch.sca + sca_offset);
1134 spin_unlock(&kvm_lock);
1136 sprintf(debug_name, "kvm-%u", current->pid);
1138 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1143 * The architectural maximum amount of facilities is 16 kbit. To store
1144 * this amount, 2 kbyte of memory is required. Thus we need a full
1145 * page to hold the guest facility list (arch.model.fac->list) and the
1146 * facility mask (arch.model.fac->mask). Its address size has to be
1147 * 31 bits and word aligned.
1149 kvm->arch.model.fac =
1150 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1151 if (!kvm->arch.model.fac)
1154 /* Populate the facility mask initially. */
1155 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1156 S390_ARCH_FAC_LIST_SIZE_BYTE);
1157 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1158 if (i < kvm_s390_fac_list_mask_size())
1159 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1161 kvm->arch.model.fac->mask[i] = 0UL;
1164 /* Populate the facility list initially. */
1165 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1166 S390_ARCH_FAC_LIST_SIZE_BYTE);
1168 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1169 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1171 if (kvm_s390_crypto_init(kvm) < 0)
1174 spin_lock_init(&kvm->arch.float_int.lock);
1175 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1176 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1177 init_waitqueue_head(&kvm->arch.ipte_wq);
1178 mutex_init(&kvm->arch.ipte_mutex);
1180 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1181 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1183 if (type & KVM_VM_S390_UCONTROL) {
1184 kvm->arch.gmap = NULL;
1185 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1187 if (sclp.hamax == U64_MAX)
1188 kvm->arch.mem_limit = TASK_MAX_SIZE;
1190 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1192 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1193 if (!kvm->arch.gmap)
1195 kvm->arch.gmap->private = kvm;
1196 kvm->arch.gmap->pfault_enabled = 0;
1199 kvm->arch.css_support = 0;
1200 kvm->arch.use_irqchip = 0;
1201 kvm->arch.epoch = 0;
1203 spin_lock_init(&kvm->arch.start_stop_lock);
1204 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1208 kfree(kvm->arch.crypto.crycb);
1209 free_page((unsigned long)kvm->arch.model.fac);
1210 debug_unregister(kvm->arch.dbf);
1212 KVM_EVENT(3, "creation of vm failed: %d", rc);
1216 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1218 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1219 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1220 kvm_s390_clear_local_irqs(vcpu);
1221 kvm_clear_async_pf_completion_queue(vcpu);
1222 if (!kvm_is_ucontrol(vcpu->kvm))
1225 if (kvm_is_ucontrol(vcpu->kvm))
1226 gmap_free(vcpu->arch.gmap);
1228 if (vcpu->kvm->arch.use_cmma)
1229 kvm_s390_vcpu_unsetup_cmma(vcpu);
1230 free_page((unsigned long)(vcpu->arch.sie_block));
1232 kvm_vcpu_uninit(vcpu);
1233 kmem_cache_free(kvm_vcpu_cache, vcpu);
1236 static void kvm_free_vcpus(struct kvm *kvm)
1239 struct kvm_vcpu *vcpu;
1241 kvm_for_each_vcpu(i, vcpu, kvm)
1242 kvm_arch_vcpu_destroy(vcpu);
1244 mutex_lock(&kvm->lock);
1245 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1246 kvm->vcpus[i] = NULL;
1248 atomic_set(&kvm->online_vcpus, 0);
1249 mutex_unlock(&kvm->lock);
1252 void kvm_arch_destroy_vm(struct kvm *kvm)
1254 kvm_free_vcpus(kvm);
1255 free_page((unsigned long)kvm->arch.model.fac);
1257 debug_unregister(kvm->arch.dbf);
1258 kfree(kvm->arch.crypto.crycb);
1259 if (!kvm_is_ucontrol(kvm))
1260 gmap_free(kvm->arch.gmap);
1261 kvm_s390_destroy_adapters(kvm);
1262 kvm_s390_clear_float_irqs(kvm);
1263 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1266 /* Section: vcpu related */
1267 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1269 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1270 if (!vcpu->arch.gmap)
1272 vcpu->arch.gmap->private = vcpu->kvm;
1277 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1279 read_lock(&vcpu->kvm->arch.sca_lock);
1280 if (vcpu->kvm->arch.use_esca) {
1281 struct esca_block *sca = vcpu->kvm->arch.sca;
1283 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1284 sca->cpu[vcpu->vcpu_id].sda = 0;
1286 struct bsca_block *sca = vcpu->kvm->arch.sca;
1288 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1289 sca->cpu[vcpu->vcpu_id].sda = 0;
1291 read_unlock(&vcpu->kvm->arch.sca_lock);
1294 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1296 read_lock(&vcpu->kvm->arch.sca_lock);
1297 if (vcpu->kvm->arch.use_esca) {
1298 struct esca_block *sca = vcpu->kvm->arch.sca;
1300 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1301 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1302 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1303 vcpu->arch.sie_block->ecb2 |= 0x04U;
1304 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1306 struct bsca_block *sca = vcpu->kvm->arch.sca;
1308 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1309 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1310 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1311 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1313 read_unlock(&vcpu->kvm->arch.sca_lock);
1316 /* Basic SCA to Extended SCA data copy routines */
1317 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1320 d->sigp_ctrl.c = s->sigp_ctrl.c;
1321 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1324 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1328 d->ipte_control = s->ipte_control;
1330 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1331 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1334 static int sca_switch_to_extended(struct kvm *kvm)
1336 struct bsca_block *old_sca = kvm->arch.sca;
1337 struct esca_block *new_sca;
1338 struct kvm_vcpu *vcpu;
1339 unsigned int vcpu_idx;
1342 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1346 scaoh = (u32)((u64)(new_sca) >> 32);
1347 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1349 kvm_s390_vcpu_block_all(kvm);
1350 write_lock(&kvm->arch.sca_lock);
1352 sca_copy_b_to_e(new_sca, old_sca);
1354 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1355 vcpu->arch.sie_block->scaoh = scaoh;
1356 vcpu->arch.sie_block->scaol = scaol;
1357 vcpu->arch.sie_block->ecb2 |= 0x04U;
1359 kvm->arch.sca = new_sca;
1360 kvm->arch.use_esca = 1;
1362 write_unlock(&kvm->arch.sca_lock);
1363 kvm_s390_vcpu_unblock_all(kvm);
1365 free_page((unsigned long)old_sca);
1367 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1368 old_sca, kvm->arch.sca);
1372 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1376 if (id < KVM_S390_BSCA_CPU_SLOTS)
1381 mutex_lock(&kvm->lock);
1382 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1383 mutex_unlock(&kvm->lock);
1385 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1388 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1390 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1391 kvm_clear_async_pf_completion_queue(vcpu);
1392 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1398 if (test_kvm_facility(vcpu->kvm, 129))
1399 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1401 if (kvm_is_ucontrol(vcpu->kvm))
1402 return __kvm_ucontrol_vcpu_init(vcpu);
1408 * Backs up the current FP/VX register save area on a particular
1409 * destination. Used to switch between different register save
1412 static inline void save_fpu_to(struct fpu *dst)
1414 dst->fpc = current->thread.fpu.fpc;
1415 dst->regs = current->thread.fpu.regs;
1419 * Switches the FP/VX register save area from which to lazy
1420 * restore register contents.
1422 static inline void load_fpu_from(struct fpu *from)
1424 current->thread.fpu.fpc = from->fpc;
1425 current->thread.fpu.regs = from->regs;
1428 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1430 /* Save host register state */
1432 save_fpu_to(&vcpu->arch.host_fpregs);
1434 if (test_kvm_facility(vcpu->kvm, 129)) {
1435 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1437 * Use the register save area in the SIE-control block
1438 * for register restore and save in kvm_arch_vcpu_put()
1440 current->thread.fpu.vxrs =
1441 (__vector128 *)&vcpu->run->s.regs.vrs;
1443 load_fpu_from(&vcpu->arch.guest_fpregs);
1445 if (test_fp_ctl(current->thread.fpu.fpc))
1446 /* User space provided an invalid FPC, let's clear it */
1447 current->thread.fpu.fpc = 0;
1449 save_access_regs(vcpu->arch.host_acrs);
1450 restore_access_regs(vcpu->run->s.regs.acrs);
1451 gmap_enable(vcpu->arch.gmap);
1452 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1455 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1457 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1458 gmap_disable(vcpu->arch.gmap);
1462 if (test_kvm_facility(vcpu->kvm, 129))
1464 * kvm_arch_vcpu_load() set up the register save area to
1465 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1466 * are already saved. Only the floating-point control must be
1469 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1471 save_fpu_to(&vcpu->arch.guest_fpregs);
1472 load_fpu_from(&vcpu->arch.host_fpregs);
1474 save_access_regs(vcpu->run->s.regs.acrs);
1475 restore_access_regs(vcpu->arch.host_acrs);
1478 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1480 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1481 vcpu->arch.sie_block->gpsw.mask = 0UL;
1482 vcpu->arch.sie_block->gpsw.addr = 0UL;
1483 kvm_s390_set_prefix(vcpu, 0);
1484 vcpu->arch.sie_block->cputm = 0UL;
1485 vcpu->arch.sie_block->ckc = 0UL;
1486 vcpu->arch.sie_block->todpr = 0;
1487 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1488 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1489 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1490 vcpu->arch.guest_fpregs.fpc = 0;
1491 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1492 vcpu->arch.sie_block->gbea = 1;
1493 vcpu->arch.sie_block->pp = 0;
1494 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1495 kvm_clear_async_pf_completion_queue(vcpu);
1496 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1497 kvm_s390_vcpu_stop(vcpu);
1498 kvm_s390_clear_local_irqs(vcpu);
1501 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1503 mutex_lock(&vcpu->kvm->lock);
1505 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1507 mutex_unlock(&vcpu->kvm->lock);
1508 if (!kvm_is_ucontrol(vcpu->kvm)) {
1509 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1515 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1517 if (!test_kvm_facility(vcpu->kvm, 76))
1520 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1522 if (vcpu->kvm->arch.crypto.aes_kw)
1523 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1524 if (vcpu->kvm->arch.crypto.dea_kw)
1525 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1527 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1530 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1532 free_page(vcpu->arch.sie_block->cbrlo);
1533 vcpu->arch.sie_block->cbrlo = 0;
1536 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1538 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1539 if (!vcpu->arch.sie_block->cbrlo)
1542 vcpu->arch.sie_block->ecb2 |= 0x80;
1543 vcpu->arch.sie_block->ecb2 &= ~0x08;
1547 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1549 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1551 vcpu->arch.cpu_id = model->cpu_id;
1552 vcpu->arch.sie_block->ibc = model->ibc;
1553 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1556 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1560 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1564 if (test_kvm_facility(vcpu->kvm, 78))
1565 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1566 else if (test_kvm_facility(vcpu->kvm, 8))
1567 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1569 kvm_s390_vcpu_setup_model(vcpu);
1571 vcpu->arch.sie_block->ecb = 6;
1572 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1573 vcpu->arch.sie_block->ecb |= 0x10;
1575 vcpu->arch.sie_block->ecb2 = 8;
1576 vcpu->arch.sie_block->eca = 0xC1002000U;
1578 vcpu->arch.sie_block->eca |= 1;
1579 if (sclp.has_sigpif)
1580 vcpu->arch.sie_block->eca |= 0x10000000U;
1581 if (test_kvm_facility(vcpu->kvm, 129)) {
1582 vcpu->arch.sie_block->eca |= 0x00020000;
1583 vcpu->arch.sie_block->ecd |= 0x20000000;
1585 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1587 if (vcpu->kvm->arch.use_cmma) {
1588 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1592 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1593 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1595 kvm_s390_vcpu_crypto_setup(vcpu);
1600 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1603 struct kvm_vcpu *vcpu;
1604 struct sie_page *sie_page;
1607 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1612 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1616 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1620 vcpu->arch.sie_block = &sie_page->sie_block;
1621 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1623 vcpu->arch.sie_block->icpua = id;
1624 spin_lock_init(&vcpu->arch.local_int.lock);
1625 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1626 vcpu->arch.local_int.wq = &vcpu->wq;
1627 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1630 * Allocate a save area for floating-point registers. If the vector
1631 * extension is available, register contents are saved in the SIE
1632 * control block. The allocated save area is still required in
1633 * particular places, for example, in kvm_s390_vcpu_store_status().
1635 vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1637 if (!vcpu->arch.guest_fpregs.fprs)
1638 goto out_free_sie_block;
1640 rc = kvm_vcpu_init(vcpu, kvm, id);
1642 goto out_free_sie_block;
1643 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1644 vcpu->arch.sie_block);
1645 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1649 free_page((unsigned long)(vcpu->arch.sie_block));
1651 kmem_cache_free(kvm_vcpu_cache, vcpu);
1656 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1658 return kvm_s390_vcpu_has_irq(vcpu, 0);
1661 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1663 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1667 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1669 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1672 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1674 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1678 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1680 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1684 * Kick a guest cpu out of SIE and wait until SIE is not running.
1685 * If the CPU is not running (e.g. waiting as idle) the function will
1686 * return immediately. */
1687 void exit_sie(struct kvm_vcpu *vcpu)
1689 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1690 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1694 /* Kick a guest cpu out of SIE to process a request synchronously */
1695 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1697 kvm_make_request(req, vcpu);
1698 kvm_s390_vcpu_request(vcpu);
1701 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1704 struct kvm *kvm = gmap->private;
1705 struct kvm_vcpu *vcpu;
1707 kvm_for_each_vcpu(i, vcpu, kvm) {
1708 /* match against both prefix pages */
1709 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1710 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1711 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1716 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1718 /* kvm common code refers to this, but never calls it */
1723 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1724 struct kvm_one_reg *reg)
1729 case KVM_REG_S390_TODPR:
1730 r = put_user(vcpu->arch.sie_block->todpr,
1731 (u32 __user *)reg->addr);
1733 case KVM_REG_S390_EPOCHDIFF:
1734 r = put_user(vcpu->arch.sie_block->epoch,
1735 (u64 __user *)reg->addr);
1737 case KVM_REG_S390_CPU_TIMER:
1738 r = put_user(vcpu->arch.sie_block->cputm,
1739 (u64 __user *)reg->addr);
1741 case KVM_REG_S390_CLOCK_COMP:
1742 r = put_user(vcpu->arch.sie_block->ckc,
1743 (u64 __user *)reg->addr);
1745 case KVM_REG_S390_PFTOKEN:
1746 r = put_user(vcpu->arch.pfault_token,
1747 (u64 __user *)reg->addr);
1749 case KVM_REG_S390_PFCOMPARE:
1750 r = put_user(vcpu->arch.pfault_compare,
1751 (u64 __user *)reg->addr);
1753 case KVM_REG_S390_PFSELECT:
1754 r = put_user(vcpu->arch.pfault_select,
1755 (u64 __user *)reg->addr);
1757 case KVM_REG_S390_PP:
1758 r = put_user(vcpu->arch.sie_block->pp,
1759 (u64 __user *)reg->addr);
1761 case KVM_REG_S390_GBEA:
1762 r = put_user(vcpu->arch.sie_block->gbea,
1763 (u64 __user *)reg->addr);
1772 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1773 struct kvm_one_reg *reg)
1778 case KVM_REG_S390_TODPR:
1779 r = get_user(vcpu->arch.sie_block->todpr,
1780 (u32 __user *)reg->addr);
1782 case KVM_REG_S390_EPOCHDIFF:
1783 r = get_user(vcpu->arch.sie_block->epoch,
1784 (u64 __user *)reg->addr);
1786 case KVM_REG_S390_CPU_TIMER:
1787 r = get_user(vcpu->arch.sie_block->cputm,
1788 (u64 __user *)reg->addr);
1790 case KVM_REG_S390_CLOCK_COMP:
1791 r = get_user(vcpu->arch.sie_block->ckc,
1792 (u64 __user *)reg->addr);
1794 case KVM_REG_S390_PFTOKEN:
1795 r = get_user(vcpu->arch.pfault_token,
1796 (u64 __user *)reg->addr);
1797 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1798 kvm_clear_async_pf_completion_queue(vcpu);
1800 case KVM_REG_S390_PFCOMPARE:
1801 r = get_user(vcpu->arch.pfault_compare,
1802 (u64 __user *)reg->addr);
1804 case KVM_REG_S390_PFSELECT:
1805 r = get_user(vcpu->arch.pfault_select,
1806 (u64 __user *)reg->addr);
1808 case KVM_REG_S390_PP:
1809 r = get_user(vcpu->arch.sie_block->pp,
1810 (u64 __user *)reg->addr);
1812 case KVM_REG_S390_GBEA:
1813 r = get_user(vcpu->arch.sie_block->gbea,
1814 (u64 __user *)reg->addr);
1823 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1825 kvm_s390_vcpu_initial_reset(vcpu);
1829 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1831 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1835 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1837 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1841 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1842 struct kvm_sregs *sregs)
1844 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1845 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1846 restore_access_regs(vcpu->run->s.regs.acrs);
1850 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1851 struct kvm_sregs *sregs)
1853 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1854 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1858 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1860 if (test_fp_ctl(fpu->fpc))
1862 memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1863 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1865 load_fpu_from(&vcpu->arch.guest_fpregs);
1869 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1871 memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1872 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1876 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1880 if (!is_vcpu_stopped(vcpu))
1883 vcpu->run->psw_mask = psw.mask;
1884 vcpu->run->psw_addr = psw.addr;
1889 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1890 struct kvm_translation *tr)
1892 return -EINVAL; /* not implemented yet */
1895 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1896 KVM_GUESTDBG_USE_HW_BP | \
1897 KVM_GUESTDBG_ENABLE)
1899 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1900 struct kvm_guest_debug *dbg)
1904 vcpu->guest_debug = 0;
1905 kvm_s390_clear_bp_data(vcpu);
1907 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1910 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1911 vcpu->guest_debug = dbg->control;
1912 /* enforce guest PER */
1913 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1915 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1916 rc = kvm_s390_import_bp_data(vcpu, dbg);
1918 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1919 vcpu->arch.guestdbg.last_bp = 0;
1923 vcpu->guest_debug = 0;
1924 kvm_s390_clear_bp_data(vcpu);
1925 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1931 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1932 struct kvm_mp_state *mp_state)
1934 /* CHECK_STOP and LOAD are not supported yet */
1935 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1936 KVM_MP_STATE_OPERATING;
1939 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1940 struct kvm_mp_state *mp_state)
1944 /* user space knows about this interface - let it control the state */
1945 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1947 switch (mp_state->mp_state) {
1948 case KVM_MP_STATE_STOPPED:
1949 kvm_s390_vcpu_stop(vcpu);
1951 case KVM_MP_STATE_OPERATING:
1952 kvm_s390_vcpu_start(vcpu);
1954 case KVM_MP_STATE_LOAD:
1955 case KVM_MP_STATE_CHECK_STOP:
1956 /* fall through - CHECK_STOP and LOAD are not supported yet */
1964 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1966 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1969 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1972 kvm_s390_vcpu_request_handled(vcpu);
1973 if (!vcpu->requests)
1976 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1977 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1978 * This ensures that the ipte instruction for this request has
1979 * already finished. We might race against a second unmapper that
1980 * wants to set the blocking bit. Lets just retry the request loop.
1982 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1984 rc = gmap_ipte_notify(vcpu->arch.gmap,
1985 kvm_s390_get_prefix(vcpu),
1992 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1993 vcpu->arch.sie_block->ihcpu = 0xffff;
1997 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1998 if (!ibs_enabled(vcpu)) {
1999 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2000 atomic_or(CPUSTAT_IBS,
2001 &vcpu->arch.sie_block->cpuflags);
2006 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2007 if (ibs_enabled(vcpu)) {
2008 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2009 atomic_andnot(CPUSTAT_IBS,
2010 &vcpu->arch.sie_block->cpuflags);
2015 /* nothing to do, just clear the request */
2016 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2021 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2023 struct kvm_vcpu *vcpu;
2026 mutex_lock(&kvm->lock);
2028 kvm->arch.epoch = tod - get_tod_clock();
2029 kvm_s390_vcpu_block_all(kvm);
2030 kvm_for_each_vcpu(i, vcpu, kvm)
2031 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2032 kvm_s390_vcpu_unblock_all(kvm);
2034 mutex_unlock(&kvm->lock);
2038 * kvm_arch_fault_in_page - fault-in guest page if necessary
2039 * @vcpu: The corresponding virtual cpu
2040 * @gpa: Guest physical address
2041 * @writable: Whether the page should be writable or not
2043 * Make sure that a guest page has been faulted-in on the host.
2045 * Return: Zero on success, negative error code otherwise.
2047 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2049 return gmap_fault(vcpu->arch.gmap, gpa,
2050 writable ? FAULT_FLAG_WRITE : 0);
2053 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2054 unsigned long token)
2056 struct kvm_s390_interrupt inti;
2057 struct kvm_s390_irq irq;
2060 irq.u.ext.ext_params2 = token;
2061 irq.type = KVM_S390_INT_PFAULT_INIT;
2062 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2064 inti.type = KVM_S390_INT_PFAULT_DONE;
2065 inti.parm64 = token;
2066 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2070 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2071 struct kvm_async_pf *work)
2073 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2074 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2077 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2078 struct kvm_async_pf *work)
2080 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2081 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2084 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2085 struct kvm_async_pf *work)
2087 /* s390 will always inject the page directly */
2090 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2093 * s390 will always inject the page directly,
2094 * but we still want check_async_completion to cleanup
2099 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2102 struct kvm_arch_async_pf arch;
2105 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2107 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2108 vcpu->arch.pfault_compare)
2110 if (psw_extint_disabled(vcpu))
2112 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2114 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2116 if (!vcpu->arch.gmap->pfault_enabled)
2119 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2120 hva += current->thread.gmap_addr & ~PAGE_MASK;
2121 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2124 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2128 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2133 * On s390 notifications for arriving pages will be delivered directly
2134 * to the guest but the house keeping for completed pfaults is
2135 * handled outside the worker.
2137 kvm_check_async_pf_completion(vcpu);
2139 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2140 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2145 if (test_cpu_flag(CIF_MCCK_PENDING))
2148 if (!kvm_is_ucontrol(vcpu->kvm)) {
2149 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2154 rc = kvm_s390_handle_requests(vcpu);
2158 if (guestdbg_enabled(vcpu)) {
2159 kvm_s390_backup_guest_per_regs(vcpu);
2160 kvm_s390_patch_guest_per_regs(vcpu);
2163 vcpu->arch.sie_block->icptcode = 0;
2164 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2165 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2166 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2171 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2173 psw_t *psw = &vcpu->arch.sie_block->gpsw;
2177 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2178 trace_kvm_s390_sie_fault(vcpu);
2181 * We want to inject an addressing exception, which is defined as a
2182 * suppressing or terminating exception. However, since we came here
2183 * by a DAT access exception, the PSW still points to the faulting
2184 * instruction since DAT exceptions are nullifying. So we've got
2185 * to look up the current opcode to get the length of the instruction
2186 * to be able to forward the PSW.
2188 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2190 return kvm_s390_inject_prog_cond(vcpu, rc);
2191 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2193 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2196 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2198 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2199 vcpu->arch.sie_block->icptcode);
2200 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2202 if (guestdbg_enabled(vcpu))
2203 kvm_s390_restore_guest_per_regs(vcpu);
2205 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2206 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2208 if (vcpu->arch.sie_block->icptcode > 0) {
2209 int rc = kvm_handle_sie_intercept(vcpu);
2211 if (rc != -EOPNOTSUPP)
2213 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2214 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2215 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2216 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2218 } else if (exit_reason != -EFAULT) {
2219 vcpu->stat.exit_null++;
2221 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2222 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2223 vcpu->run->s390_ucontrol.trans_exc_code =
2224 current->thread.gmap_addr;
2225 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2227 } else if (current->thread.gmap_pfault) {
2228 trace_kvm_s390_major_guest_pfault(vcpu);
2229 current->thread.gmap_pfault = 0;
2230 if (kvm_arch_setup_async_pf(vcpu))
2232 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2234 return vcpu_post_run_fault_in_sie(vcpu);
2237 static int __vcpu_run(struct kvm_vcpu *vcpu)
2239 int rc, exit_reason;
2242 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2243 * ning the guest), so that memslots (and other stuff) are protected
2245 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2248 rc = vcpu_pre_run(vcpu);
2252 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2254 * As PF_VCPU will be used in fault handler, between
2255 * guest_enter and guest_exit should be no uaccess.
2257 local_irq_disable();
2258 __kvm_guest_enter();
2260 exit_reason = sie64a(vcpu->arch.sie_block,
2261 vcpu->run->s.regs.gprs);
2262 local_irq_disable();
2265 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2267 rc = vcpu_post_run(vcpu, exit_reason);
2268 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2270 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2274 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2276 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2277 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2278 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2279 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2280 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2281 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2282 /* some control register changes require a tlb flush */
2283 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2285 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2286 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2287 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2288 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2289 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2290 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2292 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2293 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2294 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2295 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2296 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2297 kvm_clear_async_pf_completion_queue(vcpu);
2299 kvm_run->kvm_dirty_regs = 0;
2302 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2304 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2305 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2306 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2307 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2308 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2309 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2310 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2311 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2312 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2313 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2314 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2315 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2318 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2323 if (guestdbg_exit_pending(vcpu)) {
2324 kvm_s390_prepare_debug_exit(vcpu);
2328 if (vcpu->sigset_active)
2329 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2331 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2332 kvm_s390_vcpu_start(vcpu);
2333 } else if (is_vcpu_stopped(vcpu)) {
2334 pr_err_ratelimited("can't run stopped vcpu %d\n",
2339 sync_regs(vcpu, kvm_run);
2342 rc = __vcpu_run(vcpu);
2344 if (signal_pending(current) && !rc) {
2345 kvm_run->exit_reason = KVM_EXIT_INTR;
2349 if (guestdbg_exit_pending(vcpu) && !rc) {
2350 kvm_s390_prepare_debug_exit(vcpu);
2354 if (rc == -EREMOTE) {
2355 /* userspace support is needed, kvm_run has been prepared */
2359 store_regs(vcpu, kvm_run);
2361 if (vcpu->sigset_active)
2362 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2364 vcpu->stat.exit_userspace++;
2369 * store status at address
2370 * we use have two special cases:
2371 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2372 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2374 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2376 unsigned char archmode = 1;
2381 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2382 if (write_guest_abs(vcpu, 163, &archmode, 1))
2384 gpa = SAVE_AREA_BASE;
2385 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2386 if (write_guest_real(vcpu, 163, &archmode, 1))
2388 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2390 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2391 vcpu->arch.guest_fpregs.fprs, 128);
2392 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2393 vcpu->run->s.regs.gprs, 128);
2394 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2395 &vcpu->arch.sie_block->gpsw, 16);
2396 px = kvm_s390_get_prefix(vcpu);
2397 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2399 rc |= write_guest_abs(vcpu,
2400 gpa + offsetof(struct save_area, fp_ctrl_reg),
2401 &vcpu->arch.guest_fpregs.fpc, 4);
2402 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2403 &vcpu->arch.sie_block->todpr, 4);
2404 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2405 &vcpu->arch.sie_block->cputm, 8);
2406 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2407 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2409 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2410 &vcpu->run->s.regs.acrs, 64);
2411 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2412 &vcpu->arch.sie_block->gcr, 128);
2413 return rc ? -EFAULT : 0;
2416 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2419 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2420 * copying in vcpu load/put. Lets update our copies before we save
2421 * it into the save area
2424 if (test_kvm_facility(vcpu->kvm, 129)) {
2426 * If the vector extension is available, the vector registers
2427 * which overlaps with floating-point registers are saved in
2428 * the SIE-control block. Hence, extract the floating-point
2429 * registers and the FPC value and store them in the
2430 * guest_fpregs structure.
2432 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2433 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2434 current->thread.fpu.vxrs);
2436 save_fpu_to(&vcpu->arch.guest_fpregs);
2437 save_access_regs(vcpu->run->s.regs.acrs);
2439 return kvm_s390_store_status_unloaded(vcpu, addr);
2443 * store additional status at address
2445 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2448 /* Only bits 0-53 are used for address formation */
2449 if (!(gpa & ~0x3ff))
2452 return write_guest_abs(vcpu, gpa & ~0x3ff,
2453 (void *)&vcpu->run->s.regs.vrs, 512);
2456 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2458 if (!test_kvm_facility(vcpu->kvm, 129))
2462 * The guest VXRS are in the host VXRs due to the lazy
2463 * copying in vcpu load/put. We can simply call save_fpu_regs()
2464 * to save the current register state because we are in the
2465 * middle of a load/put cycle.
2467 * Let's update our copies before we save it into the save area.
2471 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2474 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2476 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2477 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2480 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2483 struct kvm_vcpu *vcpu;
2485 kvm_for_each_vcpu(i, vcpu, kvm) {
2486 __disable_ibs_on_vcpu(vcpu);
2490 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2492 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2493 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2496 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2498 int i, online_vcpus, started_vcpus = 0;
2500 if (!is_vcpu_stopped(vcpu))
2503 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2504 /* Only one cpu at a time may enter/leave the STOPPED state. */
2505 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2506 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2508 for (i = 0; i < online_vcpus; i++) {
2509 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2513 if (started_vcpus == 0) {
2514 /* we're the only active VCPU -> speed it up */
2515 __enable_ibs_on_vcpu(vcpu);
2516 } else if (started_vcpus == 1) {
2518 * As we are starting a second VCPU, we have to disable
2519 * the IBS facility on all VCPUs to remove potentially
2520 * oustanding ENABLE requests.
2522 __disable_ibs_on_all_vcpus(vcpu->kvm);
2525 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2527 * Another VCPU might have used IBS while we were offline.
2528 * Let's play safe and flush the VCPU at startup.
2530 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2531 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2535 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2537 int i, online_vcpus, started_vcpus = 0;
2538 struct kvm_vcpu *started_vcpu = NULL;
2540 if (is_vcpu_stopped(vcpu))
2543 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2544 /* Only one cpu at a time may enter/leave the STOPPED state. */
2545 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2546 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2548 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2549 kvm_s390_clear_stop_irq(vcpu);
2551 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2552 __disable_ibs_on_vcpu(vcpu);
2554 for (i = 0; i < online_vcpus; i++) {
2555 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2557 started_vcpu = vcpu->kvm->vcpus[i];
2561 if (started_vcpus == 1) {
2563 * As we only have one VCPU left, we want to enable the
2564 * IBS facility for that VCPU to speed it up.
2566 __enable_ibs_on_vcpu(started_vcpu);
2569 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2573 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2574 struct kvm_enable_cap *cap)
2582 case KVM_CAP_S390_CSS_SUPPORT:
2583 if (!vcpu->kvm->arch.css_support) {
2584 vcpu->kvm->arch.css_support = 1;
2585 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2586 trace_kvm_s390_enable_css(vcpu->kvm);
2597 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2598 struct kvm_s390_mem_op *mop)
2600 void __user *uaddr = (void __user *)mop->buf;
2601 void *tmpbuf = NULL;
2603 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2604 | KVM_S390_MEMOP_F_CHECK_ONLY;
2606 if (mop->flags & ~supported_flags)
2609 if (mop->size > MEM_OP_MAX_SIZE)
2612 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2613 tmpbuf = vmalloc(mop->size);
2618 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2621 case KVM_S390_MEMOP_LOGICAL_READ:
2622 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2623 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2626 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2628 if (copy_to_user(uaddr, tmpbuf, mop->size))
2632 case KVM_S390_MEMOP_LOGICAL_WRITE:
2633 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2634 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2637 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2641 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2647 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2649 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2650 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2656 long kvm_arch_vcpu_ioctl(struct file *filp,
2657 unsigned int ioctl, unsigned long arg)
2659 struct kvm_vcpu *vcpu = filp->private_data;
2660 void __user *argp = (void __user *)arg;
2665 case KVM_S390_IRQ: {
2666 struct kvm_s390_irq s390irq;
2669 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2671 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2674 case KVM_S390_INTERRUPT: {
2675 struct kvm_s390_interrupt s390int;
2676 struct kvm_s390_irq s390irq;
2679 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2681 if (s390int_to_s390irq(&s390int, &s390irq))
2683 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2686 case KVM_S390_STORE_STATUS:
2687 idx = srcu_read_lock(&vcpu->kvm->srcu);
2688 r = kvm_s390_vcpu_store_status(vcpu, arg);
2689 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2691 case KVM_S390_SET_INITIAL_PSW: {
2695 if (copy_from_user(&psw, argp, sizeof(psw)))
2697 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2700 case KVM_S390_INITIAL_RESET:
2701 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2703 case KVM_SET_ONE_REG:
2704 case KVM_GET_ONE_REG: {
2705 struct kvm_one_reg reg;
2707 if (copy_from_user(®, argp, sizeof(reg)))
2709 if (ioctl == KVM_SET_ONE_REG)
2710 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2712 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2715 #ifdef CONFIG_KVM_S390_UCONTROL
2716 case KVM_S390_UCAS_MAP: {
2717 struct kvm_s390_ucas_mapping ucasmap;
2719 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2724 if (!kvm_is_ucontrol(vcpu->kvm)) {
2729 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2730 ucasmap.vcpu_addr, ucasmap.length);
2733 case KVM_S390_UCAS_UNMAP: {
2734 struct kvm_s390_ucas_mapping ucasmap;
2736 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2741 if (!kvm_is_ucontrol(vcpu->kvm)) {
2746 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2751 case KVM_S390_VCPU_FAULT: {
2752 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2755 case KVM_ENABLE_CAP:
2757 struct kvm_enable_cap cap;
2759 if (copy_from_user(&cap, argp, sizeof(cap)))
2761 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2764 case KVM_S390_MEM_OP: {
2765 struct kvm_s390_mem_op mem_op;
2767 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2768 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2773 case KVM_S390_SET_IRQ_STATE: {
2774 struct kvm_s390_irq_state irq_state;
2777 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2779 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2780 irq_state.len == 0 ||
2781 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2785 r = kvm_s390_set_irq_state(vcpu,
2786 (void __user *) irq_state.buf,
2790 case KVM_S390_GET_IRQ_STATE: {
2791 struct kvm_s390_irq_state irq_state;
2794 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2796 if (irq_state.len == 0) {
2800 r = kvm_s390_get_irq_state(vcpu,
2801 (__u8 __user *) irq_state.buf,
2811 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2813 #ifdef CONFIG_KVM_S390_UCONTROL
2814 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2815 && (kvm_is_ucontrol(vcpu->kvm))) {
2816 vmf->page = virt_to_page(vcpu->arch.sie_block);
2817 get_page(vmf->page);
2821 return VM_FAULT_SIGBUS;
2824 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2825 unsigned long npages)
2830 /* Section: memory related */
2831 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2832 struct kvm_memory_slot *memslot,
2833 const struct kvm_userspace_memory_region *mem,
2834 enum kvm_mr_change change)
2836 /* A few sanity checks. We can have memory slots which have to be
2837 located/ended at a segment boundary (1MB). The memory in userland is
2838 ok to be fragmented into various different vmas. It is okay to mmap()
2839 and munmap() stuff in this slot after doing this call at any time */
2841 if (mem->userspace_addr & 0xffffful)
2844 if (mem->memory_size & 0xffffful)
2847 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2853 void kvm_arch_commit_memory_region(struct kvm *kvm,
2854 const struct kvm_userspace_memory_region *mem,
2855 const struct kvm_memory_slot *old,
2856 const struct kvm_memory_slot *new,
2857 enum kvm_mr_change change)
2861 /* If the basics of the memslot do not change, we do not want
2862 * to update the gmap. Every update causes several unnecessary
2863 * segment translation exceptions. This is usually handled just
2864 * fine by the normal fault handler + gmap, but it will also
2865 * cause faults on the prefix page of running guest CPUs.
2867 if (old->userspace_addr == mem->userspace_addr &&
2868 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2869 old->npages * PAGE_SIZE == mem->memory_size)
2872 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2873 mem->guest_phys_addr, mem->memory_size);
2875 pr_warn("failed to commit memory region\n");
2879 static int __init kvm_s390_init(void)
2881 if (!sclp.has_sief2) {
2882 pr_info("SIE not available\n");
2886 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2889 static void __exit kvm_s390_exit(void)
2894 module_init(kvm_s390_init);
2895 module_exit(kvm_s390_exit);
2898 * Enable autoloading of the kvm module.
2899 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2900 * since x86 takes a different approach.
2902 #include <linux/miscdevice.h>
2903 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2904 MODULE_ALIAS("devname:kvm");