2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <linux/bitmap.h>
30 #include <asm/asm-offsets.h>
31 #include <asm/lowcore.h>
33 #include <asm/pgtable.h>
36 #include <asm/switch_to.h>
39 #include <asm/cpacf.h>
44 #define KMSG_COMPONENT "kvm-s390"
46 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48 #define CREATE_TRACE_POINTS
50 #include "trace-s390.h"
52 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
54 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
55 (KVM_MAX_VCPUS + LOCAL_IRQS))
57 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59 struct kvm_stats_debugfs_item debugfs_entries[] = {
60 { "userspace_handled", VCPU_STAT(exit_userspace) },
61 { "exit_null", VCPU_STAT(exit_null) },
62 { "exit_validity", VCPU_STAT(exit_validity) },
63 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
64 { "exit_external_request", VCPU_STAT(exit_external_request) },
65 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
66 { "exit_instruction", VCPU_STAT(exit_instruction) },
67 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
68 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
69 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
70 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
71 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
72 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
73 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
74 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
75 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
76 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
77 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
78 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
79 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
80 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
81 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
82 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
83 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
84 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
85 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
86 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
87 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
88 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
89 { "instruction_spx", VCPU_STAT(instruction_spx) },
90 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
91 { "instruction_stap", VCPU_STAT(instruction_stap) },
92 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
93 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
94 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
95 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
96 { "instruction_essa", VCPU_STAT(instruction_essa) },
97 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
98 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
99 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
100 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
101 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
102 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
103 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
104 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
105 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
106 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
107 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
108 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
109 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
110 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
111 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
112 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
113 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
114 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
115 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
116 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
117 { "diagnose_10", VCPU_STAT(diagnose_10) },
118 { "diagnose_44", VCPU_STAT(diagnose_44) },
119 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
120 { "diagnose_258", VCPU_STAT(diagnose_258) },
121 { "diagnose_308", VCPU_STAT(diagnose_308) },
122 { "diagnose_500", VCPU_STAT(diagnose_500) },
126 /* upper facilities limit for kvm */
127 unsigned long kvm_s390_fac_list_mask[16] = {
128 0xffe6000000000000UL,
129 0x005e000000000000UL,
132 unsigned long kvm_s390_fac_list_mask_size(void)
134 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
135 return ARRAY_SIZE(kvm_s390_fac_list_mask);
138 /* available cpu features supported by kvm */
139 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
140 /* available subfunctions indicated via query / "test bit" */
141 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
143 static struct gmap_notifier gmap_notifier;
144 debug_info_t *kvm_s390_dbf;
146 /* Section: not file related */
147 int kvm_arch_hardware_enable(void)
149 /* every s390 is virtualization enabled ;-) */
153 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
156 * This callback is executed during stop_machine(). All CPUs are therefore
157 * temporarily stopped. In order not to change guest behavior, we have to
158 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
159 * so a CPU won't be stopped while calculating with the epoch.
161 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
165 struct kvm_vcpu *vcpu;
167 unsigned long long *delta = v;
169 list_for_each_entry(kvm, &vm_list, vm_list) {
170 kvm->arch.epoch -= *delta;
171 kvm_for_each_vcpu(i, vcpu, kvm) {
172 vcpu->arch.sie_block->epoch -= *delta;
173 if (vcpu->arch.cputm_enabled)
174 vcpu->arch.cputm_start += *delta;
180 static struct notifier_block kvm_clock_notifier = {
181 .notifier_call = kvm_clock_sync,
184 int kvm_arch_hardware_setup(void)
186 gmap_notifier.notifier_call = kvm_gmap_notifier;
187 gmap_register_ipte_notifier(&gmap_notifier);
188 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
189 &kvm_clock_notifier);
193 void kvm_arch_hardware_unsetup(void)
195 gmap_unregister_ipte_notifier(&gmap_notifier);
196 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
197 &kvm_clock_notifier);
200 static void allow_cpu_feat(unsigned long nr)
202 set_bit_inv(nr, kvm_s390_available_cpu_feat);
205 static inline int plo_test_bit(unsigned char nr)
207 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
208 int cc = 3; /* subfunction not available */
211 /* Parameter registers are ignored for "test bit" */
221 static void kvm_s390_cpu_feat_init(void)
225 for (i = 0; i < 256; ++i) {
227 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
230 if (test_facility(28)) /* TOD-clock steering */
231 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
233 if (test_facility(17)) { /* MSA */
234 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
235 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
236 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
237 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
238 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
240 if (test_facility(76)) /* MSA3 */
241 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
242 if (test_facility(77)) { /* MSA4 */
243 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
244 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
245 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
246 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
248 if (test_facility(57)) /* MSA5 */
249 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
251 if (MACHINE_HAS_ESOP)
252 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
255 int kvm_arch_init(void *opaque)
257 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
261 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
262 debug_unregister(kvm_s390_dbf);
266 kvm_s390_cpu_feat_init();
268 /* Register floating interrupt controller interface. */
269 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
272 void kvm_arch_exit(void)
274 debug_unregister(kvm_s390_dbf);
277 /* Section: device related */
278 long kvm_arch_dev_ioctl(struct file *filp,
279 unsigned int ioctl, unsigned long arg)
281 if (ioctl == KVM_S390_ENABLE_SIE)
282 return s390_enable_sie();
286 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
291 case KVM_CAP_S390_PSW:
292 case KVM_CAP_S390_GMAP:
293 case KVM_CAP_SYNC_MMU:
294 #ifdef CONFIG_KVM_S390_UCONTROL
295 case KVM_CAP_S390_UCONTROL:
297 case KVM_CAP_ASYNC_PF:
298 case KVM_CAP_SYNC_REGS:
299 case KVM_CAP_ONE_REG:
300 case KVM_CAP_ENABLE_CAP:
301 case KVM_CAP_S390_CSS_SUPPORT:
302 case KVM_CAP_IOEVENTFD:
303 case KVM_CAP_DEVICE_CTRL:
304 case KVM_CAP_ENABLE_CAP_VM:
305 case KVM_CAP_S390_IRQCHIP:
306 case KVM_CAP_VM_ATTRIBUTES:
307 case KVM_CAP_MP_STATE:
308 case KVM_CAP_S390_INJECT_IRQ:
309 case KVM_CAP_S390_USER_SIGP:
310 case KVM_CAP_S390_USER_STSI:
311 case KVM_CAP_S390_SKEYS:
312 case KVM_CAP_S390_IRQ_STATE:
315 case KVM_CAP_S390_MEM_OP:
318 case KVM_CAP_NR_VCPUS:
319 case KVM_CAP_MAX_VCPUS:
320 r = KVM_S390_BSCA_CPU_SLOTS;
321 if (sclp.has_esca && sclp.has_64bscao)
322 r = KVM_S390_ESCA_CPU_SLOTS;
324 case KVM_CAP_NR_MEMSLOTS:
325 r = KVM_USER_MEM_SLOTS;
327 case KVM_CAP_S390_COW:
328 r = MACHINE_HAS_ESOP;
330 case KVM_CAP_S390_VECTOR_REGISTERS:
333 case KVM_CAP_S390_RI:
334 r = test_facility(64);
342 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
343 struct kvm_memory_slot *memslot)
345 gfn_t cur_gfn, last_gfn;
346 unsigned long address;
347 struct gmap *gmap = kvm->arch.gmap;
349 /* Loop over all guest pages */
350 last_gfn = memslot->base_gfn + memslot->npages;
351 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
352 address = gfn_to_hva_memslot(memslot, cur_gfn);
354 if (test_and_clear_guest_dirty(gmap->mm, address))
355 mark_page_dirty(kvm, cur_gfn);
356 if (fatal_signal_pending(current))
362 /* Section: vm related */
363 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
366 * Get (and clear) the dirty memory log for a memory slot.
368 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
369 struct kvm_dirty_log *log)
373 struct kvm_memslots *slots;
374 struct kvm_memory_slot *memslot;
377 mutex_lock(&kvm->slots_lock);
380 if (log->slot >= KVM_USER_MEM_SLOTS)
383 slots = kvm_memslots(kvm);
384 memslot = id_to_memslot(slots, log->slot);
386 if (!memslot->dirty_bitmap)
389 kvm_s390_sync_dirty_log(kvm, memslot);
390 r = kvm_get_dirty_log(kvm, log, &is_dirty);
394 /* Clear the dirty log */
396 n = kvm_dirty_bitmap_bytes(memslot);
397 memset(memslot->dirty_bitmap, 0, n);
401 mutex_unlock(&kvm->slots_lock);
405 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
413 case KVM_CAP_S390_IRQCHIP:
414 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
415 kvm->arch.use_irqchip = 1;
418 case KVM_CAP_S390_USER_SIGP:
419 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
420 kvm->arch.user_sigp = 1;
423 case KVM_CAP_S390_VECTOR_REGISTERS:
424 mutex_lock(&kvm->lock);
425 if (atomic_read(&kvm->online_vcpus)) {
427 } else if (MACHINE_HAS_VX) {
428 set_kvm_facility(kvm->arch.model.fac_mask, 129);
429 set_kvm_facility(kvm->arch.model.fac_list, 129);
433 mutex_unlock(&kvm->lock);
434 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
435 r ? "(not available)" : "(success)");
437 case KVM_CAP_S390_RI:
439 mutex_lock(&kvm->lock);
440 if (atomic_read(&kvm->online_vcpus)) {
442 } else if (test_facility(64)) {
443 set_kvm_facility(kvm->arch.model.fac_mask, 64);
444 set_kvm_facility(kvm->arch.model.fac_list, 64);
447 mutex_unlock(&kvm->lock);
448 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
449 r ? "(not available)" : "(success)");
451 case KVM_CAP_S390_USER_STSI:
452 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
453 kvm->arch.user_stsi = 1;
463 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
467 switch (attr->attr) {
468 case KVM_S390_VM_MEM_LIMIT_SIZE:
470 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
471 kvm->arch.mem_limit);
472 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
482 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
486 switch (attr->attr) {
487 case KVM_S390_VM_MEM_ENABLE_CMMA:
493 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
494 mutex_lock(&kvm->lock);
495 if (atomic_read(&kvm->online_vcpus) == 0) {
496 kvm->arch.use_cmma = 1;
499 mutex_unlock(&kvm->lock);
501 case KVM_S390_VM_MEM_CLR_CMMA:
503 if (!kvm->arch.use_cmma)
506 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
507 mutex_lock(&kvm->lock);
508 idx = srcu_read_lock(&kvm->srcu);
509 s390_reset_cmma(kvm->arch.gmap->mm);
510 srcu_read_unlock(&kvm->srcu, idx);
511 mutex_unlock(&kvm->lock);
514 case KVM_S390_VM_MEM_LIMIT_SIZE: {
515 unsigned long new_limit;
517 if (kvm_is_ucontrol(kvm))
520 if (get_user(new_limit, (u64 __user *)attr->addr))
523 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
524 new_limit > kvm->arch.mem_limit)
530 /* gmap_alloc takes last usable address */
531 if (new_limit != KVM_S390_NO_MEM_LIMIT)
535 mutex_lock(&kvm->lock);
536 if (atomic_read(&kvm->online_vcpus) == 0) {
537 /* gmap_alloc will round the limit up */
538 struct gmap *new = gmap_alloc(current->mm, new_limit);
543 gmap_free(kvm->arch.gmap);
545 kvm->arch.gmap = new;
549 mutex_unlock(&kvm->lock);
550 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
551 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
552 (void *) kvm->arch.gmap->asce);
562 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
564 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
566 struct kvm_vcpu *vcpu;
569 if (!test_kvm_facility(kvm, 76))
572 mutex_lock(&kvm->lock);
573 switch (attr->attr) {
574 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
576 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
577 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
578 kvm->arch.crypto.aes_kw = 1;
579 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
581 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
583 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
584 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
585 kvm->arch.crypto.dea_kw = 1;
586 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
588 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
589 kvm->arch.crypto.aes_kw = 0;
590 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
591 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
592 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
594 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
595 kvm->arch.crypto.dea_kw = 0;
596 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
597 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
598 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
601 mutex_unlock(&kvm->lock);
605 kvm_for_each_vcpu(i, vcpu, kvm) {
606 kvm_s390_vcpu_crypto_setup(vcpu);
609 mutex_unlock(&kvm->lock);
613 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
617 if (copy_from_user(>od_high, (void __user *)attr->addr,
623 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
628 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
632 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
635 kvm_s390_set_tod_clock(kvm, gtod);
636 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
640 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
647 switch (attr->attr) {
648 case KVM_S390_VM_TOD_HIGH:
649 ret = kvm_s390_set_tod_high(kvm, attr);
651 case KVM_S390_VM_TOD_LOW:
652 ret = kvm_s390_set_tod_low(kvm, attr);
661 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
665 if (copy_to_user((void __user *)attr->addr, >od_high,
668 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
673 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
677 gtod = kvm_s390_get_tod_clock_fast(kvm);
678 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
680 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
685 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
692 switch (attr->attr) {
693 case KVM_S390_VM_TOD_HIGH:
694 ret = kvm_s390_get_tod_high(kvm, attr);
696 case KVM_S390_VM_TOD_LOW:
697 ret = kvm_s390_get_tod_low(kvm, attr);
706 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
708 struct kvm_s390_vm_cpu_processor *proc;
709 u16 lowest_ibc, unblocked_ibc;
712 mutex_lock(&kvm->lock);
713 if (atomic_read(&kvm->online_vcpus)) {
717 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
722 if (!copy_from_user(proc, (void __user *)attr->addr,
724 kvm->arch.model.cpuid = proc->cpuid;
725 lowest_ibc = sclp.ibc >> 16 & 0xfff;
726 unblocked_ibc = sclp.ibc & 0xfff;
728 if (proc->ibc > unblocked_ibc)
729 kvm->arch.model.ibc = unblocked_ibc;
730 else if (proc->ibc < lowest_ibc)
731 kvm->arch.model.ibc = lowest_ibc;
733 kvm->arch.model.ibc = proc->ibc;
735 memcpy(kvm->arch.model.fac_list, proc->fac_list,
736 S390_ARCH_FAC_LIST_SIZE_BYTE);
741 mutex_unlock(&kvm->lock);
745 static int kvm_s390_set_processor_feat(struct kvm *kvm,
746 struct kvm_device_attr *attr)
748 struct kvm_s390_vm_cpu_feat data;
751 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
753 if (!bitmap_subset((unsigned long *) data.feat,
754 kvm_s390_available_cpu_feat,
755 KVM_S390_VM_CPU_FEAT_NR_BITS))
758 mutex_lock(&kvm->lock);
759 if (!atomic_read(&kvm->online_vcpus)) {
760 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
761 KVM_S390_VM_CPU_FEAT_NR_BITS);
764 mutex_unlock(&kvm->lock);
768 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
769 struct kvm_device_attr *attr)
772 * Once supported by kernel + hw, we have to store the subfunctions
773 * in kvm->arch and remember that user space configured them.
778 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
782 switch (attr->attr) {
783 case KVM_S390_VM_CPU_PROCESSOR:
784 ret = kvm_s390_set_processor(kvm, attr);
786 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
787 ret = kvm_s390_set_processor_feat(kvm, attr);
789 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
790 ret = kvm_s390_set_processor_subfunc(kvm, attr);
796 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
798 struct kvm_s390_vm_cpu_processor *proc;
801 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
806 proc->cpuid = kvm->arch.model.cpuid;
807 proc->ibc = kvm->arch.model.ibc;
808 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
809 S390_ARCH_FAC_LIST_SIZE_BYTE);
810 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
817 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
819 struct kvm_s390_vm_cpu_machine *mach;
822 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
827 get_cpu_id((struct cpuid *) &mach->cpuid);
828 mach->ibc = sclp.ibc;
829 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
830 S390_ARCH_FAC_LIST_SIZE_BYTE);
831 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
832 S390_ARCH_FAC_LIST_SIZE_BYTE);
833 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
840 static int kvm_s390_get_processor_feat(struct kvm *kvm,
841 struct kvm_device_attr *attr)
843 struct kvm_s390_vm_cpu_feat data;
845 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
846 KVM_S390_VM_CPU_FEAT_NR_BITS);
847 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
852 static int kvm_s390_get_machine_feat(struct kvm *kvm,
853 struct kvm_device_attr *attr)
855 struct kvm_s390_vm_cpu_feat data;
857 bitmap_copy((unsigned long *) data.feat,
858 kvm_s390_available_cpu_feat,
859 KVM_S390_VM_CPU_FEAT_NR_BITS);
860 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
865 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
866 struct kvm_device_attr *attr)
869 * Once we can actually configure subfunctions (kernel + hw support),
870 * we have to check if they were already set by user space, if so copy
871 * them from kvm->arch.
876 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
877 struct kvm_device_attr *attr)
879 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
880 sizeof(struct kvm_s390_vm_cpu_subfunc)))
884 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
888 switch (attr->attr) {
889 case KVM_S390_VM_CPU_PROCESSOR:
890 ret = kvm_s390_get_processor(kvm, attr);
892 case KVM_S390_VM_CPU_MACHINE:
893 ret = kvm_s390_get_machine(kvm, attr);
895 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
896 ret = kvm_s390_get_processor_feat(kvm, attr);
898 case KVM_S390_VM_CPU_MACHINE_FEAT:
899 ret = kvm_s390_get_machine_feat(kvm, attr);
901 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
902 ret = kvm_s390_get_processor_subfunc(kvm, attr);
904 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
905 ret = kvm_s390_get_machine_subfunc(kvm, attr);
911 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
915 switch (attr->group) {
916 case KVM_S390_VM_MEM_CTRL:
917 ret = kvm_s390_set_mem_control(kvm, attr);
919 case KVM_S390_VM_TOD:
920 ret = kvm_s390_set_tod(kvm, attr);
922 case KVM_S390_VM_CPU_MODEL:
923 ret = kvm_s390_set_cpu_model(kvm, attr);
925 case KVM_S390_VM_CRYPTO:
926 ret = kvm_s390_vm_set_crypto(kvm, attr);
936 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
940 switch (attr->group) {
941 case KVM_S390_VM_MEM_CTRL:
942 ret = kvm_s390_get_mem_control(kvm, attr);
944 case KVM_S390_VM_TOD:
945 ret = kvm_s390_get_tod(kvm, attr);
947 case KVM_S390_VM_CPU_MODEL:
948 ret = kvm_s390_get_cpu_model(kvm, attr);
958 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
962 switch (attr->group) {
963 case KVM_S390_VM_MEM_CTRL:
964 switch (attr->attr) {
965 case KVM_S390_VM_MEM_ENABLE_CMMA:
966 case KVM_S390_VM_MEM_CLR_CMMA:
967 case KVM_S390_VM_MEM_LIMIT_SIZE:
975 case KVM_S390_VM_TOD:
976 switch (attr->attr) {
977 case KVM_S390_VM_TOD_LOW:
978 case KVM_S390_VM_TOD_HIGH:
986 case KVM_S390_VM_CPU_MODEL:
987 switch (attr->attr) {
988 case KVM_S390_VM_CPU_PROCESSOR:
989 case KVM_S390_VM_CPU_MACHINE:
990 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
991 case KVM_S390_VM_CPU_MACHINE_FEAT:
992 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
995 /* configuring subfunctions is not supported yet */
996 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1002 case KVM_S390_VM_CRYPTO:
1003 switch (attr->attr) {
1004 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1005 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1006 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1007 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1023 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1027 unsigned long curkey;
1030 if (args->flags != 0)
1033 /* Is this guest using storage keys? */
1034 if (!mm_use_skey(current->mm))
1035 return KVM_S390_GET_SKEYS_NONE;
1037 /* Enforce sane limit on memory allocation */
1038 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1041 keys = kmalloc_array(args->count, sizeof(uint8_t),
1042 GFP_KERNEL | __GFP_NOWARN);
1044 keys = vmalloc(sizeof(uint8_t) * args->count);
1048 for (i = 0; i < args->count; i++) {
1049 hva = gfn_to_hva(kvm, args->start_gfn + i);
1050 if (kvm_is_error_hva(hva)) {
1055 curkey = get_guest_storage_key(current->mm, hva);
1056 if (IS_ERR_VALUE(curkey)) {
1063 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1064 sizeof(uint8_t) * args->count);
1072 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1078 if (args->flags != 0)
1081 /* Enforce sane limit on memory allocation */
1082 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1085 keys = kmalloc_array(args->count, sizeof(uint8_t),
1086 GFP_KERNEL | __GFP_NOWARN);
1088 keys = vmalloc(sizeof(uint8_t) * args->count);
1092 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1093 sizeof(uint8_t) * args->count);
1099 /* Enable storage key handling for the guest */
1100 r = s390_enable_skey();
1104 for (i = 0; i < args->count; i++) {
1105 hva = gfn_to_hva(kvm, args->start_gfn + i);
1106 if (kvm_is_error_hva(hva)) {
1111 /* Lowest order bit is reserved */
1112 if (keys[i] & 0x01) {
1117 r = set_guest_storage_key(current->mm, hva,
1118 (unsigned long)keys[i], 0);
1127 long kvm_arch_vm_ioctl(struct file *filp,
1128 unsigned int ioctl, unsigned long arg)
1130 struct kvm *kvm = filp->private_data;
1131 void __user *argp = (void __user *)arg;
1132 struct kvm_device_attr attr;
1136 case KVM_S390_INTERRUPT: {
1137 struct kvm_s390_interrupt s390int;
1140 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1142 r = kvm_s390_inject_vm(kvm, &s390int);
1145 case KVM_ENABLE_CAP: {
1146 struct kvm_enable_cap cap;
1148 if (copy_from_user(&cap, argp, sizeof(cap)))
1150 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1153 case KVM_CREATE_IRQCHIP: {
1154 struct kvm_irq_routing_entry routing;
1157 if (kvm->arch.use_irqchip) {
1158 /* Set up dummy routing. */
1159 memset(&routing, 0, sizeof(routing));
1160 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1164 case KVM_SET_DEVICE_ATTR: {
1166 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1168 r = kvm_s390_vm_set_attr(kvm, &attr);
1171 case KVM_GET_DEVICE_ATTR: {
1173 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1175 r = kvm_s390_vm_get_attr(kvm, &attr);
1178 case KVM_HAS_DEVICE_ATTR: {
1180 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1182 r = kvm_s390_vm_has_attr(kvm, &attr);
1185 case KVM_S390_GET_SKEYS: {
1186 struct kvm_s390_skeys args;
1189 if (copy_from_user(&args, argp,
1190 sizeof(struct kvm_s390_skeys)))
1192 r = kvm_s390_get_skeys(kvm, &args);
1195 case KVM_S390_SET_SKEYS: {
1196 struct kvm_s390_skeys args;
1199 if (copy_from_user(&args, argp,
1200 sizeof(struct kvm_s390_skeys)))
1202 r = kvm_s390_set_skeys(kvm, &args);
1212 static int kvm_s390_query_ap_config(u8 *config)
1214 u32 fcn_code = 0x04000000UL;
1217 memset(config, 0, 128);
1221 ".long 0xb2af0000\n" /* PQAP(QCI) */
1227 : "r" (fcn_code), "r" (config)
1228 : "cc", "0", "2", "memory"
1234 static int kvm_s390_apxa_installed(void)
1239 if (test_facility(12)) {
1240 cc = kvm_s390_query_ap_config(config);
1243 pr_err("PQAP(QCI) failed with cc=%d", cc);
1245 return config[0] & 0x40;
1251 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1253 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1255 if (kvm_s390_apxa_installed())
1256 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1258 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1261 static u64 kvm_s390_get_initial_cpuid(void)
1266 cpuid.version = 0xff;
1267 return *((u64 *) &cpuid);
1270 static void kvm_s390_crypto_init(struct kvm *kvm)
1272 if (!test_kvm_facility(kvm, 76))
1275 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1276 kvm_s390_set_crycb_format(kvm);
1278 /* Enable AES/DEA protected key functions by default */
1279 kvm->arch.crypto.aes_kw = 1;
1280 kvm->arch.crypto.dea_kw = 1;
1281 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1282 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1283 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1284 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1287 static void sca_dispose(struct kvm *kvm)
1289 if (kvm->arch.use_esca)
1290 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1292 free_page((unsigned long)(kvm->arch.sca));
1293 kvm->arch.sca = NULL;
1296 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1298 gfp_t alloc_flags = GFP_KERNEL;
1300 char debug_name[16];
1301 static unsigned long sca_offset;
1304 #ifdef CONFIG_KVM_S390_UCONTROL
1305 if (type & ~KVM_VM_S390_UCONTROL)
1307 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1314 rc = s390_enable_sie();
1320 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1322 kvm->arch.use_esca = 0; /* start with basic SCA */
1323 if (!sclp.has_64bscao)
1324 alloc_flags |= GFP_DMA;
1325 rwlock_init(&kvm->arch.sca_lock);
1326 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1329 spin_lock(&kvm_lock);
1331 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1333 kvm->arch.sca = (struct bsca_block *)
1334 ((char *) kvm->arch.sca + sca_offset);
1335 spin_unlock(&kvm_lock);
1337 sprintf(debug_name, "kvm-%u", current->pid);
1339 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1343 kvm->arch.sie_page2 =
1344 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1345 if (!kvm->arch.sie_page2)
1348 /* Populate the facility mask initially. */
1349 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1350 S390_ARCH_FAC_LIST_SIZE_BYTE);
1351 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1352 if (i < kvm_s390_fac_list_mask_size())
1353 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1355 kvm->arch.model.fac_mask[i] = 0UL;
1358 /* Populate the facility list initially. */
1359 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1360 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1361 S390_ARCH_FAC_LIST_SIZE_BYTE);
1363 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1364 set_kvm_facility(kvm->arch.model.fac_list, 74);
1366 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1367 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1369 kvm_s390_crypto_init(kvm);
1371 spin_lock_init(&kvm->arch.float_int.lock);
1372 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1373 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1374 init_waitqueue_head(&kvm->arch.ipte_wq);
1375 mutex_init(&kvm->arch.ipte_mutex);
1377 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1378 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1380 if (type & KVM_VM_S390_UCONTROL) {
1381 kvm->arch.gmap = NULL;
1382 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1384 if (sclp.hamax == U64_MAX)
1385 kvm->arch.mem_limit = TASK_MAX_SIZE;
1387 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1389 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1390 if (!kvm->arch.gmap)
1392 kvm->arch.gmap->private = kvm;
1393 kvm->arch.gmap->pfault_enabled = 0;
1396 kvm->arch.css_support = 0;
1397 kvm->arch.use_irqchip = 0;
1398 kvm->arch.epoch = 0;
1400 spin_lock_init(&kvm->arch.start_stop_lock);
1401 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1405 free_page((unsigned long)kvm->arch.sie_page2);
1406 debug_unregister(kvm->arch.dbf);
1408 KVM_EVENT(3, "creation of vm failed: %d", rc);
1412 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1414 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1415 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1416 kvm_s390_clear_local_irqs(vcpu);
1417 kvm_clear_async_pf_completion_queue(vcpu);
1418 if (!kvm_is_ucontrol(vcpu->kvm))
1421 if (kvm_is_ucontrol(vcpu->kvm))
1422 gmap_free(vcpu->arch.gmap);
1424 if (vcpu->kvm->arch.use_cmma)
1425 kvm_s390_vcpu_unsetup_cmma(vcpu);
1426 free_page((unsigned long)(vcpu->arch.sie_block));
1428 kvm_vcpu_uninit(vcpu);
1429 kmem_cache_free(kvm_vcpu_cache, vcpu);
1432 static void kvm_free_vcpus(struct kvm *kvm)
1435 struct kvm_vcpu *vcpu;
1437 kvm_for_each_vcpu(i, vcpu, kvm)
1438 kvm_arch_vcpu_destroy(vcpu);
1440 mutex_lock(&kvm->lock);
1441 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1442 kvm->vcpus[i] = NULL;
1444 atomic_set(&kvm->online_vcpus, 0);
1445 mutex_unlock(&kvm->lock);
1448 void kvm_arch_destroy_vm(struct kvm *kvm)
1450 kvm_free_vcpus(kvm);
1452 debug_unregister(kvm->arch.dbf);
1453 free_page((unsigned long)kvm->arch.sie_page2);
1454 if (!kvm_is_ucontrol(kvm))
1455 gmap_free(kvm->arch.gmap);
1456 kvm_s390_destroy_adapters(kvm);
1457 kvm_s390_clear_float_irqs(kvm);
1458 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1461 /* Section: vcpu related */
1462 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1464 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1465 if (!vcpu->arch.gmap)
1467 vcpu->arch.gmap->private = vcpu->kvm;
1472 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1474 read_lock(&vcpu->kvm->arch.sca_lock);
1475 if (vcpu->kvm->arch.use_esca) {
1476 struct esca_block *sca = vcpu->kvm->arch.sca;
1478 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1479 sca->cpu[vcpu->vcpu_id].sda = 0;
1481 struct bsca_block *sca = vcpu->kvm->arch.sca;
1483 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1484 sca->cpu[vcpu->vcpu_id].sda = 0;
1486 read_unlock(&vcpu->kvm->arch.sca_lock);
1489 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1491 read_lock(&vcpu->kvm->arch.sca_lock);
1492 if (vcpu->kvm->arch.use_esca) {
1493 struct esca_block *sca = vcpu->kvm->arch.sca;
1495 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1496 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1497 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1498 vcpu->arch.sie_block->ecb2 |= 0x04U;
1499 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1501 struct bsca_block *sca = vcpu->kvm->arch.sca;
1503 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1504 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1505 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1506 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1508 read_unlock(&vcpu->kvm->arch.sca_lock);
1511 /* Basic SCA to Extended SCA data copy routines */
1512 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1515 d->sigp_ctrl.c = s->sigp_ctrl.c;
1516 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1519 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1523 d->ipte_control = s->ipte_control;
1525 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1526 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1529 static int sca_switch_to_extended(struct kvm *kvm)
1531 struct bsca_block *old_sca = kvm->arch.sca;
1532 struct esca_block *new_sca;
1533 struct kvm_vcpu *vcpu;
1534 unsigned int vcpu_idx;
1537 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1541 scaoh = (u32)((u64)(new_sca) >> 32);
1542 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1544 kvm_s390_vcpu_block_all(kvm);
1545 write_lock(&kvm->arch.sca_lock);
1547 sca_copy_b_to_e(new_sca, old_sca);
1549 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1550 vcpu->arch.sie_block->scaoh = scaoh;
1551 vcpu->arch.sie_block->scaol = scaol;
1552 vcpu->arch.sie_block->ecb2 |= 0x04U;
1554 kvm->arch.sca = new_sca;
1555 kvm->arch.use_esca = 1;
1557 write_unlock(&kvm->arch.sca_lock);
1558 kvm_s390_vcpu_unblock_all(kvm);
1560 free_page((unsigned long)old_sca);
1562 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1563 old_sca, kvm->arch.sca);
1567 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1571 if (id < KVM_S390_BSCA_CPU_SLOTS)
1573 if (!sclp.has_esca || !sclp.has_64bscao)
1576 mutex_lock(&kvm->lock);
1577 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1578 mutex_unlock(&kvm->lock);
1580 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1583 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1585 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1586 kvm_clear_async_pf_completion_queue(vcpu);
1587 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1593 if (test_kvm_facility(vcpu->kvm, 64))
1594 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1595 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1596 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1599 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1601 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1603 if (kvm_is_ucontrol(vcpu->kvm))
1604 return __kvm_ucontrol_vcpu_init(vcpu);
1609 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1610 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1612 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1613 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1614 vcpu->arch.cputm_start = get_tod_clock_fast();
1615 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1618 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1619 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1621 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1622 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1623 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1624 vcpu->arch.cputm_start = 0;
1625 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1628 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1629 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1631 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1632 vcpu->arch.cputm_enabled = true;
1633 __start_cpu_timer_accounting(vcpu);
1636 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1637 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1639 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1640 __stop_cpu_timer_accounting(vcpu);
1641 vcpu->arch.cputm_enabled = false;
1644 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1646 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1647 __enable_cpu_timer_accounting(vcpu);
1651 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1653 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1654 __disable_cpu_timer_accounting(vcpu);
1658 /* set the cpu timer - may only be called from the VCPU thread itself */
1659 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1661 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1662 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1663 if (vcpu->arch.cputm_enabled)
1664 vcpu->arch.cputm_start = get_tod_clock_fast();
1665 vcpu->arch.sie_block->cputm = cputm;
1666 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1670 /* update and get the cpu timer - can also be called from other VCPU threads */
1671 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1676 if (unlikely(!vcpu->arch.cputm_enabled))
1677 return vcpu->arch.sie_block->cputm;
1679 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1681 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1683 * If the writer would ever execute a read in the critical
1684 * section, e.g. in irq context, we have a deadlock.
1686 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1687 value = vcpu->arch.sie_block->cputm;
1688 /* if cputm_start is 0, accounting is being started/stopped */
1689 if (likely(vcpu->arch.cputm_start))
1690 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1691 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1696 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1698 /* Save host register state */
1700 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1701 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1704 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1706 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1707 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1708 if (test_fp_ctl(current->thread.fpu.fpc))
1709 /* User space provided an invalid FPC, let's clear it */
1710 current->thread.fpu.fpc = 0;
1712 save_access_regs(vcpu->arch.host_acrs);
1713 restore_access_regs(vcpu->run->s.regs.acrs);
1714 gmap_enable(vcpu->arch.gmap);
1715 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1716 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1717 __start_cpu_timer_accounting(vcpu);
1721 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1724 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1725 __stop_cpu_timer_accounting(vcpu);
1726 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1727 gmap_disable(vcpu->arch.gmap);
1729 /* Save guest register state */
1731 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1733 /* Restore host register state */
1734 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1735 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1737 save_access_regs(vcpu->run->s.regs.acrs);
1738 restore_access_regs(vcpu->arch.host_acrs);
1741 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1743 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1744 vcpu->arch.sie_block->gpsw.mask = 0UL;
1745 vcpu->arch.sie_block->gpsw.addr = 0UL;
1746 kvm_s390_set_prefix(vcpu, 0);
1747 kvm_s390_set_cpu_timer(vcpu, 0);
1748 vcpu->arch.sie_block->ckc = 0UL;
1749 vcpu->arch.sie_block->todpr = 0;
1750 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1751 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1752 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1753 /* make sure the new fpc will be lazily loaded */
1755 current->thread.fpu.fpc = 0;
1756 vcpu->arch.sie_block->gbea = 1;
1757 vcpu->arch.sie_block->pp = 0;
1758 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1759 kvm_clear_async_pf_completion_queue(vcpu);
1760 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1761 kvm_s390_vcpu_stop(vcpu);
1762 kvm_s390_clear_local_irqs(vcpu);
1765 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1767 mutex_lock(&vcpu->kvm->lock);
1769 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1771 mutex_unlock(&vcpu->kvm->lock);
1772 if (!kvm_is_ucontrol(vcpu->kvm)) {
1773 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1779 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1781 if (!test_kvm_facility(vcpu->kvm, 76))
1784 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1786 if (vcpu->kvm->arch.crypto.aes_kw)
1787 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1788 if (vcpu->kvm->arch.crypto.dea_kw)
1789 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1791 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1794 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1796 free_page(vcpu->arch.sie_block->cbrlo);
1797 vcpu->arch.sie_block->cbrlo = 0;
1800 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1802 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1803 if (!vcpu->arch.sie_block->cbrlo)
1806 vcpu->arch.sie_block->ecb2 |= 0x80;
1807 vcpu->arch.sie_block->ecb2 &= ~0x08;
1811 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1813 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1815 vcpu->arch.sie_block->ibc = model->ibc;
1816 if (test_kvm_facility(vcpu->kvm, 7))
1817 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1820 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1824 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1828 if (test_kvm_facility(vcpu->kvm, 78))
1829 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1830 else if (test_kvm_facility(vcpu->kvm, 8))
1831 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1833 kvm_s390_vcpu_setup_model(vcpu);
1835 vcpu->arch.sie_block->ecb = 0x02;
1836 if (test_kvm_facility(vcpu->kvm, 9))
1837 vcpu->arch.sie_block->ecb |= 0x04;
1838 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1839 vcpu->arch.sie_block->ecb |= 0x10;
1841 if (test_kvm_facility(vcpu->kvm, 8))
1842 vcpu->arch.sie_block->ecb2 |= 0x08;
1843 vcpu->arch.sie_block->eca = 0xC1002000U;
1845 vcpu->arch.sie_block->eca |= 1;
1846 if (sclp.has_sigpif)
1847 vcpu->arch.sie_block->eca |= 0x10000000U;
1848 if (test_kvm_facility(vcpu->kvm, 64))
1849 vcpu->arch.sie_block->ecb3 |= 0x01;
1850 if (test_kvm_facility(vcpu->kvm, 129)) {
1851 vcpu->arch.sie_block->eca |= 0x00020000;
1852 vcpu->arch.sie_block->ecd |= 0x20000000;
1854 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1855 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1856 if (test_kvm_facility(vcpu->kvm, 74))
1857 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1859 if (vcpu->kvm->arch.use_cmma) {
1860 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1864 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1865 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1867 kvm_s390_vcpu_crypto_setup(vcpu);
1872 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1875 struct kvm_vcpu *vcpu;
1876 struct sie_page *sie_page;
1879 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1884 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1888 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1892 vcpu->arch.sie_block = &sie_page->sie_block;
1893 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1895 vcpu->arch.sie_block->icpua = id;
1896 spin_lock_init(&vcpu->arch.local_int.lock);
1897 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1898 vcpu->arch.local_int.wq = &vcpu->wq;
1899 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1900 seqcount_init(&vcpu->arch.cputm_seqcount);
1902 rc = kvm_vcpu_init(vcpu, kvm, id);
1904 goto out_free_sie_block;
1905 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1906 vcpu->arch.sie_block);
1907 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1911 free_page((unsigned long)(vcpu->arch.sie_block));
1913 kmem_cache_free(kvm_vcpu_cache, vcpu);
1918 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1920 return kvm_s390_vcpu_has_irq(vcpu, 0);
1923 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1925 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1929 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1931 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1934 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1936 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1940 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1942 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1946 * Kick a guest cpu out of SIE and wait until SIE is not running.
1947 * If the CPU is not running (e.g. waiting as idle) the function will
1948 * return immediately. */
1949 void exit_sie(struct kvm_vcpu *vcpu)
1951 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1952 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1956 /* Kick a guest cpu out of SIE to process a request synchronously */
1957 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1959 kvm_make_request(req, vcpu);
1960 kvm_s390_vcpu_request(vcpu);
1963 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1966 struct kvm *kvm = gmap->private;
1967 struct kvm_vcpu *vcpu;
1969 kvm_for_each_vcpu(i, vcpu, kvm) {
1970 /* match against both prefix pages */
1971 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1972 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1973 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1978 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1980 /* kvm common code refers to this, but never calls it */
1985 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1986 struct kvm_one_reg *reg)
1991 case KVM_REG_S390_TODPR:
1992 r = put_user(vcpu->arch.sie_block->todpr,
1993 (u32 __user *)reg->addr);
1995 case KVM_REG_S390_EPOCHDIFF:
1996 r = put_user(vcpu->arch.sie_block->epoch,
1997 (u64 __user *)reg->addr);
1999 case KVM_REG_S390_CPU_TIMER:
2000 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2001 (u64 __user *)reg->addr);
2003 case KVM_REG_S390_CLOCK_COMP:
2004 r = put_user(vcpu->arch.sie_block->ckc,
2005 (u64 __user *)reg->addr);
2007 case KVM_REG_S390_PFTOKEN:
2008 r = put_user(vcpu->arch.pfault_token,
2009 (u64 __user *)reg->addr);
2011 case KVM_REG_S390_PFCOMPARE:
2012 r = put_user(vcpu->arch.pfault_compare,
2013 (u64 __user *)reg->addr);
2015 case KVM_REG_S390_PFSELECT:
2016 r = put_user(vcpu->arch.pfault_select,
2017 (u64 __user *)reg->addr);
2019 case KVM_REG_S390_PP:
2020 r = put_user(vcpu->arch.sie_block->pp,
2021 (u64 __user *)reg->addr);
2023 case KVM_REG_S390_GBEA:
2024 r = put_user(vcpu->arch.sie_block->gbea,
2025 (u64 __user *)reg->addr);
2034 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2035 struct kvm_one_reg *reg)
2041 case KVM_REG_S390_TODPR:
2042 r = get_user(vcpu->arch.sie_block->todpr,
2043 (u32 __user *)reg->addr);
2045 case KVM_REG_S390_EPOCHDIFF:
2046 r = get_user(vcpu->arch.sie_block->epoch,
2047 (u64 __user *)reg->addr);
2049 case KVM_REG_S390_CPU_TIMER:
2050 r = get_user(val, (u64 __user *)reg->addr);
2052 kvm_s390_set_cpu_timer(vcpu, val);
2054 case KVM_REG_S390_CLOCK_COMP:
2055 r = get_user(vcpu->arch.sie_block->ckc,
2056 (u64 __user *)reg->addr);
2058 case KVM_REG_S390_PFTOKEN:
2059 r = get_user(vcpu->arch.pfault_token,
2060 (u64 __user *)reg->addr);
2061 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2062 kvm_clear_async_pf_completion_queue(vcpu);
2064 case KVM_REG_S390_PFCOMPARE:
2065 r = get_user(vcpu->arch.pfault_compare,
2066 (u64 __user *)reg->addr);
2068 case KVM_REG_S390_PFSELECT:
2069 r = get_user(vcpu->arch.pfault_select,
2070 (u64 __user *)reg->addr);
2072 case KVM_REG_S390_PP:
2073 r = get_user(vcpu->arch.sie_block->pp,
2074 (u64 __user *)reg->addr);
2076 case KVM_REG_S390_GBEA:
2077 r = get_user(vcpu->arch.sie_block->gbea,
2078 (u64 __user *)reg->addr);
2087 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2089 kvm_s390_vcpu_initial_reset(vcpu);
2093 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2095 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2099 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2101 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2105 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2106 struct kvm_sregs *sregs)
2108 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2109 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2110 restore_access_regs(vcpu->run->s.regs.acrs);
2114 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2115 struct kvm_sregs *sregs)
2117 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2118 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2122 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2124 /* make sure the new values will be lazily loaded */
2126 if (test_fp_ctl(fpu->fpc))
2128 current->thread.fpu.fpc = fpu->fpc;
2130 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2132 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2136 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2138 /* make sure we have the latest values */
2141 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2143 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2144 fpu->fpc = current->thread.fpu.fpc;
2148 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2152 if (!is_vcpu_stopped(vcpu))
2155 vcpu->run->psw_mask = psw.mask;
2156 vcpu->run->psw_addr = psw.addr;
2161 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2162 struct kvm_translation *tr)
2164 return -EINVAL; /* not implemented yet */
2167 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2168 KVM_GUESTDBG_USE_HW_BP | \
2169 KVM_GUESTDBG_ENABLE)
2171 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2172 struct kvm_guest_debug *dbg)
2176 vcpu->guest_debug = 0;
2177 kvm_s390_clear_bp_data(vcpu);
2179 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2181 if (!sclp.has_gpere)
2184 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2185 vcpu->guest_debug = dbg->control;
2186 /* enforce guest PER */
2187 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2189 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2190 rc = kvm_s390_import_bp_data(vcpu, dbg);
2192 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2193 vcpu->arch.guestdbg.last_bp = 0;
2197 vcpu->guest_debug = 0;
2198 kvm_s390_clear_bp_data(vcpu);
2199 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2205 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2206 struct kvm_mp_state *mp_state)
2208 /* CHECK_STOP and LOAD are not supported yet */
2209 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2210 KVM_MP_STATE_OPERATING;
2213 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2214 struct kvm_mp_state *mp_state)
2218 /* user space knows about this interface - let it control the state */
2219 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2221 switch (mp_state->mp_state) {
2222 case KVM_MP_STATE_STOPPED:
2223 kvm_s390_vcpu_stop(vcpu);
2225 case KVM_MP_STATE_OPERATING:
2226 kvm_s390_vcpu_start(vcpu);
2228 case KVM_MP_STATE_LOAD:
2229 case KVM_MP_STATE_CHECK_STOP:
2230 /* fall through - CHECK_STOP and LOAD are not supported yet */
2238 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2240 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2243 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2246 kvm_s390_vcpu_request_handled(vcpu);
2247 if (!vcpu->requests)
2250 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2251 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2252 * This ensures that the ipte instruction for this request has
2253 * already finished. We might race against a second unmapper that
2254 * wants to set the blocking bit. Lets just retry the request loop.
2256 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2258 rc = gmap_ipte_notify(vcpu->arch.gmap,
2259 kvm_s390_get_prefix(vcpu),
2266 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2267 vcpu->arch.sie_block->ihcpu = 0xffff;
2271 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2272 if (!ibs_enabled(vcpu)) {
2273 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2274 atomic_or(CPUSTAT_IBS,
2275 &vcpu->arch.sie_block->cpuflags);
2280 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2281 if (ibs_enabled(vcpu)) {
2282 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2283 atomic_andnot(CPUSTAT_IBS,
2284 &vcpu->arch.sie_block->cpuflags);
2289 /* nothing to do, just clear the request */
2290 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2295 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2297 struct kvm_vcpu *vcpu;
2300 mutex_lock(&kvm->lock);
2302 kvm->arch.epoch = tod - get_tod_clock();
2303 kvm_s390_vcpu_block_all(kvm);
2304 kvm_for_each_vcpu(i, vcpu, kvm)
2305 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2306 kvm_s390_vcpu_unblock_all(kvm);
2308 mutex_unlock(&kvm->lock);
2312 * kvm_arch_fault_in_page - fault-in guest page if necessary
2313 * @vcpu: The corresponding virtual cpu
2314 * @gpa: Guest physical address
2315 * @writable: Whether the page should be writable or not
2317 * Make sure that a guest page has been faulted-in on the host.
2319 * Return: Zero on success, negative error code otherwise.
2321 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2323 return gmap_fault(vcpu->arch.gmap, gpa,
2324 writable ? FAULT_FLAG_WRITE : 0);
2327 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2328 unsigned long token)
2330 struct kvm_s390_interrupt inti;
2331 struct kvm_s390_irq irq;
2334 irq.u.ext.ext_params2 = token;
2335 irq.type = KVM_S390_INT_PFAULT_INIT;
2336 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2338 inti.type = KVM_S390_INT_PFAULT_DONE;
2339 inti.parm64 = token;
2340 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2344 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2345 struct kvm_async_pf *work)
2347 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2348 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2351 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2352 struct kvm_async_pf *work)
2354 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2355 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2358 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2359 struct kvm_async_pf *work)
2361 /* s390 will always inject the page directly */
2364 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2367 * s390 will always inject the page directly,
2368 * but we still want check_async_completion to cleanup
2373 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2376 struct kvm_arch_async_pf arch;
2379 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2381 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2382 vcpu->arch.pfault_compare)
2384 if (psw_extint_disabled(vcpu))
2386 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2388 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2390 if (!vcpu->arch.gmap->pfault_enabled)
2393 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2394 hva += current->thread.gmap_addr & ~PAGE_MASK;
2395 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2398 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2402 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2407 * On s390 notifications for arriving pages will be delivered directly
2408 * to the guest but the house keeping for completed pfaults is
2409 * handled outside the worker.
2411 kvm_check_async_pf_completion(vcpu);
2413 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2414 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2419 if (test_cpu_flag(CIF_MCCK_PENDING))
2422 if (!kvm_is_ucontrol(vcpu->kvm)) {
2423 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2428 rc = kvm_s390_handle_requests(vcpu);
2432 if (guestdbg_enabled(vcpu)) {
2433 kvm_s390_backup_guest_per_regs(vcpu);
2434 kvm_s390_patch_guest_per_regs(vcpu);
2437 vcpu->arch.sie_block->icptcode = 0;
2438 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2439 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2440 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2445 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2447 struct kvm_s390_pgm_info pgm_info = {
2448 .code = PGM_ADDRESSING,
2453 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2454 trace_kvm_s390_sie_fault(vcpu);
2457 * We want to inject an addressing exception, which is defined as a
2458 * suppressing or terminating exception. However, since we came here
2459 * by a DAT access exception, the PSW still points to the faulting
2460 * instruction since DAT exceptions are nullifying. So we've got
2461 * to look up the current opcode to get the length of the instruction
2462 * to be able to forward the PSW.
2464 rc = read_guest_instr(vcpu, &opcode, 1);
2465 ilen = insn_length(opcode);
2469 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2470 * Forward by arbitrary ilc, injection will take care of
2471 * nullification if necessary.
2473 pgm_info = vcpu->arch.pgm;
2476 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2477 kvm_s390_forward_psw(vcpu, ilen);
2478 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2481 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2483 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2484 vcpu->arch.sie_block->icptcode);
2485 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2487 if (guestdbg_enabled(vcpu))
2488 kvm_s390_restore_guest_per_regs(vcpu);
2490 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2491 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2493 if (vcpu->arch.sie_block->icptcode > 0) {
2494 int rc = kvm_handle_sie_intercept(vcpu);
2496 if (rc != -EOPNOTSUPP)
2498 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2499 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2500 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2501 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2503 } else if (exit_reason != -EFAULT) {
2504 vcpu->stat.exit_null++;
2506 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2507 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2508 vcpu->run->s390_ucontrol.trans_exc_code =
2509 current->thread.gmap_addr;
2510 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2512 } else if (current->thread.gmap_pfault) {
2513 trace_kvm_s390_major_guest_pfault(vcpu);
2514 current->thread.gmap_pfault = 0;
2515 if (kvm_arch_setup_async_pf(vcpu))
2517 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2519 return vcpu_post_run_fault_in_sie(vcpu);
2522 static int __vcpu_run(struct kvm_vcpu *vcpu)
2524 int rc, exit_reason;
2527 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2528 * ning the guest), so that memslots (and other stuff) are protected
2530 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2533 rc = vcpu_pre_run(vcpu);
2537 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2539 * As PF_VCPU will be used in fault handler, between
2540 * guest_enter and guest_exit should be no uaccess.
2542 local_irq_disable();
2543 __kvm_guest_enter();
2544 __disable_cpu_timer_accounting(vcpu);
2546 exit_reason = sie64a(vcpu->arch.sie_block,
2547 vcpu->run->s.regs.gprs);
2548 local_irq_disable();
2549 __enable_cpu_timer_accounting(vcpu);
2552 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2554 rc = vcpu_post_run(vcpu, exit_reason);
2555 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2557 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2561 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2563 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2564 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2565 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2566 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2567 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2568 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2569 /* some control register changes require a tlb flush */
2570 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2572 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2573 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2574 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2575 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2576 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2577 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2579 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2580 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2581 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2582 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2583 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2584 kvm_clear_async_pf_completion_queue(vcpu);
2586 kvm_run->kvm_dirty_regs = 0;
2589 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2591 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2592 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2593 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2594 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2595 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2596 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2597 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2598 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2599 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2600 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2601 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2602 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2605 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2610 if (guestdbg_exit_pending(vcpu)) {
2611 kvm_s390_prepare_debug_exit(vcpu);
2615 if (vcpu->sigset_active)
2616 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2618 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2619 kvm_s390_vcpu_start(vcpu);
2620 } else if (is_vcpu_stopped(vcpu)) {
2621 pr_err_ratelimited("can't run stopped vcpu %d\n",
2626 sync_regs(vcpu, kvm_run);
2627 enable_cpu_timer_accounting(vcpu);
2630 rc = __vcpu_run(vcpu);
2632 if (signal_pending(current) && !rc) {
2633 kvm_run->exit_reason = KVM_EXIT_INTR;
2637 if (guestdbg_exit_pending(vcpu) && !rc) {
2638 kvm_s390_prepare_debug_exit(vcpu);
2642 if (rc == -EREMOTE) {
2643 /* userspace support is needed, kvm_run has been prepared */
2647 disable_cpu_timer_accounting(vcpu);
2648 store_regs(vcpu, kvm_run);
2650 if (vcpu->sigset_active)
2651 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2653 vcpu->stat.exit_userspace++;
2658 * store status at address
2659 * we use have two special cases:
2660 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2661 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2663 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2665 unsigned char archmode = 1;
2666 freg_t fprs[NUM_FPRS];
2671 px = kvm_s390_get_prefix(vcpu);
2672 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2673 if (write_guest_abs(vcpu, 163, &archmode, 1))
2676 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2677 if (write_guest_real(vcpu, 163, &archmode, 1))
2681 gpa -= __LC_FPREGS_SAVE_AREA;
2683 /* manually convert vector registers if necessary */
2684 if (MACHINE_HAS_VX) {
2685 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2686 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2689 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2690 vcpu->run->s.regs.fprs, 128);
2692 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2693 vcpu->run->s.regs.gprs, 128);
2694 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2695 &vcpu->arch.sie_block->gpsw, 16);
2696 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2698 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2699 &vcpu->run->s.regs.fpc, 4);
2700 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2701 &vcpu->arch.sie_block->todpr, 4);
2702 cputm = kvm_s390_get_cpu_timer(vcpu);
2703 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2705 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2706 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2708 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2709 &vcpu->run->s.regs.acrs, 64);
2710 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2711 &vcpu->arch.sie_block->gcr, 128);
2712 return rc ? -EFAULT : 0;
2715 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2718 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2719 * copying in vcpu load/put. Lets update our copies before we save
2720 * it into the save area
2723 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2724 save_access_regs(vcpu->run->s.regs.acrs);
2726 return kvm_s390_store_status_unloaded(vcpu, addr);
2730 * store additional status at address
2732 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2735 /* Only bits 0-53 are used for address formation */
2736 if (!(gpa & ~0x3ff))
2739 return write_guest_abs(vcpu, gpa & ~0x3ff,
2740 (void *)&vcpu->run->s.regs.vrs, 512);
2743 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2745 if (!test_kvm_facility(vcpu->kvm, 129))
2749 * The guest VXRS are in the host VXRs due to the lazy
2750 * copying in vcpu load/put. We can simply call save_fpu_regs()
2751 * to save the current register state because we are in the
2752 * middle of a load/put cycle.
2754 * Let's update our copies before we save it into the save area.
2758 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2761 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2763 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2764 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2767 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2770 struct kvm_vcpu *vcpu;
2772 kvm_for_each_vcpu(i, vcpu, kvm) {
2773 __disable_ibs_on_vcpu(vcpu);
2777 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2779 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2780 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2783 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2785 int i, online_vcpus, started_vcpus = 0;
2787 if (!is_vcpu_stopped(vcpu))
2790 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2791 /* Only one cpu at a time may enter/leave the STOPPED state. */
2792 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2793 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2795 for (i = 0; i < online_vcpus; i++) {
2796 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2800 if (started_vcpus == 0) {
2801 /* we're the only active VCPU -> speed it up */
2802 __enable_ibs_on_vcpu(vcpu);
2803 } else if (started_vcpus == 1) {
2805 * As we are starting a second VCPU, we have to disable
2806 * the IBS facility on all VCPUs to remove potentially
2807 * oustanding ENABLE requests.
2809 __disable_ibs_on_all_vcpus(vcpu->kvm);
2812 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2814 * Another VCPU might have used IBS while we were offline.
2815 * Let's play safe and flush the VCPU at startup.
2817 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2818 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2822 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2824 int i, online_vcpus, started_vcpus = 0;
2825 struct kvm_vcpu *started_vcpu = NULL;
2827 if (is_vcpu_stopped(vcpu))
2830 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2831 /* Only one cpu at a time may enter/leave the STOPPED state. */
2832 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2833 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2835 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2836 kvm_s390_clear_stop_irq(vcpu);
2838 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2839 __disable_ibs_on_vcpu(vcpu);
2841 for (i = 0; i < online_vcpus; i++) {
2842 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2844 started_vcpu = vcpu->kvm->vcpus[i];
2848 if (started_vcpus == 1) {
2850 * As we only have one VCPU left, we want to enable the
2851 * IBS facility for that VCPU to speed it up.
2853 __enable_ibs_on_vcpu(started_vcpu);
2856 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2860 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2861 struct kvm_enable_cap *cap)
2869 case KVM_CAP_S390_CSS_SUPPORT:
2870 if (!vcpu->kvm->arch.css_support) {
2871 vcpu->kvm->arch.css_support = 1;
2872 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2873 trace_kvm_s390_enable_css(vcpu->kvm);
2884 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2885 struct kvm_s390_mem_op *mop)
2887 void __user *uaddr = (void __user *)mop->buf;
2888 void *tmpbuf = NULL;
2890 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2891 | KVM_S390_MEMOP_F_CHECK_ONLY;
2893 if (mop->flags & ~supported_flags)
2896 if (mop->size > MEM_OP_MAX_SIZE)
2899 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2900 tmpbuf = vmalloc(mop->size);
2905 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2908 case KVM_S390_MEMOP_LOGICAL_READ:
2909 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2910 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2911 mop->size, GACC_FETCH);
2914 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2916 if (copy_to_user(uaddr, tmpbuf, mop->size))
2920 case KVM_S390_MEMOP_LOGICAL_WRITE:
2921 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2922 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2923 mop->size, GACC_STORE);
2926 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2930 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2936 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2938 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2939 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2945 long kvm_arch_vcpu_ioctl(struct file *filp,
2946 unsigned int ioctl, unsigned long arg)
2948 struct kvm_vcpu *vcpu = filp->private_data;
2949 void __user *argp = (void __user *)arg;
2954 case KVM_S390_IRQ: {
2955 struct kvm_s390_irq s390irq;
2958 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2960 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2963 case KVM_S390_INTERRUPT: {
2964 struct kvm_s390_interrupt s390int;
2965 struct kvm_s390_irq s390irq;
2968 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2970 if (s390int_to_s390irq(&s390int, &s390irq))
2972 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2975 case KVM_S390_STORE_STATUS:
2976 idx = srcu_read_lock(&vcpu->kvm->srcu);
2977 r = kvm_s390_vcpu_store_status(vcpu, arg);
2978 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2980 case KVM_S390_SET_INITIAL_PSW: {
2984 if (copy_from_user(&psw, argp, sizeof(psw)))
2986 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2989 case KVM_S390_INITIAL_RESET:
2990 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2992 case KVM_SET_ONE_REG:
2993 case KVM_GET_ONE_REG: {
2994 struct kvm_one_reg reg;
2996 if (copy_from_user(®, argp, sizeof(reg)))
2998 if (ioctl == KVM_SET_ONE_REG)
2999 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3001 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3004 #ifdef CONFIG_KVM_S390_UCONTROL
3005 case KVM_S390_UCAS_MAP: {
3006 struct kvm_s390_ucas_mapping ucasmap;
3008 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3013 if (!kvm_is_ucontrol(vcpu->kvm)) {
3018 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3019 ucasmap.vcpu_addr, ucasmap.length);
3022 case KVM_S390_UCAS_UNMAP: {
3023 struct kvm_s390_ucas_mapping ucasmap;
3025 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3030 if (!kvm_is_ucontrol(vcpu->kvm)) {
3035 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3040 case KVM_S390_VCPU_FAULT: {
3041 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3044 case KVM_ENABLE_CAP:
3046 struct kvm_enable_cap cap;
3048 if (copy_from_user(&cap, argp, sizeof(cap)))
3050 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3053 case KVM_S390_MEM_OP: {
3054 struct kvm_s390_mem_op mem_op;
3056 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3057 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3062 case KVM_S390_SET_IRQ_STATE: {
3063 struct kvm_s390_irq_state irq_state;
3066 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3068 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3069 irq_state.len == 0 ||
3070 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3074 r = kvm_s390_set_irq_state(vcpu,
3075 (void __user *) irq_state.buf,
3079 case KVM_S390_GET_IRQ_STATE: {
3080 struct kvm_s390_irq_state irq_state;
3083 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3085 if (irq_state.len == 0) {
3089 r = kvm_s390_get_irq_state(vcpu,
3090 (__u8 __user *) irq_state.buf,
3100 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3102 #ifdef CONFIG_KVM_S390_UCONTROL
3103 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3104 && (kvm_is_ucontrol(vcpu->kvm))) {
3105 vmf->page = virt_to_page(vcpu->arch.sie_block);
3106 get_page(vmf->page);
3110 return VM_FAULT_SIGBUS;
3113 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3114 unsigned long npages)
3119 /* Section: memory related */
3120 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3121 struct kvm_memory_slot *memslot,
3122 const struct kvm_userspace_memory_region *mem,
3123 enum kvm_mr_change change)
3125 /* A few sanity checks. We can have memory slots which have to be
3126 located/ended at a segment boundary (1MB). The memory in userland is
3127 ok to be fragmented into various different vmas. It is okay to mmap()
3128 and munmap() stuff in this slot after doing this call at any time */
3130 if (mem->userspace_addr & 0xffffful)
3133 if (mem->memory_size & 0xffffful)
3136 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3142 void kvm_arch_commit_memory_region(struct kvm *kvm,
3143 const struct kvm_userspace_memory_region *mem,
3144 const struct kvm_memory_slot *old,
3145 const struct kvm_memory_slot *new,
3146 enum kvm_mr_change change)
3150 /* If the basics of the memslot do not change, we do not want
3151 * to update the gmap. Every update causes several unnecessary
3152 * segment translation exceptions. This is usually handled just
3153 * fine by the normal fault handler + gmap, but it will also
3154 * cause faults on the prefix page of running guest CPUs.
3156 if (old->userspace_addr == mem->userspace_addr &&
3157 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3158 old->npages * PAGE_SIZE == mem->memory_size)
3161 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3162 mem->guest_phys_addr, mem->memory_size);
3164 pr_warn("failed to commit memory region\n");
3168 static inline unsigned long nonhyp_mask(int i)
3170 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3172 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3175 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3177 vcpu->valid_wakeup = false;
3180 static int __init kvm_s390_init(void)
3184 if (!sclp.has_sief2) {
3185 pr_info("SIE not available\n");
3189 for (i = 0; i < 16; i++)
3190 kvm_s390_fac_list_mask[i] |=
3191 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3193 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3196 static void __exit kvm_s390_exit(void)
3201 module_init(kvm_s390_init);
3202 module_exit(kvm_s390_exit);
3205 * Enable autoloading of the kvm module.
3206 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3207 * since x86 takes a different approach.
3209 #include <linux/miscdevice.h>
3210 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3211 MODULE_ALIAS("devname:kvm");