kvm/s390: drop unpaired smp_mb
[cascardo/linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47
48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56         { "userspace_handled", VCPU_STAT(exit_userspace) },
57         { "exit_null", VCPU_STAT(exit_null) },
58         { "exit_validity", VCPU_STAT(exit_validity) },
59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60         { "exit_external_request", VCPU_STAT(exit_external_request) },
61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62         { "exit_instruction", VCPU_STAT(exit_instruction) },
63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83         { "instruction_spx", VCPU_STAT(instruction_spx) },
84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85         { "instruction_stap", VCPU_STAT(instruction_stap) },
86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90         { "instruction_essa", VCPU_STAT(instruction_essa) },
91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110         { "diagnose_10", VCPU_STAT(diagnose_10) },
111         { "diagnose_44", VCPU_STAT(diagnose_44) },
112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113         { "diagnose_258", VCPU_STAT(diagnose_258) },
114         { "diagnose_308", VCPU_STAT(diagnose_308) },
115         { "diagnose_500", VCPU_STAT(diagnose_500) },
116         { NULL }
117 };
118
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121         0xffe6fffbfcfdfc40UL,
122         0x005e800000000000UL,
123 };
124
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137         /* every s390 is virtualization enabled ;-) */
138         return 0;
139 }
140
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150                           void *v)
151 {
152         struct kvm *kvm;
153         struct kvm_vcpu *vcpu;
154         int i;
155         unsigned long long *delta = v;
156
157         list_for_each_entry(kvm, &vm_list, vm_list) {
158                 kvm->arch.epoch -= *delta;
159                 kvm_for_each_vcpu(i, vcpu, kvm) {
160                         vcpu->arch.sie_block->epoch -= *delta;
161                 }
162         }
163         return NOTIFY_OK;
164 }
165
166 static struct notifier_block kvm_clock_notifier = {
167         .notifier_call = kvm_clock_sync,
168 };
169
170 int kvm_arch_hardware_setup(void)
171 {
172         gmap_notifier.notifier_call = kvm_gmap_notifier;
173         gmap_register_ipte_notifier(&gmap_notifier);
174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175                                        &kvm_clock_notifier);
176         return 0;
177 }
178
179 void kvm_arch_hardware_unsetup(void)
180 {
181         gmap_unregister_ipte_notifier(&gmap_notifier);
182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183                                          &kvm_clock_notifier);
184 }
185
186 int kvm_arch_init(void *opaque)
187 {
188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189         if (!kvm_s390_dbf)
190                 return -ENOMEM;
191
192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193                 debug_unregister(kvm_s390_dbf);
194                 return -ENOMEM;
195         }
196
197         /* Register floating interrupt controller interface. */
198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200
201 void kvm_arch_exit(void)
202 {
203         debug_unregister(kvm_s390_dbf);
204 }
205
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208                         unsigned int ioctl, unsigned long arg)
209 {
210         if (ioctl == KVM_S390_ENABLE_SIE)
211                 return s390_enable_sie();
212         return -EINVAL;
213 }
214
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217         int r;
218
219         switch (ext) {
220         case KVM_CAP_S390_PSW:
221         case KVM_CAP_S390_GMAP:
222         case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224         case KVM_CAP_S390_UCONTROL:
225 #endif
226         case KVM_CAP_ASYNC_PF:
227         case KVM_CAP_SYNC_REGS:
228         case KVM_CAP_ONE_REG:
229         case KVM_CAP_ENABLE_CAP:
230         case KVM_CAP_S390_CSS_SUPPORT:
231         case KVM_CAP_IOEVENTFD:
232         case KVM_CAP_DEVICE_CTRL:
233         case KVM_CAP_ENABLE_CAP_VM:
234         case KVM_CAP_S390_IRQCHIP:
235         case KVM_CAP_VM_ATTRIBUTES:
236         case KVM_CAP_MP_STATE:
237         case KVM_CAP_S390_INJECT_IRQ:
238         case KVM_CAP_S390_USER_SIGP:
239         case KVM_CAP_S390_USER_STSI:
240         case KVM_CAP_S390_SKEYS:
241         case KVM_CAP_S390_IRQ_STATE:
242                 r = 1;
243                 break;
244         case KVM_CAP_S390_MEM_OP:
245                 r = MEM_OP_MAX_SIZE;
246                 break;
247         case KVM_CAP_NR_VCPUS:
248         case KVM_CAP_MAX_VCPUS:
249                 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250                                   : KVM_S390_BSCA_CPU_SLOTS;
251                 break;
252         case KVM_CAP_NR_MEMSLOTS:
253                 r = KVM_USER_MEM_SLOTS;
254                 break;
255         case KVM_CAP_S390_COW:
256                 r = MACHINE_HAS_ESOP;
257                 break;
258         case KVM_CAP_S390_VECTOR_REGISTERS:
259                 r = MACHINE_HAS_VX;
260                 break;
261         default:
262                 r = 0;
263         }
264         return r;
265 }
266
267 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
268                                         struct kvm_memory_slot *memslot)
269 {
270         gfn_t cur_gfn, last_gfn;
271         unsigned long address;
272         struct gmap *gmap = kvm->arch.gmap;
273
274         down_read(&gmap->mm->mmap_sem);
275         /* Loop over all guest pages */
276         last_gfn = memslot->base_gfn + memslot->npages;
277         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
278                 address = gfn_to_hva_memslot(memslot, cur_gfn);
279
280                 if (gmap_test_and_clear_dirty(address, gmap))
281                         mark_page_dirty(kvm, cur_gfn);
282         }
283         up_read(&gmap->mm->mmap_sem);
284 }
285
286 /* Section: vm related */
287 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
288
289 /*
290  * Get (and clear) the dirty memory log for a memory slot.
291  */
292 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
293                                struct kvm_dirty_log *log)
294 {
295         int r;
296         unsigned long n;
297         struct kvm_memslots *slots;
298         struct kvm_memory_slot *memslot;
299         int is_dirty = 0;
300
301         mutex_lock(&kvm->slots_lock);
302
303         r = -EINVAL;
304         if (log->slot >= KVM_USER_MEM_SLOTS)
305                 goto out;
306
307         slots = kvm_memslots(kvm);
308         memslot = id_to_memslot(slots, log->slot);
309         r = -ENOENT;
310         if (!memslot->dirty_bitmap)
311                 goto out;
312
313         kvm_s390_sync_dirty_log(kvm, memslot);
314         r = kvm_get_dirty_log(kvm, log, &is_dirty);
315         if (r)
316                 goto out;
317
318         /* Clear the dirty log */
319         if (is_dirty) {
320                 n = kvm_dirty_bitmap_bytes(memslot);
321                 memset(memslot->dirty_bitmap, 0, n);
322         }
323         r = 0;
324 out:
325         mutex_unlock(&kvm->slots_lock);
326         return r;
327 }
328
329 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
330 {
331         int r;
332
333         if (cap->flags)
334                 return -EINVAL;
335
336         switch (cap->cap) {
337         case KVM_CAP_S390_IRQCHIP:
338                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
339                 kvm->arch.use_irqchip = 1;
340                 r = 0;
341                 break;
342         case KVM_CAP_S390_USER_SIGP:
343                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
344                 kvm->arch.user_sigp = 1;
345                 r = 0;
346                 break;
347         case KVM_CAP_S390_VECTOR_REGISTERS:
348                 mutex_lock(&kvm->lock);
349                 if (atomic_read(&kvm->online_vcpus)) {
350                         r = -EBUSY;
351                 } else if (MACHINE_HAS_VX) {
352                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
353                         set_kvm_facility(kvm->arch.model.fac->list, 129);
354                         r = 0;
355                 } else
356                         r = -EINVAL;
357                 mutex_unlock(&kvm->lock);
358                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
359                          r ? "(not available)" : "(success)");
360                 break;
361         case KVM_CAP_S390_USER_STSI:
362                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
363                 kvm->arch.user_stsi = 1;
364                 r = 0;
365                 break;
366         default:
367                 r = -EINVAL;
368                 break;
369         }
370         return r;
371 }
372
373 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
374 {
375         int ret;
376
377         switch (attr->attr) {
378         case KVM_S390_VM_MEM_LIMIT_SIZE:
379                 ret = 0;
380                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
381                          kvm->arch.mem_limit);
382                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
383                         ret = -EFAULT;
384                 break;
385         default:
386                 ret = -ENXIO;
387                 break;
388         }
389         return ret;
390 }
391
392 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
393 {
394         int ret;
395         unsigned int idx;
396         switch (attr->attr) {
397         case KVM_S390_VM_MEM_ENABLE_CMMA:
398                 /* enable CMMA only for z10 and later (EDAT_1) */
399                 ret = -EINVAL;
400                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
401                         break;
402
403                 ret = -EBUSY;
404                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
405                 mutex_lock(&kvm->lock);
406                 if (atomic_read(&kvm->online_vcpus) == 0) {
407                         kvm->arch.use_cmma = 1;
408                         ret = 0;
409                 }
410                 mutex_unlock(&kvm->lock);
411                 break;
412         case KVM_S390_VM_MEM_CLR_CMMA:
413                 ret = -EINVAL;
414                 if (!kvm->arch.use_cmma)
415                         break;
416
417                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
418                 mutex_lock(&kvm->lock);
419                 idx = srcu_read_lock(&kvm->srcu);
420                 s390_reset_cmma(kvm->arch.gmap->mm);
421                 srcu_read_unlock(&kvm->srcu, idx);
422                 mutex_unlock(&kvm->lock);
423                 ret = 0;
424                 break;
425         case KVM_S390_VM_MEM_LIMIT_SIZE: {
426                 unsigned long new_limit;
427
428                 if (kvm_is_ucontrol(kvm))
429                         return -EINVAL;
430
431                 if (get_user(new_limit, (u64 __user *)attr->addr))
432                         return -EFAULT;
433
434                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
435                     new_limit > kvm->arch.mem_limit)
436                         return -E2BIG;
437
438                 if (!new_limit)
439                         return -EINVAL;
440
441                 /* gmap_alloc takes last usable address */
442                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
443                         new_limit -= 1;
444
445                 ret = -EBUSY;
446                 mutex_lock(&kvm->lock);
447                 if (atomic_read(&kvm->online_vcpus) == 0) {
448                         /* gmap_alloc will round the limit up */
449                         struct gmap *new = gmap_alloc(current->mm, new_limit);
450
451                         if (!new) {
452                                 ret = -ENOMEM;
453                         } else {
454                                 gmap_free(kvm->arch.gmap);
455                                 new->private = kvm;
456                                 kvm->arch.gmap = new;
457                                 ret = 0;
458                         }
459                 }
460                 mutex_unlock(&kvm->lock);
461                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
462                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
463                          (void *) kvm->arch.gmap->asce);
464                 break;
465         }
466         default:
467                 ret = -ENXIO;
468                 break;
469         }
470         return ret;
471 }
472
473 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
474
475 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
476 {
477         struct kvm_vcpu *vcpu;
478         int i;
479
480         if (!test_kvm_facility(kvm, 76))
481                 return -EINVAL;
482
483         mutex_lock(&kvm->lock);
484         switch (attr->attr) {
485         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
486                 get_random_bytes(
487                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
488                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
489                 kvm->arch.crypto.aes_kw = 1;
490                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
491                 break;
492         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
493                 get_random_bytes(
494                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
495                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
496                 kvm->arch.crypto.dea_kw = 1;
497                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
498                 break;
499         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
500                 kvm->arch.crypto.aes_kw = 0;
501                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
502                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
503                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
504                 break;
505         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
506                 kvm->arch.crypto.dea_kw = 0;
507                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
508                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
509                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
510                 break;
511         default:
512                 mutex_unlock(&kvm->lock);
513                 return -ENXIO;
514         }
515
516         kvm_for_each_vcpu(i, vcpu, kvm) {
517                 kvm_s390_vcpu_crypto_setup(vcpu);
518                 exit_sie(vcpu);
519         }
520         mutex_unlock(&kvm->lock);
521         return 0;
522 }
523
524 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
525 {
526         u8 gtod_high;
527
528         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
529                                            sizeof(gtod_high)))
530                 return -EFAULT;
531
532         if (gtod_high != 0)
533                 return -EINVAL;
534         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
535
536         return 0;
537 }
538
539 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
540 {
541         u64 gtod;
542
543         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
544                 return -EFAULT;
545
546         kvm_s390_set_tod_clock(kvm, gtod);
547         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
548         return 0;
549 }
550
551 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
552 {
553         int ret;
554
555         if (attr->flags)
556                 return -EINVAL;
557
558         switch (attr->attr) {
559         case KVM_S390_VM_TOD_HIGH:
560                 ret = kvm_s390_set_tod_high(kvm, attr);
561                 break;
562         case KVM_S390_VM_TOD_LOW:
563                 ret = kvm_s390_set_tod_low(kvm, attr);
564                 break;
565         default:
566                 ret = -ENXIO;
567                 break;
568         }
569         return ret;
570 }
571
572 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
573 {
574         u8 gtod_high = 0;
575
576         if (copy_to_user((void __user *)attr->addr, &gtod_high,
577                                          sizeof(gtod_high)))
578                 return -EFAULT;
579         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
580
581         return 0;
582 }
583
584 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
585 {
586         u64 gtod;
587
588         gtod = kvm_s390_get_tod_clock_fast(kvm);
589         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
590                 return -EFAULT;
591         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
592
593         return 0;
594 }
595
596 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
597 {
598         int ret;
599
600         if (attr->flags)
601                 return -EINVAL;
602
603         switch (attr->attr) {
604         case KVM_S390_VM_TOD_HIGH:
605                 ret = kvm_s390_get_tod_high(kvm, attr);
606                 break;
607         case KVM_S390_VM_TOD_LOW:
608                 ret = kvm_s390_get_tod_low(kvm, attr);
609                 break;
610         default:
611                 ret = -ENXIO;
612                 break;
613         }
614         return ret;
615 }
616
617 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
618 {
619         struct kvm_s390_vm_cpu_processor *proc;
620         int ret = 0;
621
622         mutex_lock(&kvm->lock);
623         if (atomic_read(&kvm->online_vcpus)) {
624                 ret = -EBUSY;
625                 goto out;
626         }
627         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
628         if (!proc) {
629                 ret = -ENOMEM;
630                 goto out;
631         }
632         if (!copy_from_user(proc, (void __user *)attr->addr,
633                             sizeof(*proc))) {
634                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
635                        sizeof(struct cpuid));
636                 kvm->arch.model.ibc = proc->ibc;
637                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
638                        S390_ARCH_FAC_LIST_SIZE_BYTE);
639         } else
640                 ret = -EFAULT;
641         kfree(proc);
642 out:
643         mutex_unlock(&kvm->lock);
644         return ret;
645 }
646
647 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
648 {
649         int ret = -ENXIO;
650
651         switch (attr->attr) {
652         case KVM_S390_VM_CPU_PROCESSOR:
653                 ret = kvm_s390_set_processor(kvm, attr);
654                 break;
655         }
656         return ret;
657 }
658
659 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
660 {
661         struct kvm_s390_vm_cpu_processor *proc;
662         int ret = 0;
663
664         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
665         if (!proc) {
666                 ret = -ENOMEM;
667                 goto out;
668         }
669         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
670         proc->ibc = kvm->arch.model.ibc;
671         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
672         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
673                 ret = -EFAULT;
674         kfree(proc);
675 out:
676         return ret;
677 }
678
679 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
680 {
681         struct kvm_s390_vm_cpu_machine *mach;
682         int ret = 0;
683
684         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
685         if (!mach) {
686                 ret = -ENOMEM;
687                 goto out;
688         }
689         get_cpu_id((struct cpuid *) &mach->cpuid);
690         mach->ibc = sclp.ibc;
691         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
692                S390_ARCH_FAC_LIST_SIZE_BYTE);
693         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
694                S390_ARCH_FAC_LIST_SIZE_BYTE);
695         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
696                 ret = -EFAULT;
697         kfree(mach);
698 out:
699         return ret;
700 }
701
702 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
703 {
704         int ret = -ENXIO;
705
706         switch (attr->attr) {
707         case KVM_S390_VM_CPU_PROCESSOR:
708                 ret = kvm_s390_get_processor(kvm, attr);
709                 break;
710         case KVM_S390_VM_CPU_MACHINE:
711                 ret = kvm_s390_get_machine(kvm, attr);
712                 break;
713         }
714         return ret;
715 }
716
717 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
718 {
719         int ret;
720
721         switch (attr->group) {
722         case KVM_S390_VM_MEM_CTRL:
723                 ret = kvm_s390_set_mem_control(kvm, attr);
724                 break;
725         case KVM_S390_VM_TOD:
726                 ret = kvm_s390_set_tod(kvm, attr);
727                 break;
728         case KVM_S390_VM_CPU_MODEL:
729                 ret = kvm_s390_set_cpu_model(kvm, attr);
730                 break;
731         case KVM_S390_VM_CRYPTO:
732                 ret = kvm_s390_vm_set_crypto(kvm, attr);
733                 break;
734         default:
735                 ret = -ENXIO;
736                 break;
737         }
738
739         return ret;
740 }
741
742 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
743 {
744         int ret;
745
746         switch (attr->group) {
747         case KVM_S390_VM_MEM_CTRL:
748                 ret = kvm_s390_get_mem_control(kvm, attr);
749                 break;
750         case KVM_S390_VM_TOD:
751                 ret = kvm_s390_get_tod(kvm, attr);
752                 break;
753         case KVM_S390_VM_CPU_MODEL:
754                 ret = kvm_s390_get_cpu_model(kvm, attr);
755                 break;
756         default:
757                 ret = -ENXIO;
758                 break;
759         }
760
761         return ret;
762 }
763
764 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
765 {
766         int ret;
767
768         switch (attr->group) {
769         case KVM_S390_VM_MEM_CTRL:
770                 switch (attr->attr) {
771                 case KVM_S390_VM_MEM_ENABLE_CMMA:
772                 case KVM_S390_VM_MEM_CLR_CMMA:
773                 case KVM_S390_VM_MEM_LIMIT_SIZE:
774                         ret = 0;
775                         break;
776                 default:
777                         ret = -ENXIO;
778                         break;
779                 }
780                 break;
781         case KVM_S390_VM_TOD:
782                 switch (attr->attr) {
783                 case KVM_S390_VM_TOD_LOW:
784                 case KVM_S390_VM_TOD_HIGH:
785                         ret = 0;
786                         break;
787                 default:
788                         ret = -ENXIO;
789                         break;
790                 }
791                 break;
792         case KVM_S390_VM_CPU_MODEL:
793                 switch (attr->attr) {
794                 case KVM_S390_VM_CPU_PROCESSOR:
795                 case KVM_S390_VM_CPU_MACHINE:
796                         ret = 0;
797                         break;
798                 default:
799                         ret = -ENXIO;
800                         break;
801                 }
802                 break;
803         case KVM_S390_VM_CRYPTO:
804                 switch (attr->attr) {
805                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
806                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
807                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
808                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
809                         ret = 0;
810                         break;
811                 default:
812                         ret = -ENXIO;
813                         break;
814                 }
815                 break;
816         default:
817                 ret = -ENXIO;
818                 break;
819         }
820
821         return ret;
822 }
823
824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
825 {
826         uint8_t *keys;
827         uint64_t hva;
828         unsigned long curkey;
829         int i, r = 0;
830
831         if (args->flags != 0)
832                 return -EINVAL;
833
834         /* Is this guest using storage keys? */
835         if (!mm_use_skey(current->mm))
836                 return KVM_S390_GET_SKEYS_NONE;
837
838         /* Enforce sane limit on memory allocation */
839         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
840                 return -EINVAL;
841
842         keys = kmalloc_array(args->count, sizeof(uint8_t),
843                              GFP_KERNEL | __GFP_NOWARN);
844         if (!keys)
845                 keys = vmalloc(sizeof(uint8_t) * args->count);
846         if (!keys)
847                 return -ENOMEM;
848
849         for (i = 0; i < args->count; i++) {
850                 hva = gfn_to_hva(kvm, args->start_gfn + i);
851                 if (kvm_is_error_hva(hva)) {
852                         r = -EFAULT;
853                         goto out;
854                 }
855
856                 curkey = get_guest_storage_key(current->mm, hva);
857                 if (IS_ERR_VALUE(curkey)) {
858                         r = curkey;
859                         goto out;
860                 }
861                 keys[i] = curkey;
862         }
863
864         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
865                          sizeof(uint8_t) * args->count);
866         if (r)
867                 r = -EFAULT;
868 out:
869         kvfree(keys);
870         return r;
871 }
872
873 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
874 {
875         uint8_t *keys;
876         uint64_t hva;
877         int i, r = 0;
878
879         if (args->flags != 0)
880                 return -EINVAL;
881
882         /* Enforce sane limit on memory allocation */
883         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
884                 return -EINVAL;
885
886         keys = kmalloc_array(args->count, sizeof(uint8_t),
887                              GFP_KERNEL | __GFP_NOWARN);
888         if (!keys)
889                 keys = vmalloc(sizeof(uint8_t) * args->count);
890         if (!keys)
891                 return -ENOMEM;
892
893         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
894                            sizeof(uint8_t) * args->count);
895         if (r) {
896                 r = -EFAULT;
897                 goto out;
898         }
899
900         /* Enable storage key handling for the guest */
901         r = s390_enable_skey();
902         if (r)
903                 goto out;
904
905         for (i = 0; i < args->count; i++) {
906                 hva = gfn_to_hva(kvm, args->start_gfn + i);
907                 if (kvm_is_error_hva(hva)) {
908                         r = -EFAULT;
909                         goto out;
910                 }
911
912                 /* Lowest order bit is reserved */
913                 if (keys[i] & 0x01) {
914                         r = -EINVAL;
915                         goto out;
916                 }
917
918                 r = set_guest_storage_key(current->mm, hva,
919                                           (unsigned long)keys[i], 0);
920                 if (r)
921                         goto out;
922         }
923 out:
924         kvfree(keys);
925         return r;
926 }
927
928 long kvm_arch_vm_ioctl(struct file *filp,
929                        unsigned int ioctl, unsigned long arg)
930 {
931         struct kvm *kvm = filp->private_data;
932         void __user *argp = (void __user *)arg;
933         struct kvm_device_attr attr;
934         int r;
935
936         switch (ioctl) {
937         case KVM_S390_INTERRUPT: {
938                 struct kvm_s390_interrupt s390int;
939
940                 r = -EFAULT;
941                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
942                         break;
943                 r = kvm_s390_inject_vm(kvm, &s390int);
944                 break;
945         }
946         case KVM_ENABLE_CAP: {
947                 struct kvm_enable_cap cap;
948                 r = -EFAULT;
949                 if (copy_from_user(&cap, argp, sizeof(cap)))
950                         break;
951                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
952                 break;
953         }
954         case KVM_CREATE_IRQCHIP: {
955                 struct kvm_irq_routing_entry routing;
956
957                 r = -EINVAL;
958                 if (kvm->arch.use_irqchip) {
959                         /* Set up dummy routing. */
960                         memset(&routing, 0, sizeof(routing));
961                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
962                 }
963                 break;
964         }
965         case KVM_SET_DEVICE_ATTR: {
966                 r = -EFAULT;
967                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
968                         break;
969                 r = kvm_s390_vm_set_attr(kvm, &attr);
970                 break;
971         }
972         case KVM_GET_DEVICE_ATTR: {
973                 r = -EFAULT;
974                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
975                         break;
976                 r = kvm_s390_vm_get_attr(kvm, &attr);
977                 break;
978         }
979         case KVM_HAS_DEVICE_ATTR: {
980                 r = -EFAULT;
981                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
982                         break;
983                 r = kvm_s390_vm_has_attr(kvm, &attr);
984                 break;
985         }
986         case KVM_S390_GET_SKEYS: {
987                 struct kvm_s390_skeys args;
988
989                 r = -EFAULT;
990                 if (copy_from_user(&args, argp,
991                                    sizeof(struct kvm_s390_skeys)))
992                         break;
993                 r = kvm_s390_get_skeys(kvm, &args);
994                 break;
995         }
996         case KVM_S390_SET_SKEYS: {
997                 struct kvm_s390_skeys args;
998
999                 r = -EFAULT;
1000                 if (copy_from_user(&args, argp,
1001                                    sizeof(struct kvm_s390_skeys)))
1002                         break;
1003                 r = kvm_s390_set_skeys(kvm, &args);
1004                 break;
1005         }
1006         default:
1007                 r = -ENOTTY;
1008         }
1009
1010         return r;
1011 }
1012
1013 static int kvm_s390_query_ap_config(u8 *config)
1014 {
1015         u32 fcn_code = 0x04000000UL;
1016         u32 cc = 0;
1017
1018         memset(config, 0, 128);
1019         asm volatile(
1020                 "lgr 0,%1\n"
1021                 "lgr 2,%2\n"
1022                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1023                 "0: ipm %0\n"
1024                 "srl %0,28\n"
1025                 "1:\n"
1026                 EX_TABLE(0b, 1b)
1027                 : "+r" (cc)
1028                 : "r" (fcn_code), "r" (config)
1029                 : "cc", "0", "2", "memory"
1030         );
1031
1032         return cc;
1033 }
1034
1035 static int kvm_s390_apxa_installed(void)
1036 {
1037         u8 config[128];
1038         int cc;
1039
1040         if (test_facility(12)) {
1041                 cc = kvm_s390_query_ap_config(config);
1042
1043                 if (cc)
1044                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1045                 else
1046                         return config[0] & 0x40;
1047         }
1048
1049         return 0;
1050 }
1051
1052 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1053 {
1054         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1055
1056         if (kvm_s390_apxa_installed())
1057                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1058         else
1059                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1060 }
1061
1062 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1063 {
1064         get_cpu_id(cpu_id);
1065         cpu_id->version = 0xff;
1066 }
1067
1068 static int kvm_s390_crypto_init(struct kvm *kvm)
1069 {
1070         if (!test_kvm_facility(kvm, 76))
1071                 return 0;
1072
1073         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1074                                          GFP_KERNEL | GFP_DMA);
1075         if (!kvm->arch.crypto.crycb)
1076                 return -ENOMEM;
1077
1078         kvm_s390_set_crycb_format(kvm);
1079
1080         /* Enable AES/DEA protected key functions by default */
1081         kvm->arch.crypto.aes_kw = 1;
1082         kvm->arch.crypto.dea_kw = 1;
1083         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1084                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1085         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1086                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1087
1088         return 0;
1089 }
1090
1091 static void sca_dispose(struct kvm *kvm)
1092 {
1093         if (kvm->arch.use_esca)
1094                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1095         else
1096                 free_page((unsigned long)(kvm->arch.sca));
1097         kvm->arch.sca = NULL;
1098 }
1099
1100 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1101 {
1102         int i, rc;
1103         char debug_name[16];
1104         static unsigned long sca_offset;
1105
1106         rc = -EINVAL;
1107 #ifdef CONFIG_KVM_S390_UCONTROL
1108         if (type & ~KVM_VM_S390_UCONTROL)
1109                 goto out_err;
1110         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1111                 goto out_err;
1112 #else
1113         if (type)
1114                 goto out_err;
1115 #endif
1116
1117         rc = s390_enable_sie();
1118         if (rc)
1119                 goto out_err;
1120
1121         rc = -ENOMEM;
1122
1123         kvm->arch.use_esca = 0; /* start with basic SCA */
1124         rwlock_init(&kvm->arch.sca_lock);
1125         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1126         if (!kvm->arch.sca)
1127                 goto out_err;
1128         spin_lock(&kvm_lock);
1129         sca_offset += 16;
1130         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1131                 sca_offset = 0;
1132         kvm->arch.sca = (struct bsca_block *)
1133                         ((char *) kvm->arch.sca + sca_offset);
1134         spin_unlock(&kvm_lock);
1135
1136         sprintf(debug_name, "kvm-%u", current->pid);
1137
1138         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1139         if (!kvm->arch.dbf)
1140                 goto out_err;
1141
1142         /*
1143          * The architectural maximum amount of facilities is 16 kbit. To store
1144          * this amount, 2 kbyte of memory is required. Thus we need a full
1145          * page to hold the guest facility list (arch.model.fac->list) and the
1146          * facility mask (arch.model.fac->mask). Its address size has to be
1147          * 31 bits and word aligned.
1148          */
1149         kvm->arch.model.fac =
1150                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1151         if (!kvm->arch.model.fac)
1152                 goto out_err;
1153
1154         /* Populate the facility mask initially. */
1155         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1156                S390_ARCH_FAC_LIST_SIZE_BYTE);
1157         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1158                 if (i < kvm_s390_fac_list_mask_size())
1159                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1160                 else
1161                         kvm->arch.model.fac->mask[i] = 0UL;
1162         }
1163
1164         /* Populate the facility list initially. */
1165         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1166                S390_ARCH_FAC_LIST_SIZE_BYTE);
1167
1168         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1169         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1170
1171         if (kvm_s390_crypto_init(kvm) < 0)
1172                 goto out_err;
1173
1174         spin_lock_init(&kvm->arch.float_int.lock);
1175         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1176                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1177         init_waitqueue_head(&kvm->arch.ipte_wq);
1178         mutex_init(&kvm->arch.ipte_mutex);
1179
1180         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1181         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1182
1183         if (type & KVM_VM_S390_UCONTROL) {
1184                 kvm->arch.gmap = NULL;
1185                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1186         } else {
1187                 if (sclp.hamax == U64_MAX)
1188                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1189                 else
1190                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1191                                                     sclp.hamax + 1);
1192                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1193                 if (!kvm->arch.gmap)
1194                         goto out_err;
1195                 kvm->arch.gmap->private = kvm;
1196                 kvm->arch.gmap->pfault_enabled = 0;
1197         }
1198
1199         kvm->arch.css_support = 0;
1200         kvm->arch.use_irqchip = 0;
1201         kvm->arch.epoch = 0;
1202
1203         spin_lock_init(&kvm->arch.start_stop_lock);
1204         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1205
1206         return 0;
1207 out_err:
1208         kfree(kvm->arch.crypto.crycb);
1209         free_page((unsigned long)kvm->arch.model.fac);
1210         debug_unregister(kvm->arch.dbf);
1211         sca_dispose(kvm);
1212         KVM_EVENT(3, "creation of vm failed: %d", rc);
1213         return rc;
1214 }
1215
1216 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1217 {
1218         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1219         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1220         kvm_s390_clear_local_irqs(vcpu);
1221         kvm_clear_async_pf_completion_queue(vcpu);
1222         if (!kvm_is_ucontrol(vcpu->kvm))
1223                 sca_del_vcpu(vcpu);
1224
1225         if (kvm_is_ucontrol(vcpu->kvm))
1226                 gmap_free(vcpu->arch.gmap);
1227
1228         if (vcpu->kvm->arch.use_cmma)
1229                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1230         free_page((unsigned long)(vcpu->arch.sie_block));
1231
1232         kvm_vcpu_uninit(vcpu);
1233         kmem_cache_free(kvm_vcpu_cache, vcpu);
1234 }
1235
1236 static void kvm_free_vcpus(struct kvm *kvm)
1237 {
1238         unsigned int i;
1239         struct kvm_vcpu *vcpu;
1240
1241         kvm_for_each_vcpu(i, vcpu, kvm)
1242                 kvm_arch_vcpu_destroy(vcpu);
1243
1244         mutex_lock(&kvm->lock);
1245         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1246                 kvm->vcpus[i] = NULL;
1247
1248         atomic_set(&kvm->online_vcpus, 0);
1249         mutex_unlock(&kvm->lock);
1250 }
1251
1252 void kvm_arch_destroy_vm(struct kvm *kvm)
1253 {
1254         kvm_free_vcpus(kvm);
1255         free_page((unsigned long)kvm->arch.model.fac);
1256         sca_dispose(kvm);
1257         debug_unregister(kvm->arch.dbf);
1258         kfree(kvm->arch.crypto.crycb);
1259         if (!kvm_is_ucontrol(kvm))
1260                 gmap_free(kvm->arch.gmap);
1261         kvm_s390_destroy_adapters(kvm);
1262         kvm_s390_clear_float_irqs(kvm);
1263         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1264 }
1265
1266 /* Section: vcpu related */
1267 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1268 {
1269         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1270         if (!vcpu->arch.gmap)
1271                 return -ENOMEM;
1272         vcpu->arch.gmap->private = vcpu->kvm;
1273
1274         return 0;
1275 }
1276
1277 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1278 {
1279         read_lock(&vcpu->kvm->arch.sca_lock);
1280         if (vcpu->kvm->arch.use_esca) {
1281                 struct esca_block *sca = vcpu->kvm->arch.sca;
1282
1283                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1284                 sca->cpu[vcpu->vcpu_id].sda = 0;
1285         } else {
1286                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1287
1288                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1289                 sca->cpu[vcpu->vcpu_id].sda = 0;
1290         }
1291         read_unlock(&vcpu->kvm->arch.sca_lock);
1292 }
1293
1294 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1295 {
1296         read_lock(&vcpu->kvm->arch.sca_lock);
1297         if (vcpu->kvm->arch.use_esca) {
1298                 struct esca_block *sca = vcpu->kvm->arch.sca;
1299
1300                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1301                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1302                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1303                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1304                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1305         } else {
1306                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1307
1308                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1309                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1310                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1311                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1312         }
1313         read_unlock(&vcpu->kvm->arch.sca_lock);
1314 }
1315
1316 /* Basic SCA to Extended SCA data copy routines */
1317 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1318 {
1319         d->sda = s->sda;
1320         d->sigp_ctrl.c = s->sigp_ctrl.c;
1321         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1322 }
1323
1324 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1325 {
1326         int i;
1327
1328         d->ipte_control = s->ipte_control;
1329         d->mcn[0] = s->mcn;
1330         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1331                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1332 }
1333
1334 static int sca_switch_to_extended(struct kvm *kvm)
1335 {
1336         struct bsca_block *old_sca = kvm->arch.sca;
1337         struct esca_block *new_sca;
1338         struct kvm_vcpu *vcpu;
1339         unsigned int vcpu_idx;
1340         u32 scaol, scaoh;
1341
1342         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1343         if (!new_sca)
1344                 return -ENOMEM;
1345
1346         scaoh = (u32)((u64)(new_sca) >> 32);
1347         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1348
1349         kvm_s390_vcpu_block_all(kvm);
1350         write_lock(&kvm->arch.sca_lock);
1351
1352         sca_copy_b_to_e(new_sca, old_sca);
1353
1354         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1355                 vcpu->arch.sie_block->scaoh = scaoh;
1356                 vcpu->arch.sie_block->scaol = scaol;
1357                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1358         }
1359         kvm->arch.sca = new_sca;
1360         kvm->arch.use_esca = 1;
1361
1362         write_unlock(&kvm->arch.sca_lock);
1363         kvm_s390_vcpu_unblock_all(kvm);
1364
1365         free_page((unsigned long)old_sca);
1366
1367         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1368                  old_sca, kvm->arch.sca);
1369         return 0;
1370 }
1371
1372 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1373 {
1374         int rc;
1375
1376         if (id < KVM_S390_BSCA_CPU_SLOTS)
1377                 return true;
1378         if (!sclp.has_esca)
1379                 return false;
1380
1381         mutex_lock(&kvm->lock);
1382         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1383         mutex_unlock(&kvm->lock);
1384
1385         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1386 }
1387
1388 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1389 {
1390         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1391         kvm_clear_async_pf_completion_queue(vcpu);
1392         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1393                                     KVM_SYNC_GPRS |
1394                                     KVM_SYNC_ACRS |
1395                                     KVM_SYNC_CRS |
1396                                     KVM_SYNC_ARCH0 |
1397                                     KVM_SYNC_PFAULT;
1398         if (test_kvm_facility(vcpu->kvm, 129))
1399                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1400
1401         if (kvm_is_ucontrol(vcpu->kvm))
1402                 return __kvm_ucontrol_vcpu_init(vcpu);
1403
1404         return 0;
1405 }
1406
1407 /*
1408  * Backs up the current FP/VX register save area on a particular
1409  * destination.  Used to switch between different register save
1410  * areas.
1411  */
1412 static inline void save_fpu_to(struct fpu *dst)
1413 {
1414         dst->fpc = current->thread.fpu.fpc;
1415         dst->regs = current->thread.fpu.regs;
1416 }
1417
1418 /*
1419  * Switches the FP/VX register save area from which to lazy
1420  * restore register contents.
1421  */
1422 static inline void load_fpu_from(struct fpu *from)
1423 {
1424         current->thread.fpu.fpc = from->fpc;
1425         current->thread.fpu.regs = from->regs;
1426 }
1427
1428 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1429 {
1430         /* Save host register state */
1431         save_fpu_regs();
1432         save_fpu_to(&vcpu->arch.host_fpregs);
1433
1434         if (test_kvm_facility(vcpu->kvm, 129)) {
1435                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1436                 /*
1437                  * Use the register save area in the SIE-control block
1438                  * for register restore and save in kvm_arch_vcpu_put()
1439                  */
1440                 current->thread.fpu.vxrs =
1441                         (__vector128 *)&vcpu->run->s.regs.vrs;
1442         } else
1443                 load_fpu_from(&vcpu->arch.guest_fpregs);
1444
1445         if (test_fp_ctl(current->thread.fpu.fpc))
1446                 /* User space provided an invalid FPC, let's clear it */
1447                 current->thread.fpu.fpc = 0;
1448
1449         save_access_regs(vcpu->arch.host_acrs);
1450         restore_access_regs(vcpu->run->s.regs.acrs);
1451         gmap_enable(vcpu->arch.gmap);
1452         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1453 }
1454
1455 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1456 {
1457         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1458         gmap_disable(vcpu->arch.gmap);
1459
1460         save_fpu_regs();
1461
1462         if (test_kvm_facility(vcpu->kvm, 129))
1463                 /*
1464                  * kvm_arch_vcpu_load() set up the register save area to
1465                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1466                  * are already saved.  Only the floating-point control must be
1467                  * copied.
1468                  */
1469                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1470         else
1471                 save_fpu_to(&vcpu->arch.guest_fpregs);
1472         load_fpu_from(&vcpu->arch.host_fpregs);
1473
1474         save_access_regs(vcpu->run->s.regs.acrs);
1475         restore_access_regs(vcpu->arch.host_acrs);
1476 }
1477
1478 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1479 {
1480         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1481         vcpu->arch.sie_block->gpsw.mask = 0UL;
1482         vcpu->arch.sie_block->gpsw.addr = 0UL;
1483         kvm_s390_set_prefix(vcpu, 0);
1484         vcpu->arch.sie_block->cputm     = 0UL;
1485         vcpu->arch.sie_block->ckc       = 0UL;
1486         vcpu->arch.sie_block->todpr     = 0;
1487         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1488         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1489         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1490         vcpu->arch.guest_fpregs.fpc = 0;
1491         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1492         vcpu->arch.sie_block->gbea = 1;
1493         vcpu->arch.sie_block->pp = 0;
1494         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1495         kvm_clear_async_pf_completion_queue(vcpu);
1496         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1497                 kvm_s390_vcpu_stop(vcpu);
1498         kvm_s390_clear_local_irqs(vcpu);
1499 }
1500
1501 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1502 {
1503         mutex_lock(&vcpu->kvm->lock);
1504         preempt_disable();
1505         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1506         preempt_enable();
1507         mutex_unlock(&vcpu->kvm->lock);
1508         if (!kvm_is_ucontrol(vcpu->kvm)) {
1509                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1510                 sca_add_vcpu(vcpu);
1511         }
1512
1513 }
1514
1515 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1516 {
1517         if (!test_kvm_facility(vcpu->kvm, 76))
1518                 return;
1519
1520         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1521
1522         if (vcpu->kvm->arch.crypto.aes_kw)
1523                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1524         if (vcpu->kvm->arch.crypto.dea_kw)
1525                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1526
1527         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1528 }
1529
1530 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1531 {
1532         free_page(vcpu->arch.sie_block->cbrlo);
1533         vcpu->arch.sie_block->cbrlo = 0;
1534 }
1535
1536 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1537 {
1538         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1539         if (!vcpu->arch.sie_block->cbrlo)
1540                 return -ENOMEM;
1541
1542         vcpu->arch.sie_block->ecb2 |= 0x80;
1543         vcpu->arch.sie_block->ecb2 &= ~0x08;
1544         return 0;
1545 }
1546
1547 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1548 {
1549         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1550
1551         vcpu->arch.cpu_id = model->cpu_id;
1552         vcpu->arch.sie_block->ibc = model->ibc;
1553         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1554 }
1555
1556 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1557 {
1558         int rc = 0;
1559
1560         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1561                                                     CPUSTAT_SM |
1562                                                     CPUSTAT_STOPPED);
1563
1564         if (test_kvm_facility(vcpu->kvm, 78))
1565                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1566         else if (test_kvm_facility(vcpu->kvm, 8))
1567                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1568
1569         kvm_s390_vcpu_setup_model(vcpu);
1570
1571         vcpu->arch.sie_block->ecb   = 6;
1572         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1573                 vcpu->arch.sie_block->ecb |= 0x10;
1574
1575         vcpu->arch.sie_block->ecb2  = 8;
1576         vcpu->arch.sie_block->eca   = 0xC1002000U;
1577         if (sclp.has_siif)
1578                 vcpu->arch.sie_block->eca |= 1;
1579         if (sclp.has_sigpif)
1580                 vcpu->arch.sie_block->eca |= 0x10000000U;
1581         if (test_kvm_facility(vcpu->kvm, 129)) {
1582                 vcpu->arch.sie_block->eca |= 0x00020000;
1583                 vcpu->arch.sie_block->ecd |= 0x20000000;
1584         }
1585         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1586
1587         if (vcpu->kvm->arch.use_cmma) {
1588                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1589                 if (rc)
1590                         return rc;
1591         }
1592         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1593         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1594
1595         kvm_s390_vcpu_crypto_setup(vcpu);
1596
1597         return rc;
1598 }
1599
1600 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1601                                       unsigned int id)
1602 {
1603         struct kvm_vcpu *vcpu;
1604         struct sie_page *sie_page;
1605         int rc = -EINVAL;
1606
1607         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1608                 goto out;
1609
1610         rc = -ENOMEM;
1611
1612         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1613         if (!vcpu)
1614                 goto out;
1615
1616         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1617         if (!sie_page)
1618                 goto out_free_cpu;
1619
1620         vcpu->arch.sie_block = &sie_page->sie_block;
1621         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1622
1623         vcpu->arch.sie_block->icpua = id;
1624         spin_lock_init(&vcpu->arch.local_int.lock);
1625         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1626         vcpu->arch.local_int.wq = &vcpu->wq;
1627         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1628
1629         /*
1630          * Allocate a save area for floating-point registers.  If the vector
1631          * extension is available, register contents are saved in the SIE
1632          * control block.  The allocated save area is still required in
1633          * particular places, for example, in kvm_s390_vcpu_store_status().
1634          */
1635         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1636                                                GFP_KERNEL);
1637         if (!vcpu->arch.guest_fpregs.fprs)
1638                 goto out_free_sie_block;
1639
1640         rc = kvm_vcpu_init(vcpu, kvm, id);
1641         if (rc)
1642                 goto out_free_sie_block;
1643         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1644                  vcpu->arch.sie_block);
1645         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1646
1647         return vcpu;
1648 out_free_sie_block:
1649         free_page((unsigned long)(vcpu->arch.sie_block));
1650 out_free_cpu:
1651         kmem_cache_free(kvm_vcpu_cache, vcpu);
1652 out:
1653         return ERR_PTR(rc);
1654 }
1655
1656 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1657 {
1658         return kvm_s390_vcpu_has_irq(vcpu, 0);
1659 }
1660
1661 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1662 {
1663         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1664         exit_sie(vcpu);
1665 }
1666
1667 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1668 {
1669         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1670 }
1671
1672 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1673 {
1674         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1675         exit_sie(vcpu);
1676 }
1677
1678 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1679 {
1680         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1681 }
1682
1683 /*
1684  * Kick a guest cpu out of SIE and wait until SIE is not running.
1685  * If the CPU is not running (e.g. waiting as idle) the function will
1686  * return immediately. */
1687 void exit_sie(struct kvm_vcpu *vcpu)
1688 {
1689         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1690         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1691                 cpu_relax();
1692 }
1693
1694 /* Kick a guest cpu out of SIE to process a request synchronously */
1695 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1696 {
1697         kvm_make_request(req, vcpu);
1698         kvm_s390_vcpu_request(vcpu);
1699 }
1700
1701 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1702 {
1703         int i;
1704         struct kvm *kvm = gmap->private;
1705         struct kvm_vcpu *vcpu;
1706
1707         kvm_for_each_vcpu(i, vcpu, kvm) {
1708                 /* match against both prefix pages */
1709                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1710                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1711                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1712                 }
1713         }
1714 }
1715
1716 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1717 {
1718         /* kvm common code refers to this, but never calls it */
1719         BUG();
1720         return 0;
1721 }
1722
1723 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1724                                            struct kvm_one_reg *reg)
1725 {
1726         int r = -EINVAL;
1727
1728         switch (reg->id) {
1729         case KVM_REG_S390_TODPR:
1730                 r = put_user(vcpu->arch.sie_block->todpr,
1731                              (u32 __user *)reg->addr);
1732                 break;
1733         case KVM_REG_S390_EPOCHDIFF:
1734                 r = put_user(vcpu->arch.sie_block->epoch,
1735                              (u64 __user *)reg->addr);
1736                 break;
1737         case KVM_REG_S390_CPU_TIMER:
1738                 r = put_user(vcpu->arch.sie_block->cputm,
1739                              (u64 __user *)reg->addr);
1740                 break;
1741         case KVM_REG_S390_CLOCK_COMP:
1742                 r = put_user(vcpu->arch.sie_block->ckc,
1743                              (u64 __user *)reg->addr);
1744                 break;
1745         case KVM_REG_S390_PFTOKEN:
1746                 r = put_user(vcpu->arch.pfault_token,
1747                              (u64 __user *)reg->addr);
1748                 break;
1749         case KVM_REG_S390_PFCOMPARE:
1750                 r = put_user(vcpu->arch.pfault_compare,
1751                              (u64 __user *)reg->addr);
1752                 break;
1753         case KVM_REG_S390_PFSELECT:
1754                 r = put_user(vcpu->arch.pfault_select,
1755                              (u64 __user *)reg->addr);
1756                 break;
1757         case KVM_REG_S390_PP:
1758                 r = put_user(vcpu->arch.sie_block->pp,
1759                              (u64 __user *)reg->addr);
1760                 break;
1761         case KVM_REG_S390_GBEA:
1762                 r = put_user(vcpu->arch.sie_block->gbea,
1763                              (u64 __user *)reg->addr);
1764                 break;
1765         default:
1766                 break;
1767         }
1768
1769         return r;
1770 }
1771
1772 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1773                                            struct kvm_one_reg *reg)
1774 {
1775         int r = -EINVAL;
1776
1777         switch (reg->id) {
1778         case KVM_REG_S390_TODPR:
1779                 r = get_user(vcpu->arch.sie_block->todpr,
1780                              (u32 __user *)reg->addr);
1781                 break;
1782         case KVM_REG_S390_EPOCHDIFF:
1783                 r = get_user(vcpu->arch.sie_block->epoch,
1784                              (u64 __user *)reg->addr);
1785                 break;
1786         case KVM_REG_S390_CPU_TIMER:
1787                 r = get_user(vcpu->arch.sie_block->cputm,
1788                              (u64 __user *)reg->addr);
1789                 break;
1790         case KVM_REG_S390_CLOCK_COMP:
1791                 r = get_user(vcpu->arch.sie_block->ckc,
1792                              (u64 __user *)reg->addr);
1793                 break;
1794         case KVM_REG_S390_PFTOKEN:
1795                 r = get_user(vcpu->arch.pfault_token,
1796                              (u64 __user *)reg->addr);
1797                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1798                         kvm_clear_async_pf_completion_queue(vcpu);
1799                 break;
1800         case KVM_REG_S390_PFCOMPARE:
1801                 r = get_user(vcpu->arch.pfault_compare,
1802                              (u64 __user *)reg->addr);
1803                 break;
1804         case KVM_REG_S390_PFSELECT:
1805                 r = get_user(vcpu->arch.pfault_select,
1806                              (u64 __user *)reg->addr);
1807                 break;
1808         case KVM_REG_S390_PP:
1809                 r = get_user(vcpu->arch.sie_block->pp,
1810                              (u64 __user *)reg->addr);
1811                 break;
1812         case KVM_REG_S390_GBEA:
1813                 r = get_user(vcpu->arch.sie_block->gbea,
1814                              (u64 __user *)reg->addr);
1815                 break;
1816         default:
1817                 break;
1818         }
1819
1820         return r;
1821 }
1822
1823 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1824 {
1825         kvm_s390_vcpu_initial_reset(vcpu);
1826         return 0;
1827 }
1828
1829 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1830 {
1831         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1832         return 0;
1833 }
1834
1835 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1836 {
1837         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1838         return 0;
1839 }
1840
1841 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1842                                   struct kvm_sregs *sregs)
1843 {
1844         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1845         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1846         restore_access_regs(vcpu->run->s.regs.acrs);
1847         return 0;
1848 }
1849
1850 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1851                                   struct kvm_sregs *sregs)
1852 {
1853         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1854         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1855         return 0;
1856 }
1857
1858 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1859 {
1860         if (test_fp_ctl(fpu->fpc))
1861                 return -EINVAL;
1862         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1863         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1864         save_fpu_regs();
1865         load_fpu_from(&vcpu->arch.guest_fpregs);
1866         return 0;
1867 }
1868
1869 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1870 {
1871         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1872         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1873         return 0;
1874 }
1875
1876 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1877 {
1878         int rc = 0;
1879
1880         if (!is_vcpu_stopped(vcpu))
1881                 rc = -EBUSY;
1882         else {
1883                 vcpu->run->psw_mask = psw.mask;
1884                 vcpu->run->psw_addr = psw.addr;
1885         }
1886         return rc;
1887 }
1888
1889 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1890                                   struct kvm_translation *tr)
1891 {
1892         return -EINVAL; /* not implemented yet */
1893 }
1894
1895 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1896                               KVM_GUESTDBG_USE_HW_BP | \
1897                               KVM_GUESTDBG_ENABLE)
1898
1899 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1900                                         struct kvm_guest_debug *dbg)
1901 {
1902         int rc = 0;
1903
1904         vcpu->guest_debug = 0;
1905         kvm_s390_clear_bp_data(vcpu);
1906
1907         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1908                 return -EINVAL;
1909
1910         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1911                 vcpu->guest_debug = dbg->control;
1912                 /* enforce guest PER */
1913                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1914
1915                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1916                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1917         } else {
1918                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1919                 vcpu->arch.guestdbg.last_bp = 0;
1920         }
1921
1922         if (rc) {
1923                 vcpu->guest_debug = 0;
1924                 kvm_s390_clear_bp_data(vcpu);
1925                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1926         }
1927
1928         return rc;
1929 }
1930
1931 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1932                                     struct kvm_mp_state *mp_state)
1933 {
1934         /* CHECK_STOP and LOAD are not supported yet */
1935         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1936                                        KVM_MP_STATE_OPERATING;
1937 }
1938
1939 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1940                                     struct kvm_mp_state *mp_state)
1941 {
1942         int rc = 0;
1943
1944         /* user space knows about this interface - let it control the state */
1945         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1946
1947         switch (mp_state->mp_state) {
1948         case KVM_MP_STATE_STOPPED:
1949                 kvm_s390_vcpu_stop(vcpu);
1950                 break;
1951         case KVM_MP_STATE_OPERATING:
1952                 kvm_s390_vcpu_start(vcpu);
1953                 break;
1954         case KVM_MP_STATE_LOAD:
1955         case KVM_MP_STATE_CHECK_STOP:
1956                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1957         default:
1958                 rc = -ENXIO;
1959         }
1960
1961         return rc;
1962 }
1963
1964 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1965 {
1966         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1967 }
1968
1969 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1970 {
1971 retry:
1972         kvm_s390_vcpu_request_handled(vcpu);
1973         if (!vcpu->requests)
1974                 return 0;
1975         /*
1976          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1977          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1978          * This ensures that the ipte instruction for this request has
1979          * already finished. We might race against a second unmapper that
1980          * wants to set the blocking bit. Lets just retry the request loop.
1981          */
1982         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1983                 int rc;
1984                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1985                                       kvm_s390_get_prefix(vcpu),
1986                                       PAGE_SIZE * 2);
1987                 if (rc)
1988                         return rc;
1989                 goto retry;
1990         }
1991
1992         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1993                 vcpu->arch.sie_block->ihcpu = 0xffff;
1994                 goto retry;
1995         }
1996
1997         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1998                 if (!ibs_enabled(vcpu)) {
1999                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2000                         atomic_or(CPUSTAT_IBS,
2001                                         &vcpu->arch.sie_block->cpuflags);
2002                 }
2003                 goto retry;
2004         }
2005
2006         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2007                 if (ibs_enabled(vcpu)) {
2008                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2009                         atomic_andnot(CPUSTAT_IBS,
2010                                           &vcpu->arch.sie_block->cpuflags);
2011                 }
2012                 goto retry;
2013         }
2014
2015         /* nothing to do, just clear the request */
2016         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2017
2018         return 0;
2019 }
2020
2021 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2022 {
2023         struct kvm_vcpu *vcpu;
2024         int i;
2025
2026         mutex_lock(&kvm->lock);
2027         preempt_disable();
2028         kvm->arch.epoch = tod - get_tod_clock();
2029         kvm_s390_vcpu_block_all(kvm);
2030         kvm_for_each_vcpu(i, vcpu, kvm)
2031                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2032         kvm_s390_vcpu_unblock_all(kvm);
2033         preempt_enable();
2034         mutex_unlock(&kvm->lock);
2035 }
2036
2037 /**
2038  * kvm_arch_fault_in_page - fault-in guest page if necessary
2039  * @vcpu: The corresponding virtual cpu
2040  * @gpa: Guest physical address
2041  * @writable: Whether the page should be writable or not
2042  *
2043  * Make sure that a guest page has been faulted-in on the host.
2044  *
2045  * Return: Zero on success, negative error code otherwise.
2046  */
2047 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2048 {
2049         return gmap_fault(vcpu->arch.gmap, gpa,
2050                           writable ? FAULT_FLAG_WRITE : 0);
2051 }
2052
2053 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2054                                       unsigned long token)
2055 {
2056         struct kvm_s390_interrupt inti;
2057         struct kvm_s390_irq irq;
2058
2059         if (start_token) {
2060                 irq.u.ext.ext_params2 = token;
2061                 irq.type = KVM_S390_INT_PFAULT_INIT;
2062                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2063         } else {
2064                 inti.type = KVM_S390_INT_PFAULT_DONE;
2065                 inti.parm64 = token;
2066                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2067         }
2068 }
2069
2070 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2071                                      struct kvm_async_pf *work)
2072 {
2073         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2074         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2075 }
2076
2077 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2078                                  struct kvm_async_pf *work)
2079 {
2080         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2081         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2082 }
2083
2084 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2085                                struct kvm_async_pf *work)
2086 {
2087         /* s390 will always inject the page directly */
2088 }
2089
2090 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2091 {
2092         /*
2093          * s390 will always inject the page directly,
2094          * but we still want check_async_completion to cleanup
2095          */
2096         return true;
2097 }
2098
2099 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2100 {
2101         hva_t hva;
2102         struct kvm_arch_async_pf arch;
2103         int rc;
2104
2105         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2106                 return 0;
2107         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2108             vcpu->arch.pfault_compare)
2109                 return 0;
2110         if (psw_extint_disabled(vcpu))
2111                 return 0;
2112         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2113                 return 0;
2114         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2115                 return 0;
2116         if (!vcpu->arch.gmap->pfault_enabled)
2117                 return 0;
2118
2119         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2120         hva += current->thread.gmap_addr & ~PAGE_MASK;
2121         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2122                 return 0;
2123
2124         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2125         return rc;
2126 }
2127
2128 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2129 {
2130         int rc, cpuflags;
2131
2132         /*
2133          * On s390 notifications for arriving pages will be delivered directly
2134          * to the guest but the house keeping for completed pfaults is
2135          * handled outside the worker.
2136          */
2137         kvm_check_async_pf_completion(vcpu);
2138
2139         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2140         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2141
2142         if (need_resched())
2143                 schedule();
2144
2145         if (test_cpu_flag(CIF_MCCK_PENDING))
2146                 s390_handle_mcck();
2147
2148         if (!kvm_is_ucontrol(vcpu->kvm)) {
2149                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2150                 if (rc)
2151                         return rc;
2152         }
2153
2154         rc = kvm_s390_handle_requests(vcpu);
2155         if (rc)
2156                 return rc;
2157
2158         if (guestdbg_enabled(vcpu)) {
2159                 kvm_s390_backup_guest_per_regs(vcpu);
2160                 kvm_s390_patch_guest_per_regs(vcpu);
2161         }
2162
2163         vcpu->arch.sie_block->icptcode = 0;
2164         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2165         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2166         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2167
2168         return 0;
2169 }
2170
2171 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2172 {
2173         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2174         u8 opcode;
2175         int rc;
2176
2177         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2178         trace_kvm_s390_sie_fault(vcpu);
2179
2180         /*
2181          * We want to inject an addressing exception, which is defined as a
2182          * suppressing or terminating exception. However, since we came here
2183          * by a DAT access exception, the PSW still points to the faulting
2184          * instruction since DAT exceptions are nullifying. So we've got
2185          * to look up the current opcode to get the length of the instruction
2186          * to be able to forward the PSW.
2187          */
2188         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2189         if (rc)
2190                 return kvm_s390_inject_prog_cond(vcpu, rc);
2191         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2192
2193         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2194 }
2195
2196 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2197 {
2198         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2199                    vcpu->arch.sie_block->icptcode);
2200         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2201
2202         if (guestdbg_enabled(vcpu))
2203                 kvm_s390_restore_guest_per_regs(vcpu);
2204
2205         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2206         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2207
2208         if (vcpu->arch.sie_block->icptcode > 0) {
2209                 int rc = kvm_handle_sie_intercept(vcpu);
2210
2211                 if (rc != -EOPNOTSUPP)
2212                         return rc;
2213                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2214                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2215                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2216                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2217                 return -EREMOTE;
2218         } else if (exit_reason != -EFAULT) {
2219                 vcpu->stat.exit_null++;
2220                 return 0;
2221         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2222                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2223                 vcpu->run->s390_ucontrol.trans_exc_code =
2224                                                 current->thread.gmap_addr;
2225                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2226                 return -EREMOTE;
2227         } else if (current->thread.gmap_pfault) {
2228                 trace_kvm_s390_major_guest_pfault(vcpu);
2229                 current->thread.gmap_pfault = 0;
2230                 if (kvm_arch_setup_async_pf(vcpu))
2231                         return 0;
2232                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2233         }
2234         return vcpu_post_run_fault_in_sie(vcpu);
2235 }
2236
2237 static int __vcpu_run(struct kvm_vcpu *vcpu)
2238 {
2239         int rc, exit_reason;
2240
2241         /*
2242          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2243          * ning the guest), so that memslots (and other stuff) are protected
2244          */
2245         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2246
2247         do {
2248                 rc = vcpu_pre_run(vcpu);
2249                 if (rc)
2250                         break;
2251
2252                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2253                 /*
2254                  * As PF_VCPU will be used in fault handler, between
2255                  * guest_enter and guest_exit should be no uaccess.
2256                  */
2257                 local_irq_disable();
2258                 __kvm_guest_enter();
2259                 local_irq_enable();
2260                 exit_reason = sie64a(vcpu->arch.sie_block,
2261                                      vcpu->run->s.regs.gprs);
2262                 local_irq_disable();
2263                 __kvm_guest_exit();
2264                 local_irq_enable();
2265                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2266
2267                 rc = vcpu_post_run(vcpu, exit_reason);
2268         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2269
2270         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2271         return rc;
2272 }
2273
2274 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2275 {
2276         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2277         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2278         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2279                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2280         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2281                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2282                 /* some control register changes require a tlb flush */
2283                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2284         }
2285         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2286                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2287                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2288                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2289                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2290                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2291         }
2292         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2293                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2294                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2295                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2296                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2297                         kvm_clear_async_pf_completion_queue(vcpu);
2298         }
2299         kvm_run->kvm_dirty_regs = 0;
2300 }
2301
2302 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2303 {
2304         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2305         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2306         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2307         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2308         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2309         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2310         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2311         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2312         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2313         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2314         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2315         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2316 }
2317
2318 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2319 {
2320         int rc;
2321         sigset_t sigsaved;
2322
2323         if (guestdbg_exit_pending(vcpu)) {
2324                 kvm_s390_prepare_debug_exit(vcpu);
2325                 return 0;
2326         }
2327
2328         if (vcpu->sigset_active)
2329                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2330
2331         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2332                 kvm_s390_vcpu_start(vcpu);
2333         } else if (is_vcpu_stopped(vcpu)) {
2334                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2335                                    vcpu->vcpu_id);
2336                 return -EINVAL;
2337         }
2338
2339         sync_regs(vcpu, kvm_run);
2340
2341         might_fault();
2342         rc = __vcpu_run(vcpu);
2343
2344         if (signal_pending(current) && !rc) {
2345                 kvm_run->exit_reason = KVM_EXIT_INTR;
2346                 rc = -EINTR;
2347         }
2348
2349         if (guestdbg_exit_pending(vcpu) && !rc)  {
2350                 kvm_s390_prepare_debug_exit(vcpu);
2351                 rc = 0;
2352         }
2353
2354         if (rc == -EREMOTE) {
2355                 /* userspace support is needed, kvm_run has been prepared */
2356                 rc = 0;
2357         }
2358
2359         store_regs(vcpu, kvm_run);
2360
2361         if (vcpu->sigset_active)
2362                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2363
2364         vcpu->stat.exit_userspace++;
2365         return rc;
2366 }
2367
2368 /*
2369  * store status at address
2370  * we use have two special cases:
2371  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2372  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2373  */
2374 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2375 {
2376         unsigned char archmode = 1;
2377         unsigned int px;
2378         u64 clkcomp;
2379         int rc;
2380
2381         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2382                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2383                         return -EFAULT;
2384                 gpa = SAVE_AREA_BASE;
2385         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2386                 if (write_guest_real(vcpu, 163, &archmode, 1))
2387                         return -EFAULT;
2388                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2389         }
2390         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2391                              vcpu->arch.guest_fpregs.fprs, 128);
2392         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2393                               vcpu->run->s.regs.gprs, 128);
2394         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2395                               &vcpu->arch.sie_block->gpsw, 16);
2396         px = kvm_s390_get_prefix(vcpu);
2397         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2398                               &px, 4);
2399         rc |= write_guest_abs(vcpu,
2400                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2401                               &vcpu->arch.guest_fpregs.fpc, 4);
2402         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2403                               &vcpu->arch.sie_block->todpr, 4);
2404         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2405                               &vcpu->arch.sie_block->cputm, 8);
2406         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2407         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2408                               &clkcomp, 8);
2409         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2410                               &vcpu->run->s.regs.acrs, 64);
2411         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2412                               &vcpu->arch.sie_block->gcr, 128);
2413         return rc ? -EFAULT : 0;
2414 }
2415
2416 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2417 {
2418         /*
2419          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2420          * copying in vcpu load/put. Lets update our copies before we save
2421          * it into the save area
2422          */
2423         save_fpu_regs();
2424         if (test_kvm_facility(vcpu->kvm, 129)) {
2425                 /*
2426                  * If the vector extension is available, the vector registers
2427                  * which overlaps with floating-point registers are saved in
2428                  * the SIE-control block.  Hence, extract the floating-point
2429                  * registers and the FPC value and store them in the
2430                  * guest_fpregs structure.
2431                  */
2432                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2433                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2434                                  current->thread.fpu.vxrs);
2435         } else
2436                 save_fpu_to(&vcpu->arch.guest_fpregs);
2437         save_access_regs(vcpu->run->s.regs.acrs);
2438
2439         return kvm_s390_store_status_unloaded(vcpu, addr);
2440 }
2441
2442 /*
2443  * store additional status at address
2444  */
2445 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2446                                         unsigned long gpa)
2447 {
2448         /* Only bits 0-53 are used for address formation */
2449         if (!(gpa & ~0x3ff))
2450                 return 0;
2451
2452         return write_guest_abs(vcpu, gpa & ~0x3ff,
2453                                (void *)&vcpu->run->s.regs.vrs, 512);
2454 }
2455
2456 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2457 {
2458         if (!test_kvm_facility(vcpu->kvm, 129))
2459                 return 0;
2460
2461         /*
2462          * The guest VXRS are in the host VXRs due to the lazy
2463          * copying in vcpu load/put. We can simply call save_fpu_regs()
2464          * to save the current register state because we are in the
2465          * middle of a load/put cycle.
2466          *
2467          * Let's update our copies before we save it into the save area.
2468          */
2469         save_fpu_regs();
2470
2471         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2472 }
2473
2474 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2475 {
2476         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2477         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2478 }
2479
2480 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2481 {
2482         unsigned int i;
2483         struct kvm_vcpu *vcpu;
2484
2485         kvm_for_each_vcpu(i, vcpu, kvm) {
2486                 __disable_ibs_on_vcpu(vcpu);
2487         }
2488 }
2489
2490 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2491 {
2492         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2493         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2494 }
2495
2496 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2497 {
2498         int i, online_vcpus, started_vcpus = 0;
2499
2500         if (!is_vcpu_stopped(vcpu))
2501                 return;
2502
2503         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2504         /* Only one cpu at a time may enter/leave the STOPPED state. */
2505         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2506         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2507
2508         for (i = 0; i < online_vcpus; i++) {
2509                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2510                         started_vcpus++;
2511         }
2512
2513         if (started_vcpus == 0) {
2514                 /* we're the only active VCPU -> speed it up */
2515                 __enable_ibs_on_vcpu(vcpu);
2516         } else if (started_vcpus == 1) {
2517                 /*
2518                  * As we are starting a second VCPU, we have to disable
2519                  * the IBS facility on all VCPUs to remove potentially
2520                  * oustanding ENABLE requests.
2521                  */
2522                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2523         }
2524
2525         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2526         /*
2527          * Another VCPU might have used IBS while we were offline.
2528          * Let's play safe and flush the VCPU at startup.
2529          */
2530         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2531         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2532         return;
2533 }
2534
2535 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2536 {
2537         int i, online_vcpus, started_vcpus = 0;
2538         struct kvm_vcpu *started_vcpu = NULL;
2539
2540         if (is_vcpu_stopped(vcpu))
2541                 return;
2542
2543         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2544         /* Only one cpu at a time may enter/leave the STOPPED state. */
2545         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2546         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2547
2548         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2549         kvm_s390_clear_stop_irq(vcpu);
2550
2551         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2552         __disable_ibs_on_vcpu(vcpu);
2553
2554         for (i = 0; i < online_vcpus; i++) {
2555                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2556                         started_vcpus++;
2557                         started_vcpu = vcpu->kvm->vcpus[i];
2558                 }
2559         }
2560
2561         if (started_vcpus == 1) {
2562                 /*
2563                  * As we only have one VCPU left, we want to enable the
2564                  * IBS facility for that VCPU to speed it up.
2565                  */
2566                 __enable_ibs_on_vcpu(started_vcpu);
2567         }
2568
2569         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2570         return;
2571 }
2572
2573 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2574                                      struct kvm_enable_cap *cap)
2575 {
2576         int r;
2577
2578         if (cap->flags)
2579                 return -EINVAL;
2580
2581         switch (cap->cap) {
2582         case KVM_CAP_S390_CSS_SUPPORT:
2583                 if (!vcpu->kvm->arch.css_support) {
2584                         vcpu->kvm->arch.css_support = 1;
2585                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2586                         trace_kvm_s390_enable_css(vcpu->kvm);
2587                 }
2588                 r = 0;
2589                 break;
2590         default:
2591                 r = -EINVAL;
2592                 break;
2593         }
2594         return r;
2595 }
2596
2597 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2598                                   struct kvm_s390_mem_op *mop)
2599 {
2600         void __user *uaddr = (void __user *)mop->buf;
2601         void *tmpbuf = NULL;
2602         int r, srcu_idx;
2603         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2604                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2605
2606         if (mop->flags & ~supported_flags)
2607                 return -EINVAL;
2608
2609         if (mop->size > MEM_OP_MAX_SIZE)
2610                 return -E2BIG;
2611
2612         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2613                 tmpbuf = vmalloc(mop->size);
2614                 if (!tmpbuf)
2615                         return -ENOMEM;
2616         }
2617
2618         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2619
2620         switch (mop->op) {
2621         case KVM_S390_MEMOP_LOGICAL_READ:
2622                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2623                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2624                         break;
2625                 }
2626                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2627                 if (r == 0) {
2628                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2629                                 r = -EFAULT;
2630                 }
2631                 break;
2632         case KVM_S390_MEMOP_LOGICAL_WRITE:
2633                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2634                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2635                         break;
2636                 }
2637                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2638                         r = -EFAULT;
2639                         break;
2640                 }
2641                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2642                 break;
2643         default:
2644                 r = -EINVAL;
2645         }
2646
2647         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2648
2649         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2650                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2651
2652         vfree(tmpbuf);
2653         return r;
2654 }
2655
2656 long kvm_arch_vcpu_ioctl(struct file *filp,
2657                          unsigned int ioctl, unsigned long arg)
2658 {
2659         struct kvm_vcpu *vcpu = filp->private_data;
2660         void __user *argp = (void __user *)arg;
2661         int idx;
2662         long r;
2663
2664         switch (ioctl) {
2665         case KVM_S390_IRQ: {
2666                 struct kvm_s390_irq s390irq;
2667
2668                 r = -EFAULT;
2669                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2670                         break;
2671                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2672                 break;
2673         }
2674         case KVM_S390_INTERRUPT: {
2675                 struct kvm_s390_interrupt s390int;
2676                 struct kvm_s390_irq s390irq;
2677
2678                 r = -EFAULT;
2679                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2680                         break;
2681                 if (s390int_to_s390irq(&s390int, &s390irq))
2682                         return -EINVAL;
2683                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2684                 break;
2685         }
2686         case KVM_S390_STORE_STATUS:
2687                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2688                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2689                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2690                 break;
2691         case KVM_S390_SET_INITIAL_PSW: {
2692                 psw_t psw;
2693
2694                 r = -EFAULT;
2695                 if (copy_from_user(&psw, argp, sizeof(psw)))
2696                         break;
2697                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2698                 break;
2699         }
2700         case KVM_S390_INITIAL_RESET:
2701                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2702                 break;
2703         case KVM_SET_ONE_REG:
2704         case KVM_GET_ONE_REG: {
2705                 struct kvm_one_reg reg;
2706                 r = -EFAULT;
2707                 if (copy_from_user(&reg, argp, sizeof(reg)))
2708                         break;
2709                 if (ioctl == KVM_SET_ONE_REG)
2710                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2711                 else
2712                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2713                 break;
2714         }
2715 #ifdef CONFIG_KVM_S390_UCONTROL
2716         case KVM_S390_UCAS_MAP: {
2717                 struct kvm_s390_ucas_mapping ucasmap;
2718
2719                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2720                         r = -EFAULT;
2721                         break;
2722                 }
2723
2724                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2725                         r = -EINVAL;
2726                         break;
2727                 }
2728
2729                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2730                                      ucasmap.vcpu_addr, ucasmap.length);
2731                 break;
2732         }
2733         case KVM_S390_UCAS_UNMAP: {
2734                 struct kvm_s390_ucas_mapping ucasmap;
2735
2736                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2737                         r = -EFAULT;
2738                         break;
2739                 }
2740
2741                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2742                         r = -EINVAL;
2743                         break;
2744                 }
2745
2746                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2747                         ucasmap.length);
2748                 break;
2749         }
2750 #endif
2751         case KVM_S390_VCPU_FAULT: {
2752                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2753                 break;
2754         }
2755         case KVM_ENABLE_CAP:
2756         {
2757                 struct kvm_enable_cap cap;
2758                 r = -EFAULT;
2759                 if (copy_from_user(&cap, argp, sizeof(cap)))
2760                         break;
2761                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2762                 break;
2763         }
2764         case KVM_S390_MEM_OP: {
2765                 struct kvm_s390_mem_op mem_op;
2766
2767                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2768                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2769                 else
2770                         r = -EFAULT;
2771                 break;
2772         }
2773         case KVM_S390_SET_IRQ_STATE: {
2774                 struct kvm_s390_irq_state irq_state;
2775
2776                 r = -EFAULT;
2777                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2778                         break;
2779                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2780                     irq_state.len == 0 ||
2781                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2782                         r = -EINVAL;
2783                         break;
2784                 }
2785                 r = kvm_s390_set_irq_state(vcpu,
2786                                            (void __user *) irq_state.buf,
2787                                            irq_state.len);
2788                 break;
2789         }
2790         case KVM_S390_GET_IRQ_STATE: {
2791                 struct kvm_s390_irq_state irq_state;
2792
2793                 r = -EFAULT;
2794                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2795                         break;
2796                 if (irq_state.len == 0) {
2797                         r = -EINVAL;
2798                         break;
2799                 }
2800                 r = kvm_s390_get_irq_state(vcpu,
2801                                            (__u8 __user *)  irq_state.buf,
2802                                            irq_state.len);
2803                 break;
2804         }
2805         default:
2806                 r = -ENOTTY;
2807         }
2808         return r;
2809 }
2810
2811 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2812 {
2813 #ifdef CONFIG_KVM_S390_UCONTROL
2814         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2815                  && (kvm_is_ucontrol(vcpu->kvm))) {
2816                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2817                 get_page(vmf->page);
2818                 return 0;
2819         }
2820 #endif
2821         return VM_FAULT_SIGBUS;
2822 }
2823
2824 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2825                             unsigned long npages)
2826 {
2827         return 0;
2828 }
2829
2830 /* Section: memory related */
2831 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2832                                    struct kvm_memory_slot *memslot,
2833                                    const struct kvm_userspace_memory_region *mem,
2834                                    enum kvm_mr_change change)
2835 {
2836         /* A few sanity checks. We can have memory slots which have to be
2837            located/ended at a segment boundary (1MB). The memory in userland is
2838            ok to be fragmented into various different vmas. It is okay to mmap()
2839            and munmap() stuff in this slot after doing this call at any time */
2840
2841         if (mem->userspace_addr & 0xffffful)
2842                 return -EINVAL;
2843
2844         if (mem->memory_size & 0xffffful)
2845                 return -EINVAL;
2846
2847         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2848                 return -EINVAL;
2849
2850         return 0;
2851 }
2852
2853 void kvm_arch_commit_memory_region(struct kvm *kvm,
2854                                 const struct kvm_userspace_memory_region *mem,
2855                                 const struct kvm_memory_slot *old,
2856                                 const struct kvm_memory_slot *new,
2857                                 enum kvm_mr_change change)
2858 {
2859         int rc;
2860
2861         /* If the basics of the memslot do not change, we do not want
2862          * to update the gmap. Every update causes several unnecessary
2863          * segment translation exceptions. This is usually handled just
2864          * fine by the normal fault handler + gmap, but it will also
2865          * cause faults on the prefix page of running guest CPUs.
2866          */
2867         if (old->userspace_addr == mem->userspace_addr &&
2868             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2869             old->npages * PAGE_SIZE == mem->memory_size)
2870                 return;
2871
2872         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2873                 mem->guest_phys_addr, mem->memory_size);
2874         if (rc)
2875                 pr_warn("failed to commit memory region\n");
2876         return;
2877 }
2878
2879 static int __init kvm_s390_init(void)
2880 {
2881         if (!sclp.has_sief2) {
2882                 pr_info("SIE not available\n");
2883                 return -ENODEV;
2884         }
2885
2886         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2887 }
2888
2889 static void __exit kvm_s390_exit(void)
2890 {
2891         kvm_exit();
2892 }
2893
2894 module_init(kvm_s390_init);
2895 module_exit(kvm_s390_exit);
2896
2897 /*
2898  * Enable autoloading of the kvm module.
2899  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2900  * since x86 takes a different approach.
2901  */
2902 #include <linux/miscdevice.h>
2903 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2904 MODULE_ALIAS("devname:kvm");