2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
46 #define KMSG_COMPONENT "kvm-s390"
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
50 #define CREATE_TRACE_POINTS
52 #include "trace-s390.h"
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "userspace_handled", VCPU_STAT(exit_userspace) },
63 { "exit_null", VCPU_STAT(exit_null) },
64 { "exit_validity", VCPU_STAT(exit_validity) },
65 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 { "exit_external_request", VCPU_STAT(exit_external_request) },
67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 { "exit_instruction", VCPU_STAT(exit_instruction) },
69 { "exit_pei", VCPU_STAT(exit_pei) },
70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 { "instruction_spx", VCPU_STAT(instruction_spx) },
93 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 { "instruction_stap", VCPU_STAT(instruction_stap) },
95 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 { "instruction_essa", VCPU_STAT(instruction_essa) },
100 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 { "instruction_sie", VCPU_STAT(instruction_sie) },
105 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 { "diagnose_10", VCPU_STAT(diagnose_10) },
122 { "diagnose_44", VCPU_STAT(diagnose_44) },
123 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 { "diagnose_258", VCPU_STAT(diagnose_258) },
125 { "diagnose_308", VCPU_STAT(diagnose_308) },
126 { "diagnose_500", VCPU_STAT(diagnose_500) },
130 /* allow nested virtualization in KVM (if enabled by user space) */
132 module_param(nested, int, S_IRUGO);
133 MODULE_PARM_DESC(nested, "Nested virtualization support");
135 /* upper facilities limit for kvm */
136 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
138 unsigned long kvm_s390_fac_list_mask_size(void)
140 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
141 return ARRAY_SIZE(kvm_s390_fac_list_mask);
144 /* available cpu features supported by kvm */
145 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
146 /* available subfunctions indicated via query / "test bit" */
147 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
149 static struct gmap_notifier gmap_notifier;
150 static struct gmap_notifier vsie_gmap_notifier;
151 debug_info_t *kvm_s390_dbf;
153 /* Section: not file related */
154 int kvm_arch_hardware_enable(void)
156 /* every s390 is virtualization enabled ;-) */
160 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164 * This callback is executed during stop_machine(). All CPUs are therefore
165 * temporarily stopped. In order not to change guest behavior, we have to
166 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
167 * so a CPU won't be stopped while calculating with the epoch.
169 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173 struct kvm_vcpu *vcpu;
175 unsigned long long *delta = v;
177 list_for_each_entry(kvm, &vm_list, vm_list) {
178 kvm->arch.epoch -= *delta;
179 kvm_for_each_vcpu(i, vcpu, kvm) {
180 vcpu->arch.sie_block->epoch -= *delta;
181 if (vcpu->arch.cputm_enabled)
182 vcpu->arch.cputm_start += *delta;
183 if (vcpu->arch.vsie_block)
184 vcpu->arch.vsie_block->epoch -= *delta;
190 static struct notifier_block kvm_clock_notifier = {
191 .notifier_call = kvm_clock_sync,
194 int kvm_arch_hardware_setup(void)
196 gmap_notifier.notifier_call = kvm_gmap_notifier;
197 gmap_register_pte_notifier(&gmap_notifier);
198 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
199 gmap_register_pte_notifier(&vsie_gmap_notifier);
200 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
201 &kvm_clock_notifier);
205 void kvm_arch_hardware_unsetup(void)
207 gmap_unregister_pte_notifier(&gmap_notifier);
208 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
209 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
210 &kvm_clock_notifier);
213 static void allow_cpu_feat(unsigned long nr)
215 set_bit_inv(nr, kvm_s390_available_cpu_feat);
218 static inline int plo_test_bit(unsigned char nr)
220 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
221 int cc = 3; /* subfunction not available */
224 /* Parameter registers are ignored for "test bit" */
234 static void kvm_s390_cpu_feat_init(void)
238 for (i = 0; i < 256; ++i) {
240 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
243 if (test_facility(28)) /* TOD-clock steering */
244 ptff(kvm_s390_available_subfunc.ptff,
245 sizeof(kvm_s390_available_subfunc.ptff),
248 if (test_facility(17)) { /* MSA */
249 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
250 kvm_s390_available_subfunc.kmac);
251 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
252 kvm_s390_available_subfunc.kmc);
253 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
254 kvm_s390_available_subfunc.km);
255 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.kimd);
257 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.klmd);
260 if (test_facility(76)) /* MSA3 */
261 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
262 kvm_s390_available_subfunc.pckmo);
263 if (test_facility(77)) { /* MSA4 */
264 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.kmctr);
266 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.kmf);
268 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
269 kvm_s390_available_subfunc.kmo);
270 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.pcc);
273 if (test_facility(57)) /* MSA5 */
274 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
275 kvm_s390_available_subfunc.ppno);
277 if (MACHINE_HAS_ESOP)
278 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
280 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
281 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
283 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
284 !test_facility(3) || !nested)
286 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
287 if (sclp.has_64bscao)
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
290 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
292 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
294 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
302 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
303 * all skey handling functions read/set the skey from the PGSTE
304 * instead of the real storage key.
306 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
307 * pages being detected as preserved although they are resident.
309 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
310 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
312 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
313 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
314 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
316 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
317 * cannot easily shadow the SCA because of the ipte lock.
321 int kvm_arch_init(void *opaque)
323 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
327 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
328 debug_unregister(kvm_s390_dbf);
332 kvm_s390_cpu_feat_init();
334 /* Register floating interrupt controller interface. */
335 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
338 void kvm_arch_exit(void)
340 debug_unregister(kvm_s390_dbf);
343 /* Section: device related */
344 long kvm_arch_dev_ioctl(struct file *filp,
345 unsigned int ioctl, unsigned long arg)
347 if (ioctl == KVM_S390_ENABLE_SIE)
348 return s390_enable_sie();
352 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
357 case KVM_CAP_S390_PSW:
358 case KVM_CAP_S390_GMAP:
359 case KVM_CAP_SYNC_MMU:
360 #ifdef CONFIG_KVM_S390_UCONTROL
361 case KVM_CAP_S390_UCONTROL:
363 case KVM_CAP_ASYNC_PF:
364 case KVM_CAP_SYNC_REGS:
365 case KVM_CAP_ONE_REG:
366 case KVM_CAP_ENABLE_CAP:
367 case KVM_CAP_S390_CSS_SUPPORT:
368 case KVM_CAP_IOEVENTFD:
369 case KVM_CAP_DEVICE_CTRL:
370 case KVM_CAP_ENABLE_CAP_VM:
371 case KVM_CAP_S390_IRQCHIP:
372 case KVM_CAP_VM_ATTRIBUTES:
373 case KVM_CAP_MP_STATE:
374 case KVM_CAP_S390_INJECT_IRQ:
375 case KVM_CAP_S390_USER_SIGP:
376 case KVM_CAP_S390_USER_STSI:
377 case KVM_CAP_S390_SKEYS:
378 case KVM_CAP_S390_IRQ_STATE:
379 case KVM_CAP_S390_USER_INSTR0:
382 case KVM_CAP_S390_MEM_OP:
385 case KVM_CAP_NR_VCPUS:
386 case KVM_CAP_MAX_VCPUS:
387 r = KVM_S390_BSCA_CPU_SLOTS;
388 if (!kvm_s390_use_sca_entries())
390 else if (sclp.has_esca && sclp.has_64bscao)
391 r = KVM_S390_ESCA_CPU_SLOTS;
393 case KVM_CAP_NR_MEMSLOTS:
394 r = KVM_USER_MEM_SLOTS;
396 case KVM_CAP_S390_COW:
397 r = MACHINE_HAS_ESOP;
399 case KVM_CAP_S390_VECTOR_REGISTERS:
402 case KVM_CAP_S390_RI:
403 r = test_facility(64);
411 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
412 struct kvm_memory_slot *memslot)
414 gfn_t cur_gfn, last_gfn;
415 unsigned long address;
416 struct gmap *gmap = kvm->arch.gmap;
418 /* Loop over all guest pages */
419 last_gfn = memslot->base_gfn + memslot->npages;
420 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
421 address = gfn_to_hva_memslot(memslot, cur_gfn);
423 if (test_and_clear_guest_dirty(gmap->mm, address))
424 mark_page_dirty(kvm, cur_gfn);
425 if (fatal_signal_pending(current))
431 /* Section: vm related */
432 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
435 * Get (and clear) the dirty memory log for a memory slot.
437 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
438 struct kvm_dirty_log *log)
442 struct kvm_memslots *slots;
443 struct kvm_memory_slot *memslot;
446 mutex_lock(&kvm->slots_lock);
449 if (log->slot >= KVM_USER_MEM_SLOTS)
452 slots = kvm_memslots(kvm);
453 memslot = id_to_memslot(slots, log->slot);
455 if (!memslot->dirty_bitmap)
458 kvm_s390_sync_dirty_log(kvm, memslot);
459 r = kvm_get_dirty_log(kvm, log, &is_dirty);
463 /* Clear the dirty log */
465 n = kvm_dirty_bitmap_bytes(memslot);
466 memset(memslot->dirty_bitmap, 0, n);
470 mutex_unlock(&kvm->slots_lock);
474 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
477 struct kvm_vcpu *vcpu;
479 kvm_for_each_vcpu(i, vcpu, kvm) {
480 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
484 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
492 case KVM_CAP_S390_IRQCHIP:
493 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
494 kvm->arch.use_irqchip = 1;
497 case KVM_CAP_S390_USER_SIGP:
498 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
499 kvm->arch.user_sigp = 1;
502 case KVM_CAP_S390_VECTOR_REGISTERS:
503 mutex_lock(&kvm->lock);
504 if (kvm->created_vcpus) {
506 } else if (MACHINE_HAS_VX) {
507 set_kvm_facility(kvm->arch.model.fac_mask, 129);
508 set_kvm_facility(kvm->arch.model.fac_list, 129);
512 mutex_unlock(&kvm->lock);
513 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
514 r ? "(not available)" : "(success)");
516 case KVM_CAP_S390_RI:
518 mutex_lock(&kvm->lock);
519 if (kvm->created_vcpus) {
521 } else if (test_facility(64)) {
522 set_kvm_facility(kvm->arch.model.fac_mask, 64);
523 set_kvm_facility(kvm->arch.model.fac_list, 64);
526 mutex_unlock(&kvm->lock);
527 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
528 r ? "(not available)" : "(success)");
530 case KVM_CAP_S390_USER_STSI:
531 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
532 kvm->arch.user_stsi = 1;
535 case KVM_CAP_S390_USER_INSTR0:
536 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
537 kvm->arch.user_instr0 = 1;
538 icpt_operexc_on_all_vcpus(kvm);
548 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
552 switch (attr->attr) {
553 case KVM_S390_VM_MEM_LIMIT_SIZE:
555 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
556 kvm->arch.mem_limit);
557 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
567 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
571 switch (attr->attr) {
572 case KVM_S390_VM_MEM_ENABLE_CMMA:
578 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
579 mutex_lock(&kvm->lock);
580 if (!kvm->created_vcpus) {
581 kvm->arch.use_cmma = 1;
584 mutex_unlock(&kvm->lock);
586 case KVM_S390_VM_MEM_CLR_CMMA:
591 if (!kvm->arch.use_cmma)
594 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
595 mutex_lock(&kvm->lock);
596 idx = srcu_read_lock(&kvm->srcu);
597 s390_reset_cmma(kvm->arch.gmap->mm);
598 srcu_read_unlock(&kvm->srcu, idx);
599 mutex_unlock(&kvm->lock);
602 case KVM_S390_VM_MEM_LIMIT_SIZE: {
603 unsigned long new_limit;
605 if (kvm_is_ucontrol(kvm))
608 if (get_user(new_limit, (u64 __user *)attr->addr))
611 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
612 new_limit > kvm->arch.mem_limit)
618 /* gmap_create takes last usable address */
619 if (new_limit != KVM_S390_NO_MEM_LIMIT)
623 mutex_lock(&kvm->lock);
624 if (!kvm->created_vcpus) {
625 /* gmap_create will round the limit up */
626 struct gmap *new = gmap_create(current->mm, new_limit);
631 gmap_remove(kvm->arch.gmap);
633 kvm->arch.gmap = new;
637 mutex_unlock(&kvm->lock);
638 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
639 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
640 (void *) kvm->arch.gmap->asce);
650 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
652 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
654 struct kvm_vcpu *vcpu;
657 if (!test_kvm_facility(kvm, 76))
660 mutex_lock(&kvm->lock);
661 switch (attr->attr) {
662 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
664 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
665 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
666 kvm->arch.crypto.aes_kw = 1;
667 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
669 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
671 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
672 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
673 kvm->arch.crypto.dea_kw = 1;
674 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
676 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
677 kvm->arch.crypto.aes_kw = 0;
678 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
679 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
680 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
682 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
683 kvm->arch.crypto.dea_kw = 0;
684 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
685 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
686 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
689 mutex_unlock(&kvm->lock);
693 kvm_for_each_vcpu(i, vcpu, kvm) {
694 kvm_s390_vcpu_crypto_setup(vcpu);
697 mutex_unlock(&kvm->lock);
701 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
705 if (copy_from_user(>od_high, (void __user *)attr->addr,
711 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
716 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
720 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
723 kvm_s390_set_tod_clock(kvm, gtod);
724 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
728 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
735 switch (attr->attr) {
736 case KVM_S390_VM_TOD_HIGH:
737 ret = kvm_s390_set_tod_high(kvm, attr);
739 case KVM_S390_VM_TOD_LOW:
740 ret = kvm_s390_set_tod_low(kvm, attr);
749 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
753 if (copy_to_user((void __user *)attr->addr, >od_high,
756 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
761 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
765 gtod = kvm_s390_get_tod_clock_fast(kvm);
766 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
768 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
773 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
780 switch (attr->attr) {
781 case KVM_S390_VM_TOD_HIGH:
782 ret = kvm_s390_get_tod_high(kvm, attr);
784 case KVM_S390_VM_TOD_LOW:
785 ret = kvm_s390_get_tod_low(kvm, attr);
794 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
796 struct kvm_s390_vm_cpu_processor *proc;
797 u16 lowest_ibc, unblocked_ibc;
800 mutex_lock(&kvm->lock);
801 if (kvm->created_vcpus) {
805 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
810 if (!copy_from_user(proc, (void __user *)attr->addr,
812 kvm->arch.model.cpuid = proc->cpuid;
813 lowest_ibc = sclp.ibc >> 16 & 0xfff;
814 unblocked_ibc = sclp.ibc & 0xfff;
815 if (lowest_ibc && proc->ibc) {
816 if (proc->ibc > unblocked_ibc)
817 kvm->arch.model.ibc = unblocked_ibc;
818 else if (proc->ibc < lowest_ibc)
819 kvm->arch.model.ibc = lowest_ibc;
821 kvm->arch.model.ibc = proc->ibc;
823 memcpy(kvm->arch.model.fac_list, proc->fac_list,
824 S390_ARCH_FAC_LIST_SIZE_BYTE);
829 mutex_unlock(&kvm->lock);
833 static int kvm_s390_set_processor_feat(struct kvm *kvm,
834 struct kvm_device_attr *attr)
836 struct kvm_s390_vm_cpu_feat data;
839 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
841 if (!bitmap_subset((unsigned long *) data.feat,
842 kvm_s390_available_cpu_feat,
843 KVM_S390_VM_CPU_FEAT_NR_BITS))
846 mutex_lock(&kvm->lock);
847 if (!atomic_read(&kvm->online_vcpus)) {
848 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
849 KVM_S390_VM_CPU_FEAT_NR_BITS);
852 mutex_unlock(&kvm->lock);
856 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
857 struct kvm_device_attr *attr)
860 * Once supported by kernel + hw, we have to store the subfunctions
861 * in kvm->arch and remember that user space configured them.
866 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
870 switch (attr->attr) {
871 case KVM_S390_VM_CPU_PROCESSOR:
872 ret = kvm_s390_set_processor(kvm, attr);
874 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
875 ret = kvm_s390_set_processor_feat(kvm, attr);
877 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
878 ret = kvm_s390_set_processor_subfunc(kvm, attr);
884 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
886 struct kvm_s390_vm_cpu_processor *proc;
889 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
894 proc->cpuid = kvm->arch.model.cpuid;
895 proc->ibc = kvm->arch.model.ibc;
896 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
897 S390_ARCH_FAC_LIST_SIZE_BYTE);
898 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
905 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
907 struct kvm_s390_vm_cpu_machine *mach;
910 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
915 get_cpu_id((struct cpuid *) &mach->cpuid);
916 mach->ibc = sclp.ibc;
917 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
918 S390_ARCH_FAC_LIST_SIZE_BYTE);
919 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
920 sizeof(S390_lowcore.stfle_fac_list));
921 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
928 static int kvm_s390_get_processor_feat(struct kvm *kvm,
929 struct kvm_device_attr *attr)
931 struct kvm_s390_vm_cpu_feat data;
933 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
934 KVM_S390_VM_CPU_FEAT_NR_BITS);
935 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
940 static int kvm_s390_get_machine_feat(struct kvm *kvm,
941 struct kvm_device_attr *attr)
943 struct kvm_s390_vm_cpu_feat data;
945 bitmap_copy((unsigned long *) data.feat,
946 kvm_s390_available_cpu_feat,
947 KVM_S390_VM_CPU_FEAT_NR_BITS);
948 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
953 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
954 struct kvm_device_attr *attr)
957 * Once we can actually configure subfunctions (kernel + hw support),
958 * we have to check if they were already set by user space, if so copy
959 * them from kvm->arch.
964 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
965 struct kvm_device_attr *attr)
967 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
968 sizeof(struct kvm_s390_vm_cpu_subfunc)))
972 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
976 switch (attr->attr) {
977 case KVM_S390_VM_CPU_PROCESSOR:
978 ret = kvm_s390_get_processor(kvm, attr);
980 case KVM_S390_VM_CPU_MACHINE:
981 ret = kvm_s390_get_machine(kvm, attr);
983 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
984 ret = kvm_s390_get_processor_feat(kvm, attr);
986 case KVM_S390_VM_CPU_MACHINE_FEAT:
987 ret = kvm_s390_get_machine_feat(kvm, attr);
989 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
990 ret = kvm_s390_get_processor_subfunc(kvm, attr);
992 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
993 ret = kvm_s390_get_machine_subfunc(kvm, attr);
999 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1003 switch (attr->group) {
1004 case KVM_S390_VM_MEM_CTRL:
1005 ret = kvm_s390_set_mem_control(kvm, attr);
1007 case KVM_S390_VM_TOD:
1008 ret = kvm_s390_set_tod(kvm, attr);
1010 case KVM_S390_VM_CPU_MODEL:
1011 ret = kvm_s390_set_cpu_model(kvm, attr);
1013 case KVM_S390_VM_CRYPTO:
1014 ret = kvm_s390_vm_set_crypto(kvm, attr);
1024 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1028 switch (attr->group) {
1029 case KVM_S390_VM_MEM_CTRL:
1030 ret = kvm_s390_get_mem_control(kvm, attr);
1032 case KVM_S390_VM_TOD:
1033 ret = kvm_s390_get_tod(kvm, attr);
1035 case KVM_S390_VM_CPU_MODEL:
1036 ret = kvm_s390_get_cpu_model(kvm, attr);
1046 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1050 switch (attr->group) {
1051 case KVM_S390_VM_MEM_CTRL:
1052 switch (attr->attr) {
1053 case KVM_S390_VM_MEM_ENABLE_CMMA:
1054 case KVM_S390_VM_MEM_CLR_CMMA:
1055 ret = sclp.has_cmma ? 0 : -ENXIO;
1057 case KVM_S390_VM_MEM_LIMIT_SIZE:
1065 case KVM_S390_VM_TOD:
1066 switch (attr->attr) {
1067 case KVM_S390_VM_TOD_LOW:
1068 case KVM_S390_VM_TOD_HIGH:
1076 case KVM_S390_VM_CPU_MODEL:
1077 switch (attr->attr) {
1078 case KVM_S390_VM_CPU_PROCESSOR:
1079 case KVM_S390_VM_CPU_MACHINE:
1080 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1081 case KVM_S390_VM_CPU_MACHINE_FEAT:
1082 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1085 /* configuring subfunctions is not supported yet */
1086 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1092 case KVM_S390_VM_CRYPTO:
1093 switch (attr->attr) {
1094 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1095 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1096 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1097 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1113 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1119 if (args->flags != 0)
1122 /* Is this guest using storage keys? */
1123 if (!mm_use_skey(current->mm))
1124 return KVM_S390_GET_SKEYS_NONE;
1126 /* Enforce sane limit on memory allocation */
1127 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1130 keys = kmalloc_array(args->count, sizeof(uint8_t),
1131 GFP_KERNEL | __GFP_NOWARN);
1133 keys = vmalloc(sizeof(uint8_t) * args->count);
1137 down_read(¤t->mm->mmap_sem);
1138 for (i = 0; i < args->count; i++) {
1139 hva = gfn_to_hva(kvm, args->start_gfn + i);
1140 if (kvm_is_error_hva(hva)) {
1145 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1149 up_read(¤t->mm->mmap_sem);
1152 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1153 sizeof(uint8_t) * args->count);
1162 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1168 if (args->flags != 0)
1171 /* Enforce sane limit on memory allocation */
1172 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1175 keys = kmalloc_array(args->count, sizeof(uint8_t),
1176 GFP_KERNEL | __GFP_NOWARN);
1178 keys = vmalloc(sizeof(uint8_t) * args->count);
1182 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1183 sizeof(uint8_t) * args->count);
1189 /* Enable storage key handling for the guest */
1190 r = s390_enable_skey();
1194 down_read(¤t->mm->mmap_sem);
1195 for (i = 0; i < args->count; i++) {
1196 hva = gfn_to_hva(kvm, args->start_gfn + i);
1197 if (kvm_is_error_hva(hva)) {
1202 /* Lowest order bit is reserved */
1203 if (keys[i] & 0x01) {
1208 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1212 up_read(¤t->mm->mmap_sem);
1218 long kvm_arch_vm_ioctl(struct file *filp,
1219 unsigned int ioctl, unsigned long arg)
1221 struct kvm *kvm = filp->private_data;
1222 void __user *argp = (void __user *)arg;
1223 struct kvm_device_attr attr;
1227 case KVM_S390_INTERRUPT: {
1228 struct kvm_s390_interrupt s390int;
1231 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1233 r = kvm_s390_inject_vm(kvm, &s390int);
1236 case KVM_ENABLE_CAP: {
1237 struct kvm_enable_cap cap;
1239 if (copy_from_user(&cap, argp, sizeof(cap)))
1241 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1244 case KVM_CREATE_IRQCHIP: {
1245 struct kvm_irq_routing_entry routing;
1248 if (kvm->arch.use_irqchip) {
1249 /* Set up dummy routing. */
1250 memset(&routing, 0, sizeof(routing));
1251 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1255 case KVM_SET_DEVICE_ATTR: {
1257 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1259 r = kvm_s390_vm_set_attr(kvm, &attr);
1262 case KVM_GET_DEVICE_ATTR: {
1264 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1266 r = kvm_s390_vm_get_attr(kvm, &attr);
1269 case KVM_HAS_DEVICE_ATTR: {
1271 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1273 r = kvm_s390_vm_has_attr(kvm, &attr);
1276 case KVM_S390_GET_SKEYS: {
1277 struct kvm_s390_skeys args;
1280 if (copy_from_user(&args, argp,
1281 sizeof(struct kvm_s390_skeys)))
1283 r = kvm_s390_get_skeys(kvm, &args);
1286 case KVM_S390_SET_SKEYS: {
1287 struct kvm_s390_skeys args;
1290 if (copy_from_user(&args, argp,
1291 sizeof(struct kvm_s390_skeys)))
1293 r = kvm_s390_set_skeys(kvm, &args);
1303 static int kvm_s390_query_ap_config(u8 *config)
1305 u32 fcn_code = 0x04000000UL;
1308 memset(config, 0, 128);
1312 ".long 0xb2af0000\n" /* PQAP(QCI) */
1318 : "r" (fcn_code), "r" (config)
1319 : "cc", "0", "2", "memory"
1325 static int kvm_s390_apxa_installed(void)
1330 if (test_facility(12)) {
1331 cc = kvm_s390_query_ap_config(config);
1334 pr_err("PQAP(QCI) failed with cc=%d", cc);
1336 return config[0] & 0x40;
1342 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1344 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1346 if (kvm_s390_apxa_installed())
1347 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1349 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1352 static u64 kvm_s390_get_initial_cpuid(void)
1357 cpuid.version = 0xff;
1358 return *((u64 *) &cpuid);
1361 static void kvm_s390_crypto_init(struct kvm *kvm)
1363 if (!test_kvm_facility(kvm, 76))
1366 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1367 kvm_s390_set_crycb_format(kvm);
1369 /* Enable AES/DEA protected key functions by default */
1370 kvm->arch.crypto.aes_kw = 1;
1371 kvm->arch.crypto.dea_kw = 1;
1372 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1373 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1374 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1375 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1378 static void sca_dispose(struct kvm *kvm)
1380 if (kvm->arch.use_esca)
1381 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1383 free_page((unsigned long)(kvm->arch.sca));
1384 kvm->arch.sca = NULL;
1387 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1389 gfp_t alloc_flags = GFP_KERNEL;
1391 char debug_name[16];
1392 static unsigned long sca_offset;
1395 #ifdef CONFIG_KVM_S390_UCONTROL
1396 if (type & ~KVM_VM_S390_UCONTROL)
1398 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1405 rc = s390_enable_sie();
1411 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1413 kvm->arch.use_esca = 0; /* start with basic SCA */
1414 if (!sclp.has_64bscao)
1415 alloc_flags |= GFP_DMA;
1416 rwlock_init(&kvm->arch.sca_lock);
1417 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1420 spin_lock(&kvm_lock);
1422 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1424 kvm->arch.sca = (struct bsca_block *)
1425 ((char *) kvm->arch.sca + sca_offset);
1426 spin_unlock(&kvm_lock);
1428 sprintf(debug_name, "kvm-%u", current->pid);
1430 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1434 kvm->arch.sie_page2 =
1435 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1436 if (!kvm->arch.sie_page2)
1439 /* Populate the facility mask initially. */
1440 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1441 sizeof(S390_lowcore.stfle_fac_list));
1442 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1443 if (i < kvm_s390_fac_list_mask_size())
1444 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1446 kvm->arch.model.fac_mask[i] = 0UL;
1449 /* Populate the facility list initially. */
1450 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1451 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1452 S390_ARCH_FAC_LIST_SIZE_BYTE);
1454 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1455 set_kvm_facility(kvm->arch.model.fac_list, 74);
1457 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1458 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1460 kvm_s390_crypto_init(kvm);
1462 spin_lock_init(&kvm->arch.float_int.lock);
1463 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1464 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1465 init_waitqueue_head(&kvm->arch.ipte_wq);
1466 mutex_init(&kvm->arch.ipte_mutex);
1468 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1469 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1471 if (type & KVM_VM_S390_UCONTROL) {
1472 kvm->arch.gmap = NULL;
1473 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1475 if (sclp.hamax == U64_MAX)
1476 kvm->arch.mem_limit = TASK_MAX_SIZE;
1478 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1480 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1481 if (!kvm->arch.gmap)
1483 kvm->arch.gmap->private = kvm;
1484 kvm->arch.gmap->pfault_enabled = 0;
1487 kvm->arch.css_support = 0;
1488 kvm->arch.use_irqchip = 0;
1489 kvm->arch.epoch = 0;
1491 spin_lock_init(&kvm->arch.start_stop_lock);
1492 kvm_s390_vsie_init(kvm);
1493 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1497 free_page((unsigned long)kvm->arch.sie_page2);
1498 debug_unregister(kvm->arch.dbf);
1500 KVM_EVENT(3, "creation of vm failed: %d", rc);
1504 bool kvm_arch_has_vcpu_debugfs(void)
1509 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1514 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1516 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1517 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1518 kvm_s390_clear_local_irqs(vcpu);
1519 kvm_clear_async_pf_completion_queue(vcpu);
1520 if (!kvm_is_ucontrol(vcpu->kvm))
1523 if (kvm_is_ucontrol(vcpu->kvm))
1524 gmap_remove(vcpu->arch.gmap);
1526 if (vcpu->kvm->arch.use_cmma)
1527 kvm_s390_vcpu_unsetup_cmma(vcpu);
1528 free_page((unsigned long)(vcpu->arch.sie_block));
1530 kvm_vcpu_uninit(vcpu);
1531 kmem_cache_free(kvm_vcpu_cache, vcpu);
1534 static void kvm_free_vcpus(struct kvm *kvm)
1537 struct kvm_vcpu *vcpu;
1539 kvm_for_each_vcpu(i, vcpu, kvm)
1540 kvm_arch_vcpu_destroy(vcpu);
1542 mutex_lock(&kvm->lock);
1543 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1544 kvm->vcpus[i] = NULL;
1546 atomic_set(&kvm->online_vcpus, 0);
1547 mutex_unlock(&kvm->lock);
1550 void kvm_arch_destroy_vm(struct kvm *kvm)
1552 kvm_free_vcpus(kvm);
1554 debug_unregister(kvm->arch.dbf);
1555 free_page((unsigned long)kvm->arch.sie_page2);
1556 if (!kvm_is_ucontrol(kvm))
1557 gmap_remove(kvm->arch.gmap);
1558 kvm_s390_destroy_adapters(kvm);
1559 kvm_s390_clear_float_irqs(kvm);
1560 kvm_s390_vsie_destroy(kvm);
1561 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1564 /* Section: vcpu related */
1565 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1567 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1568 if (!vcpu->arch.gmap)
1570 vcpu->arch.gmap->private = vcpu->kvm;
1575 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1577 if (!kvm_s390_use_sca_entries())
1579 read_lock(&vcpu->kvm->arch.sca_lock);
1580 if (vcpu->kvm->arch.use_esca) {
1581 struct esca_block *sca = vcpu->kvm->arch.sca;
1583 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1584 sca->cpu[vcpu->vcpu_id].sda = 0;
1586 struct bsca_block *sca = vcpu->kvm->arch.sca;
1588 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1589 sca->cpu[vcpu->vcpu_id].sda = 0;
1591 read_unlock(&vcpu->kvm->arch.sca_lock);
1594 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1596 if (!kvm_s390_use_sca_entries()) {
1597 struct bsca_block *sca = vcpu->kvm->arch.sca;
1599 /* we still need the basic sca for the ipte control */
1600 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1601 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1603 read_lock(&vcpu->kvm->arch.sca_lock);
1604 if (vcpu->kvm->arch.use_esca) {
1605 struct esca_block *sca = vcpu->kvm->arch.sca;
1607 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1608 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1609 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1610 vcpu->arch.sie_block->ecb2 |= 0x04U;
1611 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1613 struct bsca_block *sca = vcpu->kvm->arch.sca;
1615 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1616 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1617 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1618 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1620 read_unlock(&vcpu->kvm->arch.sca_lock);
1623 /* Basic SCA to Extended SCA data copy routines */
1624 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1627 d->sigp_ctrl.c = s->sigp_ctrl.c;
1628 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1631 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1635 d->ipte_control = s->ipte_control;
1637 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1638 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1641 static int sca_switch_to_extended(struct kvm *kvm)
1643 struct bsca_block *old_sca = kvm->arch.sca;
1644 struct esca_block *new_sca;
1645 struct kvm_vcpu *vcpu;
1646 unsigned int vcpu_idx;
1649 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1653 scaoh = (u32)((u64)(new_sca) >> 32);
1654 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1656 kvm_s390_vcpu_block_all(kvm);
1657 write_lock(&kvm->arch.sca_lock);
1659 sca_copy_b_to_e(new_sca, old_sca);
1661 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1662 vcpu->arch.sie_block->scaoh = scaoh;
1663 vcpu->arch.sie_block->scaol = scaol;
1664 vcpu->arch.sie_block->ecb2 |= 0x04U;
1666 kvm->arch.sca = new_sca;
1667 kvm->arch.use_esca = 1;
1669 write_unlock(&kvm->arch.sca_lock);
1670 kvm_s390_vcpu_unblock_all(kvm);
1672 free_page((unsigned long)old_sca);
1674 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1675 old_sca, kvm->arch.sca);
1679 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1683 if (!kvm_s390_use_sca_entries()) {
1684 if (id < KVM_MAX_VCPUS)
1688 if (id < KVM_S390_BSCA_CPU_SLOTS)
1690 if (!sclp.has_esca || !sclp.has_64bscao)
1693 mutex_lock(&kvm->lock);
1694 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1695 mutex_unlock(&kvm->lock);
1697 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1700 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1702 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1703 kvm_clear_async_pf_completion_queue(vcpu);
1704 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1710 kvm_s390_set_prefix(vcpu, 0);
1711 if (test_kvm_facility(vcpu->kvm, 64))
1712 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1713 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1714 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1717 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1719 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1721 if (kvm_is_ucontrol(vcpu->kvm))
1722 return __kvm_ucontrol_vcpu_init(vcpu);
1727 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1728 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1730 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1731 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1732 vcpu->arch.cputm_start = get_tod_clock_fast();
1733 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1736 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1737 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1739 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1740 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1741 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1742 vcpu->arch.cputm_start = 0;
1743 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1746 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1747 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1749 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1750 vcpu->arch.cputm_enabled = true;
1751 __start_cpu_timer_accounting(vcpu);
1754 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1755 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1757 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1758 __stop_cpu_timer_accounting(vcpu);
1759 vcpu->arch.cputm_enabled = false;
1762 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1764 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1765 __enable_cpu_timer_accounting(vcpu);
1769 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1771 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1772 __disable_cpu_timer_accounting(vcpu);
1776 /* set the cpu timer - may only be called from the VCPU thread itself */
1777 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1779 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1780 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1781 if (vcpu->arch.cputm_enabled)
1782 vcpu->arch.cputm_start = get_tod_clock_fast();
1783 vcpu->arch.sie_block->cputm = cputm;
1784 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1788 /* update and get the cpu timer - can also be called from other VCPU threads */
1789 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1794 if (unlikely(!vcpu->arch.cputm_enabled))
1795 return vcpu->arch.sie_block->cputm;
1797 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1799 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1801 * If the writer would ever execute a read in the critical
1802 * section, e.g. in irq context, we have a deadlock.
1804 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1805 value = vcpu->arch.sie_block->cputm;
1806 /* if cputm_start is 0, accounting is being started/stopped */
1807 if (likely(vcpu->arch.cputm_start))
1808 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1809 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1814 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1817 gmap_enable(vcpu->arch.enabled_gmap);
1818 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1819 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1820 __start_cpu_timer_accounting(vcpu);
1824 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1827 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1828 __stop_cpu_timer_accounting(vcpu);
1829 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1830 vcpu->arch.enabled_gmap = gmap_get_enabled();
1831 gmap_disable(vcpu->arch.enabled_gmap);
1835 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1837 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1838 vcpu->arch.sie_block->gpsw.mask = 0UL;
1839 vcpu->arch.sie_block->gpsw.addr = 0UL;
1840 kvm_s390_set_prefix(vcpu, 0);
1841 kvm_s390_set_cpu_timer(vcpu, 0);
1842 vcpu->arch.sie_block->ckc = 0UL;
1843 vcpu->arch.sie_block->todpr = 0;
1844 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1845 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1846 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1847 /* make sure the new fpc will be lazily loaded */
1849 current->thread.fpu.fpc = 0;
1850 vcpu->arch.sie_block->gbea = 1;
1851 vcpu->arch.sie_block->pp = 0;
1852 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1853 kvm_clear_async_pf_completion_queue(vcpu);
1854 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1855 kvm_s390_vcpu_stop(vcpu);
1856 kvm_s390_clear_local_irqs(vcpu);
1859 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1861 mutex_lock(&vcpu->kvm->lock);
1863 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1865 mutex_unlock(&vcpu->kvm->lock);
1866 if (!kvm_is_ucontrol(vcpu->kvm)) {
1867 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1870 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1871 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1872 /* make vcpu_load load the right gmap on the first trigger */
1873 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1876 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1878 if (!test_kvm_facility(vcpu->kvm, 76))
1881 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1883 if (vcpu->kvm->arch.crypto.aes_kw)
1884 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1885 if (vcpu->kvm->arch.crypto.dea_kw)
1886 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1888 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1891 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1893 free_page(vcpu->arch.sie_block->cbrlo);
1894 vcpu->arch.sie_block->cbrlo = 0;
1897 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1899 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1900 if (!vcpu->arch.sie_block->cbrlo)
1903 vcpu->arch.sie_block->ecb2 |= 0x80;
1904 vcpu->arch.sie_block->ecb2 &= ~0x08;
1908 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1910 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1912 vcpu->arch.sie_block->ibc = model->ibc;
1913 if (test_kvm_facility(vcpu->kvm, 7))
1914 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1917 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1921 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1925 if (test_kvm_facility(vcpu->kvm, 78))
1926 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1927 else if (test_kvm_facility(vcpu->kvm, 8))
1928 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1930 kvm_s390_vcpu_setup_model(vcpu);
1932 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1933 if (MACHINE_HAS_ESOP)
1934 vcpu->arch.sie_block->ecb |= 0x02;
1935 if (test_kvm_facility(vcpu->kvm, 9))
1936 vcpu->arch.sie_block->ecb |= 0x04;
1937 if (test_kvm_facility(vcpu->kvm, 73))
1938 vcpu->arch.sie_block->ecb |= 0x10;
1940 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1941 vcpu->arch.sie_block->ecb2 |= 0x08;
1942 vcpu->arch.sie_block->eca = 0x1002000U;
1944 vcpu->arch.sie_block->eca |= 0x80000000U;
1946 vcpu->arch.sie_block->eca |= 0x40000000U;
1948 vcpu->arch.sie_block->eca |= 1;
1949 if (sclp.has_sigpif)
1950 vcpu->arch.sie_block->eca |= 0x10000000U;
1951 if (test_kvm_facility(vcpu->kvm, 129)) {
1952 vcpu->arch.sie_block->eca |= 0x00020000;
1953 vcpu->arch.sie_block->ecd |= 0x20000000;
1955 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1956 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1958 if (vcpu->kvm->arch.use_cmma) {
1959 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1963 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1964 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1966 kvm_s390_vcpu_crypto_setup(vcpu);
1971 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1974 struct kvm_vcpu *vcpu;
1975 struct sie_page *sie_page;
1978 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1983 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1987 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1991 vcpu->arch.sie_block = &sie_page->sie_block;
1992 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1994 /* the real guest size will always be smaller than msl */
1995 vcpu->arch.sie_block->mso = 0;
1996 vcpu->arch.sie_block->msl = sclp.hamax;
1998 vcpu->arch.sie_block->icpua = id;
1999 spin_lock_init(&vcpu->arch.local_int.lock);
2000 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2001 vcpu->arch.local_int.wq = &vcpu->wq;
2002 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2003 seqcount_init(&vcpu->arch.cputm_seqcount);
2005 rc = kvm_vcpu_init(vcpu, kvm, id);
2007 goto out_free_sie_block;
2008 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2009 vcpu->arch.sie_block);
2010 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2014 free_page((unsigned long)(vcpu->arch.sie_block));
2016 kmem_cache_free(kvm_vcpu_cache, vcpu);
2021 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2023 return kvm_s390_vcpu_has_irq(vcpu, 0);
2026 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2028 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2032 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2034 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2037 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2039 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2043 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2045 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2049 * Kick a guest cpu out of SIE and wait until SIE is not running.
2050 * If the CPU is not running (e.g. waiting as idle) the function will
2051 * return immediately. */
2052 void exit_sie(struct kvm_vcpu *vcpu)
2054 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2055 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2059 /* Kick a guest cpu out of SIE to process a request synchronously */
2060 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2062 kvm_make_request(req, vcpu);
2063 kvm_s390_vcpu_request(vcpu);
2066 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2069 struct kvm *kvm = gmap->private;
2070 struct kvm_vcpu *vcpu;
2071 unsigned long prefix;
2074 if (gmap_is_shadow(gmap))
2076 if (start >= 1UL << 31)
2077 /* We are only interested in prefix pages */
2079 kvm_for_each_vcpu(i, vcpu, kvm) {
2080 /* match against both prefix pages */
2081 prefix = kvm_s390_get_prefix(vcpu);
2082 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2083 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2085 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2090 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2092 /* kvm common code refers to this, but never calls it */
2097 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2098 struct kvm_one_reg *reg)
2103 case KVM_REG_S390_TODPR:
2104 r = put_user(vcpu->arch.sie_block->todpr,
2105 (u32 __user *)reg->addr);
2107 case KVM_REG_S390_EPOCHDIFF:
2108 r = put_user(vcpu->arch.sie_block->epoch,
2109 (u64 __user *)reg->addr);
2111 case KVM_REG_S390_CPU_TIMER:
2112 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2113 (u64 __user *)reg->addr);
2115 case KVM_REG_S390_CLOCK_COMP:
2116 r = put_user(vcpu->arch.sie_block->ckc,
2117 (u64 __user *)reg->addr);
2119 case KVM_REG_S390_PFTOKEN:
2120 r = put_user(vcpu->arch.pfault_token,
2121 (u64 __user *)reg->addr);
2123 case KVM_REG_S390_PFCOMPARE:
2124 r = put_user(vcpu->arch.pfault_compare,
2125 (u64 __user *)reg->addr);
2127 case KVM_REG_S390_PFSELECT:
2128 r = put_user(vcpu->arch.pfault_select,
2129 (u64 __user *)reg->addr);
2131 case KVM_REG_S390_PP:
2132 r = put_user(vcpu->arch.sie_block->pp,
2133 (u64 __user *)reg->addr);
2135 case KVM_REG_S390_GBEA:
2136 r = put_user(vcpu->arch.sie_block->gbea,
2137 (u64 __user *)reg->addr);
2146 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2147 struct kvm_one_reg *reg)
2153 case KVM_REG_S390_TODPR:
2154 r = get_user(vcpu->arch.sie_block->todpr,
2155 (u32 __user *)reg->addr);
2157 case KVM_REG_S390_EPOCHDIFF:
2158 r = get_user(vcpu->arch.sie_block->epoch,
2159 (u64 __user *)reg->addr);
2161 case KVM_REG_S390_CPU_TIMER:
2162 r = get_user(val, (u64 __user *)reg->addr);
2164 kvm_s390_set_cpu_timer(vcpu, val);
2166 case KVM_REG_S390_CLOCK_COMP:
2167 r = get_user(vcpu->arch.sie_block->ckc,
2168 (u64 __user *)reg->addr);
2170 case KVM_REG_S390_PFTOKEN:
2171 r = get_user(vcpu->arch.pfault_token,
2172 (u64 __user *)reg->addr);
2173 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2174 kvm_clear_async_pf_completion_queue(vcpu);
2176 case KVM_REG_S390_PFCOMPARE:
2177 r = get_user(vcpu->arch.pfault_compare,
2178 (u64 __user *)reg->addr);
2180 case KVM_REG_S390_PFSELECT:
2181 r = get_user(vcpu->arch.pfault_select,
2182 (u64 __user *)reg->addr);
2184 case KVM_REG_S390_PP:
2185 r = get_user(vcpu->arch.sie_block->pp,
2186 (u64 __user *)reg->addr);
2188 case KVM_REG_S390_GBEA:
2189 r = get_user(vcpu->arch.sie_block->gbea,
2190 (u64 __user *)reg->addr);
2199 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2201 kvm_s390_vcpu_initial_reset(vcpu);
2205 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2207 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2211 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2213 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2217 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2218 struct kvm_sregs *sregs)
2220 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2221 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2225 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2226 struct kvm_sregs *sregs)
2228 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2229 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2233 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2235 if (test_fp_ctl(fpu->fpc))
2237 vcpu->run->s.regs.fpc = fpu->fpc;
2239 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2240 (freg_t *) fpu->fprs);
2242 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2246 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2248 /* make sure we have the latest values */
2251 convert_vx_to_fp((freg_t *) fpu->fprs,
2252 (__vector128 *) vcpu->run->s.regs.vrs);
2254 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2255 fpu->fpc = vcpu->run->s.regs.fpc;
2259 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2263 if (!is_vcpu_stopped(vcpu))
2266 vcpu->run->psw_mask = psw.mask;
2267 vcpu->run->psw_addr = psw.addr;
2272 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2273 struct kvm_translation *tr)
2275 return -EINVAL; /* not implemented yet */
2278 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2279 KVM_GUESTDBG_USE_HW_BP | \
2280 KVM_GUESTDBG_ENABLE)
2282 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2283 struct kvm_guest_debug *dbg)
2287 vcpu->guest_debug = 0;
2288 kvm_s390_clear_bp_data(vcpu);
2290 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2292 if (!sclp.has_gpere)
2295 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2296 vcpu->guest_debug = dbg->control;
2297 /* enforce guest PER */
2298 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2300 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2301 rc = kvm_s390_import_bp_data(vcpu, dbg);
2303 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2304 vcpu->arch.guestdbg.last_bp = 0;
2308 vcpu->guest_debug = 0;
2309 kvm_s390_clear_bp_data(vcpu);
2310 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2316 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2317 struct kvm_mp_state *mp_state)
2319 /* CHECK_STOP and LOAD are not supported yet */
2320 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2321 KVM_MP_STATE_OPERATING;
2324 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2325 struct kvm_mp_state *mp_state)
2329 /* user space knows about this interface - let it control the state */
2330 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2332 switch (mp_state->mp_state) {
2333 case KVM_MP_STATE_STOPPED:
2334 kvm_s390_vcpu_stop(vcpu);
2336 case KVM_MP_STATE_OPERATING:
2337 kvm_s390_vcpu_start(vcpu);
2339 case KVM_MP_STATE_LOAD:
2340 case KVM_MP_STATE_CHECK_STOP:
2341 /* fall through - CHECK_STOP and LOAD are not supported yet */
2349 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2351 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2354 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2357 kvm_s390_vcpu_request_handled(vcpu);
2358 if (!vcpu->requests)
2361 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2362 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2363 * This ensures that the ipte instruction for this request has
2364 * already finished. We might race against a second unmapper that
2365 * wants to set the blocking bit. Lets just retry the request loop.
2367 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2369 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2370 kvm_s390_get_prefix(vcpu),
2371 PAGE_SIZE * 2, PROT_WRITE);
2373 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2379 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2380 vcpu->arch.sie_block->ihcpu = 0xffff;
2384 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2385 if (!ibs_enabled(vcpu)) {
2386 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2387 atomic_or(CPUSTAT_IBS,
2388 &vcpu->arch.sie_block->cpuflags);
2393 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2394 if (ibs_enabled(vcpu)) {
2395 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2396 atomic_andnot(CPUSTAT_IBS,
2397 &vcpu->arch.sie_block->cpuflags);
2402 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2403 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2407 /* nothing to do, just clear the request */
2408 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2413 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2415 struct kvm_vcpu *vcpu;
2418 mutex_lock(&kvm->lock);
2420 kvm->arch.epoch = tod - get_tod_clock();
2421 kvm_s390_vcpu_block_all(kvm);
2422 kvm_for_each_vcpu(i, vcpu, kvm)
2423 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2424 kvm_s390_vcpu_unblock_all(kvm);
2426 mutex_unlock(&kvm->lock);
2430 * kvm_arch_fault_in_page - fault-in guest page if necessary
2431 * @vcpu: The corresponding virtual cpu
2432 * @gpa: Guest physical address
2433 * @writable: Whether the page should be writable or not
2435 * Make sure that a guest page has been faulted-in on the host.
2437 * Return: Zero on success, negative error code otherwise.
2439 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2441 return gmap_fault(vcpu->arch.gmap, gpa,
2442 writable ? FAULT_FLAG_WRITE : 0);
2445 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2446 unsigned long token)
2448 struct kvm_s390_interrupt inti;
2449 struct kvm_s390_irq irq;
2452 irq.u.ext.ext_params2 = token;
2453 irq.type = KVM_S390_INT_PFAULT_INIT;
2454 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2456 inti.type = KVM_S390_INT_PFAULT_DONE;
2457 inti.parm64 = token;
2458 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2462 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2463 struct kvm_async_pf *work)
2465 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2466 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2469 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2470 struct kvm_async_pf *work)
2472 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2473 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2476 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2477 struct kvm_async_pf *work)
2479 /* s390 will always inject the page directly */
2482 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2485 * s390 will always inject the page directly,
2486 * but we still want check_async_completion to cleanup
2491 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2494 struct kvm_arch_async_pf arch;
2497 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2499 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2500 vcpu->arch.pfault_compare)
2502 if (psw_extint_disabled(vcpu))
2504 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2506 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2508 if (!vcpu->arch.gmap->pfault_enabled)
2511 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2512 hva += current->thread.gmap_addr & ~PAGE_MASK;
2513 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2516 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2520 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2525 * On s390 notifications for arriving pages will be delivered directly
2526 * to the guest but the house keeping for completed pfaults is
2527 * handled outside the worker.
2529 kvm_check_async_pf_completion(vcpu);
2531 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2532 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2537 if (test_cpu_flag(CIF_MCCK_PENDING))
2540 if (!kvm_is_ucontrol(vcpu->kvm)) {
2541 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2546 rc = kvm_s390_handle_requests(vcpu);
2550 if (guestdbg_enabled(vcpu)) {
2551 kvm_s390_backup_guest_per_regs(vcpu);
2552 kvm_s390_patch_guest_per_regs(vcpu);
2555 vcpu->arch.sie_block->icptcode = 0;
2556 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2557 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2558 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2563 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2565 struct kvm_s390_pgm_info pgm_info = {
2566 .code = PGM_ADDRESSING,
2571 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2572 trace_kvm_s390_sie_fault(vcpu);
2575 * We want to inject an addressing exception, which is defined as a
2576 * suppressing or terminating exception. However, since we came here
2577 * by a DAT access exception, the PSW still points to the faulting
2578 * instruction since DAT exceptions are nullifying. So we've got
2579 * to look up the current opcode to get the length of the instruction
2580 * to be able to forward the PSW.
2582 rc = read_guest_instr(vcpu, &opcode, 1);
2583 ilen = insn_length(opcode);
2587 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2588 * Forward by arbitrary ilc, injection will take care of
2589 * nullification if necessary.
2591 pgm_info = vcpu->arch.pgm;
2594 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2595 kvm_s390_forward_psw(vcpu, ilen);
2596 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2599 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2601 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2602 vcpu->arch.sie_block->icptcode);
2603 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2605 if (guestdbg_enabled(vcpu))
2606 kvm_s390_restore_guest_per_regs(vcpu);
2608 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2609 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2611 if (vcpu->arch.sie_block->icptcode > 0) {
2612 int rc = kvm_handle_sie_intercept(vcpu);
2614 if (rc != -EOPNOTSUPP)
2616 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2617 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2618 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2619 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2621 } else if (exit_reason != -EFAULT) {
2622 vcpu->stat.exit_null++;
2624 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2625 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2626 vcpu->run->s390_ucontrol.trans_exc_code =
2627 current->thread.gmap_addr;
2628 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2630 } else if (current->thread.gmap_pfault) {
2631 trace_kvm_s390_major_guest_pfault(vcpu);
2632 current->thread.gmap_pfault = 0;
2633 if (kvm_arch_setup_async_pf(vcpu))
2635 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2637 return vcpu_post_run_fault_in_sie(vcpu);
2640 static int __vcpu_run(struct kvm_vcpu *vcpu)
2642 int rc, exit_reason;
2645 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2646 * ning the guest), so that memslots (and other stuff) are protected
2648 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2651 rc = vcpu_pre_run(vcpu);
2655 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2657 * As PF_VCPU will be used in fault handler, between
2658 * guest_enter and guest_exit should be no uaccess.
2660 local_irq_disable();
2661 guest_enter_irqoff();
2662 __disable_cpu_timer_accounting(vcpu);
2664 exit_reason = sie64a(vcpu->arch.sie_block,
2665 vcpu->run->s.regs.gprs);
2666 local_irq_disable();
2667 __enable_cpu_timer_accounting(vcpu);
2668 guest_exit_irqoff();
2670 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2672 rc = vcpu_post_run(vcpu, exit_reason);
2673 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2675 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2679 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2681 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2682 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2683 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2684 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2685 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2686 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2687 /* some control register changes require a tlb flush */
2688 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2690 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2691 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2692 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2693 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2694 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2695 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2697 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2698 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2699 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2700 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2701 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2702 kvm_clear_async_pf_completion_queue(vcpu);
2705 * If userspace sets the riccb (e.g. after migration) to a valid state,
2706 * we should enable RI here instead of doing the lazy enablement.
2708 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2709 test_kvm_facility(vcpu->kvm, 64)) {
2710 struct runtime_instr_cb *riccb =
2711 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2714 vcpu->arch.sie_block->ecb3 |= 0x01;
2716 save_access_regs(vcpu->arch.host_acrs);
2717 restore_access_regs(vcpu->run->s.regs.acrs);
2718 /* save host (userspace) fprs/vrs */
2720 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2721 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2723 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2725 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2726 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2727 if (test_fp_ctl(current->thread.fpu.fpc))
2728 /* User space provided an invalid FPC, let's clear it */
2729 current->thread.fpu.fpc = 0;
2731 kvm_run->kvm_dirty_regs = 0;
2734 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2736 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2737 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2738 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2739 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2740 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2741 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2742 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2743 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2744 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2745 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2746 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2747 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2748 save_access_regs(vcpu->run->s.regs.acrs);
2749 restore_access_regs(vcpu->arch.host_acrs);
2750 /* Save guest register state */
2752 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2753 /* Restore will be done lazily at return */
2754 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2755 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2759 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2764 if (guestdbg_exit_pending(vcpu)) {
2765 kvm_s390_prepare_debug_exit(vcpu);
2769 if (vcpu->sigset_active)
2770 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2772 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2773 kvm_s390_vcpu_start(vcpu);
2774 } else if (is_vcpu_stopped(vcpu)) {
2775 pr_err_ratelimited("can't run stopped vcpu %d\n",
2780 sync_regs(vcpu, kvm_run);
2781 enable_cpu_timer_accounting(vcpu);
2784 rc = __vcpu_run(vcpu);
2786 if (signal_pending(current) && !rc) {
2787 kvm_run->exit_reason = KVM_EXIT_INTR;
2791 if (guestdbg_exit_pending(vcpu) && !rc) {
2792 kvm_s390_prepare_debug_exit(vcpu);
2796 if (rc == -EREMOTE) {
2797 /* userspace support is needed, kvm_run has been prepared */
2801 disable_cpu_timer_accounting(vcpu);
2802 store_regs(vcpu, kvm_run);
2804 if (vcpu->sigset_active)
2805 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2807 vcpu->stat.exit_userspace++;
2812 * store status at address
2813 * we use have two special cases:
2814 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2815 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2817 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2819 unsigned char archmode = 1;
2820 freg_t fprs[NUM_FPRS];
2825 px = kvm_s390_get_prefix(vcpu);
2826 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2827 if (write_guest_abs(vcpu, 163, &archmode, 1))
2830 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2831 if (write_guest_real(vcpu, 163, &archmode, 1))
2835 gpa -= __LC_FPREGS_SAVE_AREA;
2837 /* manually convert vector registers if necessary */
2838 if (MACHINE_HAS_VX) {
2839 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2840 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2843 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2844 vcpu->run->s.regs.fprs, 128);
2846 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2847 vcpu->run->s.regs.gprs, 128);
2848 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2849 &vcpu->arch.sie_block->gpsw, 16);
2850 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2852 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2853 &vcpu->run->s.regs.fpc, 4);
2854 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2855 &vcpu->arch.sie_block->todpr, 4);
2856 cputm = kvm_s390_get_cpu_timer(vcpu);
2857 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2859 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2860 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2862 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2863 &vcpu->run->s.regs.acrs, 64);
2864 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2865 &vcpu->arch.sie_block->gcr, 128);
2866 return rc ? -EFAULT : 0;
2869 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2872 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2873 * switch in the run ioctl. Let's update our copies before we save
2874 * it into the save area
2877 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2878 save_access_regs(vcpu->run->s.regs.acrs);
2880 return kvm_s390_store_status_unloaded(vcpu, addr);
2883 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2885 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2886 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2889 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2892 struct kvm_vcpu *vcpu;
2894 kvm_for_each_vcpu(i, vcpu, kvm) {
2895 __disable_ibs_on_vcpu(vcpu);
2899 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2903 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2904 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2907 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2909 int i, online_vcpus, started_vcpus = 0;
2911 if (!is_vcpu_stopped(vcpu))
2914 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2915 /* Only one cpu at a time may enter/leave the STOPPED state. */
2916 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2917 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2919 for (i = 0; i < online_vcpus; i++) {
2920 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2924 if (started_vcpus == 0) {
2925 /* we're the only active VCPU -> speed it up */
2926 __enable_ibs_on_vcpu(vcpu);
2927 } else if (started_vcpus == 1) {
2929 * As we are starting a second VCPU, we have to disable
2930 * the IBS facility on all VCPUs to remove potentially
2931 * oustanding ENABLE requests.
2933 __disable_ibs_on_all_vcpus(vcpu->kvm);
2936 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2938 * Another VCPU might have used IBS while we were offline.
2939 * Let's play safe and flush the VCPU at startup.
2941 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2942 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2946 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2948 int i, online_vcpus, started_vcpus = 0;
2949 struct kvm_vcpu *started_vcpu = NULL;
2951 if (is_vcpu_stopped(vcpu))
2954 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2955 /* Only one cpu at a time may enter/leave the STOPPED state. */
2956 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2957 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2959 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2960 kvm_s390_clear_stop_irq(vcpu);
2962 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2963 __disable_ibs_on_vcpu(vcpu);
2965 for (i = 0; i < online_vcpus; i++) {
2966 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2968 started_vcpu = vcpu->kvm->vcpus[i];
2972 if (started_vcpus == 1) {
2974 * As we only have one VCPU left, we want to enable the
2975 * IBS facility for that VCPU to speed it up.
2977 __enable_ibs_on_vcpu(started_vcpu);
2980 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2984 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2985 struct kvm_enable_cap *cap)
2993 case KVM_CAP_S390_CSS_SUPPORT:
2994 if (!vcpu->kvm->arch.css_support) {
2995 vcpu->kvm->arch.css_support = 1;
2996 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2997 trace_kvm_s390_enable_css(vcpu->kvm);
3008 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3009 struct kvm_s390_mem_op *mop)
3011 void __user *uaddr = (void __user *)mop->buf;
3012 void *tmpbuf = NULL;
3014 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3015 | KVM_S390_MEMOP_F_CHECK_ONLY;
3017 if (mop->flags & ~supported_flags)
3020 if (mop->size > MEM_OP_MAX_SIZE)
3023 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3024 tmpbuf = vmalloc(mop->size);
3029 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3032 case KVM_S390_MEMOP_LOGICAL_READ:
3033 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3034 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3035 mop->size, GACC_FETCH);
3038 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3040 if (copy_to_user(uaddr, tmpbuf, mop->size))
3044 case KVM_S390_MEMOP_LOGICAL_WRITE:
3045 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3046 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3047 mop->size, GACC_STORE);
3050 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3054 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3060 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3062 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3063 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3069 long kvm_arch_vcpu_ioctl(struct file *filp,
3070 unsigned int ioctl, unsigned long arg)
3072 struct kvm_vcpu *vcpu = filp->private_data;
3073 void __user *argp = (void __user *)arg;
3078 case KVM_S390_IRQ: {
3079 struct kvm_s390_irq s390irq;
3082 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3084 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3087 case KVM_S390_INTERRUPT: {
3088 struct kvm_s390_interrupt s390int;
3089 struct kvm_s390_irq s390irq;
3092 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3094 if (s390int_to_s390irq(&s390int, &s390irq))
3096 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3099 case KVM_S390_STORE_STATUS:
3100 idx = srcu_read_lock(&vcpu->kvm->srcu);
3101 r = kvm_s390_vcpu_store_status(vcpu, arg);
3102 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3104 case KVM_S390_SET_INITIAL_PSW: {
3108 if (copy_from_user(&psw, argp, sizeof(psw)))
3110 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3113 case KVM_S390_INITIAL_RESET:
3114 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3116 case KVM_SET_ONE_REG:
3117 case KVM_GET_ONE_REG: {
3118 struct kvm_one_reg reg;
3120 if (copy_from_user(®, argp, sizeof(reg)))
3122 if (ioctl == KVM_SET_ONE_REG)
3123 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3125 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3128 #ifdef CONFIG_KVM_S390_UCONTROL
3129 case KVM_S390_UCAS_MAP: {
3130 struct kvm_s390_ucas_mapping ucasmap;
3132 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3137 if (!kvm_is_ucontrol(vcpu->kvm)) {
3142 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3143 ucasmap.vcpu_addr, ucasmap.length);
3146 case KVM_S390_UCAS_UNMAP: {
3147 struct kvm_s390_ucas_mapping ucasmap;
3149 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3154 if (!kvm_is_ucontrol(vcpu->kvm)) {
3159 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3164 case KVM_S390_VCPU_FAULT: {
3165 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3168 case KVM_ENABLE_CAP:
3170 struct kvm_enable_cap cap;
3172 if (copy_from_user(&cap, argp, sizeof(cap)))
3174 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3177 case KVM_S390_MEM_OP: {
3178 struct kvm_s390_mem_op mem_op;
3180 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3181 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3186 case KVM_S390_SET_IRQ_STATE: {
3187 struct kvm_s390_irq_state irq_state;
3190 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3192 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3193 irq_state.len == 0 ||
3194 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3198 r = kvm_s390_set_irq_state(vcpu,
3199 (void __user *) irq_state.buf,
3203 case KVM_S390_GET_IRQ_STATE: {
3204 struct kvm_s390_irq_state irq_state;
3207 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3209 if (irq_state.len == 0) {
3213 r = kvm_s390_get_irq_state(vcpu,
3214 (__u8 __user *) irq_state.buf,
3224 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3226 #ifdef CONFIG_KVM_S390_UCONTROL
3227 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3228 && (kvm_is_ucontrol(vcpu->kvm))) {
3229 vmf->page = virt_to_page(vcpu->arch.sie_block);
3230 get_page(vmf->page);
3234 return VM_FAULT_SIGBUS;
3237 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3238 unsigned long npages)
3243 /* Section: memory related */
3244 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3245 struct kvm_memory_slot *memslot,
3246 const struct kvm_userspace_memory_region *mem,
3247 enum kvm_mr_change change)
3249 /* A few sanity checks. We can have memory slots which have to be
3250 located/ended at a segment boundary (1MB). The memory in userland is
3251 ok to be fragmented into various different vmas. It is okay to mmap()
3252 and munmap() stuff in this slot after doing this call at any time */
3254 if (mem->userspace_addr & 0xffffful)
3257 if (mem->memory_size & 0xffffful)
3260 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3266 void kvm_arch_commit_memory_region(struct kvm *kvm,
3267 const struct kvm_userspace_memory_region *mem,
3268 const struct kvm_memory_slot *old,
3269 const struct kvm_memory_slot *new,
3270 enum kvm_mr_change change)
3274 /* If the basics of the memslot do not change, we do not want
3275 * to update the gmap. Every update causes several unnecessary
3276 * segment translation exceptions. This is usually handled just
3277 * fine by the normal fault handler + gmap, but it will also
3278 * cause faults on the prefix page of running guest CPUs.
3280 if (old->userspace_addr == mem->userspace_addr &&
3281 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3282 old->npages * PAGE_SIZE == mem->memory_size)
3285 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3286 mem->guest_phys_addr, mem->memory_size);
3288 pr_warn("failed to commit memory region\n");
3292 static inline unsigned long nonhyp_mask(int i)
3294 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3296 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3299 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3301 vcpu->valid_wakeup = false;
3304 static int __init kvm_s390_init(void)
3308 if (!sclp.has_sief2) {
3309 pr_info("SIE not available\n");
3313 for (i = 0; i < 16; i++)
3314 kvm_s390_fac_list_mask[i] |=
3315 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3317 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3320 static void __exit kvm_s390_exit(void)
3325 module_init(kvm_s390_init);
3326 module_exit(kvm_s390_exit);
3329 * Enable autoloading of the kvm module.
3330 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3331 * since x86 takes a different approach.
3333 #include <linux/miscdevice.h>
3334 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3335 MODULE_ALIAS("devname:kvm");