2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
201 void kvm_arch_exit(void)
203 debug_unregister(kvm_s390_dbf);
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
244 case KVM_CAP_S390_MEM_OP:
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
249 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250 : KVM_S390_BSCA_CPU_SLOTS;
252 case KVM_CAP_NR_MEMSLOTS:
253 r = KVM_USER_MEM_SLOTS;
255 case KVM_CAP_S390_COW:
256 r = MACHINE_HAS_ESOP;
258 case KVM_CAP_S390_VECTOR_REGISTERS:
261 case KVM_CAP_S390_RI:
262 r = test_facility(64);
270 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
271 struct kvm_memory_slot *memslot)
273 gfn_t cur_gfn, last_gfn;
274 unsigned long address;
275 struct gmap *gmap = kvm->arch.gmap;
277 /* Loop over all guest pages */
278 last_gfn = memslot->base_gfn + memslot->npages;
279 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
280 address = gfn_to_hva_memslot(memslot, cur_gfn);
282 if (gmap_test_and_clear_dirty(address, gmap))
283 mark_page_dirty(kvm, cur_gfn);
288 /* Section: vm related */
289 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
292 * Get (and clear) the dirty memory log for a memory slot.
294 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
295 struct kvm_dirty_log *log)
299 struct kvm_memslots *slots;
300 struct kvm_memory_slot *memslot;
303 mutex_lock(&kvm->slots_lock);
306 if (log->slot >= KVM_USER_MEM_SLOTS)
309 slots = kvm_memslots(kvm);
310 memslot = id_to_memslot(slots, log->slot);
312 if (!memslot->dirty_bitmap)
315 kvm_s390_sync_dirty_log(kvm, memslot);
316 r = kvm_get_dirty_log(kvm, log, &is_dirty);
320 /* Clear the dirty log */
322 n = kvm_dirty_bitmap_bytes(memslot);
323 memset(memslot->dirty_bitmap, 0, n);
327 mutex_unlock(&kvm->slots_lock);
331 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
339 case KVM_CAP_S390_IRQCHIP:
340 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
341 kvm->arch.use_irqchip = 1;
344 case KVM_CAP_S390_USER_SIGP:
345 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
346 kvm->arch.user_sigp = 1;
349 case KVM_CAP_S390_VECTOR_REGISTERS:
350 mutex_lock(&kvm->lock);
351 if (atomic_read(&kvm->online_vcpus)) {
353 } else if (MACHINE_HAS_VX) {
354 set_kvm_facility(kvm->arch.model.fac->mask, 129);
355 set_kvm_facility(kvm->arch.model.fac->list, 129);
359 mutex_unlock(&kvm->lock);
360 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
361 r ? "(not available)" : "(success)");
363 case KVM_CAP_S390_RI:
365 mutex_lock(&kvm->lock);
366 if (atomic_read(&kvm->online_vcpus)) {
368 } else if (test_facility(64)) {
369 set_kvm_facility(kvm->arch.model.fac->mask, 64);
370 set_kvm_facility(kvm->arch.model.fac->list, 64);
373 mutex_unlock(&kvm->lock);
374 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
375 r ? "(not available)" : "(success)");
377 case KVM_CAP_S390_USER_STSI:
378 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
379 kvm->arch.user_stsi = 1;
389 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
393 switch (attr->attr) {
394 case KVM_S390_VM_MEM_LIMIT_SIZE:
396 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
397 kvm->arch.mem_limit);
398 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
408 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
412 switch (attr->attr) {
413 case KVM_S390_VM_MEM_ENABLE_CMMA:
414 /* enable CMMA only for z10 and later (EDAT_1) */
416 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
420 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
421 mutex_lock(&kvm->lock);
422 if (atomic_read(&kvm->online_vcpus) == 0) {
423 kvm->arch.use_cmma = 1;
426 mutex_unlock(&kvm->lock);
428 case KVM_S390_VM_MEM_CLR_CMMA:
430 if (!kvm->arch.use_cmma)
433 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
434 mutex_lock(&kvm->lock);
435 idx = srcu_read_lock(&kvm->srcu);
436 s390_reset_cmma(kvm->arch.gmap->mm);
437 srcu_read_unlock(&kvm->srcu, idx);
438 mutex_unlock(&kvm->lock);
441 case KVM_S390_VM_MEM_LIMIT_SIZE: {
442 unsigned long new_limit;
444 if (kvm_is_ucontrol(kvm))
447 if (get_user(new_limit, (u64 __user *)attr->addr))
450 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
451 new_limit > kvm->arch.mem_limit)
457 /* gmap_alloc takes last usable address */
458 if (new_limit != KVM_S390_NO_MEM_LIMIT)
462 mutex_lock(&kvm->lock);
463 if (atomic_read(&kvm->online_vcpus) == 0) {
464 /* gmap_alloc will round the limit up */
465 struct gmap *new = gmap_alloc(current->mm, new_limit);
470 gmap_free(kvm->arch.gmap);
472 kvm->arch.gmap = new;
476 mutex_unlock(&kvm->lock);
477 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
478 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
479 (void *) kvm->arch.gmap->asce);
489 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
491 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
493 struct kvm_vcpu *vcpu;
496 if (!test_kvm_facility(kvm, 76))
499 mutex_lock(&kvm->lock);
500 switch (attr->attr) {
501 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
503 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
504 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
505 kvm->arch.crypto.aes_kw = 1;
506 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
508 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
510 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
511 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
512 kvm->arch.crypto.dea_kw = 1;
513 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
515 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
516 kvm->arch.crypto.aes_kw = 0;
517 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
518 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
519 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
521 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
522 kvm->arch.crypto.dea_kw = 0;
523 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
524 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
525 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
528 mutex_unlock(&kvm->lock);
532 kvm_for_each_vcpu(i, vcpu, kvm) {
533 kvm_s390_vcpu_crypto_setup(vcpu);
536 mutex_unlock(&kvm->lock);
540 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
544 if (copy_from_user(>od_high, (void __user *)attr->addr,
550 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
555 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
559 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
562 kvm_s390_set_tod_clock(kvm, gtod);
563 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
567 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
574 switch (attr->attr) {
575 case KVM_S390_VM_TOD_HIGH:
576 ret = kvm_s390_set_tod_high(kvm, attr);
578 case KVM_S390_VM_TOD_LOW:
579 ret = kvm_s390_set_tod_low(kvm, attr);
588 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
592 if (copy_to_user((void __user *)attr->addr, >od_high,
595 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
600 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
604 gtod = kvm_s390_get_tod_clock_fast(kvm);
605 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
607 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
612 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
619 switch (attr->attr) {
620 case KVM_S390_VM_TOD_HIGH:
621 ret = kvm_s390_get_tod_high(kvm, attr);
623 case KVM_S390_VM_TOD_LOW:
624 ret = kvm_s390_get_tod_low(kvm, attr);
633 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
635 struct kvm_s390_vm_cpu_processor *proc;
638 mutex_lock(&kvm->lock);
639 if (atomic_read(&kvm->online_vcpus)) {
643 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
648 if (!copy_from_user(proc, (void __user *)attr->addr,
650 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
651 sizeof(struct cpuid));
652 kvm->arch.model.ibc = proc->ibc;
653 memcpy(kvm->arch.model.fac->list, proc->fac_list,
654 S390_ARCH_FAC_LIST_SIZE_BYTE);
659 mutex_unlock(&kvm->lock);
663 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
667 switch (attr->attr) {
668 case KVM_S390_VM_CPU_PROCESSOR:
669 ret = kvm_s390_set_processor(kvm, attr);
675 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
677 struct kvm_s390_vm_cpu_processor *proc;
680 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
685 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
686 proc->ibc = kvm->arch.model.ibc;
687 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
688 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
695 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
697 struct kvm_s390_vm_cpu_machine *mach;
700 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
705 get_cpu_id((struct cpuid *) &mach->cpuid);
706 mach->ibc = sclp.ibc;
707 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
708 S390_ARCH_FAC_LIST_SIZE_BYTE);
709 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
710 S390_ARCH_FAC_LIST_SIZE_BYTE);
711 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
718 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
722 switch (attr->attr) {
723 case KVM_S390_VM_CPU_PROCESSOR:
724 ret = kvm_s390_get_processor(kvm, attr);
726 case KVM_S390_VM_CPU_MACHINE:
727 ret = kvm_s390_get_machine(kvm, attr);
733 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
737 switch (attr->group) {
738 case KVM_S390_VM_MEM_CTRL:
739 ret = kvm_s390_set_mem_control(kvm, attr);
741 case KVM_S390_VM_TOD:
742 ret = kvm_s390_set_tod(kvm, attr);
744 case KVM_S390_VM_CPU_MODEL:
745 ret = kvm_s390_set_cpu_model(kvm, attr);
747 case KVM_S390_VM_CRYPTO:
748 ret = kvm_s390_vm_set_crypto(kvm, attr);
758 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
762 switch (attr->group) {
763 case KVM_S390_VM_MEM_CTRL:
764 ret = kvm_s390_get_mem_control(kvm, attr);
766 case KVM_S390_VM_TOD:
767 ret = kvm_s390_get_tod(kvm, attr);
769 case KVM_S390_VM_CPU_MODEL:
770 ret = kvm_s390_get_cpu_model(kvm, attr);
780 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
784 switch (attr->group) {
785 case KVM_S390_VM_MEM_CTRL:
786 switch (attr->attr) {
787 case KVM_S390_VM_MEM_ENABLE_CMMA:
788 case KVM_S390_VM_MEM_CLR_CMMA:
789 case KVM_S390_VM_MEM_LIMIT_SIZE:
797 case KVM_S390_VM_TOD:
798 switch (attr->attr) {
799 case KVM_S390_VM_TOD_LOW:
800 case KVM_S390_VM_TOD_HIGH:
808 case KVM_S390_VM_CPU_MODEL:
809 switch (attr->attr) {
810 case KVM_S390_VM_CPU_PROCESSOR:
811 case KVM_S390_VM_CPU_MACHINE:
819 case KVM_S390_VM_CRYPTO:
820 switch (attr->attr) {
821 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
822 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
823 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
824 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
840 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
844 unsigned long curkey;
847 if (args->flags != 0)
850 /* Is this guest using storage keys? */
851 if (!mm_use_skey(current->mm))
852 return KVM_S390_GET_SKEYS_NONE;
854 /* Enforce sane limit on memory allocation */
855 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
858 keys = kmalloc_array(args->count, sizeof(uint8_t),
859 GFP_KERNEL | __GFP_NOWARN);
861 keys = vmalloc(sizeof(uint8_t) * args->count);
865 for (i = 0; i < args->count; i++) {
866 hva = gfn_to_hva(kvm, args->start_gfn + i);
867 if (kvm_is_error_hva(hva)) {
872 curkey = get_guest_storage_key(current->mm, hva);
873 if (IS_ERR_VALUE(curkey)) {
880 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
881 sizeof(uint8_t) * args->count);
889 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
895 if (args->flags != 0)
898 /* Enforce sane limit on memory allocation */
899 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
902 keys = kmalloc_array(args->count, sizeof(uint8_t),
903 GFP_KERNEL | __GFP_NOWARN);
905 keys = vmalloc(sizeof(uint8_t) * args->count);
909 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
910 sizeof(uint8_t) * args->count);
916 /* Enable storage key handling for the guest */
917 r = s390_enable_skey();
921 for (i = 0; i < args->count; i++) {
922 hva = gfn_to_hva(kvm, args->start_gfn + i);
923 if (kvm_is_error_hva(hva)) {
928 /* Lowest order bit is reserved */
929 if (keys[i] & 0x01) {
934 r = set_guest_storage_key(current->mm, hva,
935 (unsigned long)keys[i], 0);
944 long kvm_arch_vm_ioctl(struct file *filp,
945 unsigned int ioctl, unsigned long arg)
947 struct kvm *kvm = filp->private_data;
948 void __user *argp = (void __user *)arg;
949 struct kvm_device_attr attr;
953 case KVM_S390_INTERRUPT: {
954 struct kvm_s390_interrupt s390int;
957 if (copy_from_user(&s390int, argp, sizeof(s390int)))
959 r = kvm_s390_inject_vm(kvm, &s390int);
962 case KVM_ENABLE_CAP: {
963 struct kvm_enable_cap cap;
965 if (copy_from_user(&cap, argp, sizeof(cap)))
967 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
970 case KVM_CREATE_IRQCHIP: {
971 struct kvm_irq_routing_entry routing;
974 if (kvm->arch.use_irqchip) {
975 /* Set up dummy routing. */
976 memset(&routing, 0, sizeof(routing));
977 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
981 case KVM_SET_DEVICE_ATTR: {
983 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
985 r = kvm_s390_vm_set_attr(kvm, &attr);
988 case KVM_GET_DEVICE_ATTR: {
990 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
992 r = kvm_s390_vm_get_attr(kvm, &attr);
995 case KVM_HAS_DEVICE_ATTR: {
997 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
999 r = kvm_s390_vm_has_attr(kvm, &attr);
1002 case KVM_S390_GET_SKEYS: {
1003 struct kvm_s390_skeys args;
1006 if (copy_from_user(&args, argp,
1007 sizeof(struct kvm_s390_skeys)))
1009 r = kvm_s390_get_skeys(kvm, &args);
1012 case KVM_S390_SET_SKEYS: {
1013 struct kvm_s390_skeys args;
1016 if (copy_from_user(&args, argp,
1017 sizeof(struct kvm_s390_skeys)))
1019 r = kvm_s390_set_skeys(kvm, &args);
1029 static int kvm_s390_query_ap_config(u8 *config)
1031 u32 fcn_code = 0x04000000UL;
1034 memset(config, 0, 128);
1038 ".long 0xb2af0000\n" /* PQAP(QCI) */
1044 : "r" (fcn_code), "r" (config)
1045 : "cc", "0", "2", "memory"
1051 static int kvm_s390_apxa_installed(void)
1056 if (test_facility(12)) {
1057 cc = kvm_s390_query_ap_config(config);
1060 pr_err("PQAP(QCI) failed with cc=%d", cc);
1062 return config[0] & 0x40;
1068 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1070 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1072 if (kvm_s390_apxa_installed())
1073 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1075 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1078 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1081 cpu_id->version = 0xff;
1084 static int kvm_s390_crypto_init(struct kvm *kvm)
1086 if (!test_kvm_facility(kvm, 76))
1089 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1090 GFP_KERNEL | GFP_DMA);
1091 if (!kvm->arch.crypto.crycb)
1094 kvm_s390_set_crycb_format(kvm);
1096 /* Enable AES/DEA protected key functions by default */
1097 kvm->arch.crypto.aes_kw = 1;
1098 kvm->arch.crypto.dea_kw = 1;
1099 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1100 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1101 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1102 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1107 static void sca_dispose(struct kvm *kvm)
1109 if (kvm->arch.use_esca)
1110 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1112 free_page((unsigned long)(kvm->arch.sca));
1113 kvm->arch.sca = NULL;
1116 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1119 char debug_name[16];
1120 static unsigned long sca_offset;
1123 #ifdef CONFIG_KVM_S390_UCONTROL
1124 if (type & ~KVM_VM_S390_UCONTROL)
1126 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1133 rc = s390_enable_sie();
1139 kvm->arch.use_esca = 0; /* start with basic SCA */
1140 rwlock_init(&kvm->arch.sca_lock);
1141 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1144 spin_lock(&kvm_lock);
1146 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1148 kvm->arch.sca = (struct bsca_block *)
1149 ((char *) kvm->arch.sca + sca_offset);
1150 spin_unlock(&kvm_lock);
1152 sprintf(debug_name, "kvm-%u", current->pid);
1154 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1159 * The architectural maximum amount of facilities is 16 kbit. To store
1160 * this amount, 2 kbyte of memory is required. Thus we need a full
1161 * page to hold the guest facility list (arch.model.fac->list) and the
1162 * facility mask (arch.model.fac->mask). Its address size has to be
1163 * 31 bits and word aligned.
1165 kvm->arch.model.fac =
1166 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1167 if (!kvm->arch.model.fac)
1170 /* Populate the facility mask initially. */
1171 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1172 S390_ARCH_FAC_LIST_SIZE_BYTE);
1173 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1174 if (i < kvm_s390_fac_list_mask_size())
1175 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1177 kvm->arch.model.fac->mask[i] = 0UL;
1180 /* Populate the facility list initially. */
1181 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1182 S390_ARCH_FAC_LIST_SIZE_BYTE);
1184 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1185 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1187 if (kvm_s390_crypto_init(kvm) < 0)
1190 spin_lock_init(&kvm->arch.float_int.lock);
1191 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1192 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1193 init_waitqueue_head(&kvm->arch.ipte_wq);
1194 mutex_init(&kvm->arch.ipte_mutex);
1196 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1197 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1199 if (type & KVM_VM_S390_UCONTROL) {
1200 kvm->arch.gmap = NULL;
1201 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1203 if (sclp.hamax == U64_MAX)
1204 kvm->arch.mem_limit = TASK_MAX_SIZE;
1206 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1208 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1209 if (!kvm->arch.gmap)
1211 kvm->arch.gmap->private = kvm;
1212 kvm->arch.gmap->pfault_enabled = 0;
1215 kvm->arch.css_support = 0;
1216 kvm->arch.use_irqchip = 0;
1217 kvm->arch.epoch = 0;
1219 spin_lock_init(&kvm->arch.start_stop_lock);
1220 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1224 kfree(kvm->arch.crypto.crycb);
1225 free_page((unsigned long)kvm->arch.model.fac);
1226 debug_unregister(kvm->arch.dbf);
1228 KVM_EVENT(3, "creation of vm failed: %d", rc);
1232 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1234 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1235 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1236 kvm_s390_clear_local_irqs(vcpu);
1237 kvm_clear_async_pf_completion_queue(vcpu);
1238 if (!kvm_is_ucontrol(vcpu->kvm))
1241 if (kvm_is_ucontrol(vcpu->kvm))
1242 gmap_free(vcpu->arch.gmap);
1244 if (vcpu->kvm->arch.use_cmma)
1245 kvm_s390_vcpu_unsetup_cmma(vcpu);
1246 free_page((unsigned long)(vcpu->arch.sie_block));
1248 kvm_vcpu_uninit(vcpu);
1249 kmem_cache_free(kvm_vcpu_cache, vcpu);
1252 static void kvm_free_vcpus(struct kvm *kvm)
1255 struct kvm_vcpu *vcpu;
1257 kvm_for_each_vcpu(i, vcpu, kvm)
1258 kvm_arch_vcpu_destroy(vcpu);
1260 mutex_lock(&kvm->lock);
1261 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1262 kvm->vcpus[i] = NULL;
1264 atomic_set(&kvm->online_vcpus, 0);
1265 mutex_unlock(&kvm->lock);
1268 void kvm_arch_destroy_vm(struct kvm *kvm)
1270 kvm_free_vcpus(kvm);
1271 free_page((unsigned long)kvm->arch.model.fac);
1273 debug_unregister(kvm->arch.dbf);
1274 kfree(kvm->arch.crypto.crycb);
1275 if (!kvm_is_ucontrol(kvm))
1276 gmap_free(kvm->arch.gmap);
1277 kvm_s390_destroy_adapters(kvm);
1278 kvm_s390_clear_float_irqs(kvm);
1279 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1282 /* Section: vcpu related */
1283 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1285 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1286 if (!vcpu->arch.gmap)
1288 vcpu->arch.gmap->private = vcpu->kvm;
1293 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1295 read_lock(&vcpu->kvm->arch.sca_lock);
1296 if (vcpu->kvm->arch.use_esca) {
1297 struct esca_block *sca = vcpu->kvm->arch.sca;
1299 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1300 sca->cpu[vcpu->vcpu_id].sda = 0;
1302 struct bsca_block *sca = vcpu->kvm->arch.sca;
1304 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1305 sca->cpu[vcpu->vcpu_id].sda = 0;
1307 read_unlock(&vcpu->kvm->arch.sca_lock);
1310 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1312 read_lock(&vcpu->kvm->arch.sca_lock);
1313 if (vcpu->kvm->arch.use_esca) {
1314 struct esca_block *sca = vcpu->kvm->arch.sca;
1316 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1317 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1318 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1319 vcpu->arch.sie_block->ecb2 |= 0x04U;
1320 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1322 struct bsca_block *sca = vcpu->kvm->arch.sca;
1324 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1325 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1326 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1327 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1329 read_unlock(&vcpu->kvm->arch.sca_lock);
1332 /* Basic SCA to Extended SCA data copy routines */
1333 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1336 d->sigp_ctrl.c = s->sigp_ctrl.c;
1337 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1340 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1344 d->ipte_control = s->ipte_control;
1346 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1347 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1350 static int sca_switch_to_extended(struct kvm *kvm)
1352 struct bsca_block *old_sca = kvm->arch.sca;
1353 struct esca_block *new_sca;
1354 struct kvm_vcpu *vcpu;
1355 unsigned int vcpu_idx;
1358 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1362 scaoh = (u32)((u64)(new_sca) >> 32);
1363 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1365 kvm_s390_vcpu_block_all(kvm);
1366 write_lock(&kvm->arch.sca_lock);
1368 sca_copy_b_to_e(new_sca, old_sca);
1370 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1371 vcpu->arch.sie_block->scaoh = scaoh;
1372 vcpu->arch.sie_block->scaol = scaol;
1373 vcpu->arch.sie_block->ecb2 |= 0x04U;
1375 kvm->arch.sca = new_sca;
1376 kvm->arch.use_esca = 1;
1378 write_unlock(&kvm->arch.sca_lock);
1379 kvm_s390_vcpu_unblock_all(kvm);
1381 free_page((unsigned long)old_sca);
1383 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1384 old_sca, kvm->arch.sca);
1388 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1392 if (id < KVM_S390_BSCA_CPU_SLOTS)
1397 mutex_lock(&kvm->lock);
1398 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1399 mutex_unlock(&kvm->lock);
1401 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1404 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1406 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1407 kvm_clear_async_pf_completion_queue(vcpu);
1408 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1414 if (test_kvm_facility(vcpu->kvm, 64))
1415 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1416 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1417 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1420 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1422 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1424 if (kvm_is_ucontrol(vcpu->kvm))
1425 return __kvm_ucontrol_vcpu_init(vcpu);
1430 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1432 /* Save host register state */
1434 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1435 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1438 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1440 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1441 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1442 if (test_fp_ctl(current->thread.fpu.fpc))
1443 /* User space provided an invalid FPC, let's clear it */
1444 current->thread.fpu.fpc = 0;
1446 save_access_regs(vcpu->arch.host_acrs);
1447 restore_access_regs(vcpu->run->s.regs.acrs);
1448 gmap_enable(vcpu->arch.gmap);
1449 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1452 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1454 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1455 gmap_disable(vcpu->arch.gmap);
1457 /* Save guest register state */
1459 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1461 /* Restore host register state */
1462 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1463 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1465 save_access_regs(vcpu->run->s.regs.acrs);
1466 restore_access_regs(vcpu->arch.host_acrs);
1469 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1471 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1472 vcpu->arch.sie_block->gpsw.mask = 0UL;
1473 vcpu->arch.sie_block->gpsw.addr = 0UL;
1474 kvm_s390_set_prefix(vcpu, 0);
1475 vcpu->arch.sie_block->cputm = 0UL;
1476 vcpu->arch.sie_block->ckc = 0UL;
1477 vcpu->arch.sie_block->todpr = 0;
1478 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1479 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1480 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1481 /* make sure the new fpc will be lazily loaded */
1483 current->thread.fpu.fpc = 0;
1484 vcpu->arch.sie_block->gbea = 1;
1485 vcpu->arch.sie_block->pp = 0;
1486 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1487 kvm_clear_async_pf_completion_queue(vcpu);
1488 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1489 kvm_s390_vcpu_stop(vcpu);
1490 kvm_s390_clear_local_irqs(vcpu);
1493 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1495 mutex_lock(&vcpu->kvm->lock);
1497 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1499 mutex_unlock(&vcpu->kvm->lock);
1500 if (!kvm_is_ucontrol(vcpu->kvm)) {
1501 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1507 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1509 if (!test_kvm_facility(vcpu->kvm, 76))
1512 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1514 if (vcpu->kvm->arch.crypto.aes_kw)
1515 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1516 if (vcpu->kvm->arch.crypto.dea_kw)
1517 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1519 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1522 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1524 free_page(vcpu->arch.sie_block->cbrlo);
1525 vcpu->arch.sie_block->cbrlo = 0;
1528 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1530 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1531 if (!vcpu->arch.sie_block->cbrlo)
1534 vcpu->arch.sie_block->ecb2 |= 0x80;
1535 vcpu->arch.sie_block->ecb2 &= ~0x08;
1539 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1541 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1543 vcpu->arch.cpu_id = model->cpu_id;
1544 vcpu->arch.sie_block->ibc = model->ibc;
1545 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1548 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1552 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1556 if (test_kvm_facility(vcpu->kvm, 78))
1557 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1558 else if (test_kvm_facility(vcpu->kvm, 8))
1559 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1561 kvm_s390_vcpu_setup_model(vcpu);
1563 vcpu->arch.sie_block->ecb = 6;
1564 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1565 vcpu->arch.sie_block->ecb |= 0x10;
1567 vcpu->arch.sie_block->ecb2 = 8;
1568 vcpu->arch.sie_block->eca = 0xC1002000U;
1570 vcpu->arch.sie_block->eca |= 1;
1571 if (sclp.has_sigpif)
1572 vcpu->arch.sie_block->eca |= 0x10000000U;
1573 if (test_kvm_facility(vcpu->kvm, 64))
1574 vcpu->arch.sie_block->ecb3 |= 0x01;
1575 if (test_kvm_facility(vcpu->kvm, 129)) {
1576 vcpu->arch.sie_block->eca |= 0x00020000;
1577 vcpu->arch.sie_block->ecd |= 0x20000000;
1579 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1580 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1582 if (vcpu->kvm->arch.use_cmma) {
1583 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1587 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1588 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1590 kvm_s390_vcpu_crypto_setup(vcpu);
1595 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1598 struct kvm_vcpu *vcpu;
1599 struct sie_page *sie_page;
1602 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1607 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1611 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1615 vcpu->arch.sie_block = &sie_page->sie_block;
1616 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1618 vcpu->arch.sie_block->icpua = id;
1619 spin_lock_init(&vcpu->arch.local_int.lock);
1620 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1621 vcpu->arch.local_int.wq = &vcpu->wq;
1622 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1624 rc = kvm_vcpu_init(vcpu, kvm, id);
1626 goto out_free_sie_block;
1627 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1628 vcpu->arch.sie_block);
1629 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1633 free_page((unsigned long)(vcpu->arch.sie_block));
1635 kmem_cache_free(kvm_vcpu_cache, vcpu);
1640 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1642 return kvm_s390_vcpu_has_irq(vcpu, 0);
1645 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1647 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1651 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1653 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1656 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1658 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1662 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1664 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1668 * Kick a guest cpu out of SIE and wait until SIE is not running.
1669 * If the CPU is not running (e.g. waiting as idle) the function will
1670 * return immediately. */
1671 void exit_sie(struct kvm_vcpu *vcpu)
1673 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1674 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1678 /* Kick a guest cpu out of SIE to process a request synchronously */
1679 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1681 kvm_make_request(req, vcpu);
1682 kvm_s390_vcpu_request(vcpu);
1685 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1688 struct kvm *kvm = gmap->private;
1689 struct kvm_vcpu *vcpu;
1691 kvm_for_each_vcpu(i, vcpu, kvm) {
1692 /* match against both prefix pages */
1693 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1694 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1695 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1700 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1702 /* kvm common code refers to this, but never calls it */
1707 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1708 struct kvm_one_reg *reg)
1713 case KVM_REG_S390_TODPR:
1714 r = put_user(vcpu->arch.sie_block->todpr,
1715 (u32 __user *)reg->addr);
1717 case KVM_REG_S390_EPOCHDIFF:
1718 r = put_user(vcpu->arch.sie_block->epoch,
1719 (u64 __user *)reg->addr);
1721 case KVM_REG_S390_CPU_TIMER:
1722 r = put_user(vcpu->arch.sie_block->cputm,
1723 (u64 __user *)reg->addr);
1725 case KVM_REG_S390_CLOCK_COMP:
1726 r = put_user(vcpu->arch.sie_block->ckc,
1727 (u64 __user *)reg->addr);
1729 case KVM_REG_S390_PFTOKEN:
1730 r = put_user(vcpu->arch.pfault_token,
1731 (u64 __user *)reg->addr);
1733 case KVM_REG_S390_PFCOMPARE:
1734 r = put_user(vcpu->arch.pfault_compare,
1735 (u64 __user *)reg->addr);
1737 case KVM_REG_S390_PFSELECT:
1738 r = put_user(vcpu->arch.pfault_select,
1739 (u64 __user *)reg->addr);
1741 case KVM_REG_S390_PP:
1742 r = put_user(vcpu->arch.sie_block->pp,
1743 (u64 __user *)reg->addr);
1745 case KVM_REG_S390_GBEA:
1746 r = put_user(vcpu->arch.sie_block->gbea,
1747 (u64 __user *)reg->addr);
1756 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1757 struct kvm_one_reg *reg)
1762 case KVM_REG_S390_TODPR:
1763 r = get_user(vcpu->arch.sie_block->todpr,
1764 (u32 __user *)reg->addr);
1766 case KVM_REG_S390_EPOCHDIFF:
1767 r = get_user(vcpu->arch.sie_block->epoch,
1768 (u64 __user *)reg->addr);
1770 case KVM_REG_S390_CPU_TIMER:
1771 r = get_user(vcpu->arch.sie_block->cputm,
1772 (u64 __user *)reg->addr);
1774 case KVM_REG_S390_CLOCK_COMP:
1775 r = get_user(vcpu->arch.sie_block->ckc,
1776 (u64 __user *)reg->addr);
1778 case KVM_REG_S390_PFTOKEN:
1779 r = get_user(vcpu->arch.pfault_token,
1780 (u64 __user *)reg->addr);
1781 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1782 kvm_clear_async_pf_completion_queue(vcpu);
1784 case KVM_REG_S390_PFCOMPARE:
1785 r = get_user(vcpu->arch.pfault_compare,
1786 (u64 __user *)reg->addr);
1788 case KVM_REG_S390_PFSELECT:
1789 r = get_user(vcpu->arch.pfault_select,
1790 (u64 __user *)reg->addr);
1792 case KVM_REG_S390_PP:
1793 r = get_user(vcpu->arch.sie_block->pp,
1794 (u64 __user *)reg->addr);
1796 case KVM_REG_S390_GBEA:
1797 r = get_user(vcpu->arch.sie_block->gbea,
1798 (u64 __user *)reg->addr);
1807 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1809 kvm_s390_vcpu_initial_reset(vcpu);
1813 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1815 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1819 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1821 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1825 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1826 struct kvm_sregs *sregs)
1828 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1829 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1830 restore_access_regs(vcpu->run->s.regs.acrs);
1834 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1835 struct kvm_sregs *sregs)
1837 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1838 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1842 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1844 /* make sure the new values will be lazily loaded */
1846 if (test_fp_ctl(fpu->fpc))
1848 current->thread.fpu.fpc = fpu->fpc;
1850 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1852 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1856 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1858 /* make sure we have the latest values */
1861 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1863 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1864 fpu->fpc = current->thread.fpu.fpc;
1868 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1872 if (!is_vcpu_stopped(vcpu))
1875 vcpu->run->psw_mask = psw.mask;
1876 vcpu->run->psw_addr = psw.addr;
1881 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1882 struct kvm_translation *tr)
1884 return -EINVAL; /* not implemented yet */
1887 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1888 KVM_GUESTDBG_USE_HW_BP | \
1889 KVM_GUESTDBG_ENABLE)
1891 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1892 struct kvm_guest_debug *dbg)
1896 vcpu->guest_debug = 0;
1897 kvm_s390_clear_bp_data(vcpu);
1899 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1902 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1903 vcpu->guest_debug = dbg->control;
1904 /* enforce guest PER */
1905 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1907 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1908 rc = kvm_s390_import_bp_data(vcpu, dbg);
1910 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1911 vcpu->arch.guestdbg.last_bp = 0;
1915 vcpu->guest_debug = 0;
1916 kvm_s390_clear_bp_data(vcpu);
1917 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1923 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1924 struct kvm_mp_state *mp_state)
1926 /* CHECK_STOP and LOAD are not supported yet */
1927 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1928 KVM_MP_STATE_OPERATING;
1931 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1932 struct kvm_mp_state *mp_state)
1936 /* user space knows about this interface - let it control the state */
1937 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1939 switch (mp_state->mp_state) {
1940 case KVM_MP_STATE_STOPPED:
1941 kvm_s390_vcpu_stop(vcpu);
1943 case KVM_MP_STATE_OPERATING:
1944 kvm_s390_vcpu_start(vcpu);
1946 case KVM_MP_STATE_LOAD:
1947 case KVM_MP_STATE_CHECK_STOP:
1948 /* fall through - CHECK_STOP and LOAD are not supported yet */
1956 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1958 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1961 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1964 kvm_s390_vcpu_request_handled(vcpu);
1965 if (!vcpu->requests)
1968 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1969 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1970 * This ensures that the ipte instruction for this request has
1971 * already finished. We might race against a second unmapper that
1972 * wants to set the blocking bit. Lets just retry the request loop.
1974 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1976 rc = gmap_ipte_notify(vcpu->arch.gmap,
1977 kvm_s390_get_prefix(vcpu),
1984 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1985 vcpu->arch.sie_block->ihcpu = 0xffff;
1989 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1990 if (!ibs_enabled(vcpu)) {
1991 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1992 atomic_or(CPUSTAT_IBS,
1993 &vcpu->arch.sie_block->cpuflags);
1998 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1999 if (ibs_enabled(vcpu)) {
2000 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2001 atomic_andnot(CPUSTAT_IBS,
2002 &vcpu->arch.sie_block->cpuflags);
2007 /* nothing to do, just clear the request */
2008 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2013 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2015 struct kvm_vcpu *vcpu;
2018 mutex_lock(&kvm->lock);
2020 kvm->arch.epoch = tod - get_tod_clock();
2021 kvm_s390_vcpu_block_all(kvm);
2022 kvm_for_each_vcpu(i, vcpu, kvm)
2023 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2024 kvm_s390_vcpu_unblock_all(kvm);
2026 mutex_unlock(&kvm->lock);
2030 * kvm_arch_fault_in_page - fault-in guest page if necessary
2031 * @vcpu: The corresponding virtual cpu
2032 * @gpa: Guest physical address
2033 * @writable: Whether the page should be writable or not
2035 * Make sure that a guest page has been faulted-in on the host.
2037 * Return: Zero on success, negative error code otherwise.
2039 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2041 return gmap_fault(vcpu->arch.gmap, gpa,
2042 writable ? FAULT_FLAG_WRITE : 0);
2045 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2046 unsigned long token)
2048 struct kvm_s390_interrupt inti;
2049 struct kvm_s390_irq irq;
2052 irq.u.ext.ext_params2 = token;
2053 irq.type = KVM_S390_INT_PFAULT_INIT;
2054 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2056 inti.type = KVM_S390_INT_PFAULT_DONE;
2057 inti.parm64 = token;
2058 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2062 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2063 struct kvm_async_pf *work)
2065 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2066 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2069 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2070 struct kvm_async_pf *work)
2072 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2073 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2076 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2077 struct kvm_async_pf *work)
2079 /* s390 will always inject the page directly */
2082 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2085 * s390 will always inject the page directly,
2086 * but we still want check_async_completion to cleanup
2091 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2094 struct kvm_arch_async_pf arch;
2097 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2099 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2100 vcpu->arch.pfault_compare)
2102 if (psw_extint_disabled(vcpu))
2104 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2106 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2108 if (!vcpu->arch.gmap->pfault_enabled)
2111 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2112 hva += current->thread.gmap_addr & ~PAGE_MASK;
2113 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2116 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2120 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2125 * On s390 notifications for arriving pages will be delivered directly
2126 * to the guest but the house keeping for completed pfaults is
2127 * handled outside the worker.
2129 kvm_check_async_pf_completion(vcpu);
2131 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2132 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2137 if (test_cpu_flag(CIF_MCCK_PENDING))
2140 if (!kvm_is_ucontrol(vcpu->kvm)) {
2141 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2146 rc = kvm_s390_handle_requests(vcpu);
2150 if (guestdbg_enabled(vcpu)) {
2151 kvm_s390_backup_guest_per_regs(vcpu);
2152 kvm_s390_patch_guest_per_regs(vcpu);
2155 vcpu->arch.sie_block->icptcode = 0;
2156 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2157 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2158 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2163 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2165 struct kvm_s390_pgm_info pgm_info = {
2166 .code = PGM_ADDRESSING,
2171 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2172 trace_kvm_s390_sie_fault(vcpu);
2175 * We want to inject an addressing exception, which is defined as a
2176 * suppressing or terminating exception. However, since we came here
2177 * by a DAT access exception, the PSW still points to the faulting
2178 * instruction since DAT exceptions are nullifying. So we've got
2179 * to look up the current opcode to get the length of the instruction
2180 * to be able to forward the PSW.
2182 rc = read_guest_instr(vcpu, &opcode, 1);
2183 ilen = insn_length(opcode);
2187 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2188 * Forward by arbitrary ilc, injection will take care of
2189 * nullification if necessary.
2191 pgm_info = vcpu->arch.pgm;
2194 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2195 kvm_s390_forward_psw(vcpu, ilen);
2196 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2199 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2201 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2202 vcpu->arch.sie_block->icptcode);
2203 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2205 if (guestdbg_enabled(vcpu))
2206 kvm_s390_restore_guest_per_regs(vcpu);
2208 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2209 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2211 if (vcpu->arch.sie_block->icptcode > 0) {
2212 int rc = kvm_handle_sie_intercept(vcpu);
2214 if (rc != -EOPNOTSUPP)
2216 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2217 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2218 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2219 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2221 } else if (exit_reason != -EFAULT) {
2222 vcpu->stat.exit_null++;
2224 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2225 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2226 vcpu->run->s390_ucontrol.trans_exc_code =
2227 current->thread.gmap_addr;
2228 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2230 } else if (current->thread.gmap_pfault) {
2231 trace_kvm_s390_major_guest_pfault(vcpu);
2232 current->thread.gmap_pfault = 0;
2233 if (kvm_arch_setup_async_pf(vcpu))
2235 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2237 return vcpu_post_run_fault_in_sie(vcpu);
2240 static int __vcpu_run(struct kvm_vcpu *vcpu)
2242 int rc, exit_reason;
2245 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2246 * ning the guest), so that memslots (and other stuff) are protected
2248 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2251 rc = vcpu_pre_run(vcpu);
2255 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2257 * As PF_VCPU will be used in fault handler, between
2258 * guest_enter and guest_exit should be no uaccess.
2260 local_irq_disable();
2261 __kvm_guest_enter();
2263 exit_reason = sie64a(vcpu->arch.sie_block,
2264 vcpu->run->s.regs.gprs);
2265 local_irq_disable();
2268 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2270 rc = vcpu_post_run(vcpu, exit_reason);
2271 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2273 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2277 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2279 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2280 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2281 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2282 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2283 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2284 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2285 /* some control register changes require a tlb flush */
2286 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2288 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2289 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2290 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2291 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2292 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2293 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2295 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2296 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2297 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2298 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2299 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2300 kvm_clear_async_pf_completion_queue(vcpu);
2302 kvm_run->kvm_dirty_regs = 0;
2305 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2307 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2308 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2309 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2310 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2311 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2312 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2313 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2314 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2315 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2316 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2317 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2318 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2321 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2326 if (guestdbg_exit_pending(vcpu)) {
2327 kvm_s390_prepare_debug_exit(vcpu);
2331 if (vcpu->sigset_active)
2332 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2334 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2335 kvm_s390_vcpu_start(vcpu);
2336 } else if (is_vcpu_stopped(vcpu)) {
2337 pr_err_ratelimited("can't run stopped vcpu %d\n",
2342 sync_regs(vcpu, kvm_run);
2345 rc = __vcpu_run(vcpu);
2347 if (signal_pending(current) && !rc) {
2348 kvm_run->exit_reason = KVM_EXIT_INTR;
2352 if (guestdbg_exit_pending(vcpu) && !rc) {
2353 kvm_s390_prepare_debug_exit(vcpu);
2357 if (rc == -EREMOTE) {
2358 /* userspace support is needed, kvm_run has been prepared */
2362 store_regs(vcpu, kvm_run);
2364 if (vcpu->sigset_active)
2365 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2367 vcpu->stat.exit_userspace++;
2372 * store status at address
2373 * we use have two special cases:
2374 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2375 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2377 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2379 unsigned char archmode = 1;
2380 freg_t fprs[NUM_FPRS];
2385 px = kvm_s390_get_prefix(vcpu);
2386 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2387 if (write_guest_abs(vcpu, 163, &archmode, 1))
2390 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2391 if (write_guest_real(vcpu, 163, &archmode, 1))
2395 gpa -= __LC_FPREGS_SAVE_AREA;
2397 /* manually convert vector registers if necessary */
2398 if (MACHINE_HAS_VX) {
2399 convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
2400 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2403 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2404 vcpu->run->s.regs.fprs, 128);
2406 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2407 vcpu->run->s.regs.gprs, 128);
2408 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2409 &vcpu->arch.sie_block->gpsw, 16);
2410 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2412 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2413 &vcpu->run->s.regs.fpc, 4);
2414 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2415 &vcpu->arch.sie_block->todpr, 4);
2416 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2417 &vcpu->arch.sie_block->cputm, 8);
2418 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2419 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2421 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2422 &vcpu->run->s.regs.acrs, 64);
2423 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2424 &vcpu->arch.sie_block->gcr, 128);
2425 return rc ? -EFAULT : 0;
2428 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2431 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2432 * copying in vcpu load/put. Lets update our copies before we save
2433 * it into the save area
2436 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2437 save_access_regs(vcpu->run->s.regs.acrs);
2439 return kvm_s390_store_status_unloaded(vcpu, addr);
2443 * store additional status at address
2445 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2448 /* Only bits 0-53 are used for address formation */
2449 if (!(gpa & ~0x3ff))
2452 return write_guest_abs(vcpu, gpa & ~0x3ff,
2453 (void *)&vcpu->run->s.regs.vrs, 512);
2456 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2458 if (!test_kvm_facility(vcpu->kvm, 129))
2462 * The guest VXRS are in the host VXRs due to the lazy
2463 * copying in vcpu load/put. We can simply call save_fpu_regs()
2464 * to save the current register state because we are in the
2465 * middle of a load/put cycle.
2467 * Let's update our copies before we save it into the save area.
2471 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2474 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2476 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2477 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2480 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2483 struct kvm_vcpu *vcpu;
2485 kvm_for_each_vcpu(i, vcpu, kvm) {
2486 __disable_ibs_on_vcpu(vcpu);
2490 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2492 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2493 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2496 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2498 int i, online_vcpus, started_vcpus = 0;
2500 if (!is_vcpu_stopped(vcpu))
2503 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2504 /* Only one cpu at a time may enter/leave the STOPPED state. */
2505 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2506 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2508 for (i = 0; i < online_vcpus; i++) {
2509 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2513 if (started_vcpus == 0) {
2514 /* we're the only active VCPU -> speed it up */
2515 __enable_ibs_on_vcpu(vcpu);
2516 } else if (started_vcpus == 1) {
2518 * As we are starting a second VCPU, we have to disable
2519 * the IBS facility on all VCPUs to remove potentially
2520 * oustanding ENABLE requests.
2522 __disable_ibs_on_all_vcpus(vcpu->kvm);
2525 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2527 * Another VCPU might have used IBS while we were offline.
2528 * Let's play safe and flush the VCPU at startup.
2530 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2531 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2535 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2537 int i, online_vcpus, started_vcpus = 0;
2538 struct kvm_vcpu *started_vcpu = NULL;
2540 if (is_vcpu_stopped(vcpu))
2543 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2544 /* Only one cpu at a time may enter/leave the STOPPED state. */
2545 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2546 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2548 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2549 kvm_s390_clear_stop_irq(vcpu);
2551 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2552 __disable_ibs_on_vcpu(vcpu);
2554 for (i = 0; i < online_vcpus; i++) {
2555 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2557 started_vcpu = vcpu->kvm->vcpus[i];
2561 if (started_vcpus == 1) {
2563 * As we only have one VCPU left, we want to enable the
2564 * IBS facility for that VCPU to speed it up.
2566 __enable_ibs_on_vcpu(started_vcpu);
2569 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2573 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2574 struct kvm_enable_cap *cap)
2582 case KVM_CAP_S390_CSS_SUPPORT:
2583 if (!vcpu->kvm->arch.css_support) {
2584 vcpu->kvm->arch.css_support = 1;
2585 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2586 trace_kvm_s390_enable_css(vcpu->kvm);
2597 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2598 struct kvm_s390_mem_op *mop)
2600 void __user *uaddr = (void __user *)mop->buf;
2601 void *tmpbuf = NULL;
2603 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2604 | KVM_S390_MEMOP_F_CHECK_ONLY;
2606 if (mop->flags & ~supported_flags)
2609 if (mop->size > MEM_OP_MAX_SIZE)
2612 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2613 tmpbuf = vmalloc(mop->size);
2618 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2621 case KVM_S390_MEMOP_LOGICAL_READ:
2622 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2623 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2624 mop->size, GACC_FETCH);
2627 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2629 if (copy_to_user(uaddr, tmpbuf, mop->size))
2633 case KVM_S390_MEMOP_LOGICAL_WRITE:
2634 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2635 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2636 mop->size, GACC_STORE);
2639 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2643 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2649 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2651 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2652 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2658 long kvm_arch_vcpu_ioctl(struct file *filp,
2659 unsigned int ioctl, unsigned long arg)
2661 struct kvm_vcpu *vcpu = filp->private_data;
2662 void __user *argp = (void __user *)arg;
2667 case KVM_S390_IRQ: {
2668 struct kvm_s390_irq s390irq;
2671 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2673 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2676 case KVM_S390_INTERRUPT: {
2677 struct kvm_s390_interrupt s390int;
2678 struct kvm_s390_irq s390irq;
2681 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2683 if (s390int_to_s390irq(&s390int, &s390irq))
2685 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2688 case KVM_S390_STORE_STATUS:
2689 idx = srcu_read_lock(&vcpu->kvm->srcu);
2690 r = kvm_s390_vcpu_store_status(vcpu, arg);
2691 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2693 case KVM_S390_SET_INITIAL_PSW: {
2697 if (copy_from_user(&psw, argp, sizeof(psw)))
2699 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2702 case KVM_S390_INITIAL_RESET:
2703 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2705 case KVM_SET_ONE_REG:
2706 case KVM_GET_ONE_REG: {
2707 struct kvm_one_reg reg;
2709 if (copy_from_user(®, argp, sizeof(reg)))
2711 if (ioctl == KVM_SET_ONE_REG)
2712 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2714 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2717 #ifdef CONFIG_KVM_S390_UCONTROL
2718 case KVM_S390_UCAS_MAP: {
2719 struct kvm_s390_ucas_mapping ucasmap;
2721 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2726 if (!kvm_is_ucontrol(vcpu->kvm)) {
2731 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2732 ucasmap.vcpu_addr, ucasmap.length);
2735 case KVM_S390_UCAS_UNMAP: {
2736 struct kvm_s390_ucas_mapping ucasmap;
2738 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2743 if (!kvm_is_ucontrol(vcpu->kvm)) {
2748 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2753 case KVM_S390_VCPU_FAULT: {
2754 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2757 case KVM_ENABLE_CAP:
2759 struct kvm_enable_cap cap;
2761 if (copy_from_user(&cap, argp, sizeof(cap)))
2763 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2766 case KVM_S390_MEM_OP: {
2767 struct kvm_s390_mem_op mem_op;
2769 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2770 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2775 case KVM_S390_SET_IRQ_STATE: {
2776 struct kvm_s390_irq_state irq_state;
2779 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2781 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2782 irq_state.len == 0 ||
2783 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2787 r = kvm_s390_set_irq_state(vcpu,
2788 (void __user *) irq_state.buf,
2792 case KVM_S390_GET_IRQ_STATE: {
2793 struct kvm_s390_irq_state irq_state;
2796 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2798 if (irq_state.len == 0) {
2802 r = kvm_s390_get_irq_state(vcpu,
2803 (__u8 __user *) irq_state.buf,
2813 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2815 #ifdef CONFIG_KVM_S390_UCONTROL
2816 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2817 && (kvm_is_ucontrol(vcpu->kvm))) {
2818 vmf->page = virt_to_page(vcpu->arch.sie_block);
2819 get_page(vmf->page);
2823 return VM_FAULT_SIGBUS;
2826 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2827 unsigned long npages)
2832 /* Section: memory related */
2833 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2834 struct kvm_memory_slot *memslot,
2835 const struct kvm_userspace_memory_region *mem,
2836 enum kvm_mr_change change)
2838 /* A few sanity checks. We can have memory slots which have to be
2839 located/ended at a segment boundary (1MB). The memory in userland is
2840 ok to be fragmented into various different vmas. It is okay to mmap()
2841 and munmap() stuff in this slot after doing this call at any time */
2843 if (mem->userspace_addr & 0xffffful)
2846 if (mem->memory_size & 0xffffful)
2849 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2855 void kvm_arch_commit_memory_region(struct kvm *kvm,
2856 const struct kvm_userspace_memory_region *mem,
2857 const struct kvm_memory_slot *old,
2858 const struct kvm_memory_slot *new,
2859 enum kvm_mr_change change)
2863 /* If the basics of the memslot do not change, we do not want
2864 * to update the gmap. Every update causes several unnecessary
2865 * segment translation exceptions. This is usually handled just
2866 * fine by the normal fault handler + gmap, but it will also
2867 * cause faults on the prefix page of running guest CPUs.
2869 if (old->userspace_addr == mem->userspace_addr &&
2870 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2871 old->npages * PAGE_SIZE == mem->memory_size)
2874 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2875 mem->guest_phys_addr, mem->memory_size);
2877 pr_warn("failed to commit memory region\n");
2881 static int __init kvm_s390_init(void)
2883 if (!sclp.has_sief2) {
2884 pr_info("SIE not available\n");
2888 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2891 static void __exit kvm_s390_exit(void)
2896 module_init(kvm_s390_init);
2897 module_exit(kvm_s390_exit);
2900 * Enable autoloading of the kvm module.
2901 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2902 * since x86 takes a different approach.
2904 #include <linux/miscdevice.h>
2905 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2906 MODULE_ALIAS("devname:kvm");