2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008,2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/lowcore.h>
27 #include <asm/pgtable.h>
29 #include <asm/system.h>
33 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35 struct kvm_stats_debugfs_item debugfs_entries[] = {
36 { "userspace_handled", VCPU_STAT(exit_userspace) },
37 { "exit_null", VCPU_STAT(exit_null) },
38 { "exit_validity", VCPU_STAT(exit_validity) },
39 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
40 { "exit_external_request", VCPU_STAT(exit_external_request) },
41 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
42 { "exit_instruction", VCPU_STAT(exit_instruction) },
43 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
44 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
45 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
46 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
47 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
48 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
49 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
50 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
51 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
52 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
53 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
54 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
55 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
56 { "instruction_spx", VCPU_STAT(instruction_spx) },
57 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
58 { "instruction_stap", VCPU_STAT(instruction_stap) },
59 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
60 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
61 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
62 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
63 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
64 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
65 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
66 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
67 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
68 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
69 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
70 { "diagnose_44", VCPU_STAT(diagnose_44) },
74 static unsigned long long *facilities;
76 /* Section: not file related */
77 int kvm_arch_hardware_enable(void *garbage)
79 /* every s390 is virtualization enabled ;-) */
83 void kvm_arch_hardware_disable(void *garbage)
87 int kvm_arch_hardware_setup(void)
92 void kvm_arch_hardware_unsetup(void)
96 void kvm_arch_check_processor_compat(void *rtn)
100 int kvm_arch_init(void *opaque)
105 void kvm_arch_exit(void)
109 /* Section: device related */
110 long kvm_arch_dev_ioctl(struct file *filp,
111 unsigned int ioctl, unsigned long arg)
113 if (ioctl == KVM_S390_ENABLE_SIE)
114 return s390_enable_sie();
118 int kvm_dev_ioctl_check_extension(long ext)
123 case KVM_CAP_S390_PSW:
132 /* Section: vm related */
134 * Get (and clear) the dirty memory log for a memory slot.
136 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
137 struct kvm_dirty_log *log)
142 long kvm_arch_vm_ioctl(struct file *filp,
143 unsigned int ioctl, unsigned long arg)
145 struct kvm *kvm = filp->private_data;
146 void __user *argp = (void __user *)arg;
150 case KVM_S390_INTERRUPT: {
151 struct kvm_s390_interrupt s390int;
154 if (copy_from_user(&s390int, argp, sizeof(s390int)))
156 r = kvm_s390_inject_vm(kvm, &s390int);
166 struct kvm *kvm_arch_create_vm(void)
172 rc = s390_enable_sie();
177 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
181 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
185 sprintf(debug_name, "kvm-%u", current->pid);
187 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
191 spin_lock_init(&kvm->arch.float_int.lock);
192 INIT_LIST_HEAD(&kvm->arch.float_int.list);
194 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
195 VM_EVENT(kvm, 3, "%s", "vm created");
199 free_page((unsigned long)(kvm->arch.sca));
206 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
208 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
209 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
210 (__u64) vcpu->arch.sie_block)
211 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
213 free_page((unsigned long)(vcpu->arch.sie_block));
214 kvm_vcpu_uninit(vcpu);
218 static void kvm_free_vcpus(struct kvm *kvm)
221 struct kvm_vcpu *vcpu;
223 kvm_for_each_vcpu(i, vcpu, kvm)
224 kvm_arch_vcpu_destroy(vcpu);
226 mutex_lock(&kvm->lock);
227 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
228 kvm->vcpus[i] = NULL;
230 atomic_set(&kvm->online_vcpus, 0);
231 mutex_unlock(&kvm->lock);
234 void kvm_arch_sync_events(struct kvm *kvm)
238 void kvm_arch_destroy_vm(struct kvm *kvm)
241 kvm_free_physmem(kvm);
242 free_page((unsigned long)(kvm->arch.sca));
243 debug_unregister(kvm->arch.dbf);
247 /* Section: vcpu related */
248 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
253 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
258 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
260 save_fp_regs(&vcpu->arch.host_fpregs);
261 save_access_regs(vcpu->arch.host_acrs);
262 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
263 restore_fp_regs(&vcpu->arch.guest_fpregs);
264 restore_access_regs(vcpu->arch.guest_acrs);
267 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
269 save_fp_regs(&vcpu->arch.guest_fpregs);
270 save_access_regs(vcpu->arch.guest_acrs);
271 restore_fp_regs(&vcpu->arch.host_fpregs);
272 restore_access_regs(vcpu->arch.host_acrs);
275 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
277 /* this equals initial cpu reset in pop, but we don't switch to ESA */
278 vcpu->arch.sie_block->gpsw.mask = 0UL;
279 vcpu->arch.sie_block->gpsw.addr = 0UL;
280 vcpu->arch.sie_block->prefix = 0UL;
281 vcpu->arch.sie_block->ihcpu = 0xffff;
282 vcpu->arch.sie_block->cputm = 0UL;
283 vcpu->arch.sie_block->ckc = 0UL;
284 vcpu->arch.sie_block->todpr = 0;
285 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
286 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
287 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
288 vcpu->arch.guest_fpregs.fpc = 0;
289 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
290 vcpu->arch.sie_block->gbea = 1;
293 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
295 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
296 set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
297 vcpu->arch.sie_block->ecb = 2;
298 vcpu->arch.sie_block->eca = 0xC1002001U;
299 vcpu->arch.sie_block->fac = (int) (long) facilities;
300 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
301 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
302 (unsigned long) vcpu);
303 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
304 get_cpu_id(&vcpu->arch.cpu_id);
305 vcpu->arch.cpu_id.version = 0xff;
309 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
312 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
318 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
319 get_zeroed_page(GFP_KERNEL);
321 if (!vcpu->arch.sie_block)
324 vcpu->arch.sie_block->icpua = id;
325 BUG_ON(!kvm->arch.sca);
326 if (!kvm->arch.sca->cpu[id].sda)
327 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
328 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
329 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
331 spin_lock_init(&vcpu->arch.local_int.lock);
332 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
333 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
334 spin_lock(&kvm->arch.float_int.lock);
335 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
336 init_waitqueue_head(&vcpu->arch.local_int.wq);
337 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
338 spin_unlock(&kvm->arch.float_int.lock);
340 rc = kvm_vcpu_init(vcpu, kvm, id);
343 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
344 vcpu->arch.sie_block);
353 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
355 /* kvm common code refers to this, but never calls it */
360 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
363 kvm_s390_vcpu_initial_reset(vcpu);
368 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
371 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
376 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
379 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
384 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
385 struct kvm_sregs *sregs)
388 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
389 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
394 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
395 struct kvm_sregs *sregs)
398 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
399 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
404 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
407 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
408 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
413 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
416 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
417 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
422 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
427 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
430 vcpu->run->psw_mask = psw.mask;
431 vcpu->run->psw_addr = psw.addr;
437 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
438 struct kvm_translation *tr)
440 return -EINVAL; /* not implemented yet */
443 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
444 struct kvm_guest_debug *dbg)
446 return -EINVAL; /* not implemented yet */
449 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
450 struct kvm_mp_state *mp_state)
452 return -EINVAL; /* not implemented yet */
455 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
456 struct kvm_mp_state *mp_state)
458 return -EINVAL; /* not implemented yet */
461 static void __vcpu_run(struct kvm_vcpu *vcpu)
463 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
468 if (test_thread_flag(TIF_MCCK_PENDING))
471 kvm_s390_deliver_pending_interrupts(vcpu);
473 vcpu->arch.sie_block->icptcode = 0;
477 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
478 atomic_read(&vcpu->arch.sie_block->cpuflags));
479 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
480 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
481 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
483 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
484 vcpu->arch.sie_block->icptcode);
489 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
492 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
501 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
502 kvm_s390_vcpu_set_mem(vcpu);
504 /* verify, that memory has been registered */
505 if (!vcpu->arch.sie_block->gmslm) {
507 VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
511 if (vcpu->sigset_active)
512 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
514 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
516 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
518 switch (kvm_run->exit_reason) {
519 case KVM_EXIT_S390_SIEIC:
520 case KVM_EXIT_UNKNOWN:
522 case KVM_EXIT_S390_RESET:
528 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
529 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
535 rc = kvm_handle_sie_intercept(vcpu);
536 } while (!signal_pending(current) && !rc);
538 if (rc == SIE_INTERCEPT_RERUNVCPU)
541 if (signal_pending(current) && !rc) {
542 kvm_run->exit_reason = KVM_EXIT_INTR;
546 if (rc == -EOPNOTSUPP) {
547 /* intercept cannot be handled in-kernel, prepare kvm-run */
548 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
549 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
550 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
551 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
555 if (rc == -EREMOTE) {
556 /* intercept was handled, but userspace support is needed
557 * kvm_run has been prepared by the handler */
561 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
562 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
564 if (vcpu->sigset_active)
565 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
569 vcpu->stat.exit_userspace++;
573 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
574 unsigned long n, int prefix)
577 return copy_to_guest(vcpu, guestdest, from, n);
579 return copy_to_guest_absolute(vcpu, guestdest, from, n);
583 * store status at address
584 * we use have two special cases:
585 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
586 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
588 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
590 const unsigned char archmode = 1;
593 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
594 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
596 addr = SAVE_AREA_BASE;
598 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
599 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
601 addr = SAVE_AREA_BASE;
606 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
607 vcpu->arch.guest_fpregs.fprs, 128, prefix))
610 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
611 vcpu->arch.guest_gprs, 128, prefix))
614 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
615 &vcpu->arch.sie_block->gpsw, 16, prefix))
618 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
619 &vcpu->arch.sie_block->prefix, 4, prefix))
622 if (__guestcopy(vcpu,
623 addr + offsetof(struct save_area, fp_ctrl_reg),
624 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
627 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
628 &vcpu->arch.sie_block->todpr, 4, prefix))
631 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
632 &vcpu->arch.sie_block->cputm, 8, prefix))
635 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
636 &vcpu->arch.sie_block->ckc, 8, prefix))
639 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
640 &vcpu->arch.guest_acrs, 64, prefix))
643 if (__guestcopy(vcpu,
644 addr + offsetof(struct save_area, ctrl_regs),
645 &vcpu->arch.sie_block->gcr, 128, prefix))
650 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
655 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
660 long kvm_arch_vcpu_ioctl(struct file *filp,
661 unsigned int ioctl, unsigned long arg)
663 struct kvm_vcpu *vcpu = filp->private_data;
664 void __user *argp = (void __user *)arg;
667 case KVM_S390_INTERRUPT: {
668 struct kvm_s390_interrupt s390int;
670 if (copy_from_user(&s390int, argp, sizeof(s390int)))
672 return kvm_s390_inject_vcpu(vcpu, &s390int);
674 case KVM_S390_STORE_STATUS:
675 return kvm_s390_vcpu_store_status(vcpu, arg);
676 case KVM_S390_SET_INITIAL_PSW: {
679 if (copy_from_user(&psw, argp, sizeof(psw)))
681 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
683 case KVM_S390_INITIAL_RESET:
684 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
691 /* Section: memory related */
692 int kvm_arch_set_memory_region(struct kvm *kvm,
693 struct kvm_userspace_memory_region *mem,
694 struct kvm_memory_slot old,
698 struct kvm_vcpu *vcpu;
700 /* A few sanity checks. We can have exactly one memory slot which has
701 to start at guest virtual zero and which has to be located at a
702 page boundary in userland and which has to end at a page boundary.
703 The memory in userland is ok to be fragmented into various different
704 vmas. It is okay to mmap() and munmap() stuff in this slot after
705 doing this call at any time */
710 if (mem->guest_phys_addr)
713 if (mem->userspace_addr & (PAGE_SIZE - 1))
716 if (mem->memory_size & (PAGE_SIZE - 1))
722 /* request update of sie control block for all available vcpus */
723 kvm_for_each_vcpu(i, vcpu, kvm) {
724 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
726 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
732 void kvm_arch_flush_shadow(struct kvm *kvm)
736 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
741 static int __init kvm_s390_init(void)
744 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
749 * guests can ask for up to 255+1 double words, we need a full page
750 * to hold the maximum amount of facilites. On the other hand, we
751 * only set facilities that are known to work in KVM.
753 facilities = (unsigned long long *) get_zeroed_page(GFP_DMA);
758 stfle(facilities, 1);
759 facilities[0] &= 0xff00fff3f0700000ULL;
763 static void __exit kvm_s390_exit(void)
765 free_page((unsigned long) facilities);
769 module_init(kvm_s390_init);
770 module_exit(kvm_s390_exit);