]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: do not block CPU on dirty logging
[karo-tx-linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47
48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56         { "userspace_handled", VCPU_STAT(exit_userspace) },
57         { "exit_null", VCPU_STAT(exit_null) },
58         { "exit_validity", VCPU_STAT(exit_validity) },
59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60         { "exit_external_request", VCPU_STAT(exit_external_request) },
61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62         { "exit_instruction", VCPU_STAT(exit_instruction) },
63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83         { "instruction_spx", VCPU_STAT(instruction_spx) },
84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85         { "instruction_stap", VCPU_STAT(instruction_stap) },
86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90         { "instruction_essa", VCPU_STAT(instruction_essa) },
91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110         { "diagnose_10", VCPU_STAT(diagnose_10) },
111         { "diagnose_44", VCPU_STAT(diagnose_44) },
112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113         { "diagnose_258", VCPU_STAT(diagnose_258) },
114         { "diagnose_308", VCPU_STAT(diagnose_308) },
115         { "diagnose_500", VCPU_STAT(diagnose_500) },
116         { NULL }
117 };
118
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121         0xffe6fffbfcfdfc40UL,
122         0x005e800000000000UL,
123 };
124
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137         /* every s390 is virtualization enabled ;-) */
138         return 0;
139 }
140
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150                           void *v)
151 {
152         struct kvm *kvm;
153         struct kvm_vcpu *vcpu;
154         int i;
155         unsigned long long *delta = v;
156
157         list_for_each_entry(kvm, &vm_list, vm_list) {
158                 kvm->arch.epoch -= *delta;
159                 kvm_for_each_vcpu(i, vcpu, kvm) {
160                         vcpu->arch.sie_block->epoch -= *delta;
161                 }
162         }
163         return NOTIFY_OK;
164 }
165
166 static struct notifier_block kvm_clock_notifier = {
167         .notifier_call = kvm_clock_sync,
168 };
169
170 int kvm_arch_hardware_setup(void)
171 {
172         gmap_notifier.notifier_call = kvm_gmap_notifier;
173         gmap_register_ipte_notifier(&gmap_notifier);
174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175                                        &kvm_clock_notifier);
176         return 0;
177 }
178
179 void kvm_arch_hardware_unsetup(void)
180 {
181         gmap_unregister_ipte_notifier(&gmap_notifier);
182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183                                          &kvm_clock_notifier);
184 }
185
186 int kvm_arch_init(void *opaque)
187 {
188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189         if (!kvm_s390_dbf)
190                 return -ENOMEM;
191
192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193                 debug_unregister(kvm_s390_dbf);
194                 return -ENOMEM;
195         }
196
197         /* Register floating interrupt controller interface. */
198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200
201 void kvm_arch_exit(void)
202 {
203         debug_unregister(kvm_s390_dbf);
204 }
205
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208                         unsigned int ioctl, unsigned long arg)
209 {
210         if (ioctl == KVM_S390_ENABLE_SIE)
211                 return s390_enable_sie();
212         return -EINVAL;
213 }
214
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217         int r;
218
219         switch (ext) {
220         case KVM_CAP_S390_PSW:
221         case KVM_CAP_S390_GMAP:
222         case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224         case KVM_CAP_S390_UCONTROL:
225 #endif
226         case KVM_CAP_ASYNC_PF:
227         case KVM_CAP_SYNC_REGS:
228         case KVM_CAP_ONE_REG:
229         case KVM_CAP_ENABLE_CAP:
230         case KVM_CAP_S390_CSS_SUPPORT:
231         case KVM_CAP_IOEVENTFD:
232         case KVM_CAP_DEVICE_CTRL:
233         case KVM_CAP_ENABLE_CAP_VM:
234         case KVM_CAP_S390_IRQCHIP:
235         case KVM_CAP_VM_ATTRIBUTES:
236         case KVM_CAP_MP_STATE:
237         case KVM_CAP_S390_INJECT_IRQ:
238         case KVM_CAP_S390_USER_SIGP:
239         case KVM_CAP_S390_USER_STSI:
240         case KVM_CAP_S390_SKEYS:
241         case KVM_CAP_S390_IRQ_STATE:
242                 r = 1;
243                 break;
244         case KVM_CAP_S390_MEM_OP:
245                 r = MEM_OP_MAX_SIZE;
246                 break;
247         case KVM_CAP_NR_VCPUS:
248         case KVM_CAP_MAX_VCPUS:
249                 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250                                   : KVM_S390_BSCA_CPU_SLOTS;
251                 break;
252         case KVM_CAP_NR_MEMSLOTS:
253                 r = KVM_USER_MEM_SLOTS;
254                 break;
255         case KVM_CAP_S390_COW:
256                 r = MACHINE_HAS_ESOP;
257                 break;
258         case KVM_CAP_S390_VECTOR_REGISTERS:
259                 r = MACHINE_HAS_VX;
260                 break;
261         case KVM_CAP_S390_RI:
262                 r = test_facility(64);
263                 break;
264         default:
265                 r = 0;
266         }
267         return r;
268 }
269
270 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
271                                         struct kvm_memory_slot *memslot)
272 {
273         gfn_t cur_gfn, last_gfn;
274         unsigned long address;
275         struct gmap *gmap = kvm->arch.gmap;
276
277         /* Loop over all guest pages */
278         last_gfn = memslot->base_gfn + memslot->npages;
279         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
280                 address = gfn_to_hva_memslot(memslot, cur_gfn);
281
282                 if (gmap_test_and_clear_dirty(address, gmap))
283                         mark_page_dirty(kvm, cur_gfn);
284                 cond_resched();
285         }
286 }
287
288 /* Section: vm related */
289 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
290
291 /*
292  * Get (and clear) the dirty memory log for a memory slot.
293  */
294 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
295                                struct kvm_dirty_log *log)
296 {
297         int r;
298         unsigned long n;
299         struct kvm_memslots *slots;
300         struct kvm_memory_slot *memslot;
301         int is_dirty = 0;
302
303         mutex_lock(&kvm->slots_lock);
304
305         r = -EINVAL;
306         if (log->slot >= KVM_USER_MEM_SLOTS)
307                 goto out;
308
309         slots = kvm_memslots(kvm);
310         memslot = id_to_memslot(slots, log->slot);
311         r = -ENOENT;
312         if (!memslot->dirty_bitmap)
313                 goto out;
314
315         kvm_s390_sync_dirty_log(kvm, memslot);
316         r = kvm_get_dirty_log(kvm, log, &is_dirty);
317         if (r)
318                 goto out;
319
320         /* Clear the dirty log */
321         if (is_dirty) {
322                 n = kvm_dirty_bitmap_bytes(memslot);
323                 memset(memslot->dirty_bitmap, 0, n);
324         }
325         r = 0;
326 out:
327         mutex_unlock(&kvm->slots_lock);
328         return r;
329 }
330
331 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
332 {
333         int r;
334
335         if (cap->flags)
336                 return -EINVAL;
337
338         switch (cap->cap) {
339         case KVM_CAP_S390_IRQCHIP:
340                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
341                 kvm->arch.use_irqchip = 1;
342                 r = 0;
343                 break;
344         case KVM_CAP_S390_USER_SIGP:
345                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
346                 kvm->arch.user_sigp = 1;
347                 r = 0;
348                 break;
349         case KVM_CAP_S390_VECTOR_REGISTERS:
350                 mutex_lock(&kvm->lock);
351                 if (atomic_read(&kvm->online_vcpus)) {
352                         r = -EBUSY;
353                 } else if (MACHINE_HAS_VX) {
354                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
355                         set_kvm_facility(kvm->arch.model.fac->list, 129);
356                         r = 0;
357                 } else
358                         r = -EINVAL;
359                 mutex_unlock(&kvm->lock);
360                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
361                          r ? "(not available)" : "(success)");
362                 break;
363         case KVM_CAP_S390_RI:
364                 r = -EINVAL;
365                 mutex_lock(&kvm->lock);
366                 if (atomic_read(&kvm->online_vcpus)) {
367                         r = -EBUSY;
368                 } else if (test_facility(64)) {
369                         set_kvm_facility(kvm->arch.model.fac->mask, 64);
370                         set_kvm_facility(kvm->arch.model.fac->list, 64);
371                         r = 0;
372                 }
373                 mutex_unlock(&kvm->lock);
374                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
375                          r ? "(not available)" : "(success)");
376                 break;
377         case KVM_CAP_S390_USER_STSI:
378                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
379                 kvm->arch.user_stsi = 1;
380                 r = 0;
381                 break;
382         default:
383                 r = -EINVAL;
384                 break;
385         }
386         return r;
387 }
388
389 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
390 {
391         int ret;
392
393         switch (attr->attr) {
394         case KVM_S390_VM_MEM_LIMIT_SIZE:
395                 ret = 0;
396                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
397                          kvm->arch.mem_limit);
398                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
399                         ret = -EFAULT;
400                 break;
401         default:
402                 ret = -ENXIO;
403                 break;
404         }
405         return ret;
406 }
407
408 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
409 {
410         int ret;
411         unsigned int idx;
412         switch (attr->attr) {
413         case KVM_S390_VM_MEM_ENABLE_CMMA:
414                 /* enable CMMA only for z10 and later (EDAT_1) */
415                 ret = -EINVAL;
416                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
417                         break;
418
419                 ret = -EBUSY;
420                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
421                 mutex_lock(&kvm->lock);
422                 if (atomic_read(&kvm->online_vcpus) == 0) {
423                         kvm->arch.use_cmma = 1;
424                         ret = 0;
425                 }
426                 mutex_unlock(&kvm->lock);
427                 break;
428         case KVM_S390_VM_MEM_CLR_CMMA:
429                 ret = -EINVAL;
430                 if (!kvm->arch.use_cmma)
431                         break;
432
433                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
434                 mutex_lock(&kvm->lock);
435                 idx = srcu_read_lock(&kvm->srcu);
436                 s390_reset_cmma(kvm->arch.gmap->mm);
437                 srcu_read_unlock(&kvm->srcu, idx);
438                 mutex_unlock(&kvm->lock);
439                 ret = 0;
440                 break;
441         case KVM_S390_VM_MEM_LIMIT_SIZE: {
442                 unsigned long new_limit;
443
444                 if (kvm_is_ucontrol(kvm))
445                         return -EINVAL;
446
447                 if (get_user(new_limit, (u64 __user *)attr->addr))
448                         return -EFAULT;
449
450                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
451                     new_limit > kvm->arch.mem_limit)
452                         return -E2BIG;
453
454                 if (!new_limit)
455                         return -EINVAL;
456
457                 /* gmap_alloc takes last usable address */
458                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
459                         new_limit -= 1;
460
461                 ret = -EBUSY;
462                 mutex_lock(&kvm->lock);
463                 if (atomic_read(&kvm->online_vcpus) == 0) {
464                         /* gmap_alloc will round the limit up */
465                         struct gmap *new = gmap_alloc(current->mm, new_limit);
466
467                         if (!new) {
468                                 ret = -ENOMEM;
469                         } else {
470                                 gmap_free(kvm->arch.gmap);
471                                 new->private = kvm;
472                                 kvm->arch.gmap = new;
473                                 ret = 0;
474                         }
475                 }
476                 mutex_unlock(&kvm->lock);
477                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
478                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
479                          (void *) kvm->arch.gmap->asce);
480                 break;
481         }
482         default:
483                 ret = -ENXIO;
484                 break;
485         }
486         return ret;
487 }
488
489 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
490
491 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
492 {
493         struct kvm_vcpu *vcpu;
494         int i;
495
496         if (!test_kvm_facility(kvm, 76))
497                 return -EINVAL;
498
499         mutex_lock(&kvm->lock);
500         switch (attr->attr) {
501         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
502                 get_random_bytes(
503                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
504                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
505                 kvm->arch.crypto.aes_kw = 1;
506                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
507                 break;
508         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
509                 get_random_bytes(
510                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
511                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
512                 kvm->arch.crypto.dea_kw = 1;
513                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
514                 break;
515         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
516                 kvm->arch.crypto.aes_kw = 0;
517                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
518                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
519                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
520                 break;
521         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
522                 kvm->arch.crypto.dea_kw = 0;
523                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
524                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
525                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
526                 break;
527         default:
528                 mutex_unlock(&kvm->lock);
529                 return -ENXIO;
530         }
531
532         kvm_for_each_vcpu(i, vcpu, kvm) {
533                 kvm_s390_vcpu_crypto_setup(vcpu);
534                 exit_sie(vcpu);
535         }
536         mutex_unlock(&kvm->lock);
537         return 0;
538 }
539
540 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
541 {
542         u8 gtod_high;
543
544         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
545                                            sizeof(gtod_high)))
546                 return -EFAULT;
547
548         if (gtod_high != 0)
549                 return -EINVAL;
550         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
551
552         return 0;
553 }
554
555 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
556 {
557         u64 gtod;
558
559         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
560                 return -EFAULT;
561
562         kvm_s390_set_tod_clock(kvm, gtod);
563         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
564         return 0;
565 }
566
567 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
568 {
569         int ret;
570
571         if (attr->flags)
572                 return -EINVAL;
573
574         switch (attr->attr) {
575         case KVM_S390_VM_TOD_HIGH:
576                 ret = kvm_s390_set_tod_high(kvm, attr);
577                 break;
578         case KVM_S390_VM_TOD_LOW:
579                 ret = kvm_s390_set_tod_low(kvm, attr);
580                 break;
581         default:
582                 ret = -ENXIO;
583                 break;
584         }
585         return ret;
586 }
587
588 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
589 {
590         u8 gtod_high = 0;
591
592         if (copy_to_user((void __user *)attr->addr, &gtod_high,
593                                          sizeof(gtod_high)))
594                 return -EFAULT;
595         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
596
597         return 0;
598 }
599
600 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
601 {
602         u64 gtod;
603
604         gtod = kvm_s390_get_tod_clock_fast(kvm);
605         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
606                 return -EFAULT;
607         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
608
609         return 0;
610 }
611
612 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
613 {
614         int ret;
615
616         if (attr->flags)
617                 return -EINVAL;
618
619         switch (attr->attr) {
620         case KVM_S390_VM_TOD_HIGH:
621                 ret = kvm_s390_get_tod_high(kvm, attr);
622                 break;
623         case KVM_S390_VM_TOD_LOW:
624                 ret = kvm_s390_get_tod_low(kvm, attr);
625                 break;
626         default:
627                 ret = -ENXIO;
628                 break;
629         }
630         return ret;
631 }
632
633 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
634 {
635         struct kvm_s390_vm_cpu_processor *proc;
636         int ret = 0;
637
638         mutex_lock(&kvm->lock);
639         if (atomic_read(&kvm->online_vcpus)) {
640                 ret = -EBUSY;
641                 goto out;
642         }
643         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
644         if (!proc) {
645                 ret = -ENOMEM;
646                 goto out;
647         }
648         if (!copy_from_user(proc, (void __user *)attr->addr,
649                             sizeof(*proc))) {
650                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
651                        sizeof(struct cpuid));
652                 kvm->arch.model.ibc = proc->ibc;
653                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
654                        S390_ARCH_FAC_LIST_SIZE_BYTE);
655         } else
656                 ret = -EFAULT;
657         kfree(proc);
658 out:
659         mutex_unlock(&kvm->lock);
660         return ret;
661 }
662
663 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
664 {
665         int ret = -ENXIO;
666
667         switch (attr->attr) {
668         case KVM_S390_VM_CPU_PROCESSOR:
669                 ret = kvm_s390_set_processor(kvm, attr);
670                 break;
671         }
672         return ret;
673 }
674
675 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
676 {
677         struct kvm_s390_vm_cpu_processor *proc;
678         int ret = 0;
679
680         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
681         if (!proc) {
682                 ret = -ENOMEM;
683                 goto out;
684         }
685         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
686         proc->ibc = kvm->arch.model.ibc;
687         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
688         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
689                 ret = -EFAULT;
690         kfree(proc);
691 out:
692         return ret;
693 }
694
695 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
696 {
697         struct kvm_s390_vm_cpu_machine *mach;
698         int ret = 0;
699
700         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
701         if (!mach) {
702                 ret = -ENOMEM;
703                 goto out;
704         }
705         get_cpu_id((struct cpuid *) &mach->cpuid);
706         mach->ibc = sclp.ibc;
707         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
708                S390_ARCH_FAC_LIST_SIZE_BYTE);
709         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
710                S390_ARCH_FAC_LIST_SIZE_BYTE);
711         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
712                 ret = -EFAULT;
713         kfree(mach);
714 out:
715         return ret;
716 }
717
718 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
719 {
720         int ret = -ENXIO;
721
722         switch (attr->attr) {
723         case KVM_S390_VM_CPU_PROCESSOR:
724                 ret = kvm_s390_get_processor(kvm, attr);
725                 break;
726         case KVM_S390_VM_CPU_MACHINE:
727                 ret = kvm_s390_get_machine(kvm, attr);
728                 break;
729         }
730         return ret;
731 }
732
733 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
734 {
735         int ret;
736
737         switch (attr->group) {
738         case KVM_S390_VM_MEM_CTRL:
739                 ret = kvm_s390_set_mem_control(kvm, attr);
740                 break;
741         case KVM_S390_VM_TOD:
742                 ret = kvm_s390_set_tod(kvm, attr);
743                 break;
744         case KVM_S390_VM_CPU_MODEL:
745                 ret = kvm_s390_set_cpu_model(kvm, attr);
746                 break;
747         case KVM_S390_VM_CRYPTO:
748                 ret = kvm_s390_vm_set_crypto(kvm, attr);
749                 break;
750         default:
751                 ret = -ENXIO;
752                 break;
753         }
754
755         return ret;
756 }
757
758 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
759 {
760         int ret;
761
762         switch (attr->group) {
763         case KVM_S390_VM_MEM_CTRL:
764                 ret = kvm_s390_get_mem_control(kvm, attr);
765                 break;
766         case KVM_S390_VM_TOD:
767                 ret = kvm_s390_get_tod(kvm, attr);
768                 break;
769         case KVM_S390_VM_CPU_MODEL:
770                 ret = kvm_s390_get_cpu_model(kvm, attr);
771                 break;
772         default:
773                 ret = -ENXIO;
774                 break;
775         }
776
777         return ret;
778 }
779
780 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
781 {
782         int ret;
783
784         switch (attr->group) {
785         case KVM_S390_VM_MEM_CTRL:
786                 switch (attr->attr) {
787                 case KVM_S390_VM_MEM_ENABLE_CMMA:
788                 case KVM_S390_VM_MEM_CLR_CMMA:
789                 case KVM_S390_VM_MEM_LIMIT_SIZE:
790                         ret = 0;
791                         break;
792                 default:
793                         ret = -ENXIO;
794                         break;
795                 }
796                 break;
797         case KVM_S390_VM_TOD:
798                 switch (attr->attr) {
799                 case KVM_S390_VM_TOD_LOW:
800                 case KVM_S390_VM_TOD_HIGH:
801                         ret = 0;
802                         break;
803                 default:
804                         ret = -ENXIO;
805                         break;
806                 }
807                 break;
808         case KVM_S390_VM_CPU_MODEL:
809                 switch (attr->attr) {
810                 case KVM_S390_VM_CPU_PROCESSOR:
811                 case KVM_S390_VM_CPU_MACHINE:
812                         ret = 0;
813                         break;
814                 default:
815                         ret = -ENXIO;
816                         break;
817                 }
818                 break;
819         case KVM_S390_VM_CRYPTO:
820                 switch (attr->attr) {
821                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
822                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
823                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
824                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
825                         ret = 0;
826                         break;
827                 default:
828                         ret = -ENXIO;
829                         break;
830                 }
831                 break;
832         default:
833                 ret = -ENXIO;
834                 break;
835         }
836
837         return ret;
838 }
839
840 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
841 {
842         uint8_t *keys;
843         uint64_t hva;
844         unsigned long curkey;
845         int i, r = 0;
846
847         if (args->flags != 0)
848                 return -EINVAL;
849
850         /* Is this guest using storage keys? */
851         if (!mm_use_skey(current->mm))
852                 return KVM_S390_GET_SKEYS_NONE;
853
854         /* Enforce sane limit on memory allocation */
855         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
856                 return -EINVAL;
857
858         keys = kmalloc_array(args->count, sizeof(uint8_t),
859                              GFP_KERNEL | __GFP_NOWARN);
860         if (!keys)
861                 keys = vmalloc(sizeof(uint8_t) * args->count);
862         if (!keys)
863                 return -ENOMEM;
864
865         for (i = 0; i < args->count; i++) {
866                 hva = gfn_to_hva(kvm, args->start_gfn + i);
867                 if (kvm_is_error_hva(hva)) {
868                         r = -EFAULT;
869                         goto out;
870                 }
871
872                 curkey = get_guest_storage_key(current->mm, hva);
873                 if (IS_ERR_VALUE(curkey)) {
874                         r = curkey;
875                         goto out;
876                 }
877                 keys[i] = curkey;
878         }
879
880         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
881                          sizeof(uint8_t) * args->count);
882         if (r)
883                 r = -EFAULT;
884 out:
885         kvfree(keys);
886         return r;
887 }
888
889 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
890 {
891         uint8_t *keys;
892         uint64_t hva;
893         int i, r = 0;
894
895         if (args->flags != 0)
896                 return -EINVAL;
897
898         /* Enforce sane limit on memory allocation */
899         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
900                 return -EINVAL;
901
902         keys = kmalloc_array(args->count, sizeof(uint8_t),
903                              GFP_KERNEL | __GFP_NOWARN);
904         if (!keys)
905                 keys = vmalloc(sizeof(uint8_t) * args->count);
906         if (!keys)
907                 return -ENOMEM;
908
909         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
910                            sizeof(uint8_t) * args->count);
911         if (r) {
912                 r = -EFAULT;
913                 goto out;
914         }
915
916         /* Enable storage key handling for the guest */
917         r = s390_enable_skey();
918         if (r)
919                 goto out;
920
921         for (i = 0; i < args->count; i++) {
922                 hva = gfn_to_hva(kvm, args->start_gfn + i);
923                 if (kvm_is_error_hva(hva)) {
924                         r = -EFAULT;
925                         goto out;
926                 }
927
928                 /* Lowest order bit is reserved */
929                 if (keys[i] & 0x01) {
930                         r = -EINVAL;
931                         goto out;
932                 }
933
934                 r = set_guest_storage_key(current->mm, hva,
935                                           (unsigned long)keys[i], 0);
936                 if (r)
937                         goto out;
938         }
939 out:
940         kvfree(keys);
941         return r;
942 }
943
944 long kvm_arch_vm_ioctl(struct file *filp,
945                        unsigned int ioctl, unsigned long arg)
946 {
947         struct kvm *kvm = filp->private_data;
948         void __user *argp = (void __user *)arg;
949         struct kvm_device_attr attr;
950         int r;
951
952         switch (ioctl) {
953         case KVM_S390_INTERRUPT: {
954                 struct kvm_s390_interrupt s390int;
955
956                 r = -EFAULT;
957                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
958                         break;
959                 r = kvm_s390_inject_vm(kvm, &s390int);
960                 break;
961         }
962         case KVM_ENABLE_CAP: {
963                 struct kvm_enable_cap cap;
964                 r = -EFAULT;
965                 if (copy_from_user(&cap, argp, sizeof(cap)))
966                         break;
967                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
968                 break;
969         }
970         case KVM_CREATE_IRQCHIP: {
971                 struct kvm_irq_routing_entry routing;
972
973                 r = -EINVAL;
974                 if (kvm->arch.use_irqchip) {
975                         /* Set up dummy routing. */
976                         memset(&routing, 0, sizeof(routing));
977                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
978                 }
979                 break;
980         }
981         case KVM_SET_DEVICE_ATTR: {
982                 r = -EFAULT;
983                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
984                         break;
985                 r = kvm_s390_vm_set_attr(kvm, &attr);
986                 break;
987         }
988         case KVM_GET_DEVICE_ATTR: {
989                 r = -EFAULT;
990                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
991                         break;
992                 r = kvm_s390_vm_get_attr(kvm, &attr);
993                 break;
994         }
995         case KVM_HAS_DEVICE_ATTR: {
996                 r = -EFAULT;
997                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
998                         break;
999                 r = kvm_s390_vm_has_attr(kvm, &attr);
1000                 break;
1001         }
1002         case KVM_S390_GET_SKEYS: {
1003                 struct kvm_s390_skeys args;
1004
1005                 r = -EFAULT;
1006                 if (copy_from_user(&args, argp,
1007                                    sizeof(struct kvm_s390_skeys)))
1008                         break;
1009                 r = kvm_s390_get_skeys(kvm, &args);
1010                 break;
1011         }
1012         case KVM_S390_SET_SKEYS: {
1013                 struct kvm_s390_skeys args;
1014
1015                 r = -EFAULT;
1016                 if (copy_from_user(&args, argp,
1017                                    sizeof(struct kvm_s390_skeys)))
1018                         break;
1019                 r = kvm_s390_set_skeys(kvm, &args);
1020                 break;
1021         }
1022         default:
1023                 r = -ENOTTY;
1024         }
1025
1026         return r;
1027 }
1028
1029 static int kvm_s390_query_ap_config(u8 *config)
1030 {
1031         u32 fcn_code = 0x04000000UL;
1032         u32 cc = 0;
1033
1034         memset(config, 0, 128);
1035         asm volatile(
1036                 "lgr 0,%1\n"
1037                 "lgr 2,%2\n"
1038                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1039                 "0: ipm %0\n"
1040                 "srl %0,28\n"
1041                 "1:\n"
1042                 EX_TABLE(0b, 1b)
1043                 : "+r" (cc)
1044                 : "r" (fcn_code), "r" (config)
1045                 : "cc", "0", "2", "memory"
1046         );
1047
1048         return cc;
1049 }
1050
1051 static int kvm_s390_apxa_installed(void)
1052 {
1053         u8 config[128];
1054         int cc;
1055
1056         if (test_facility(12)) {
1057                 cc = kvm_s390_query_ap_config(config);
1058
1059                 if (cc)
1060                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1061                 else
1062                         return config[0] & 0x40;
1063         }
1064
1065         return 0;
1066 }
1067
1068 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1069 {
1070         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1071
1072         if (kvm_s390_apxa_installed())
1073                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1074         else
1075                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1076 }
1077
1078 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1079 {
1080         get_cpu_id(cpu_id);
1081         cpu_id->version = 0xff;
1082 }
1083
1084 static int kvm_s390_crypto_init(struct kvm *kvm)
1085 {
1086         if (!test_kvm_facility(kvm, 76))
1087                 return 0;
1088
1089         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1090                                          GFP_KERNEL | GFP_DMA);
1091         if (!kvm->arch.crypto.crycb)
1092                 return -ENOMEM;
1093
1094         kvm_s390_set_crycb_format(kvm);
1095
1096         /* Enable AES/DEA protected key functions by default */
1097         kvm->arch.crypto.aes_kw = 1;
1098         kvm->arch.crypto.dea_kw = 1;
1099         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1100                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1101         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1102                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1103
1104         return 0;
1105 }
1106
1107 static void sca_dispose(struct kvm *kvm)
1108 {
1109         if (kvm->arch.use_esca)
1110                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1111         else
1112                 free_page((unsigned long)(kvm->arch.sca));
1113         kvm->arch.sca = NULL;
1114 }
1115
1116 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1117 {
1118         int i, rc;
1119         char debug_name[16];
1120         static unsigned long sca_offset;
1121
1122         rc = -EINVAL;
1123 #ifdef CONFIG_KVM_S390_UCONTROL
1124         if (type & ~KVM_VM_S390_UCONTROL)
1125                 goto out_err;
1126         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1127                 goto out_err;
1128 #else
1129         if (type)
1130                 goto out_err;
1131 #endif
1132
1133         rc = s390_enable_sie();
1134         if (rc)
1135                 goto out_err;
1136
1137         rc = -ENOMEM;
1138
1139         kvm->arch.use_esca = 0; /* start with basic SCA */
1140         rwlock_init(&kvm->arch.sca_lock);
1141         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1142         if (!kvm->arch.sca)
1143                 goto out_err;
1144         spin_lock(&kvm_lock);
1145         sca_offset += 16;
1146         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1147                 sca_offset = 0;
1148         kvm->arch.sca = (struct bsca_block *)
1149                         ((char *) kvm->arch.sca + sca_offset);
1150         spin_unlock(&kvm_lock);
1151
1152         sprintf(debug_name, "kvm-%u", current->pid);
1153
1154         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1155         if (!kvm->arch.dbf)
1156                 goto out_err;
1157
1158         /*
1159          * The architectural maximum amount of facilities is 16 kbit. To store
1160          * this amount, 2 kbyte of memory is required. Thus we need a full
1161          * page to hold the guest facility list (arch.model.fac->list) and the
1162          * facility mask (arch.model.fac->mask). Its address size has to be
1163          * 31 bits and word aligned.
1164          */
1165         kvm->arch.model.fac =
1166                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1167         if (!kvm->arch.model.fac)
1168                 goto out_err;
1169
1170         /* Populate the facility mask initially. */
1171         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1172                S390_ARCH_FAC_LIST_SIZE_BYTE);
1173         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1174                 if (i < kvm_s390_fac_list_mask_size())
1175                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1176                 else
1177                         kvm->arch.model.fac->mask[i] = 0UL;
1178         }
1179
1180         /* Populate the facility list initially. */
1181         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1182                S390_ARCH_FAC_LIST_SIZE_BYTE);
1183
1184         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1185         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1186
1187         if (kvm_s390_crypto_init(kvm) < 0)
1188                 goto out_err;
1189
1190         spin_lock_init(&kvm->arch.float_int.lock);
1191         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1192                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1193         init_waitqueue_head(&kvm->arch.ipte_wq);
1194         mutex_init(&kvm->arch.ipte_mutex);
1195
1196         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1197         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1198
1199         if (type & KVM_VM_S390_UCONTROL) {
1200                 kvm->arch.gmap = NULL;
1201                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1202         } else {
1203                 if (sclp.hamax == U64_MAX)
1204                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1205                 else
1206                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1207                                                     sclp.hamax + 1);
1208                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1209                 if (!kvm->arch.gmap)
1210                         goto out_err;
1211                 kvm->arch.gmap->private = kvm;
1212                 kvm->arch.gmap->pfault_enabled = 0;
1213         }
1214
1215         kvm->arch.css_support = 0;
1216         kvm->arch.use_irqchip = 0;
1217         kvm->arch.epoch = 0;
1218
1219         spin_lock_init(&kvm->arch.start_stop_lock);
1220         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1221
1222         return 0;
1223 out_err:
1224         kfree(kvm->arch.crypto.crycb);
1225         free_page((unsigned long)kvm->arch.model.fac);
1226         debug_unregister(kvm->arch.dbf);
1227         sca_dispose(kvm);
1228         KVM_EVENT(3, "creation of vm failed: %d", rc);
1229         return rc;
1230 }
1231
1232 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1233 {
1234         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1235         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1236         kvm_s390_clear_local_irqs(vcpu);
1237         kvm_clear_async_pf_completion_queue(vcpu);
1238         if (!kvm_is_ucontrol(vcpu->kvm))
1239                 sca_del_vcpu(vcpu);
1240
1241         if (kvm_is_ucontrol(vcpu->kvm))
1242                 gmap_free(vcpu->arch.gmap);
1243
1244         if (vcpu->kvm->arch.use_cmma)
1245                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1246         free_page((unsigned long)(vcpu->arch.sie_block));
1247
1248         kvm_vcpu_uninit(vcpu);
1249         kmem_cache_free(kvm_vcpu_cache, vcpu);
1250 }
1251
1252 static void kvm_free_vcpus(struct kvm *kvm)
1253 {
1254         unsigned int i;
1255         struct kvm_vcpu *vcpu;
1256
1257         kvm_for_each_vcpu(i, vcpu, kvm)
1258                 kvm_arch_vcpu_destroy(vcpu);
1259
1260         mutex_lock(&kvm->lock);
1261         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1262                 kvm->vcpus[i] = NULL;
1263
1264         atomic_set(&kvm->online_vcpus, 0);
1265         mutex_unlock(&kvm->lock);
1266 }
1267
1268 void kvm_arch_destroy_vm(struct kvm *kvm)
1269 {
1270         kvm_free_vcpus(kvm);
1271         free_page((unsigned long)kvm->arch.model.fac);
1272         sca_dispose(kvm);
1273         debug_unregister(kvm->arch.dbf);
1274         kfree(kvm->arch.crypto.crycb);
1275         if (!kvm_is_ucontrol(kvm))
1276                 gmap_free(kvm->arch.gmap);
1277         kvm_s390_destroy_adapters(kvm);
1278         kvm_s390_clear_float_irqs(kvm);
1279         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1280 }
1281
1282 /* Section: vcpu related */
1283 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1284 {
1285         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1286         if (!vcpu->arch.gmap)
1287                 return -ENOMEM;
1288         vcpu->arch.gmap->private = vcpu->kvm;
1289
1290         return 0;
1291 }
1292
1293 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1294 {
1295         read_lock(&vcpu->kvm->arch.sca_lock);
1296         if (vcpu->kvm->arch.use_esca) {
1297                 struct esca_block *sca = vcpu->kvm->arch.sca;
1298
1299                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1300                 sca->cpu[vcpu->vcpu_id].sda = 0;
1301         } else {
1302                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1303
1304                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1305                 sca->cpu[vcpu->vcpu_id].sda = 0;
1306         }
1307         read_unlock(&vcpu->kvm->arch.sca_lock);
1308 }
1309
1310 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1311 {
1312         read_lock(&vcpu->kvm->arch.sca_lock);
1313         if (vcpu->kvm->arch.use_esca) {
1314                 struct esca_block *sca = vcpu->kvm->arch.sca;
1315
1316                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1317                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1318                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1319                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1320                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1321         } else {
1322                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1323
1324                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1325                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1326                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1327                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1328         }
1329         read_unlock(&vcpu->kvm->arch.sca_lock);
1330 }
1331
1332 /* Basic SCA to Extended SCA data copy routines */
1333 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1334 {
1335         d->sda = s->sda;
1336         d->sigp_ctrl.c = s->sigp_ctrl.c;
1337         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1338 }
1339
1340 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1341 {
1342         int i;
1343
1344         d->ipte_control = s->ipte_control;
1345         d->mcn[0] = s->mcn;
1346         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1347                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1348 }
1349
1350 static int sca_switch_to_extended(struct kvm *kvm)
1351 {
1352         struct bsca_block *old_sca = kvm->arch.sca;
1353         struct esca_block *new_sca;
1354         struct kvm_vcpu *vcpu;
1355         unsigned int vcpu_idx;
1356         u32 scaol, scaoh;
1357
1358         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1359         if (!new_sca)
1360                 return -ENOMEM;
1361
1362         scaoh = (u32)((u64)(new_sca) >> 32);
1363         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1364
1365         kvm_s390_vcpu_block_all(kvm);
1366         write_lock(&kvm->arch.sca_lock);
1367
1368         sca_copy_b_to_e(new_sca, old_sca);
1369
1370         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1371                 vcpu->arch.sie_block->scaoh = scaoh;
1372                 vcpu->arch.sie_block->scaol = scaol;
1373                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1374         }
1375         kvm->arch.sca = new_sca;
1376         kvm->arch.use_esca = 1;
1377
1378         write_unlock(&kvm->arch.sca_lock);
1379         kvm_s390_vcpu_unblock_all(kvm);
1380
1381         free_page((unsigned long)old_sca);
1382
1383         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1384                  old_sca, kvm->arch.sca);
1385         return 0;
1386 }
1387
1388 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1389 {
1390         int rc;
1391
1392         if (id < KVM_S390_BSCA_CPU_SLOTS)
1393                 return true;
1394         if (!sclp.has_esca)
1395                 return false;
1396
1397         mutex_lock(&kvm->lock);
1398         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1399         mutex_unlock(&kvm->lock);
1400
1401         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1402 }
1403
1404 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1405 {
1406         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1407         kvm_clear_async_pf_completion_queue(vcpu);
1408         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1409                                     KVM_SYNC_GPRS |
1410                                     KVM_SYNC_ACRS |
1411                                     KVM_SYNC_CRS |
1412                                     KVM_SYNC_ARCH0 |
1413                                     KVM_SYNC_PFAULT;
1414         if (test_kvm_facility(vcpu->kvm, 64))
1415                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1416         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1417          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1418          */
1419         if (MACHINE_HAS_VX)
1420                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1421         else
1422                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1423
1424         if (kvm_is_ucontrol(vcpu->kvm))
1425                 return __kvm_ucontrol_vcpu_init(vcpu);
1426
1427         return 0;
1428 }
1429
1430 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1431 {
1432         /* Save host register state */
1433         save_fpu_regs();
1434         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1435         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1436
1437         if (MACHINE_HAS_VX)
1438                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1439         else
1440                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1441         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1442         if (test_fp_ctl(current->thread.fpu.fpc))
1443                 /* User space provided an invalid FPC, let's clear it */
1444                 current->thread.fpu.fpc = 0;
1445
1446         save_access_regs(vcpu->arch.host_acrs);
1447         restore_access_regs(vcpu->run->s.regs.acrs);
1448         gmap_enable(vcpu->arch.gmap);
1449         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1450 }
1451
1452 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1453 {
1454         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1455         gmap_disable(vcpu->arch.gmap);
1456
1457         /* Save guest register state */
1458         save_fpu_regs();
1459         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1460
1461         /* Restore host register state */
1462         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1463         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1464
1465         save_access_regs(vcpu->run->s.regs.acrs);
1466         restore_access_regs(vcpu->arch.host_acrs);
1467 }
1468
1469 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1470 {
1471         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1472         vcpu->arch.sie_block->gpsw.mask = 0UL;
1473         vcpu->arch.sie_block->gpsw.addr = 0UL;
1474         kvm_s390_set_prefix(vcpu, 0);
1475         vcpu->arch.sie_block->cputm     = 0UL;
1476         vcpu->arch.sie_block->ckc       = 0UL;
1477         vcpu->arch.sie_block->todpr     = 0;
1478         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1479         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1480         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1481         /* make sure the new fpc will be lazily loaded */
1482         save_fpu_regs();
1483         current->thread.fpu.fpc = 0;
1484         vcpu->arch.sie_block->gbea = 1;
1485         vcpu->arch.sie_block->pp = 0;
1486         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1487         kvm_clear_async_pf_completion_queue(vcpu);
1488         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1489                 kvm_s390_vcpu_stop(vcpu);
1490         kvm_s390_clear_local_irqs(vcpu);
1491 }
1492
1493 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1494 {
1495         mutex_lock(&vcpu->kvm->lock);
1496         preempt_disable();
1497         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1498         preempt_enable();
1499         mutex_unlock(&vcpu->kvm->lock);
1500         if (!kvm_is_ucontrol(vcpu->kvm)) {
1501                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1502                 sca_add_vcpu(vcpu);
1503         }
1504
1505 }
1506
1507 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1508 {
1509         if (!test_kvm_facility(vcpu->kvm, 76))
1510                 return;
1511
1512         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1513
1514         if (vcpu->kvm->arch.crypto.aes_kw)
1515                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1516         if (vcpu->kvm->arch.crypto.dea_kw)
1517                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1518
1519         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1520 }
1521
1522 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1523 {
1524         free_page(vcpu->arch.sie_block->cbrlo);
1525         vcpu->arch.sie_block->cbrlo = 0;
1526 }
1527
1528 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1529 {
1530         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1531         if (!vcpu->arch.sie_block->cbrlo)
1532                 return -ENOMEM;
1533
1534         vcpu->arch.sie_block->ecb2 |= 0x80;
1535         vcpu->arch.sie_block->ecb2 &= ~0x08;
1536         return 0;
1537 }
1538
1539 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1540 {
1541         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1542
1543         vcpu->arch.cpu_id = model->cpu_id;
1544         vcpu->arch.sie_block->ibc = model->ibc;
1545         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1546 }
1547
1548 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1549 {
1550         int rc = 0;
1551
1552         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1553                                                     CPUSTAT_SM |
1554                                                     CPUSTAT_STOPPED);
1555
1556         if (test_kvm_facility(vcpu->kvm, 78))
1557                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1558         else if (test_kvm_facility(vcpu->kvm, 8))
1559                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1560
1561         kvm_s390_vcpu_setup_model(vcpu);
1562
1563         vcpu->arch.sie_block->ecb   = 6;
1564         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1565                 vcpu->arch.sie_block->ecb |= 0x10;
1566
1567         vcpu->arch.sie_block->ecb2  = 8;
1568         vcpu->arch.sie_block->eca   = 0xC1002000U;
1569         if (sclp.has_siif)
1570                 vcpu->arch.sie_block->eca |= 1;
1571         if (sclp.has_sigpif)
1572                 vcpu->arch.sie_block->eca |= 0x10000000U;
1573         if (test_kvm_facility(vcpu->kvm, 64))
1574                 vcpu->arch.sie_block->ecb3 |= 0x01;
1575         if (test_kvm_facility(vcpu->kvm, 129)) {
1576                 vcpu->arch.sie_block->eca |= 0x00020000;
1577                 vcpu->arch.sie_block->ecd |= 0x20000000;
1578         }
1579         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1580         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1581
1582         if (vcpu->kvm->arch.use_cmma) {
1583                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1584                 if (rc)
1585                         return rc;
1586         }
1587         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1588         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1589
1590         kvm_s390_vcpu_crypto_setup(vcpu);
1591
1592         return rc;
1593 }
1594
1595 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1596                                       unsigned int id)
1597 {
1598         struct kvm_vcpu *vcpu;
1599         struct sie_page *sie_page;
1600         int rc = -EINVAL;
1601
1602         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1603                 goto out;
1604
1605         rc = -ENOMEM;
1606
1607         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1608         if (!vcpu)
1609                 goto out;
1610
1611         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1612         if (!sie_page)
1613                 goto out_free_cpu;
1614
1615         vcpu->arch.sie_block = &sie_page->sie_block;
1616         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1617
1618         vcpu->arch.sie_block->icpua = id;
1619         spin_lock_init(&vcpu->arch.local_int.lock);
1620         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1621         vcpu->arch.local_int.wq = &vcpu->wq;
1622         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1623
1624         rc = kvm_vcpu_init(vcpu, kvm, id);
1625         if (rc)
1626                 goto out_free_sie_block;
1627         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1628                  vcpu->arch.sie_block);
1629         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1630
1631         return vcpu;
1632 out_free_sie_block:
1633         free_page((unsigned long)(vcpu->arch.sie_block));
1634 out_free_cpu:
1635         kmem_cache_free(kvm_vcpu_cache, vcpu);
1636 out:
1637         return ERR_PTR(rc);
1638 }
1639
1640 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1641 {
1642         return kvm_s390_vcpu_has_irq(vcpu, 0);
1643 }
1644
1645 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1646 {
1647         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1648         exit_sie(vcpu);
1649 }
1650
1651 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1652 {
1653         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1654 }
1655
1656 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1657 {
1658         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1659         exit_sie(vcpu);
1660 }
1661
1662 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1663 {
1664         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1665 }
1666
1667 /*
1668  * Kick a guest cpu out of SIE and wait until SIE is not running.
1669  * If the CPU is not running (e.g. waiting as idle) the function will
1670  * return immediately. */
1671 void exit_sie(struct kvm_vcpu *vcpu)
1672 {
1673         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1674         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1675                 cpu_relax();
1676 }
1677
1678 /* Kick a guest cpu out of SIE to process a request synchronously */
1679 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1680 {
1681         kvm_make_request(req, vcpu);
1682         kvm_s390_vcpu_request(vcpu);
1683 }
1684
1685 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1686 {
1687         int i;
1688         struct kvm *kvm = gmap->private;
1689         struct kvm_vcpu *vcpu;
1690
1691         kvm_for_each_vcpu(i, vcpu, kvm) {
1692                 /* match against both prefix pages */
1693                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1694                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1695                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1696                 }
1697         }
1698 }
1699
1700 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1701 {
1702         /* kvm common code refers to this, but never calls it */
1703         BUG();
1704         return 0;
1705 }
1706
1707 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1708                                            struct kvm_one_reg *reg)
1709 {
1710         int r = -EINVAL;
1711
1712         switch (reg->id) {
1713         case KVM_REG_S390_TODPR:
1714                 r = put_user(vcpu->arch.sie_block->todpr,
1715                              (u32 __user *)reg->addr);
1716                 break;
1717         case KVM_REG_S390_EPOCHDIFF:
1718                 r = put_user(vcpu->arch.sie_block->epoch,
1719                              (u64 __user *)reg->addr);
1720                 break;
1721         case KVM_REG_S390_CPU_TIMER:
1722                 r = put_user(vcpu->arch.sie_block->cputm,
1723                              (u64 __user *)reg->addr);
1724                 break;
1725         case KVM_REG_S390_CLOCK_COMP:
1726                 r = put_user(vcpu->arch.sie_block->ckc,
1727                              (u64 __user *)reg->addr);
1728                 break;
1729         case KVM_REG_S390_PFTOKEN:
1730                 r = put_user(vcpu->arch.pfault_token,
1731                              (u64 __user *)reg->addr);
1732                 break;
1733         case KVM_REG_S390_PFCOMPARE:
1734                 r = put_user(vcpu->arch.pfault_compare,
1735                              (u64 __user *)reg->addr);
1736                 break;
1737         case KVM_REG_S390_PFSELECT:
1738                 r = put_user(vcpu->arch.pfault_select,
1739                              (u64 __user *)reg->addr);
1740                 break;
1741         case KVM_REG_S390_PP:
1742                 r = put_user(vcpu->arch.sie_block->pp,
1743                              (u64 __user *)reg->addr);
1744                 break;
1745         case KVM_REG_S390_GBEA:
1746                 r = put_user(vcpu->arch.sie_block->gbea,
1747                              (u64 __user *)reg->addr);
1748                 break;
1749         default:
1750                 break;
1751         }
1752
1753         return r;
1754 }
1755
1756 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1757                                            struct kvm_one_reg *reg)
1758 {
1759         int r = -EINVAL;
1760
1761         switch (reg->id) {
1762         case KVM_REG_S390_TODPR:
1763                 r = get_user(vcpu->arch.sie_block->todpr,
1764                              (u32 __user *)reg->addr);
1765                 break;
1766         case KVM_REG_S390_EPOCHDIFF:
1767                 r = get_user(vcpu->arch.sie_block->epoch,
1768                              (u64 __user *)reg->addr);
1769                 break;
1770         case KVM_REG_S390_CPU_TIMER:
1771                 r = get_user(vcpu->arch.sie_block->cputm,
1772                              (u64 __user *)reg->addr);
1773                 break;
1774         case KVM_REG_S390_CLOCK_COMP:
1775                 r = get_user(vcpu->arch.sie_block->ckc,
1776                              (u64 __user *)reg->addr);
1777                 break;
1778         case KVM_REG_S390_PFTOKEN:
1779                 r = get_user(vcpu->arch.pfault_token,
1780                              (u64 __user *)reg->addr);
1781                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1782                         kvm_clear_async_pf_completion_queue(vcpu);
1783                 break;
1784         case KVM_REG_S390_PFCOMPARE:
1785                 r = get_user(vcpu->arch.pfault_compare,
1786                              (u64 __user *)reg->addr);
1787                 break;
1788         case KVM_REG_S390_PFSELECT:
1789                 r = get_user(vcpu->arch.pfault_select,
1790                              (u64 __user *)reg->addr);
1791                 break;
1792         case KVM_REG_S390_PP:
1793                 r = get_user(vcpu->arch.sie_block->pp,
1794                              (u64 __user *)reg->addr);
1795                 break;
1796         case KVM_REG_S390_GBEA:
1797                 r = get_user(vcpu->arch.sie_block->gbea,
1798                              (u64 __user *)reg->addr);
1799                 break;
1800         default:
1801                 break;
1802         }
1803
1804         return r;
1805 }
1806
1807 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1808 {
1809         kvm_s390_vcpu_initial_reset(vcpu);
1810         return 0;
1811 }
1812
1813 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1814 {
1815         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1816         return 0;
1817 }
1818
1819 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1820 {
1821         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1822         return 0;
1823 }
1824
1825 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1826                                   struct kvm_sregs *sregs)
1827 {
1828         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1829         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1830         restore_access_regs(vcpu->run->s.regs.acrs);
1831         return 0;
1832 }
1833
1834 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1835                                   struct kvm_sregs *sregs)
1836 {
1837         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1838         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1839         return 0;
1840 }
1841
1842 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1843 {
1844         /* make sure the new values will be lazily loaded */
1845         save_fpu_regs();
1846         if (test_fp_ctl(fpu->fpc))
1847                 return -EINVAL;
1848         current->thread.fpu.fpc = fpu->fpc;
1849         if (MACHINE_HAS_VX)
1850                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1851         else
1852                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1853         return 0;
1854 }
1855
1856 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1857 {
1858         /* make sure we have the latest values */
1859         save_fpu_regs();
1860         if (MACHINE_HAS_VX)
1861                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1862         else
1863                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1864         fpu->fpc = current->thread.fpu.fpc;
1865         return 0;
1866 }
1867
1868 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1869 {
1870         int rc = 0;
1871
1872         if (!is_vcpu_stopped(vcpu))
1873                 rc = -EBUSY;
1874         else {
1875                 vcpu->run->psw_mask = psw.mask;
1876                 vcpu->run->psw_addr = psw.addr;
1877         }
1878         return rc;
1879 }
1880
1881 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1882                                   struct kvm_translation *tr)
1883 {
1884         return -EINVAL; /* not implemented yet */
1885 }
1886
1887 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1888                               KVM_GUESTDBG_USE_HW_BP | \
1889                               KVM_GUESTDBG_ENABLE)
1890
1891 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1892                                         struct kvm_guest_debug *dbg)
1893 {
1894         int rc = 0;
1895
1896         vcpu->guest_debug = 0;
1897         kvm_s390_clear_bp_data(vcpu);
1898
1899         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1900                 return -EINVAL;
1901
1902         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1903                 vcpu->guest_debug = dbg->control;
1904                 /* enforce guest PER */
1905                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1906
1907                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1908                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1909         } else {
1910                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1911                 vcpu->arch.guestdbg.last_bp = 0;
1912         }
1913
1914         if (rc) {
1915                 vcpu->guest_debug = 0;
1916                 kvm_s390_clear_bp_data(vcpu);
1917                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1918         }
1919
1920         return rc;
1921 }
1922
1923 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1924                                     struct kvm_mp_state *mp_state)
1925 {
1926         /* CHECK_STOP and LOAD are not supported yet */
1927         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1928                                        KVM_MP_STATE_OPERATING;
1929 }
1930
1931 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1932                                     struct kvm_mp_state *mp_state)
1933 {
1934         int rc = 0;
1935
1936         /* user space knows about this interface - let it control the state */
1937         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1938
1939         switch (mp_state->mp_state) {
1940         case KVM_MP_STATE_STOPPED:
1941                 kvm_s390_vcpu_stop(vcpu);
1942                 break;
1943         case KVM_MP_STATE_OPERATING:
1944                 kvm_s390_vcpu_start(vcpu);
1945                 break;
1946         case KVM_MP_STATE_LOAD:
1947         case KVM_MP_STATE_CHECK_STOP:
1948                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1949         default:
1950                 rc = -ENXIO;
1951         }
1952
1953         return rc;
1954 }
1955
1956 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1957 {
1958         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1959 }
1960
1961 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1962 {
1963 retry:
1964         kvm_s390_vcpu_request_handled(vcpu);
1965         if (!vcpu->requests)
1966                 return 0;
1967         /*
1968          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1969          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1970          * This ensures that the ipte instruction for this request has
1971          * already finished. We might race against a second unmapper that
1972          * wants to set the blocking bit. Lets just retry the request loop.
1973          */
1974         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1975                 int rc;
1976                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1977                                       kvm_s390_get_prefix(vcpu),
1978                                       PAGE_SIZE * 2);
1979                 if (rc)
1980                         return rc;
1981                 goto retry;
1982         }
1983
1984         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1985                 vcpu->arch.sie_block->ihcpu = 0xffff;
1986                 goto retry;
1987         }
1988
1989         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1990                 if (!ibs_enabled(vcpu)) {
1991                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1992                         atomic_or(CPUSTAT_IBS,
1993                                         &vcpu->arch.sie_block->cpuflags);
1994                 }
1995                 goto retry;
1996         }
1997
1998         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1999                 if (ibs_enabled(vcpu)) {
2000                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2001                         atomic_andnot(CPUSTAT_IBS,
2002                                           &vcpu->arch.sie_block->cpuflags);
2003                 }
2004                 goto retry;
2005         }
2006
2007         /* nothing to do, just clear the request */
2008         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2009
2010         return 0;
2011 }
2012
2013 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2014 {
2015         struct kvm_vcpu *vcpu;
2016         int i;
2017
2018         mutex_lock(&kvm->lock);
2019         preempt_disable();
2020         kvm->arch.epoch = tod - get_tod_clock();
2021         kvm_s390_vcpu_block_all(kvm);
2022         kvm_for_each_vcpu(i, vcpu, kvm)
2023                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2024         kvm_s390_vcpu_unblock_all(kvm);
2025         preempt_enable();
2026         mutex_unlock(&kvm->lock);
2027 }
2028
2029 /**
2030  * kvm_arch_fault_in_page - fault-in guest page if necessary
2031  * @vcpu: The corresponding virtual cpu
2032  * @gpa: Guest physical address
2033  * @writable: Whether the page should be writable or not
2034  *
2035  * Make sure that a guest page has been faulted-in on the host.
2036  *
2037  * Return: Zero on success, negative error code otherwise.
2038  */
2039 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2040 {
2041         return gmap_fault(vcpu->arch.gmap, gpa,
2042                           writable ? FAULT_FLAG_WRITE : 0);
2043 }
2044
2045 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2046                                       unsigned long token)
2047 {
2048         struct kvm_s390_interrupt inti;
2049         struct kvm_s390_irq irq;
2050
2051         if (start_token) {
2052                 irq.u.ext.ext_params2 = token;
2053                 irq.type = KVM_S390_INT_PFAULT_INIT;
2054                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2055         } else {
2056                 inti.type = KVM_S390_INT_PFAULT_DONE;
2057                 inti.parm64 = token;
2058                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2059         }
2060 }
2061
2062 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2063                                      struct kvm_async_pf *work)
2064 {
2065         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2066         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2067 }
2068
2069 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2070                                  struct kvm_async_pf *work)
2071 {
2072         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2073         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2074 }
2075
2076 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2077                                struct kvm_async_pf *work)
2078 {
2079         /* s390 will always inject the page directly */
2080 }
2081
2082 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2083 {
2084         /*
2085          * s390 will always inject the page directly,
2086          * but we still want check_async_completion to cleanup
2087          */
2088         return true;
2089 }
2090
2091 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2092 {
2093         hva_t hva;
2094         struct kvm_arch_async_pf arch;
2095         int rc;
2096
2097         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2098                 return 0;
2099         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2100             vcpu->arch.pfault_compare)
2101                 return 0;
2102         if (psw_extint_disabled(vcpu))
2103                 return 0;
2104         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2105                 return 0;
2106         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2107                 return 0;
2108         if (!vcpu->arch.gmap->pfault_enabled)
2109                 return 0;
2110
2111         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2112         hva += current->thread.gmap_addr & ~PAGE_MASK;
2113         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2114                 return 0;
2115
2116         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2117         return rc;
2118 }
2119
2120 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2121 {
2122         int rc, cpuflags;
2123
2124         /*
2125          * On s390 notifications for arriving pages will be delivered directly
2126          * to the guest but the house keeping for completed pfaults is
2127          * handled outside the worker.
2128          */
2129         kvm_check_async_pf_completion(vcpu);
2130
2131         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2132         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2133
2134         if (need_resched())
2135                 schedule();
2136
2137         if (test_cpu_flag(CIF_MCCK_PENDING))
2138                 s390_handle_mcck();
2139
2140         if (!kvm_is_ucontrol(vcpu->kvm)) {
2141                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2142                 if (rc)
2143                         return rc;
2144         }
2145
2146         rc = kvm_s390_handle_requests(vcpu);
2147         if (rc)
2148                 return rc;
2149
2150         if (guestdbg_enabled(vcpu)) {
2151                 kvm_s390_backup_guest_per_regs(vcpu);
2152                 kvm_s390_patch_guest_per_regs(vcpu);
2153         }
2154
2155         vcpu->arch.sie_block->icptcode = 0;
2156         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2157         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2158         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2159
2160         return 0;
2161 }
2162
2163 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2164 {
2165         struct kvm_s390_pgm_info pgm_info = {
2166                 .code = PGM_ADDRESSING,
2167         };
2168         u8 opcode, ilen;
2169         int rc;
2170
2171         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2172         trace_kvm_s390_sie_fault(vcpu);
2173
2174         /*
2175          * We want to inject an addressing exception, which is defined as a
2176          * suppressing or terminating exception. However, since we came here
2177          * by a DAT access exception, the PSW still points to the faulting
2178          * instruction since DAT exceptions are nullifying. So we've got
2179          * to look up the current opcode to get the length of the instruction
2180          * to be able to forward the PSW.
2181          */
2182         rc = read_guest_instr(vcpu, &opcode, 1);
2183         ilen = insn_length(opcode);
2184         if (rc < 0) {
2185                 return rc;
2186         } else if (rc) {
2187                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2188                  * Forward by arbitrary ilc, injection will take care of
2189                  * nullification if necessary.
2190                  */
2191                 pgm_info = vcpu->arch.pgm;
2192                 ilen = 4;
2193         }
2194         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2195         kvm_s390_forward_psw(vcpu, ilen);
2196         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2197 }
2198
2199 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2200 {
2201         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2202                    vcpu->arch.sie_block->icptcode);
2203         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2204
2205         if (guestdbg_enabled(vcpu))
2206                 kvm_s390_restore_guest_per_regs(vcpu);
2207
2208         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2209         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2210
2211         if (vcpu->arch.sie_block->icptcode > 0) {
2212                 int rc = kvm_handle_sie_intercept(vcpu);
2213
2214                 if (rc != -EOPNOTSUPP)
2215                         return rc;
2216                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2217                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2218                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2219                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2220                 return -EREMOTE;
2221         } else if (exit_reason != -EFAULT) {
2222                 vcpu->stat.exit_null++;
2223                 return 0;
2224         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2225                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2226                 vcpu->run->s390_ucontrol.trans_exc_code =
2227                                                 current->thread.gmap_addr;
2228                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2229                 return -EREMOTE;
2230         } else if (current->thread.gmap_pfault) {
2231                 trace_kvm_s390_major_guest_pfault(vcpu);
2232                 current->thread.gmap_pfault = 0;
2233                 if (kvm_arch_setup_async_pf(vcpu))
2234                         return 0;
2235                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2236         }
2237         return vcpu_post_run_fault_in_sie(vcpu);
2238 }
2239
2240 static int __vcpu_run(struct kvm_vcpu *vcpu)
2241 {
2242         int rc, exit_reason;
2243
2244         /*
2245          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2246          * ning the guest), so that memslots (and other stuff) are protected
2247          */
2248         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2249
2250         do {
2251                 rc = vcpu_pre_run(vcpu);
2252                 if (rc)
2253                         break;
2254
2255                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2256                 /*
2257                  * As PF_VCPU will be used in fault handler, between
2258                  * guest_enter and guest_exit should be no uaccess.
2259                  */
2260                 local_irq_disable();
2261                 __kvm_guest_enter();
2262                 local_irq_enable();
2263                 exit_reason = sie64a(vcpu->arch.sie_block,
2264                                      vcpu->run->s.regs.gprs);
2265                 local_irq_disable();
2266                 __kvm_guest_exit();
2267                 local_irq_enable();
2268                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2269
2270                 rc = vcpu_post_run(vcpu, exit_reason);
2271         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2272
2273         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2274         return rc;
2275 }
2276
2277 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2278 {
2279         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2280         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2281         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2282                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2283         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2284                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2285                 /* some control register changes require a tlb flush */
2286                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2287         }
2288         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2289                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2290                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2291                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2292                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2293                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2294         }
2295         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2296                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2297                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2298                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2299                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2300                         kvm_clear_async_pf_completion_queue(vcpu);
2301         }
2302         kvm_run->kvm_dirty_regs = 0;
2303 }
2304
2305 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2306 {
2307         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2308         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2309         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2310         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2311         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2312         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2313         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2314         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2315         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2316         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2317         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2318         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2319 }
2320
2321 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2322 {
2323         int rc;
2324         sigset_t sigsaved;
2325
2326         if (guestdbg_exit_pending(vcpu)) {
2327                 kvm_s390_prepare_debug_exit(vcpu);
2328                 return 0;
2329         }
2330
2331         if (vcpu->sigset_active)
2332                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2333
2334         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2335                 kvm_s390_vcpu_start(vcpu);
2336         } else if (is_vcpu_stopped(vcpu)) {
2337                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2338                                    vcpu->vcpu_id);
2339                 return -EINVAL;
2340         }
2341
2342         sync_regs(vcpu, kvm_run);
2343
2344         might_fault();
2345         rc = __vcpu_run(vcpu);
2346
2347         if (signal_pending(current) && !rc) {
2348                 kvm_run->exit_reason = KVM_EXIT_INTR;
2349                 rc = -EINTR;
2350         }
2351
2352         if (guestdbg_exit_pending(vcpu) && !rc)  {
2353                 kvm_s390_prepare_debug_exit(vcpu);
2354                 rc = 0;
2355         }
2356
2357         if (rc == -EREMOTE) {
2358                 /* userspace support is needed, kvm_run has been prepared */
2359                 rc = 0;
2360         }
2361
2362         store_regs(vcpu, kvm_run);
2363
2364         if (vcpu->sigset_active)
2365                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2366
2367         vcpu->stat.exit_userspace++;
2368         return rc;
2369 }
2370
2371 /*
2372  * store status at address
2373  * we use have two special cases:
2374  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2375  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2376  */
2377 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2378 {
2379         unsigned char archmode = 1;
2380         freg_t fprs[NUM_FPRS];
2381         unsigned int px;
2382         u64 clkcomp;
2383         int rc;
2384
2385         px = kvm_s390_get_prefix(vcpu);
2386         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2387                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2388                         return -EFAULT;
2389                 gpa = 0;
2390         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2391                 if (write_guest_real(vcpu, 163, &archmode, 1))
2392                         return -EFAULT;
2393                 gpa = px;
2394         } else
2395                 gpa -= __LC_FPREGS_SAVE_AREA;
2396
2397         /* manually convert vector registers if necessary */
2398         if (MACHINE_HAS_VX) {
2399                 convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
2400                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2401                                      fprs, 128);
2402         } else {
2403                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2404                                      vcpu->run->s.regs.fprs, 128);
2405         }
2406         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2407                               vcpu->run->s.regs.gprs, 128);
2408         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2409                               &vcpu->arch.sie_block->gpsw, 16);
2410         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2411                               &px, 4);
2412         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2413                               &vcpu->run->s.regs.fpc, 4);
2414         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2415                               &vcpu->arch.sie_block->todpr, 4);
2416         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2417                               &vcpu->arch.sie_block->cputm, 8);
2418         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2419         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2420                               &clkcomp, 8);
2421         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2422                               &vcpu->run->s.regs.acrs, 64);
2423         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2424                               &vcpu->arch.sie_block->gcr, 128);
2425         return rc ? -EFAULT : 0;
2426 }
2427
2428 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2429 {
2430         /*
2431          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2432          * copying in vcpu load/put. Lets update our copies before we save
2433          * it into the save area
2434          */
2435         save_fpu_regs();
2436         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2437         save_access_regs(vcpu->run->s.regs.acrs);
2438
2439         return kvm_s390_store_status_unloaded(vcpu, addr);
2440 }
2441
2442 /*
2443  * store additional status at address
2444  */
2445 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2446                                         unsigned long gpa)
2447 {
2448         /* Only bits 0-53 are used for address formation */
2449         if (!(gpa & ~0x3ff))
2450                 return 0;
2451
2452         return write_guest_abs(vcpu, gpa & ~0x3ff,
2453                                (void *)&vcpu->run->s.regs.vrs, 512);
2454 }
2455
2456 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2457 {
2458         if (!test_kvm_facility(vcpu->kvm, 129))
2459                 return 0;
2460
2461         /*
2462          * The guest VXRS are in the host VXRs due to the lazy
2463          * copying in vcpu load/put. We can simply call save_fpu_regs()
2464          * to save the current register state because we are in the
2465          * middle of a load/put cycle.
2466          *
2467          * Let's update our copies before we save it into the save area.
2468          */
2469         save_fpu_regs();
2470
2471         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2472 }
2473
2474 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2475 {
2476         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2477         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2478 }
2479
2480 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2481 {
2482         unsigned int i;
2483         struct kvm_vcpu *vcpu;
2484
2485         kvm_for_each_vcpu(i, vcpu, kvm) {
2486                 __disable_ibs_on_vcpu(vcpu);
2487         }
2488 }
2489
2490 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2491 {
2492         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2493         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2494 }
2495
2496 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2497 {
2498         int i, online_vcpus, started_vcpus = 0;
2499
2500         if (!is_vcpu_stopped(vcpu))
2501                 return;
2502
2503         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2504         /* Only one cpu at a time may enter/leave the STOPPED state. */
2505         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2506         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2507
2508         for (i = 0; i < online_vcpus; i++) {
2509                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2510                         started_vcpus++;
2511         }
2512
2513         if (started_vcpus == 0) {
2514                 /* we're the only active VCPU -> speed it up */
2515                 __enable_ibs_on_vcpu(vcpu);
2516         } else if (started_vcpus == 1) {
2517                 /*
2518                  * As we are starting a second VCPU, we have to disable
2519                  * the IBS facility on all VCPUs to remove potentially
2520                  * oustanding ENABLE requests.
2521                  */
2522                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2523         }
2524
2525         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2526         /*
2527          * Another VCPU might have used IBS while we were offline.
2528          * Let's play safe and flush the VCPU at startup.
2529          */
2530         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2531         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2532         return;
2533 }
2534
2535 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2536 {
2537         int i, online_vcpus, started_vcpus = 0;
2538         struct kvm_vcpu *started_vcpu = NULL;
2539
2540         if (is_vcpu_stopped(vcpu))
2541                 return;
2542
2543         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2544         /* Only one cpu at a time may enter/leave the STOPPED state. */
2545         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2546         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2547
2548         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2549         kvm_s390_clear_stop_irq(vcpu);
2550
2551         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2552         __disable_ibs_on_vcpu(vcpu);
2553
2554         for (i = 0; i < online_vcpus; i++) {
2555                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2556                         started_vcpus++;
2557                         started_vcpu = vcpu->kvm->vcpus[i];
2558                 }
2559         }
2560
2561         if (started_vcpus == 1) {
2562                 /*
2563                  * As we only have one VCPU left, we want to enable the
2564                  * IBS facility for that VCPU to speed it up.
2565                  */
2566                 __enable_ibs_on_vcpu(started_vcpu);
2567         }
2568
2569         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2570         return;
2571 }
2572
2573 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2574                                      struct kvm_enable_cap *cap)
2575 {
2576         int r;
2577
2578         if (cap->flags)
2579                 return -EINVAL;
2580
2581         switch (cap->cap) {
2582         case KVM_CAP_S390_CSS_SUPPORT:
2583                 if (!vcpu->kvm->arch.css_support) {
2584                         vcpu->kvm->arch.css_support = 1;
2585                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2586                         trace_kvm_s390_enable_css(vcpu->kvm);
2587                 }
2588                 r = 0;
2589                 break;
2590         default:
2591                 r = -EINVAL;
2592                 break;
2593         }
2594         return r;
2595 }
2596
2597 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2598                                   struct kvm_s390_mem_op *mop)
2599 {
2600         void __user *uaddr = (void __user *)mop->buf;
2601         void *tmpbuf = NULL;
2602         int r, srcu_idx;
2603         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2604                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2605
2606         if (mop->flags & ~supported_flags)
2607                 return -EINVAL;
2608
2609         if (mop->size > MEM_OP_MAX_SIZE)
2610                 return -E2BIG;
2611
2612         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2613                 tmpbuf = vmalloc(mop->size);
2614                 if (!tmpbuf)
2615                         return -ENOMEM;
2616         }
2617
2618         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2619
2620         switch (mop->op) {
2621         case KVM_S390_MEMOP_LOGICAL_READ:
2622                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2623                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2624                                             mop->size, GACC_FETCH);
2625                         break;
2626                 }
2627                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2628                 if (r == 0) {
2629                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2630                                 r = -EFAULT;
2631                 }
2632                 break;
2633         case KVM_S390_MEMOP_LOGICAL_WRITE:
2634                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2635                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2636                                             mop->size, GACC_STORE);
2637                         break;
2638                 }
2639                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2640                         r = -EFAULT;
2641                         break;
2642                 }
2643                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2644                 break;
2645         default:
2646                 r = -EINVAL;
2647         }
2648
2649         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2650
2651         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2652                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2653
2654         vfree(tmpbuf);
2655         return r;
2656 }
2657
2658 long kvm_arch_vcpu_ioctl(struct file *filp,
2659                          unsigned int ioctl, unsigned long arg)
2660 {
2661         struct kvm_vcpu *vcpu = filp->private_data;
2662         void __user *argp = (void __user *)arg;
2663         int idx;
2664         long r;
2665
2666         switch (ioctl) {
2667         case KVM_S390_IRQ: {
2668                 struct kvm_s390_irq s390irq;
2669
2670                 r = -EFAULT;
2671                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2672                         break;
2673                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2674                 break;
2675         }
2676         case KVM_S390_INTERRUPT: {
2677                 struct kvm_s390_interrupt s390int;
2678                 struct kvm_s390_irq s390irq;
2679
2680                 r = -EFAULT;
2681                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2682                         break;
2683                 if (s390int_to_s390irq(&s390int, &s390irq))
2684                         return -EINVAL;
2685                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2686                 break;
2687         }
2688         case KVM_S390_STORE_STATUS:
2689                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2690                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2691                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2692                 break;
2693         case KVM_S390_SET_INITIAL_PSW: {
2694                 psw_t psw;
2695
2696                 r = -EFAULT;
2697                 if (copy_from_user(&psw, argp, sizeof(psw)))
2698                         break;
2699                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2700                 break;
2701         }
2702         case KVM_S390_INITIAL_RESET:
2703                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2704                 break;
2705         case KVM_SET_ONE_REG:
2706         case KVM_GET_ONE_REG: {
2707                 struct kvm_one_reg reg;
2708                 r = -EFAULT;
2709                 if (copy_from_user(&reg, argp, sizeof(reg)))
2710                         break;
2711                 if (ioctl == KVM_SET_ONE_REG)
2712                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2713                 else
2714                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2715                 break;
2716         }
2717 #ifdef CONFIG_KVM_S390_UCONTROL
2718         case KVM_S390_UCAS_MAP: {
2719                 struct kvm_s390_ucas_mapping ucasmap;
2720
2721                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2722                         r = -EFAULT;
2723                         break;
2724                 }
2725
2726                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2727                         r = -EINVAL;
2728                         break;
2729                 }
2730
2731                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2732                                      ucasmap.vcpu_addr, ucasmap.length);
2733                 break;
2734         }
2735         case KVM_S390_UCAS_UNMAP: {
2736                 struct kvm_s390_ucas_mapping ucasmap;
2737
2738                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2739                         r = -EFAULT;
2740                         break;
2741                 }
2742
2743                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2744                         r = -EINVAL;
2745                         break;
2746                 }
2747
2748                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2749                         ucasmap.length);
2750                 break;
2751         }
2752 #endif
2753         case KVM_S390_VCPU_FAULT: {
2754                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2755                 break;
2756         }
2757         case KVM_ENABLE_CAP:
2758         {
2759                 struct kvm_enable_cap cap;
2760                 r = -EFAULT;
2761                 if (copy_from_user(&cap, argp, sizeof(cap)))
2762                         break;
2763                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2764                 break;
2765         }
2766         case KVM_S390_MEM_OP: {
2767                 struct kvm_s390_mem_op mem_op;
2768
2769                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2770                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2771                 else
2772                         r = -EFAULT;
2773                 break;
2774         }
2775         case KVM_S390_SET_IRQ_STATE: {
2776                 struct kvm_s390_irq_state irq_state;
2777
2778                 r = -EFAULT;
2779                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2780                         break;
2781                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2782                     irq_state.len == 0 ||
2783                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2784                         r = -EINVAL;
2785                         break;
2786                 }
2787                 r = kvm_s390_set_irq_state(vcpu,
2788                                            (void __user *) irq_state.buf,
2789                                            irq_state.len);
2790                 break;
2791         }
2792         case KVM_S390_GET_IRQ_STATE: {
2793                 struct kvm_s390_irq_state irq_state;
2794
2795                 r = -EFAULT;
2796                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2797                         break;
2798                 if (irq_state.len == 0) {
2799                         r = -EINVAL;
2800                         break;
2801                 }
2802                 r = kvm_s390_get_irq_state(vcpu,
2803                                            (__u8 __user *)  irq_state.buf,
2804                                            irq_state.len);
2805                 break;
2806         }
2807         default:
2808                 r = -ENOTTY;
2809         }
2810         return r;
2811 }
2812
2813 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2814 {
2815 #ifdef CONFIG_KVM_S390_UCONTROL
2816         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2817                  && (kvm_is_ucontrol(vcpu->kvm))) {
2818                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2819                 get_page(vmf->page);
2820                 return 0;
2821         }
2822 #endif
2823         return VM_FAULT_SIGBUS;
2824 }
2825
2826 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2827                             unsigned long npages)
2828 {
2829         return 0;
2830 }
2831
2832 /* Section: memory related */
2833 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2834                                    struct kvm_memory_slot *memslot,
2835                                    const struct kvm_userspace_memory_region *mem,
2836                                    enum kvm_mr_change change)
2837 {
2838         /* A few sanity checks. We can have memory slots which have to be
2839            located/ended at a segment boundary (1MB). The memory in userland is
2840            ok to be fragmented into various different vmas. It is okay to mmap()
2841            and munmap() stuff in this slot after doing this call at any time */
2842
2843         if (mem->userspace_addr & 0xffffful)
2844                 return -EINVAL;
2845
2846         if (mem->memory_size & 0xffffful)
2847                 return -EINVAL;
2848
2849         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2850                 return -EINVAL;
2851
2852         return 0;
2853 }
2854
2855 void kvm_arch_commit_memory_region(struct kvm *kvm,
2856                                 const struct kvm_userspace_memory_region *mem,
2857                                 const struct kvm_memory_slot *old,
2858                                 const struct kvm_memory_slot *new,
2859                                 enum kvm_mr_change change)
2860 {
2861         int rc;
2862
2863         /* If the basics of the memslot do not change, we do not want
2864          * to update the gmap. Every update causes several unnecessary
2865          * segment translation exceptions. This is usually handled just
2866          * fine by the normal fault handler + gmap, but it will also
2867          * cause faults on the prefix page of running guest CPUs.
2868          */
2869         if (old->userspace_addr == mem->userspace_addr &&
2870             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2871             old->npages * PAGE_SIZE == mem->memory_size)
2872                 return;
2873
2874         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2875                 mem->guest_phys_addr, mem->memory_size);
2876         if (rc)
2877                 pr_warn("failed to commit memory region\n");
2878         return;
2879 }
2880
2881 static int __init kvm_s390_init(void)
2882 {
2883         if (!sclp.has_sief2) {
2884                 pr_info("SIE not available\n");
2885                 return -ENODEV;
2886         }
2887
2888         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2889 }
2890
2891 static void __exit kvm_s390_exit(void)
2892 {
2893         kvm_exit();
2894 }
2895
2896 module_init(kvm_s390_init);
2897 module_exit(kvm_s390_exit);
2898
2899 /*
2900  * Enable autoloading of the kvm module.
2901  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2902  * since x86 takes a different approach.
2903  */
2904 #include <linux/miscdevice.h>
2905 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2906 MODULE_ALIAS("devname:kvm");