]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/s390/kvm/kvm-s390.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[karo-tx-linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62         { "userspace_handled", VCPU_STAT(exit_userspace) },
63         { "exit_null", VCPU_STAT(exit_null) },
64         { "exit_validity", VCPU_STAT(exit_validity) },
65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66         { "exit_external_request", VCPU_STAT(exit_external_request) },
67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68         { "exit_instruction", VCPU_STAT(exit_instruction) },
69         { "exit_pei", VCPU_STAT(exit_pei) },
70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92         { "instruction_spx", VCPU_STAT(instruction_spx) },
93         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94         { "instruction_stap", VCPU_STAT(instruction_stap) },
95         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99         { "instruction_essa", VCPU_STAT(instruction_essa) },
100         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104         { "instruction_sie", VCPU_STAT(instruction_sie) },
105         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121         { "diagnose_10", VCPU_STAT(diagnose_10) },
122         { "diagnose_44", VCPU_STAT(diagnose_44) },
123         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124         { "diagnose_258", VCPU_STAT(diagnose_258) },
125         { "diagnose_308", VCPU_STAT(diagnose_308) },
126         { "diagnose_500", VCPU_STAT(diagnose_500) },
127         { NULL }
128 };
129
130 /* allow nested virtualization in KVM (if enabled by user space) */
131 static int nested;
132 module_param(nested, int, S_IRUGO);
133 MODULE_PARM_DESC(nested, "Nested virtualization support");
134
135 /* upper facilities limit for kvm */
136 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
137
138 unsigned long kvm_s390_fac_list_mask_size(void)
139 {
140         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
141         return ARRAY_SIZE(kvm_s390_fac_list_mask);
142 }
143
144 /* available cpu features supported by kvm */
145 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
146 /* available subfunctions indicated via query / "test bit" */
147 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
148
149 static struct gmap_notifier gmap_notifier;
150 static struct gmap_notifier vsie_gmap_notifier;
151 debug_info_t *kvm_s390_dbf;
152
153 /* Section: not file related */
154 int kvm_arch_hardware_enable(void)
155 {
156         /* every s390 is virtualization enabled ;-) */
157         return 0;
158 }
159
160 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
161                               unsigned long end);
162
163 /*
164  * This callback is executed during stop_machine(). All CPUs are therefore
165  * temporarily stopped. In order not to change guest behavior, we have to
166  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
167  * so a CPU won't be stopped while calculating with the epoch.
168  */
169 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
170                           void *v)
171 {
172         struct kvm *kvm;
173         struct kvm_vcpu *vcpu;
174         int i;
175         unsigned long long *delta = v;
176
177         list_for_each_entry(kvm, &vm_list, vm_list) {
178                 kvm->arch.epoch -= *delta;
179                 kvm_for_each_vcpu(i, vcpu, kvm) {
180                         vcpu->arch.sie_block->epoch -= *delta;
181                         if (vcpu->arch.cputm_enabled)
182                                 vcpu->arch.cputm_start += *delta;
183                         if (vcpu->arch.vsie_block)
184                                 vcpu->arch.vsie_block->epoch -= *delta;
185                 }
186         }
187         return NOTIFY_OK;
188 }
189
190 static struct notifier_block kvm_clock_notifier = {
191         .notifier_call = kvm_clock_sync,
192 };
193
194 int kvm_arch_hardware_setup(void)
195 {
196         gmap_notifier.notifier_call = kvm_gmap_notifier;
197         gmap_register_pte_notifier(&gmap_notifier);
198         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
199         gmap_register_pte_notifier(&vsie_gmap_notifier);
200         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
201                                        &kvm_clock_notifier);
202         return 0;
203 }
204
205 void kvm_arch_hardware_unsetup(void)
206 {
207         gmap_unregister_pte_notifier(&gmap_notifier);
208         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
209         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
210                                          &kvm_clock_notifier);
211 }
212
213 static void allow_cpu_feat(unsigned long nr)
214 {
215         set_bit_inv(nr, kvm_s390_available_cpu_feat);
216 }
217
218 static inline int plo_test_bit(unsigned char nr)
219 {
220         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
221         int cc;
222
223         asm volatile(
224                 /* Parameter registers are ignored for "test bit" */
225                 "       plo     0,0,0,0(0)\n"
226                 "       ipm     %0\n"
227                 "       srl     %0,28\n"
228                 : "=d" (cc)
229                 : "d" (r0)
230                 : "cc");
231         return cc == 0;
232 }
233
234 static void kvm_s390_cpu_feat_init(void)
235 {
236         int i;
237
238         for (i = 0; i < 256; ++i) {
239                 if (plo_test_bit(i))
240                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
241         }
242
243         if (test_facility(28)) /* TOD-clock steering */
244                 ptff(kvm_s390_available_subfunc.ptff,
245                      sizeof(kvm_s390_available_subfunc.ptff),
246                      PTFF_QAF);
247
248         if (test_facility(17)) { /* MSA */
249                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
250                               kvm_s390_available_subfunc.kmac);
251                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
252                               kvm_s390_available_subfunc.kmc);
253                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
254                               kvm_s390_available_subfunc.km);
255                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.kimd);
257                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.klmd);
259         }
260         if (test_facility(76)) /* MSA3 */
261                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
262                               kvm_s390_available_subfunc.pckmo);
263         if (test_facility(77)) { /* MSA4 */
264                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
265                               kvm_s390_available_subfunc.kmctr);
266                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
267                               kvm_s390_available_subfunc.kmf);
268                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
269                               kvm_s390_available_subfunc.kmo);
270                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.pcc);
272         }
273         if (test_facility(57)) /* MSA5 */
274                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
275                               kvm_s390_available_subfunc.ppno);
276
277         if (MACHINE_HAS_ESOP)
278                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
279         /*
280          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
281          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
282          */
283         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
284             !test_facility(3) || !nested)
285                 return;
286         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
287         if (sclp.has_64bscao)
288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
289         if (sclp.has_siif)
290                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
291         if (sclp.has_gpere)
292                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
293         if (sclp.has_gsls)
294                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
295         if (sclp.has_ib)
296                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
297         if (sclp.has_cei)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
299         if (sclp.has_ibs)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
301         /*
302          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
303          * all skey handling functions read/set the skey from the PGSTE
304          * instead of the real storage key.
305          *
306          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
307          * pages being detected as preserved although they are resident.
308          *
309          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
310          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
311          *
312          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
313          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
314          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
315          *
316          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
317          * cannot easily shadow the SCA because of the ipte lock.
318          */
319 }
320
321 int kvm_arch_init(void *opaque)
322 {
323         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
324         if (!kvm_s390_dbf)
325                 return -ENOMEM;
326
327         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
328                 debug_unregister(kvm_s390_dbf);
329                 return -ENOMEM;
330         }
331
332         kvm_s390_cpu_feat_init();
333
334         /* Register floating interrupt controller interface. */
335         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
336 }
337
338 void kvm_arch_exit(void)
339 {
340         debug_unregister(kvm_s390_dbf);
341 }
342
343 /* Section: device related */
344 long kvm_arch_dev_ioctl(struct file *filp,
345                         unsigned int ioctl, unsigned long arg)
346 {
347         if (ioctl == KVM_S390_ENABLE_SIE)
348                 return s390_enable_sie();
349         return -EINVAL;
350 }
351
352 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
353 {
354         int r;
355
356         switch (ext) {
357         case KVM_CAP_S390_PSW:
358         case KVM_CAP_S390_GMAP:
359         case KVM_CAP_SYNC_MMU:
360 #ifdef CONFIG_KVM_S390_UCONTROL
361         case KVM_CAP_S390_UCONTROL:
362 #endif
363         case KVM_CAP_ASYNC_PF:
364         case KVM_CAP_SYNC_REGS:
365         case KVM_CAP_ONE_REG:
366         case KVM_CAP_ENABLE_CAP:
367         case KVM_CAP_S390_CSS_SUPPORT:
368         case KVM_CAP_IOEVENTFD:
369         case KVM_CAP_DEVICE_CTRL:
370         case KVM_CAP_ENABLE_CAP_VM:
371         case KVM_CAP_S390_IRQCHIP:
372         case KVM_CAP_VM_ATTRIBUTES:
373         case KVM_CAP_MP_STATE:
374         case KVM_CAP_IMMEDIATE_EXIT:
375         case KVM_CAP_S390_INJECT_IRQ:
376         case KVM_CAP_S390_USER_SIGP:
377         case KVM_CAP_S390_USER_STSI:
378         case KVM_CAP_S390_SKEYS:
379         case KVM_CAP_S390_IRQ_STATE:
380         case KVM_CAP_S390_USER_INSTR0:
381                 r = 1;
382                 break;
383         case KVM_CAP_S390_MEM_OP:
384                 r = MEM_OP_MAX_SIZE;
385                 break;
386         case KVM_CAP_NR_VCPUS:
387         case KVM_CAP_MAX_VCPUS:
388                 r = KVM_S390_BSCA_CPU_SLOTS;
389                 if (!kvm_s390_use_sca_entries())
390                         r = KVM_MAX_VCPUS;
391                 else if (sclp.has_esca && sclp.has_64bscao)
392                         r = KVM_S390_ESCA_CPU_SLOTS;
393                 break;
394         case KVM_CAP_NR_MEMSLOTS:
395                 r = KVM_USER_MEM_SLOTS;
396                 break;
397         case KVM_CAP_S390_COW:
398                 r = MACHINE_HAS_ESOP;
399                 break;
400         case KVM_CAP_S390_VECTOR_REGISTERS:
401                 r = MACHINE_HAS_VX;
402                 break;
403         case KVM_CAP_S390_RI:
404                 r = test_facility(64);
405                 break;
406         default:
407                 r = 0;
408         }
409         return r;
410 }
411
412 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
413                                         struct kvm_memory_slot *memslot)
414 {
415         gfn_t cur_gfn, last_gfn;
416         unsigned long address;
417         struct gmap *gmap = kvm->arch.gmap;
418
419         /* Loop over all guest pages */
420         last_gfn = memslot->base_gfn + memslot->npages;
421         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
422                 address = gfn_to_hva_memslot(memslot, cur_gfn);
423
424                 if (test_and_clear_guest_dirty(gmap->mm, address))
425                         mark_page_dirty(kvm, cur_gfn);
426                 if (fatal_signal_pending(current))
427                         return;
428                 cond_resched();
429         }
430 }
431
432 /* Section: vm related */
433 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
434
435 /*
436  * Get (and clear) the dirty memory log for a memory slot.
437  */
438 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
439                                struct kvm_dirty_log *log)
440 {
441         int r;
442         unsigned long n;
443         struct kvm_memslots *slots;
444         struct kvm_memory_slot *memslot;
445         int is_dirty = 0;
446
447         if (kvm_is_ucontrol(kvm))
448                 return -EINVAL;
449
450         mutex_lock(&kvm->slots_lock);
451
452         r = -EINVAL;
453         if (log->slot >= KVM_USER_MEM_SLOTS)
454                 goto out;
455
456         slots = kvm_memslots(kvm);
457         memslot = id_to_memslot(slots, log->slot);
458         r = -ENOENT;
459         if (!memslot->dirty_bitmap)
460                 goto out;
461
462         kvm_s390_sync_dirty_log(kvm, memslot);
463         r = kvm_get_dirty_log(kvm, log, &is_dirty);
464         if (r)
465                 goto out;
466
467         /* Clear the dirty log */
468         if (is_dirty) {
469                 n = kvm_dirty_bitmap_bytes(memslot);
470                 memset(memslot->dirty_bitmap, 0, n);
471         }
472         r = 0;
473 out:
474         mutex_unlock(&kvm->slots_lock);
475         return r;
476 }
477
478 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
479 {
480         unsigned int i;
481         struct kvm_vcpu *vcpu;
482
483         kvm_for_each_vcpu(i, vcpu, kvm) {
484                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
485         }
486 }
487
488 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
489 {
490         int r;
491
492         if (cap->flags)
493                 return -EINVAL;
494
495         switch (cap->cap) {
496         case KVM_CAP_S390_IRQCHIP:
497                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
498                 kvm->arch.use_irqchip = 1;
499                 r = 0;
500                 break;
501         case KVM_CAP_S390_USER_SIGP:
502                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
503                 kvm->arch.user_sigp = 1;
504                 r = 0;
505                 break;
506         case KVM_CAP_S390_VECTOR_REGISTERS:
507                 mutex_lock(&kvm->lock);
508                 if (kvm->created_vcpus) {
509                         r = -EBUSY;
510                 } else if (MACHINE_HAS_VX) {
511                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
512                         set_kvm_facility(kvm->arch.model.fac_list, 129);
513                         if (test_facility(134)) {
514                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
515                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
516                         }
517                         if (test_facility(135)) {
518                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
519                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
520                         }
521                         r = 0;
522                 } else
523                         r = -EINVAL;
524                 mutex_unlock(&kvm->lock);
525                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
526                          r ? "(not available)" : "(success)");
527                 break;
528         case KVM_CAP_S390_RI:
529                 r = -EINVAL;
530                 mutex_lock(&kvm->lock);
531                 if (kvm->created_vcpus) {
532                         r = -EBUSY;
533                 } else if (test_facility(64)) {
534                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
535                         set_kvm_facility(kvm->arch.model.fac_list, 64);
536                         r = 0;
537                 }
538                 mutex_unlock(&kvm->lock);
539                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
540                          r ? "(not available)" : "(success)");
541                 break;
542         case KVM_CAP_S390_USER_STSI:
543                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
544                 kvm->arch.user_stsi = 1;
545                 r = 0;
546                 break;
547         case KVM_CAP_S390_USER_INSTR0:
548                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
549                 kvm->arch.user_instr0 = 1;
550                 icpt_operexc_on_all_vcpus(kvm);
551                 r = 0;
552                 break;
553         default:
554                 r = -EINVAL;
555                 break;
556         }
557         return r;
558 }
559
560 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
561 {
562         int ret;
563
564         switch (attr->attr) {
565         case KVM_S390_VM_MEM_LIMIT_SIZE:
566                 ret = 0;
567                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
568                          kvm->arch.mem_limit);
569                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
570                         ret = -EFAULT;
571                 break;
572         default:
573                 ret = -ENXIO;
574                 break;
575         }
576         return ret;
577 }
578
579 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
580 {
581         int ret;
582         unsigned int idx;
583         switch (attr->attr) {
584         case KVM_S390_VM_MEM_ENABLE_CMMA:
585                 ret = -ENXIO;
586                 if (!sclp.has_cmma)
587                         break;
588
589                 ret = -EBUSY;
590                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
591                 mutex_lock(&kvm->lock);
592                 if (!kvm->created_vcpus) {
593                         kvm->arch.use_cmma = 1;
594                         ret = 0;
595                 }
596                 mutex_unlock(&kvm->lock);
597                 break;
598         case KVM_S390_VM_MEM_CLR_CMMA:
599                 ret = -ENXIO;
600                 if (!sclp.has_cmma)
601                         break;
602                 ret = -EINVAL;
603                 if (!kvm->arch.use_cmma)
604                         break;
605
606                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
607                 mutex_lock(&kvm->lock);
608                 idx = srcu_read_lock(&kvm->srcu);
609                 s390_reset_cmma(kvm->arch.gmap->mm);
610                 srcu_read_unlock(&kvm->srcu, idx);
611                 mutex_unlock(&kvm->lock);
612                 ret = 0;
613                 break;
614         case KVM_S390_VM_MEM_LIMIT_SIZE: {
615                 unsigned long new_limit;
616
617                 if (kvm_is_ucontrol(kvm))
618                         return -EINVAL;
619
620                 if (get_user(new_limit, (u64 __user *)attr->addr))
621                         return -EFAULT;
622
623                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
624                     new_limit > kvm->arch.mem_limit)
625                         return -E2BIG;
626
627                 if (!new_limit)
628                         return -EINVAL;
629
630                 /* gmap_create takes last usable address */
631                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
632                         new_limit -= 1;
633
634                 ret = -EBUSY;
635                 mutex_lock(&kvm->lock);
636                 if (!kvm->created_vcpus) {
637                         /* gmap_create will round the limit up */
638                         struct gmap *new = gmap_create(current->mm, new_limit);
639
640                         if (!new) {
641                                 ret = -ENOMEM;
642                         } else {
643                                 gmap_remove(kvm->arch.gmap);
644                                 new->private = kvm;
645                                 kvm->arch.gmap = new;
646                                 ret = 0;
647                         }
648                 }
649                 mutex_unlock(&kvm->lock);
650                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
651                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
652                          (void *) kvm->arch.gmap->asce);
653                 break;
654         }
655         default:
656                 ret = -ENXIO;
657                 break;
658         }
659         return ret;
660 }
661
662 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
663
664 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
665 {
666         struct kvm_vcpu *vcpu;
667         int i;
668
669         if (!test_kvm_facility(kvm, 76))
670                 return -EINVAL;
671
672         mutex_lock(&kvm->lock);
673         switch (attr->attr) {
674         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
675                 get_random_bytes(
676                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
677                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
678                 kvm->arch.crypto.aes_kw = 1;
679                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
680                 break;
681         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
682                 get_random_bytes(
683                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
684                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
685                 kvm->arch.crypto.dea_kw = 1;
686                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
687                 break;
688         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
689                 kvm->arch.crypto.aes_kw = 0;
690                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
691                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
692                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
693                 break;
694         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
695                 kvm->arch.crypto.dea_kw = 0;
696                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
697                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
698                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
699                 break;
700         default:
701                 mutex_unlock(&kvm->lock);
702                 return -ENXIO;
703         }
704
705         kvm_for_each_vcpu(i, vcpu, kvm) {
706                 kvm_s390_vcpu_crypto_setup(vcpu);
707                 exit_sie(vcpu);
708         }
709         mutex_unlock(&kvm->lock);
710         return 0;
711 }
712
713 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
714 {
715         u8 gtod_high;
716
717         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
718                                            sizeof(gtod_high)))
719                 return -EFAULT;
720
721         if (gtod_high != 0)
722                 return -EINVAL;
723         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
724
725         return 0;
726 }
727
728 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
729 {
730         u64 gtod;
731
732         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
733                 return -EFAULT;
734
735         kvm_s390_set_tod_clock(kvm, gtod);
736         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
737         return 0;
738 }
739
740 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
741 {
742         int ret;
743
744         if (attr->flags)
745                 return -EINVAL;
746
747         switch (attr->attr) {
748         case KVM_S390_VM_TOD_HIGH:
749                 ret = kvm_s390_set_tod_high(kvm, attr);
750                 break;
751         case KVM_S390_VM_TOD_LOW:
752                 ret = kvm_s390_set_tod_low(kvm, attr);
753                 break;
754         default:
755                 ret = -ENXIO;
756                 break;
757         }
758         return ret;
759 }
760
761 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
762 {
763         u8 gtod_high = 0;
764
765         if (copy_to_user((void __user *)attr->addr, &gtod_high,
766                                          sizeof(gtod_high)))
767                 return -EFAULT;
768         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
769
770         return 0;
771 }
772
773 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
774 {
775         u64 gtod;
776
777         gtod = kvm_s390_get_tod_clock_fast(kvm);
778         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
779                 return -EFAULT;
780         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
781
782         return 0;
783 }
784
785 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
786 {
787         int ret;
788
789         if (attr->flags)
790                 return -EINVAL;
791
792         switch (attr->attr) {
793         case KVM_S390_VM_TOD_HIGH:
794                 ret = kvm_s390_get_tod_high(kvm, attr);
795                 break;
796         case KVM_S390_VM_TOD_LOW:
797                 ret = kvm_s390_get_tod_low(kvm, attr);
798                 break;
799         default:
800                 ret = -ENXIO;
801                 break;
802         }
803         return ret;
804 }
805
806 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
807 {
808         struct kvm_s390_vm_cpu_processor *proc;
809         u16 lowest_ibc, unblocked_ibc;
810         int ret = 0;
811
812         mutex_lock(&kvm->lock);
813         if (kvm->created_vcpus) {
814                 ret = -EBUSY;
815                 goto out;
816         }
817         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
818         if (!proc) {
819                 ret = -ENOMEM;
820                 goto out;
821         }
822         if (!copy_from_user(proc, (void __user *)attr->addr,
823                             sizeof(*proc))) {
824                 kvm->arch.model.cpuid = proc->cpuid;
825                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
826                 unblocked_ibc = sclp.ibc & 0xfff;
827                 if (lowest_ibc && proc->ibc) {
828                         if (proc->ibc > unblocked_ibc)
829                                 kvm->arch.model.ibc = unblocked_ibc;
830                         else if (proc->ibc < lowest_ibc)
831                                 kvm->arch.model.ibc = lowest_ibc;
832                         else
833                                 kvm->arch.model.ibc = proc->ibc;
834                 }
835                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
836                        S390_ARCH_FAC_LIST_SIZE_BYTE);
837                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
838                          kvm->arch.model.ibc,
839                          kvm->arch.model.cpuid);
840                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
841                          kvm->arch.model.fac_list[0],
842                          kvm->arch.model.fac_list[1],
843                          kvm->arch.model.fac_list[2]);
844         } else
845                 ret = -EFAULT;
846         kfree(proc);
847 out:
848         mutex_unlock(&kvm->lock);
849         return ret;
850 }
851
852 static int kvm_s390_set_processor_feat(struct kvm *kvm,
853                                        struct kvm_device_attr *attr)
854 {
855         struct kvm_s390_vm_cpu_feat data;
856         int ret = -EBUSY;
857
858         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
859                 return -EFAULT;
860         if (!bitmap_subset((unsigned long *) data.feat,
861                            kvm_s390_available_cpu_feat,
862                            KVM_S390_VM_CPU_FEAT_NR_BITS))
863                 return -EINVAL;
864
865         mutex_lock(&kvm->lock);
866         if (!atomic_read(&kvm->online_vcpus)) {
867                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
868                             KVM_S390_VM_CPU_FEAT_NR_BITS);
869                 ret = 0;
870         }
871         mutex_unlock(&kvm->lock);
872         return ret;
873 }
874
875 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
876                                           struct kvm_device_attr *attr)
877 {
878         /*
879          * Once supported by kernel + hw, we have to store the subfunctions
880          * in kvm->arch and remember that user space configured them.
881          */
882         return -ENXIO;
883 }
884
885 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
886 {
887         int ret = -ENXIO;
888
889         switch (attr->attr) {
890         case KVM_S390_VM_CPU_PROCESSOR:
891                 ret = kvm_s390_set_processor(kvm, attr);
892                 break;
893         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
894                 ret = kvm_s390_set_processor_feat(kvm, attr);
895                 break;
896         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
897                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
898                 break;
899         }
900         return ret;
901 }
902
903 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
904 {
905         struct kvm_s390_vm_cpu_processor *proc;
906         int ret = 0;
907
908         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
909         if (!proc) {
910                 ret = -ENOMEM;
911                 goto out;
912         }
913         proc->cpuid = kvm->arch.model.cpuid;
914         proc->ibc = kvm->arch.model.ibc;
915         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
916                S390_ARCH_FAC_LIST_SIZE_BYTE);
917         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
918                  kvm->arch.model.ibc,
919                  kvm->arch.model.cpuid);
920         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
921                  kvm->arch.model.fac_list[0],
922                  kvm->arch.model.fac_list[1],
923                  kvm->arch.model.fac_list[2]);
924         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
925                 ret = -EFAULT;
926         kfree(proc);
927 out:
928         return ret;
929 }
930
931 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
932 {
933         struct kvm_s390_vm_cpu_machine *mach;
934         int ret = 0;
935
936         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
937         if (!mach) {
938                 ret = -ENOMEM;
939                 goto out;
940         }
941         get_cpu_id((struct cpuid *) &mach->cpuid);
942         mach->ibc = sclp.ibc;
943         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
944                S390_ARCH_FAC_LIST_SIZE_BYTE);
945         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
946                sizeof(S390_lowcore.stfle_fac_list));
947         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
948                  kvm->arch.model.ibc,
949                  kvm->arch.model.cpuid);
950         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
951                  mach->fac_mask[0],
952                  mach->fac_mask[1],
953                  mach->fac_mask[2]);
954         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
955                  mach->fac_list[0],
956                  mach->fac_list[1],
957                  mach->fac_list[2]);
958         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
959                 ret = -EFAULT;
960         kfree(mach);
961 out:
962         return ret;
963 }
964
965 static int kvm_s390_get_processor_feat(struct kvm *kvm,
966                                        struct kvm_device_attr *attr)
967 {
968         struct kvm_s390_vm_cpu_feat data;
969
970         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
971                     KVM_S390_VM_CPU_FEAT_NR_BITS);
972         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
973                 return -EFAULT;
974         return 0;
975 }
976
977 static int kvm_s390_get_machine_feat(struct kvm *kvm,
978                                      struct kvm_device_attr *attr)
979 {
980         struct kvm_s390_vm_cpu_feat data;
981
982         bitmap_copy((unsigned long *) data.feat,
983                     kvm_s390_available_cpu_feat,
984                     KVM_S390_VM_CPU_FEAT_NR_BITS);
985         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
986                 return -EFAULT;
987         return 0;
988 }
989
990 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
991                                           struct kvm_device_attr *attr)
992 {
993         /*
994          * Once we can actually configure subfunctions (kernel + hw support),
995          * we have to check if they were already set by user space, if so copy
996          * them from kvm->arch.
997          */
998         return -ENXIO;
999 }
1000
1001 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1002                                         struct kvm_device_attr *attr)
1003 {
1004         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1005             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1006                 return -EFAULT;
1007         return 0;
1008 }
1009 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1010 {
1011         int ret = -ENXIO;
1012
1013         switch (attr->attr) {
1014         case KVM_S390_VM_CPU_PROCESSOR:
1015                 ret = kvm_s390_get_processor(kvm, attr);
1016                 break;
1017         case KVM_S390_VM_CPU_MACHINE:
1018                 ret = kvm_s390_get_machine(kvm, attr);
1019                 break;
1020         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1021                 ret = kvm_s390_get_processor_feat(kvm, attr);
1022                 break;
1023         case KVM_S390_VM_CPU_MACHINE_FEAT:
1024                 ret = kvm_s390_get_machine_feat(kvm, attr);
1025                 break;
1026         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1027                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1028                 break;
1029         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1030                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1031                 break;
1032         }
1033         return ret;
1034 }
1035
1036 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1037 {
1038         int ret;
1039
1040         switch (attr->group) {
1041         case KVM_S390_VM_MEM_CTRL:
1042                 ret = kvm_s390_set_mem_control(kvm, attr);
1043                 break;
1044         case KVM_S390_VM_TOD:
1045                 ret = kvm_s390_set_tod(kvm, attr);
1046                 break;
1047         case KVM_S390_VM_CPU_MODEL:
1048                 ret = kvm_s390_set_cpu_model(kvm, attr);
1049                 break;
1050         case KVM_S390_VM_CRYPTO:
1051                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1052                 break;
1053         default:
1054                 ret = -ENXIO;
1055                 break;
1056         }
1057
1058         return ret;
1059 }
1060
1061 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1062 {
1063         int ret;
1064
1065         switch (attr->group) {
1066         case KVM_S390_VM_MEM_CTRL:
1067                 ret = kvm_s390_get_mem_control(kvm, attr);
1068                 break;
1069         case KVM_S390_VM_TOD:
1070                 ret = kvm_s390_get_tod(kvm, attr);
1071                 break;
1072         case KVM_S390_VM_CPU_MODEL:
1073                 ret = kvm_s390_get_cpu_model(kvm, attr);
1074                 break;
1075         default:
1076                 ret = -ENXIO;
1077                 break;
1078         }
1079
1080         return ret;
1081 }
1082
1083 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1084 {
1085         int ret;
1086
1087         switch (attr->group) {
1088         case KVM_S390_VM_MEM_CTRL:
1089                 switch (attr->attr) {
1090                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1091                 case KVM_S390_VM_MEM_CLR_CMMA:
1092                         ret = sclp.has_cmma ? 0 : -ENXIO;
1093                         break;
1094                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1095                         ret = 0;
1096                         break;
1097                 default:
1098                         ret = -ENXIO;
1099                         break;
1100                 }
1101                 break;
1102         case KVM_S390_VM_TOD:
1103                 switch (attr->attr) {
1104                 case KVM_S390_VM_TOD_LOW:
1105                 case KVM_S390_VM_TOD_HIGH:
1106                         ret = 0;
1107                         break;
1108                 default:
1109                         ret = -ENXIO;
1110                         break;
1111                 }
1112                 break;
1113         case KVM_S390_VM_CPU_MODEL:
1114                 switch (attr->attr) {
1115                 case KVM_S390_VM_CPU_PROCESSOR:
1116                 case KVM_S390_VM_CPU_MACHINE:
1117                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1118                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1119                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1120                         ret = 0;
1121                         break;
1122                 /* configuring subfunctions is not supported yet */
1123                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1124                 default:
1125                         ret = -ENXIO;
1126                         break;
1127                 }
1128                 break;
1129         case KVM_S390_VM_CRYPTO:
1130                 switch (attr->attr) {
1131                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1132                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1133                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1134                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1135                         ret = 0;
1136                         break;
1137                 default:
1138                         ret = -ENXIO;
1139                         break;
1140                 }
1141                 break;
1142         default:
1143                 ret = -ENXIO;
1144                 break;
1145         }
1146
1147         return ret;
1148 }
1149
1150 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1151 {
1152         uint8_t *keys;
1153         uint64_t hva;
1154         int i, r = 0;
1155
1156         if (args->flags != 0)
1157                 return -EINVAL;
1158
1159         /* Is this guest using storage keys? */
1160         if (!mm_use_skey(current->mm))
1161                 return KVM_S390_GET_SKEYS_NONE;
1162
1163         /* Enforce sane limit on memory allocation */
1164         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1165                 return -EINVAL;
1166
1167         keys = kmalloc_array(args->count, sizeof(uint8_t),
1168                              GFP_KERNEL | __GFP_NOWARN);
1169         if (!keys)
1170                 keys = vmalloc(sizeof(uint8_t) * args->count);
1171         if (!keys)
1172                 return -ENOMEM;
1173
1174         down_read(&current->mm->mmap_sem);
1175         for (i = 0; i < args->count; i++) {
1176                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1177                 if (kvm_is_error_hva(hva)) {
1178                         r = -EFAULT;
1179                         break;
1180                 }
1181
1182                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1183                 if (r)
1184                         break;
1185         }
1186         up_read(&current->mm->mmap_sem);
1187
1188         if (!r) {
1189                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1190                                  sizeof(uint8_t) * args->count);
1191                 if (r)
1192                         r = -EFAULT;
1193         }
1194
1195         kvfree(keys);
1196         return r;
1197 }
1198
1199 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1200 {
1201         uint8_t *keys;
1202         uint64_t hva;
1203         int i, r = 0;
1204
1205         if (args->flags != 0)
1206                 return -EINVAL;
1207
1208         /* Enforce sane limit on memory allocation */
1209         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1210                 return -EINVAL;
1211
1212         keys = kmalloc_array(args->count, sizeof(uint8_t),
1213                              GFP_KERNEL | __GFP_NOWARN);
1214         if (!keys)
1215                 keys = vmalloc(sizeof(uint8_t) * args->count);
1216         if (!keys)
1217                 return -ENOMEM;
1218
1219         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1220                            sizeof(uint8_t) * args->count);
1221         if (r) {
1222                 r = -EFAULT;
1223                 goto out;
1224         }
1225
1226         /* Enable storage key handling for the guest */
1227         r = s390_enable_skey();
1228         if (r)
1229                 goto out;
1230
1231         down_read(&current->mm->mmap_sem);
1232         for (i = 0; i < args->count; i++) {
1233                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1234                 if (kvm_is_error_hva(hva)) {
1235                         r = -EFAULT;
1236                         break;
1237                 }
1238
1239                 /* Lowest order bit is reserved */
1240                 if (keys[i] & 0x01) {
1241                         r = -EINVAL;
1242                         break;
1243                 }
1244
1245                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1246                 if (r)
1247                         break;
1248         }
1249         up_read(&current->mm->mmap_sem);
1250 out:
1251         kvfree(keys);
1252         return r;
1253 }
1254
1255 long kvm_arch_vm_ioctl(struct file *filp,
1256                        unsigned int ioctl, unsigned long arg)
1257 {
1258         struct kvm *kvm = filp->private_data;
1259         void __user *argp = (void __user *)arg;
1260         struct kvm_device_attr attr;
1261         int r;
1262
1263         switch (ioctl) {
1264         case KVM_S390_INTERRUPT: {
1265                 struct kvm_s390_interrupt s390int;
1266
1267                 r = -EFAULT;
1268                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1269                         break;
1270                 r = kvm_s390_inject_vm(kvm, &s390int);
1271                 break;
1272         }
1273         case KVM_ENABLE_CAP: {
1274                 struct kvm_enable_cap cap;
1275                 r = -EFAULT;
1276                 if (copy_from_user(&cap, argp, sizeof(cap)))
1277                         break;
1278                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1279                 break;
1280         }
1281         case KVM_CREATE_IRQCHIP: {
1282                 struct kvm_irq_routing_entry routing;
1283
1284                 r = -EINVAL;
1285                 if (kvm->arch.use_irqchip) {
1286                         /* Set up dummy routing. */
1287                         memset(&routing, 0, sizeof(routing));
1288                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1289                 }
1290                 break;
1291         }
1292         case KVM_SET_DEVICE_ATTR: {
1293                 r = -EFAULT;
1294                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1295                         break;
1296                 r = kvm_s390_vm_set_attr(kvm, &attr);
1297                 break;
1298         }
1299         case KVM_GET_DEVICE_ATTR: {
1300                 r = -EFAULT;
1301                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1302                         break;
1303                 r = kvm_s390_vm_get_attr(kvm, &attr);
1304                 break;
1305         }
1306         case KVM_HAS_DEVICE_ATTR: {
1307                 r = -EFAULT;
1308                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1309                         break;
1310                 r = kvm_s390_vm_has_attr(kvm, &attr);
1311                 break;
1312         }
1313         case KVM_S390_GET_SKEYS: {
1314                 struct kvm_s390_skeys args;
1315
1316                 r = -EFAULT;
1317                 if (copy_from_user(&args, argp,
1318                                    sizeof(struct kvm_s390_skeys)))
1319                         break;
1320                 r = kvm_s390_get_skeys(kvm, &args);
1321                 break;
1322         }
1323         case KVM_S390_SET_SKEYS: {
1324                 struct kvm_s390_skeys args;
1325
1326                 r = -EFAULT;
1327                 if (copy_from_user(&args, argp,
1328                                    sizeof(struct kvm_s390_skeys)))
1329                         break;
1330                 r = kvm_s390_set_skeys(kvm, &args);
1331                 break;
1332         }
1333         default:
1334                 r = -ENOTTY;
1335         }
1336
1337         return r;
1338 }
1339
1340 static int kvm_s390_query_ap_config(u8 *config)
1341 {
1342         u32 fcn_code = 0x04000000UL;
1343         u32 cc = 0;
1344
1345         memset(config, 0, 128);
1346         asm volatile(
1347                 "lgr 0,%1\n"
1348                 "lgr 2,%2\n"
1349                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1350                 "0: ipm %0\n"
1351                 "srl %0,28\n"
1352                 "1:\n"
1353                 EX_TABLE(0b, 1b)
1354                 : "+r" (cc)
1355                 : "r" (fcn_code), "r" (config)
1356                 : "cc", "0", "2", "memory"
1357         );
1358
1359         return cc;
1360 }
1361
1362 static int kvm_s390_apxa_installed(void)
1363 {
1364         u8 config[128];
1365         int cc;
1366
1367         if (test_facility(12)) {
1368                 cc = kvm_s390_query_ap_config(config);
1369
1370                 if (cc)
1371                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1372                 else
1373                         return config[0] & 0x40;
1374         }
1375
1376         return 0;
1377 }
1378
1379 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1380 {
1381         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1382
1383         if (kvm_s390_apxa_installed())
1384                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1385         else
1386                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1387 }
1388
1389 static u64 kvm_s390_get_initial_cpuid(void)
1390 {
1391         struct cpuid cpuid;
1392
1393         get_cpu_id(&cpuid);
1394         cpuid.version = 0xff;
1395         return *((u64 *) &cpuid);
1396 }
1397
1398 static void kvm_s390_crypto_init(struct kvm *kvm)
1399 {
1400         if (!test_kvm_facility(kvm, 76))
1401                 return;
1402
1403         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1404         kvm_s390_set_crycb_format(kvm);
1405
1406         /* Enable AES/DEA protected key functions by default */
1407         kvm->arch.crypto.aes_kw = 1;
1408         kvm->arch.crypto.dea_kw = 1;
1409         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1410                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1411         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1412                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1413 }
1414
1415 static void sca_dispose(struct kvm *kvm)
1416 {
1417         if (kvm->arch.use_esca)
1418                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1419         else
1420                 free_page((unsigned long)(kvm->arch.sca));
1421         kvm->arch.sca = NULL;
1422 }
1423
1424 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1425 {
1426         gfp_t alloc_flags = GFP_KERNEL;
1427         int i, rc;
1428         char debug_name[16];
1429         static unsigned long sca_offset;
1430
1431         rc = -EINVAL;
1432 #ifdef CONFIG_KVM_S390_UCONTROL
1433         if (type & ~KVM_VM_S390_UCONTROL)
1434                 goto out_err;
1435         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1436                 goto out_err;
1437 #else
1438         if (type)
1439                 goto out_err;
1440 #endif
1441
1442         rc = s390_enable_sie();
1443         if (rc)
1444                 goto out_err;
1445
1446         rc = -ENOMEM;
1447
1448         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1449
1450         kvm->arch.use_esca = 0; /* start with basic SCA */
1451         if (!sclp.has_64bscao)
1452                 alloc_flags |= GFP_DMA;
1453         rwlock_init(&kvm->arch.sca_lock);
1454         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1455         if (!kvm->arch.sca)
1456                 goto out_err;
1457         spin_lock(&kvm_lock);
1458         sca_offset += 16;
1459         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1460                 sca_offset = 0;
1461         kvm->arch.sca = (struct bsca_block *)
1462                         ((char *) kvm->arch.sca + sca_offset);
1463         spin_unlock(&kvm_lock);
1464
1465         sprintf(debug_name, "kvm-%u", current->pid);
1466
1467         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1468         if (!kvm->arch.dbf)
1469                 goto out_err;
1470
1471         kvm->arch.sie_page2 =
1472              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1473         if (!kvm->arch.sie_page2)
1474                 goto out_err;
1475
1476         /* Populate the facility mask initially. */
1477         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1478                sizeof(S390_lowcore.stfle_fac_list));
1479         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1480                 if (i < kvm_s390_fac_list_mask_size())
1481                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1482                 else
1483                         kvm->arch.model.fac_mask[i] = 0UL;
1484         }
1485
1486         /* Populate the facility list initially. */
1487         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1488         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1489                S390_ARCH_FAC_LIST_SIZE_BYTE);
1490
1491         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1492         set_kvm_facility(kvm->arch.model.fac_list, 74);
1493
1494         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1495         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1496
1497         kvm_s390_crypto_init(kvm);
1498
1499         spin_lock_init(&kvm->arch.float_int.lock);
1500         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1501                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1502         init_waitqueue_head(&kvm->arch.ipte_wq);
1503         mutex_init(&kvm->arch.ipte_mutex);
1504
1505         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1506         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1507
1508         if (type & KVM_VM_S390_UCONTROL) {
1509                 kvm->arch.gmap = NULL;
1510                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1511         } else {
1512                 if (sclp.hamax == U64_MAX)
1513                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1514                 else
1515                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1516                                                     sclp.hamax + 1);
1517                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1518                 if (!kvm->arch.gmap)
1519                         goto out_err;
1520                 kvm->arch.gmap->private = kvm;
1521                 kvm->arch.gmap->pfault_enabled = 0;
1522         }
1523
1524         kvm->arch.css_support = 0;
1525         kvm->arch.use_irqchip = 0;
1526         kvm->arch.epoch = 0;
1527
1528         spin_lock_init(&kvm->arch.start_stop_lock);
1529         kvm_s390_vsie_init(kvm);
1530         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1531
1532         return 0;
1533 out_err:
1534         free_page((unsigned long)kvm->arch.sie_page2);
1535         debug_unregister(kvm->arch.dbf);
1536         sca_dispose(kvm);
1537         KVM_EVENT(3, "creation of vm failed: %d", rc);
1538         return rc;
1539 }
1540
1541 bool kvm_arch_has_vcpu_debugfs(void)
1542 {
1543         return false;
1544 }
1545
1546 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1547 {
1548         return 0;
1549 }
1550
1551 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1552 {
1553         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1554         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1555         kvm_s390_clear_local_irqs(vcpu);
1556         kvm_clear_async_pf_completion_queue(vcpu);
1557         if (!kvm_is_ucontrol(vcpu->kvm))
1558                 sca_del_vcpu(vcpu);
1559
1560         if (kvm_is_ucontrol(vcpu->kvm))
1561                 gmap_remove(vcpu->arch.gmap);
1562
1563         if (vcpu->kvm->arch.use_cmma)
1564                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1565         free_page((unsigned long)(vcpu->arch.sie_block));
1566
1567         kvm_vcpu_uninit(vcpu);
1568         kmem_cache_free(kvm_vcpu_cache, vcpu);
1569 }
1570
1571 static void kvm_free_vcpus(struct kvm *kvm)
1572 {
1573         unsigned int i;
1574         struct kvm_vcpu *vcpu;
1575
1576         kvm_for_each_vcpu(i, vcpu, kvm)
1577                 kvm_arch_vcpu_destroy(vcpu);
1578
1579         mutex_lock(&kvm->lock);
1580         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1581                 kvm->vcpus[i] = NULL;
1582
1583         atomic_set(&kvm->online_vcpus, 0);
1584         mutex_unlock(&kvm->lock);
1585 }
1586
1587 void kvm_arch_destroy_vm(struct kvm *kvm)
1588 {
1589         kvm_free_vcpus(kvm);
1590         sca_dispose(kvm);
1591         debug_unregister(kvm->arch.dbf);
1592         free_page((unsigned long)kvm->arch.sie_page2);
1593         if (!kvm_is_ucontrol(kvm))
1594                 gmap_remove(kvm->arch.gmap);
1595         kvm_s390_destroy_adapters(kvm);
1596         kvm_s390_clear_float_irqs(kvm);
1597         kvm_s390_vsie_destroy(kvm);
1598         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1599 }
1600
1601 /* Section: vcpu related */
1602 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1603 {
1604         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1605         if (!vcpu->arch.gmap)
1606                 return -ENOMEM;
1607         vcpu->arch.gmap->private = vcpu->kvm;
1608
1609         return 0;
1610 }
1611
1612 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1613 {
1614         if (!kvm_s390_use_sca_entries())
1615                 return;
1616         read_lock(&vcpu->kvm->arch.sca_lock);
1617         if (vcpu->kvm->arch.use_esca) {
1618                 struct esca_block *sca = vcpu->kvm->arch.sca;
1619
1620                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1621                 sca->cpu[vcpu->vcpu_id].sda = 0;
1622         } else {
1623                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1624
1625                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1626                 sca->cpu[vcpu->vcpu_id].sda = 0;
1627         }
1628         read_unlock(&vcpu->kvm->arch.sca_lock);
1629 }
1630
1631 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1632 {
1633         if (!kvm_s390_use_sca_entries()) {
1634                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1635
1636                 /* we still need the basic sca for the ipte control */
1637                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1638                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1639         }
1640         read_lock(&vcpu->kvm->arch.sca_lock);
1641         if (vcpu->kvm->arch.use_esca) {
1642                 struct esca_block *sca = vcpu->kvm->arch.sca;
1643
1644                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1645                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1646                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1647                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1648                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1649         } else {
1650                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1651
1652                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1653                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1654                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1655                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1656         }
1657         read_unlock(&vcpu->kvm->arch.sca_lock);
1658 }
1659
1660 /* Basic SCA to Extended SCA data copy routines */
1661 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1662 {
1663         d->sda = s->sda;
1664         d->sigp_ctrl.c = s->sigp_ctrl.c;
1665         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1666 }
1667
1668 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1669 {
1670         int i;
1671
1672         d->ipte_control = s->ipte_control;
1673         d->mcn[0] = s->mcn;
1674         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1675                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1676 }
1677
1678 static int sca_switch_to_extended(struct kvm *kvm)
1679 {
1680         struct bsca_block *old_sca = kvm->arch.sca;
1681         struct esca_block *new_sca;
1682         struct kvm_vcpu *vcpu;
1683         unsigned int vcpu_idx;
1684         u32 scaol, scaoh;
1685
1686         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1687         if (!new_sca)
1688                 return -ENOMEM;
1689
1690         scaoh = (u32)((u64)(new_sca) >> 32);
1691         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1692
1693         kvm_s390_vcpu_block_all(kvm);
1694         write_lock(&kvm->arch.sca_lock);
1695
1696         sca_copy_b_to_e(new_sca, old_sca);
1697
1698         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1699                 vcpu->arch.sie_block->scaoh = scaoh;
1700                 vcpu->arch.sie_block->scaol = scaol;
1701                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1702         }
1703         kvm->arch.sca = new_sca;
1704         kvm->arch.use_esca = 1;
1705
1706         write_unlock(&kvm->arch.sca_lock);
1707         kvm_s390_vcpu_unblock_all(kvm);
1708
1709         free_page((unsigned long)old_sca);
1710
1711         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1712                  old_sca, kvm->arch.sca);
1713         return 0;
1714 }
1715
1716 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1717 {
1718         int rc;
1719
1720         if (!kvm_s390_use_sca_entries()) {
1721                 if (id < KVM_MAX_VCPUS)
1722                         return true;
1723                 return false;
1724         }
1725         if (id < KVM_S390_BSCA_CPU_SLOTS)
1726                 return true;
1727         if (!sclp.has_esca || !sclp.has_64bscao)
1728                 return false;
1729
1730         mutex_lock(&kvm->lock);
1731         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1732         mutex_unlock(&kvm->lock);
1733
1734         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1735 }
1736
1737 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1738 {
1739         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1740         kvm_clear_async_pf_completion_queue(vcpu);
1741         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1742                                     KVM_SYNC_GPRS |
1743                                     KVM_SYNC_ACRS |
1744                                     KVM_SYNC_CRS |
1745                                     KVM_SYNC_ARCH0 |
1746                                     KVM_SYNC_PFAULT;
1747         kvm_s390_set_prefix(vcpu, 0);
1748         if (test_kvm_facility(vcpu->kvm, 64))
1749                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1750         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1751          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1752          */
1753         if (MACHINE_HAS_VX)
1754                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1755         else
1756                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1757
1758         if (kvm_is_ucontrol(vcpu->kvm))
1759                 return __kvm_ucontrol_vcpu_init(vcpu);
1760
1761         return 0;
1762 }
1763
1764 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1765 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1766 {
1767         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1768         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1769         vcpu->arch.cputm_start = get_tod_clock_fast();
1770         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1771 }
1772
1773 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1774 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1775 {
1776         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1777         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1778         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1779         vcpu->arch.cputm_start = 0;
1780         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1781 }
1782
1783 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1784 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1785 {
1786         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1787         vcpu->arch.cputm_enabled = true;
1788         __start_cpu_timer_accounting(vcpu);
1789 }
1790
1791 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1792 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1793 {
1794         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1795         __stop_cpu_timer_accounting(vcpu);
1796         vcpu->arch.cputm_enabled = false;
1797 }
1798
1799 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1800 {
1801         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1802         __enable_cpu_timer_accounting(vcpu);
1803         preempt_enable();
1804 }
1805
1806 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1807 {
1808         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1809         __disable_cpu_timer_accounting(vcpu);
1810         preempt_enable();
1811 }
1812
1813 /* set the cpu timer - may only be called from the VCPU thread itself */
1814 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1815 {
1816         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1817         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1818         if (vcpu->arch.cputm_enabled)
1819                 vcpu->arch.cputm_start = get_tod_clock_fast();
1820         vcpu->arch.sie_block->cputm = cputm;
1821         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1822         preempt_enable();
1823 }
1824
1825 /* update and get the cpu timer - can also be called from other VCPU threads */
1826 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1827 {
1828         unsigned int seq;
1829         __u64 value;
1830
1831         if (unlikely(!vcpu->arch.cputm_enabled))
1832                 return vcpu->arch.sie_block->cputm;
1833
1834         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1835         do {
1836                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1837                 /*
1838                  * If the writer would ever execute a read in the critical
1839                  * section, e.g. in irq context, we have a deadlock.
1840                  */
1841                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1842                 value = vcpu->arch.sie_block->cputm;
1843                 /* if cputm_start is 0, accounting is being started/stopped */
1844                 if (likely(vcpu->arch.cputm_start))
1845                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1846         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1847         preempt_enable();
1848         return value;
1849 }
1850
1851 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1852 {
1853
1854         gmap_enable(vcpu->arch.enabled_gmap);
1855         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1856         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1857                 __start_cpu_timer_accounting(vcpu);
1858         vcpu->cpu = cpu;
1859 }
1860
1861 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1862 {
1863         vcpu->cpu = -1;
1864         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1865                 __stop_cpu_timer_accounting(vcpu);
1866         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1867         vcpu->arch.enabled_gmap = gmap_get_enabled();
1868         gmap_disable(vcpu->arch.enabled_gmap);
1869
1870 }
1871
1872 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1873 {
1874         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1875         vcpu->arch.sie_block->gpsw.mask = 0UL;
1876         vcpu->arch.sie_block->gpsw.addr = 0UL;
1877         kvm_s390_set_prefix(vcpu, 0);
1878         kvm_s390_set_cpu_timer(vcpu, 0);
1879         vcpu->arch.sie_block->ckc       = 0UL;
1880         vcpu->arch.sie_block->todpr     = 0;
1881         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1882         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1883         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1884         /* make sure the new fpc will be lazily loaded */
1885         save_fpu_regs();
1886         current->thread.fpu.fpc = 0;
1887         vcpu->arch.sie_block->gbea = 1;
1888         vcpu->arch.sie_block->pp = 0;
1889         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1890         kvm_clear_async_pf_completion_queue(vcpu);
1891         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1892                 kvm_s390_vcpu_stop(vcpu);
1893         kvm_s390_clear_local_irqs(vcpu);
1894 }
1895
1896 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1897 {
1898         mutex_lock(&vcpu->kvm->lock);
1899         preempt_disable();
1900         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1901         preempt_enable();
1902         mutex_unlock(&vcpu->kvm->lock);
1903         if (!kvm_is_ucontrol(vcpu->kvm)) {
1904                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1905                 sca_add_vcpu(vcpu);
1906         }
1907         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1908                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1909         /* make vcpu_load load the right gmap on the first trigger */
1910         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1911 }
1912
1913 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1914 {
1915         if (!test_kvm_facility(vcpu->kvm, 76))
1916                 return;
1917
1918         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1919
1920         if (vcpu->kvm->arch.crypto.aes_kw)
1921                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1922         if (vcpu->kvm->arch.crypto.dea_kw)
1923                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1924
1925         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1926 }
1927
1928 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1929 {
1930         free_page(vcpu->arch.sie_block->cbrlo);
1931         vcpu->arch.sie_block->cbrlo = 0;
1932 }
1933
1934 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1935 {
1936         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1937         if (!vcpu->arch.sie_block->cbrlo)
1938                 return -ENOMEM;
1939
1940         vcpu->arch.sie_block->ecb2 |= 0x80;
1941         vcpu->arch.sie_block->ecb2 &= ~0x08;
1942         return 0;
1943 }
1944
1945 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1946 {
1947         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1948
1949         vcpu->arch.sie_block->ibc = model->ibc;
1950         if (test_kvm_facility(vcpu->kvm, 7))
1951                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1952 }
1953
1954 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1955 {
1956         int rc = 0;
1957
1958         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1959                                                     CPUSTAT_SM |
1960                                                     CPUSTAT_STOPPED);
1961
1962         if (test_kvm_facility(vcpu->kvm, 78))
1963                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1964         else if (test_kvm_facility(vcpu->kvm, 8))
1965                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1966
1967         kvm_s390_vcpu_setup_model(vcpu);
1968
1969         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1970         if (MACHINE_HAS_ESOP)
1971                 vcpu->arch.sie_block->ecb |= 0x02;
1972         if (test_kvm_facility(vcpu->kvm, 9))
1973                 vcpu->arch.sie_block->ecb |= 0x04;
1974         if (test_kvm_facility(vcpu->kvm, 73))
1975                 vcpu->arch.sie_block->ecb |= 0x10;
1976
1977         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1978                 vcpu->arch.sie_block->ecb2 |= 0x08;
1979         if (test_kvm_facility(vcpu->kvm, 130))
1980                 vcpu->arch.sie_block->ecb2 |= 0x20;
1981         vcpu->arch.sie_block->eca = 0x1002000U;
1982         if (sclp.has_cei)
1983                 vcpu->arch.sie_block->eca |= 0x80000000U;
1984         if (sclp.has_ib)
1985                 vcpu->arch.sie_block->eca |= 0x40000000U;
1986         if (sclp.has_siif)
1987                 vcpu->arch.sie_block->eca |= 1;
1988         if (sclp.has_sigpif)
1989                 vcpu->arch.sie_block->eca |= 0x10000000U;
1990         if (test_kvm_facility(vcpu->kvm, 129)) {
1991                 vcpu->arch.sie_block->eca |= 0x00020000;
1992                 vcpu->arch.sie_block->ecd |= 0x20000000;
1993         }
1994         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1995         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1996
1997         if (vcpu->kvm->arch.use_cmma) {
1998                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1999                 if (rc)
2000                         return rc;
2001         }
2002         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2003         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2004
2005         kvm_s390_vcpu_crypto_setup(vcpu);
2006
2007         return rc;
2008 }
2009
2010 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2011                                       unsigned int id)
2012 {
2013         struct kvm_vcpu *vcpu;
2014         struct sie_page *sie_page;
2015         int rc = -EINVAL;
2016
2017         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2018                 goto out;
2019
2020         rc = -ENOMEM;
2021
2022         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2023         if (!vcpu)
2024                 goto out;
2025
2026         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2027         if (!sie_page)
2028                 goto out_free_cpu;
2029
2030         vcpu->arch.sie_block = &sie_page->sie_block;
2031         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2032
2033         /* the real guest size will always be smaller than msl */
2034         vcpu->arch.sie_block->mso = 0;
2035         vcpu->arch.sie_block->msl = sclp.hamax;
2036
2037         vcpu->arch.sie_block->icpua = id;
2038         spin_lock_init(&vcpu->arch.local_int.lock);
2039         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2040         vcpu->arch.local_int.wq = &vcpu->wq;
2041         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2042         seqcount_init(&vcpu->arch.cputm_seqcount);
2043
2044         rc = kvm_vcpu_init(vcpu, kvm, id);
2045         if (rc)
2046                 goto out_free_sie_block;
2047         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2048                  vcpu->arch.sie_block);
2049         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2050
2051         return vcpu;
2052 out_free_sie_block:
2053         free_page((unsigned long)(vcpu->arch.sie_block));
2054 out_free_cpu:
2055         kmem_cache_free(kvm_vcpu_cache, vcpu);
2056 out:
2057         return ERR_PTR(rc);
2058 }
2059
2060 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2061 {
2062         return kvm_s390_vcpu_has_irq(vcpu, 0);
2063 }
2064
2065 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2066 {
2067         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2068         exit_sie(vcpu);
2069 }
2070
2071 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2072 {
2073         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2074 }
2075
2076 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2077 {
2078         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2079         exit_sie(vcpu);
2080 }
2081
2082 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2083 {
2084         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2085 }
2086
2087 /*
2088  * Kick a guest cpu out of SIE and wait until SIE is not running.
2089  * If the CPU is not running (e.g. waiting as idle) the function will
2090  * return immediately. */
2091 void exit_sie(struct kvm_vcpu *vcpu)
2092 {
2093         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2094         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2095                 cpu_relax();
2096 }
2097
2098 /* Kick a guest cpu out of SIE to process a request synchronously */
2099 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2100 {
2101         kvm_make_request(req, vcpu);
2102         kvm_s390_vcpu_request(vcpu);
2103 }
2104
2105 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2106                               unsigned long end)
2107 {
2108         struct kvm *kvm = gmap->private;
2109         struct kvm_vcpu *vcpu;
2110         unsigned long prefix;
2111         int i;
2112
2113         if (gmap_is_shadow(gmap))
2114                 return;
2115         if (start >= 1UL << 31)
2116                 /* We are only interested in prefix pages */
2117                 return;
2118         kvm_for_each_vcpu(i, vcpu, kvm) {
2119                 /* match against both prefix pages */
2120                 prefix = kvm_s390_get_prefix(vcpu);
2121                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2122                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2123                                    start, end);
2124                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2125                 }
2126         }
2127 }
2128
2129 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2130 {
2131         /* kvm common code refers to this, but never calls it */
2132         BUG();
2133         return 0;
2134 }
2135
2136 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2137                                            struct kvm_one_reg *reg)
2138 {
2139         int r = -EINVAL;
2140
2141         switch (reg->id) {
2142         case KVM_REG_S390_TODPR:
2143                 r = put_user(vcpu->arch.sie_block->todpr,
2144                              (u32 __user *)reg->addr);
2145                 break;
2146         case KVM_REG_S390_EPOCHDIFF:
2147                 r = put_user(vcpu->arch.sie_block->epoch,
2148                              (u64 __user *)reg->addr);
2149                 break;
2150         case KVM_REG_S390_CPU_TIMER:
2151                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2152                              (u64 __user *)reg->addr);
2153                 break;
2154         case KVM_REG_S390_CLOCK_COMP:
2155                 r = put_user(vcpu->arch.sie_block->ckc,
2156                              (u64 __user *)reg->addr);
2157                 break;
2158         case KVM_REG_S390_PFTOKEN:
2159                 r = put_user(vcpu->arch.pfault_token,
2160                              (u64 __user *)reg->addr);
2161                 break;
2162         case KVM_REG_S390_PFCOMPARE:
2163                 r = put_user(vcpu->arch.pfault_compare,
2164                              (u64 __user *)reg->addr);
2165                 break;
2166         case KVM_REG_S390_PFSELECT:
2167                 r = put_user(vcpu->arch.pfault_select,
2168                              (u64 __user *)reg->addr);
2169                 break;
2170         case KVM_REG_S390_PP:
2171                 r = put_user(vcpu->arch.sie_block->pp,
2172                              (u64 __user *)reg->addr);
2173                 break;
2174         case KVM_REG_S390_GBEA:
2175                 r = put_user(vcpu->arch.sie_block->gbea,
2176                              (u64 __user *)reg->addr);
2177                 break;
2178         default:
2179                 break;
2180         }
2181
2182         return r;
2183 }
2184
2185 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2186                                            struct kvm_one_reg *reg)
2187 {
2188         int r = -EINVAL;
2189         __u64 val;
2190
2191         switch (reg->id) {
2192         case KVM_REG_S390_TODPR:
2193                 r = get_user(vcpu->arch.sie_block->todpr,
2194                              (u32 __user *)reg->addr);
2195                 break;
2196         case KVM_REG_S390_EPOCHDIFF:
2197                 r = get_user(vcpu->arch.sie_block->epoch,
2198                              (u64 __user *)reg->addr);
2199                 break;
2200         case KVM_REG_S390_CPU_TIMER:
2201                 r = get_user(val, (u64 __user *)reg->addr);
2202                 if (!r)
2203                         kvm_s390_set_cpu_timer(vcpu, val);
2204                 break;
2205         case KVM_REG_S390_CLOCK_COMP:
2206                 r = get_user(vcpu->arch.sie_block->ckc,
2207                              (u64 __user *)reg->addr);
2208                 break;
2209         case KVM_REG_S390_PFTOKEN:
2210                 r = get_user(vcpu->arch.pfault_token,
2211                              (u64 __user *)reg->addr);
2212                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2213                         kvm_clear_async_pf_completion_queue(vcpu);
2214                 break;
2215         case KVM_REG_S390_PFCOMPARE:
2216                 r = get_user(vcpu->arch.pfault_compare,
2217                              (u64 __user *)reg->addr);
2218                 break;
2219         case KVM_REG_S390_PFSELECT:
2220                 r = get_user(vcpu->arch.pfault_select,
2221                              (u64 __user *)reg->addr);
2222                 break;
2223         case KVM_REG_S390_PP:
2224                 r = get_user(vcpu->arch.sie_block->pp,
2225                              (u64 __user *)reg->addr);
2226                 break;
2227         case KVM_REG_S390_GBEA:
2228                 r = get_user(vcpu->arch.sie_block->gbea,
2229                              (u64 __user *)reg->addr);
2230                 break;
2231         default:
2232                 break;
2233         }
2234
2235         return r;
2236 }
2237
2238 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2239 {
2240         kvm_s390_vcpu_initial_reset(vcpu);
2241         return 0;
2242 }
2243
2244 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2245 {
2246         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2247         return 0;
2248 }
2249
2250 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2251 {
2252         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2253         return 0;
2254 }
2255
2256 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2257                                   struct kvm_sregs *sregs)
2258 {
2259         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2260         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2261         return 0;
2262 }
2263
2264 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2265                                   struct kvm_sregs *sregs)
2266 {
2267         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2268         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2269         return 0;
2270 }
2271
2272 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2273 {
2274         if (test_fp_ctl(fpu->fpc))
2275                 return -EINVAL;
2276         vcpu->run->s.regs.fpc = fpu->fpc;
2277         if (MACHINE_HAS_VX)
2278                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2279                                  (freg_t *) fpu->fprs);
2280         else
2281                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2282         return 0;
2283 }
2284
2285 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2286 {
2287         /* make sure we have the latest values */
2288         save_fpu_regs();
2289         if (MACHINE_HAS_VX)
2290                 convert_vx_to_fp((freg_t *) fpu->fprs,
2291                                  (__vector128 *) vcpu->run->s.regs.vrs);
2292         else
2293                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2294         fpu->fpc = vcpu->run->s.regs.fpc;
2295         return 0;
2296 }
2297
2298 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2299 {
2300         int rc = 0;
2301
2302         if (!is_vcpu_stopped(vcpu))
2303                 rc = -EBUSY;
2304         else {
2305                 vcpu->run->psw_mask = psw.mask;
2306                 vcpu->run->psw_addr = psw.addr;
2307         }
2308         return rc;
2309 }
2310
2311 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2312                                   struct kvm_translation *tr)
2313 {
2314         return -EINVAL; /* not implemented yet */
2315 }
2316
2317 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2318                               KVM_GUESTDBG_USE_HW_BP | \
2319                               KVM_GUESTDBG_ENABLE)
2320
2321 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2322                                         struct kvm_guest_debug *dbg)
2323 {
2324         int rc = 0;
2325
2326         vcpu->guest_debug = 0;
2327         kvm_s390_clear_bp_data(vcpu);
2328
2329         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2330                 return -EINVAL;
2331         if (!sclp.has_gpere)
2332                 return -EINVAL;
2333
2334         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2335                 vcpu->guest_debug = dbg->control;
2336                 /* enforce guest PER */
2337                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2338
2339                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2340                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2341         } else {
2342                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2343                 vcpu->arch.guestdbg.last_bp = 0;
2344         }
2345
2346         if (rc) {
2347                 vcpu->guest_debug = 0;
2348                 kvm_s390_clear_bp_data(vcpu);
2349                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2350         }
2351
2352         return rc;
2353 }
2354
2355 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2356                                     struct kvm_mp_state *mp_state)
2357 {
2358         /* CHECK_STOP and LOAD are not supported yet */
2359         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2360                                        KVM_MP_STATE_OPERATING;
2361 }
2362
2363 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2364                                     struct kvm_mp_state *mp_state)
2365 {
2366         int rc = 0;
2367
2368         /* user space knows about this interface - let it control the state */
2369         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2370
2371         switch (mp_state->mp_state) {
2372         case KVM_MP_STATE_STOPPED:
2373                 kvm_s390_vcpu_stop(vcpu);
2374                 break;
2375         case KVM_MP_STATE_OPERATING:
2376                 kvm_s390_vcpu_start(vcpu);
2377                 break;
2378         case KVM_MP_STATE_LOAD:
2379         case KVM_MP_STATE_CHECK_STOP:
2380                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2381         default:
2382                 rc = -ENXIO;
2383         }
2384
2385         return rc;
2386 }
2387
2388 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2389 {
2390         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2391 }
2392
2393 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2394 {
2395 retry:
2396         kvm_s390_vcpu_request_handled(vcpu);
2397         if (!vcpu->requests)
2398                 return 0;
2399         /*
2400          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2401          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2402          * This ensures that the ipte instruction for this request has
2403          * already finished. We might race against a second unmapper that
2404          * wants to set the blocking bit. Lets just retry the request loop.
2405          */
2406         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2407                 int rc;
2408                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2409                                           kvm_s390_get_prefix(vcpu),
2410                                           PAGE_SIZE * 2, PROT_WRITE);
2411                 if (rc) {
2412                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2413                         return rc;
2414                 }
2415                 goto retry;
2416         }
2417
2418         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2419                 vcpu->arch.sie_block->ihcpu = 0xffff;
2420                 goto retry;
2421         }
2422
2423         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2424                 if (!ibs_enabled(vcpu)) {
2425                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2426                         atomic_or(CPUSTAT_IBS,
2427                                         &vcpu->arch.sie_block->cpuflags);
2428                 }
2429                 goto retry;
2430         }
2431
2432         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2433                 if (ibs_enabled(vcpu)) {
2434                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2435                         atomic_andnot(CPUSTAT_IBS,
2436                                           &vcpu->arch.sie_block->cpuflags);
2437                 }
2438                 goto retry;
2439         }
2440
2441         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2442                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2443                 goto retry;
2444         }
2445
2446         /* nothing to do, just clear the request */
2447         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2448
2449         return 0;
2450 }
2451
2452 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2453 {
2454         struct kvm_vcpu *vcpu;
2455         int i;
2456
2457         mutex_lock(&kvm->lock);
2458         preempt_disable();
2459         kvm->arch.epoch = tod - get_tod_clock();
2460         kvm_s390_vcpu_block_all(kvm);
2461         kvm_for_each_vcpu(i, vcpu, kvm)
2462                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2463         kvm_s390_vcpu_unblock_all(kvm);
2464         preempt_enable();
2465         mutex_unlock(&kvm->lock);
2466 }
2467
2468 /**
2469  * kvm_arch_fault_in_page - fault-in guest page if necessary
2470  * @vcpu: The corresponding virtual cpu
2471  * @gpa: Guest physical address
2472  * @writable: Whether the page should be writable or not
2473  *
2474  * Make sure that a guest page has been faulted-in on the host.
2475  *
2476  * Return: Zero on success, negative error code otherwise.
2477  */
2478 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2479 {
2480         return gmap_fault(vcpu->arch.gmap, gpa,
2481                           writable ? FAULT_FLAG_WRITE : 0);
2482 }
2483
2484 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2485                                       unsigned long token)
2486 {
2487         struct kvm_s390_interrupt inti;
2488         struct kvm_s390_irq irq;
2489
2490         if (start_token) {
2491                 irq.u.ext.ext_params2 = token;
2492                 irq.type = KVM_S390_INT_PFAULT_INIT;
2493                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2494         } else {
2495                 inti.type = KVM_S390_INT_PFAULT_DONE;
2496                 inti.parm64 = token;
2497                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2498         }
2499 }
2500
2501 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2502                                      struct kvm_async_pf *work)
2503 {
2504         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2505         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2506 }
2507
2508 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2509                                  struct kvm_async_pf *work)
2510 {
2511         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2512         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2513 }
2514
2515 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2516                                struct kvm_async_pf *work)
2517 {
2518         /* s390 will always inject the page directly */
2519 }
2520
2521 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2522 {
2523         /*
2524          * s390 will always inject the page directly,
2525          * but we still want check_async_completion to cleanup
2526          */
2527         return true;
2528 }
2529
2530 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2531 {
2532         hva_t hva;
2533         struct kvm_arch_async_pf arch;
2534         int rc;
2535
2536         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2537                 return 0;
2538         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2539             vcpu->arch.pfault_compare)
2540                 return 0;
2541         if (psw_extint_disabled(vcpu))
2542                 return 0;
2543         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2544                 return 0;
2545         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2546                 return 0;
2547         if (!vcpu->arch.gmap->pfault_enabled)
2548                 return 0;
2549
2550         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2551         hva += current->thread.gmap_addr & ~PAGE_MASK;
2552         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2553                 return 0;
2554
2555         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2556         return rc;
2557 }
2558
2559 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2560 {
2561         int rc, cpuflags;
2562
2563         /*
2564          * On s390 notifications for arriving pages will be delivered directly
2565          * to the guest but the house keeping for completed pfaults is
2566          * handled outside the worker.
2567          */
2568         kvm_check_async_pf_completion(vcpu);
2569
2570         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2571         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2572
2573         if (need_resched())
2574                 schedule();
2575
2576         if (test_cpu_flag(CIF_MCCK_PENDING))
2577                 s390_handle_mcck();
2578
2579         if (!kvm_is_ucontrol(vcpu->kvm)) {
2580                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2581                 if (rc)
2582                         return rc;
2583         }
2584
2585         rc = kvm_s390_handle_requests(vcpu);
2586         if (rc)
2587                 return rc;
2588
2589         if (guestdbg_enabled(vcpu)) {
2590                 kvm_s390_backup_guest_per_regs(vcpu);
2591                 kvm_s390_patch_guest_per_regs(vcpu);
2592         }
2593
2594         vcpu->arch.sie_block->icptcode = 0;
2595         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2596         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2597         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2598
2599         return 0;
2600 }
2601
2602 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2603 {
2604         struct kvm_s390_pgm_info pgm_info = {
2605                 .code = PGM_ADDRESSING,
2606         };
2607         u8 opcode, ilen;
2608         int rc;
2609
2610         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2611         trace_kvm_s390_sie_fault(vcpu);
2612
2613         /*
2614          * We want to inject an addressing exception, which is defined as a
2615          * suppressing or terminating exception. However, since we came here
2616          * by a DAT access exception, the PSW still points to the faulting
2617          * instruction since DAT exceptions are nullifying. So we've got
2618          * to look up the current opcode to get the length of the instruction
2619          * to be able to forward the PSW.
2620          */
2621         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2622         ilen = insn_length(opcode);
2623         if (rc < 0) {
2624                 return rc;
2625         } else if (rc) {
2626                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2627                  * Forward by arbitrary ilc, injection will take care of
2628                  * nullification if necessary.
2629                  */
2630                 pgm_info = vcpu->arch.pgm;
2631                 ilen = 4;
2632         }
2633         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2634         kvm_s390_forward_psw(vcpu, ilen);
2635         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2636 }
2637
2638 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2639 {
2640         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2641                    vcpu->arch.sie_block->icptcode);
2642         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2643
2644         if (guestdbg_enabled(vcpu))
2645                 kvm_s390_restore_guest_per_regs(vcpu);
2646
2647         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2648         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2649
2650         if (vcpu->arch.sie_block->icptcode > 0) {
2651                 int rc = kvm_handle_sie_intercept(vcpu);
2652
2653                 if (rc != -EOPNOTSUPP)
2654                         return rc;
2655                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2656                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2657                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2658                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2659                 return -EREMOTE;
2660         } else if (exit_reason != -EFAULT) {
2661                 vcpu->stat.exit_null++;
2662                 return 0;
2663         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2664                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2665                 vcpu->run->s390_ucontrol.trans_exc_code =
2666                                                 current->thread.gmap_addr;
2667                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2668                 return -EREMOTE;
2669         } else if (current->thread.gmap_pfault) {
2670                 trace_kvm_s390_major_guest_pfault(vcpu);
2671                 current->thread.gmap_pfault = 0;
2672                 if (kvm_arch_setup_async_pf(vcpu))
2673                         return 0;
2674                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2675         }
2676         return vcpu_post_run_fault_in_sie(vcpu);
2677 }
2678
2679 static int __vcpu_run(struct kvm_vcpu *vcpu)
2680 {
2681         int rc, exit_reason;
2682
2683         /*
2684          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2685          * ning the guest), so that memslots (and other stuff) are protected
2686          */
2687         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2688
2689         do {
2690                 rc = vcpu_pre_run(vcpu);
2691                 if (rc)
2692                         break;
2693
2694                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2695                 /*
2696                  * As PF_VCPU will be used in fault handler, between
2697                  * guest_enter and guest_exit should be no uaccess.
2698                  */
2699                 local_irq_disable();
2700                 guest_enter_irqoff();
2701                 __disable_cpu_timer_accounting(vcpu);
2702                 local_irq_enable();
2703                 exit_reason = sie64a(vcpu->arch.sie_block,
2704                                      vcpu->run->s.regs.gprs);
2705                 local_irq_disable();
2706                 __enable_cpu_timer_accounting(vcpu);
2707                 guest_exit_irqoff();
2708                 local_irq_enable();
2709                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2710
2711                 rc = vcpu_post_run(vcpu, exit_reason);
2712         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2713
2714         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2715         return rc;
2716 }
2717
2718 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2719 {
2720         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2721         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2722         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2723                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2724         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2725                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2726                 /* some control register changes require a tlb flush */
2727                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2728         }
2729         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2730                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2731                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2732                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2733                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2734                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2735         }
2736         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2737                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2738                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2739                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2740                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2741                         kvm_clear_async_pf_completion_queue(vcpu);
2742         }
2743         /*
2744          * If userspace sets the riccb (e.g. after migration) to a valid state,
2745          * we should enable RI here instead of doing the lazy enablement.
2746          */
2747         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2748             test_kvm_facility(vcpu->kvm, 64)) {
2749                 struct runtime_instr_cb *riccb =
2750                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2751
2752                 if (riccb->valid)
2753                         vcpu->arch.sie_block->ecb3 |= 0x01;
2754         }
2755         save_access_regs(vcpu->arch.host_acrs);
2756         restore_access_regs(vcpu->run->s.regs.acrs);
2757         /* save host (userspace) fprs/vrs */
2758         save_fpu_regs();
2759         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2760         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2761         if (MACHINE_HAS_VX)
2762                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2763         else
2764                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2765         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2766         if (test_fp_ctl(current->thread.fpu.fpc))
2767                 /* User space provided an invalid FPC, let's clear it */
2768                 current->thread.fpu.fpc = 0;
2769
2770         kvm_run->kvm_dirty_regs = 0;
2771 }
2772
2773 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2774 {
2775         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2776         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2777         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2778         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2779         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2780         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2781         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2782         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2783         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2784         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2785         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2786         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2787         save_access_regs(vcpu->run->s.regs.acrs);
2788         restore_access_regs(vcpu->arch.host_acrs);
2789         /* Save guest register state */
2790         save_fpu_regs();
2791         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2792         /* Restore will be done lazily at return */
2793         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2794         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2795
2796 }
2797
2798 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2799 {
2800         int rc;
2801         sigset_t sigsaved;
2802
2803         if (kvm_run->immediate_exit)
2804                 return -EINTR;
2805
2806         if (guestdbg_exit_pending(vcpu)) {
2807                 kvm_s390_prepare_debug_exit(vcpu);
2808                 return 0;
2809         }
2810
2811         if (vcpu->sigset_active)
2812                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2813
2814         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2815                 kvm_s390_vcpu_start(vcpu);
2816         } else if (is_vcpu_stopped(vcpu)) {
2817                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2818                                    vcpu->vcpu_id);
2819                 return -EINVAL;
2820         }
2821
2822         sync_regs(vcpu, kvm_run);
2823         enable_cpu_timer_accounting(vcpu);
2824
2825         might_fault();
2826         rc = __vcpu_run(vcpu);
2827
2828         if (signal_pending(current) && !rc) {
2829                 kvm_run->exit_reason = KVM_EXIT_INTR;
2830                 rc = -EINTR;
2831         }
2832
2833         if (guestdbg_exit_pending(vcpu) && !rc)  {
2834                 kvm_s390_prepare_debug_exit(vcpu);
2835                 rc = 0;
2836         }
2837
2838         if (rc == -EREMOTE) {
2839                 /* userspace support is needed, kvm_run has been prepared */
2840                 rc = 0;
2841         }
2842
2843         disable_cpu_timer_accounting(vcpu);
2844         store_regs(vcpu, kvm_run);
2845
2846         if (vcpu->sigset_active)
2847                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2848
2849         vcpu->stat.exit_userspace++;
2850         return rc;
2851 }
2852
2853 /*
2854  * store status at address
2855  * we use have two special cases:
2856  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2857  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2858  */
2859 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2860 {
2861         unsigned char archmode = 1;
2862         freg_t fprs[NUM_FPRS];
2863         unsigned int px;
2864         u64 clkcomp, cputm;
2865         int rc;
2866
2867         px = kvm_s390_get_prefix(vcpu);
2868         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2869                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2870                         return -EFAULT;
2871                 gpa = 0;
2872         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2873                 if (write_guest_real(vcpu, 163, &archmode, 1))
2874                         return -EFAULT;
2875                 gpa = px;
2876         } else
2877                 gpa -= __LC_FPREGS_SAVE_AREA;
2878
2879         /* manually convert vector registers if necessary */
2880         if (MACHINE_HAS_VX) {
2881                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2882                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2883                                      fprs, 128);
2884         } else {
2885                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2886                                      vcpu->run->s.regs.fprs, 128);
2887         }
2888         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2889                               vcpu->run->s.regs.gprs, 128);
2890         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2891                               &vcpu->arch.sie_block->gpsw, 16);
2892         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2893                               &px, 4);
2894         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2895                               &vcpu->run->s.regs.fpc, 4);
2896         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2897                               &vcpu->arch.sie_block->todpr, 4);
2898         cputm = kvm_s390_get_cpu_timer(vcpu);
2899         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2900                               &cputm, 8);
2901         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2902         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2903                               &clkcomp, 8);
2904         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2905                               &vcpu->run->s.regs.acrs, 64);
2906         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2907                               &vcpu->arch.sie_block->gcr, 128);
2908         return rc ? -EFAULT : 0;
2909 }
2910
2911 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2912 {
2913         /*
2914          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2915          * switch in the run ioctl. Let's update our copies before we save
2916          * it into the save area
2917          */
2918         save_fpu_regs();
2919         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2920         save_access_regs(vcpu->run->s.regs.acrs);
2921
2922         return kvm_s390_store_status_unloaded(vcpu, addr);
2923 }
2924
2925 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2926 {
2927         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2928         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2929 }
2930
2931 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2932 {
2933         unsigned int i;
2934         struct kvm_vcpu *vcpu;
2935
2936         kvm_for_each_vcpu(i, vcpu, kvm) {
2937                 __disable_ibs_on_vcpu(vcpu);
2938         }
2939 }
2940
2941 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2942 {
2943         if (!sclp.has_ibs)
2944                 return;
2945         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2946         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2947 }
2948
2949 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2950 {
2951         int i, online_vcpus, started_vcpus = 0;
2952
2953         if (!is_vcpu_stopped(vcpu))
2954                 return;
2955
2956         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2957         /* Only one cpu at a time may enter/leave the STOPPED state. */
2958         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2959         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2960
2961         for (i = 0; i < online_vcpus; i++) {
2962                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2963                         started_vcpus++;
2964         }
2965
2966         if (started_vcpus == 0) {
2967                 /* we're the only active VCPU -> speed it up */
2968                 __enable_ibs_on_vcpu(vcpu);
2969         } else if (started_vcpus == 1) {
2970                 /*
2971                  * As we are starting a second VCPU, we have to disable
2972                  * the IBS facility on all VCPUs to remove potentially
2973                  * oustanding ENABLE requests.
2974                  */
2975                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2976         }
2977
2978         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2979         /*
2980          * Another VCPU might have used IBS while we were offline.
2981          * Let's play safe and flush the VCPU at startup.
2982          */
2983         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2984         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2985         return;
2986 }
2987
2988 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2989 {
2990         int i, online_vcpus, started_vcpus = 0;
2991         struct kvm_vcpu *started_vcpu = NULL;
2992
2993         if (is_vcpu_stopped(vcpu))
2994                 return;
2995
2996         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2997         /* Only one cpu at a time may enter/leave the STOPPED state. */
2998         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2999         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3000
3001         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3002         kvm_s390_clear_stop_irq(vcpu);
3003
3004         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3005         __disable_ibs_on_vcpu(vcpu);
3006
3007         for (i = 0; i < online_vcpus; i++) {
3008                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3009                         started_vcpus++;
3010                         started_vcpu = vcpu->kvm->vcpus[i];
3011                 }
3012         }
3013
3014         if (started_vcpus == 1) {
3015                 /*
3016                  * As we only have one VCPU left, we want to enable the
3017                  * IBS facility for that VCPU to speed it up.
3018                  */
3019                 __enable_ibs_on_vcpu(started_vcpu);
3020         }
3021
3022         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3023         return;
3024 }
3025
3026 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3027                                      struct kvm_enable_cap *cap)
3028 {
3029         int r;
3030
3031         if (cap->flags)
3032                 return -EINVAL;
3033
3034         switch (cap->cap) {
3035         case KVM_CAP_S390_CSS_SUPPORT:
3036                 if (!vcpu->kvm->arch.css_support) {
3037                         vcpu->kvm->arch.css_support = 1;
3038                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3039                         trace_kvm_s390_enable_css(vcpu->kvm);
3040                 }
3041                 r = 0;
3042                 break;
3043         default:
3044                 r = -EINVAL;
3045                 break;
3046         }
3047         return r;
3048 }
3049
3050 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3051                                   struct kvm_s390_mem_op *mop)
3052 {
3053         void __user *uaddr = (void __user *)mop->buf;
3054         void *tmpbuf = NULL;
3055         int r, srcu_idx;
3056         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3057                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3058
3059         if (mop->flags & ~supported_flags)
3060                 return -EINVAL;
3061
3062         if (mop->size > MEM_OP_MAX_SIZE)
3063                 return -E2BIG;
3064
3065         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3066                 tmpbuf = vmalloc(mop->size);
3067                 if (!tmpbuf)
3068                         return -ENOMEM;
3069         }
3070
3071         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3072
3073         switch (mop->op) {
3074         case KVM_S390_MEMOP_LOGICAL_READ:
3075                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3076                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3077                                             mop->size, GACC_FETCH);
3078                         break;
3079                 }
3080                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3081                 if (r == 0) {
3082                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3083                                 r = -EFAULT;
3084                 }
3085                 break;
3086         case KVM_S390_MEMOP_LOGICAL_WRITE:
3087                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3088                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3089                                             mop->size, GACC_STORE);
3090                         break;
3091                 }
3092                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3093                         r = -EFAULT;
3094                         break;
3095                 }
3096                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3097                 break;
3098         default:
3099                 r = -EINVAL;
3100         }
3101
3102         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3103
3104         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3105                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3106
3107         vfree(tmpbuf);
3108         return r;
3109 }
3110
3111 long kvm_arch_vcpu_ioctl(struct file *filp,
3112                          unsigned int ioctl, unsigned long arg)
3113 {
3114         struct kvm_vcpu *vcpu = filp->private_data;
3115         void __user *argp = (void __user *)arg;
3116         int idx;
3117         long r;
3118
3119         switch (ioctl) {
3120         case KVM_S390_IRQ: {
3121                 struct kvm_s390_irq s390irq;
3122
3123                 r = -EFAULT;
3124                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3125                         break;
3126                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3127                 break;
3128         }
3129         case KVM_S390_INTERRUPT: {
3130                 struct kvm_s390_interrupt s390int;
3131                 struct kvm_s390_irq s390irq;
3132
3133                 r = -EFAULT;
3134                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3135                         break;
3136                 if (s390int_to_s390irq(&s390int, &s390irq))
3137                         return -EINVAL;
3138                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3139                 break;
3140         }
3141         case KVM_S390_STORE_STATUS:
3142                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3143                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3144                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3145                 break;
3146         case KVM_S390_SET_INITIAL_PSW: {
3147                 psw_t psw;
3148
3149                 r = -EFAULT;
3150                 if (copy_from_user(&psw, argp, sizeof(psw)))
3151                         break;
3152                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3153                 break;
3154         }
3155         case KVM_S390_INITIAL_RESET:
3156                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3157                 break;
3158         case KVM_SET_ONE_REG:
3159         case KVM_GET_ONE_REG: {
3160                 struct kvm_one_reg reg;
3161                 r = -EFAULT;
3162                 if (copy_from_user(&reg, argp, sizeof(reg)))
3163                         break;
3164                 if (ioctl == KVM_SET_ONE_REG)
3165                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3166                 else
3167                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3168                 break;
3169         }
3170 #ifdef CONFIG_KVM_S390_UCONTROL
3171         case KVM_S390_UCAS_MAP: {
3172                 struct kvm_s390_ucas_mapping ucasmap;
3173
3174                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3175                         r = -EFAULT;
3176                         break;
3177                 }
3178
3179                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3180                         r = -EINVAL;
3181                         break;
3182                 }
3183
3184                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3185                                      ucasmap.vcpu_addr, ucasmap.length);
3186                 break;
3187         }
3188         case KVM_S390_UCAS_UNMAP: {
3189                 struct kvm_s390_ucas_mapping ucasmap;
3190
3191                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3192                         r = -EFAULT;
3193                         break;
3194                 }
3195
3196                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3197                         r = -EINVAL;
3198                         break;
3199                 }
3200
3201                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3202                         ucasmap.length);
3203                 break;
3204         }
3205 #endif
3206         case KVM_S390_VCPU_FAULT: {
3207                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3208                 break;
3209         }
3210         case KVM_ENABLE_CAP:
3211         {
3212                 struct kvm_enable_cap cap;
3213                 r = -EFAULT;
3214                 if (copy_from_user(&cap, argp, sizeof(cap)))
3215                         break;
3216                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3217                 break;
3218         }
3219         case KVM_S390_MEM_OP: {
3220                 struct kvm_s390_mem_op mem_op;
3221
3222                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3223                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3224                 else
3225                         r = -EFAULT;
3226                 break;
3227         }
3228         case KVM_S390_SET_IRQ_STATE: {
3229                 struct kvm_s390_irq_state irq_state;
3230
3231                 r = -EFAULT;
3232                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3233                         break;
3234                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3235                     irq_state.len == 0 ||
3236                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3237                         r = -EINVAL;
3238                         break;
3239                 }
3240                 r = kvm_s390_set_irq_state(vcpu,
3241                                            (void __user *) irq_state.buf,
3242                                            irq_state.len);
3243                 break;
3244         }
3245         case KVM_S390_GET_IRQ_STATE: {
3246                 struct kvm_s390_irq_state irq_state;
3247
3248                 r = -EFAULT;
3249                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3250                         break;
3251                 if (irq_state.len == 0) {
3252                         r = -EINVAL;
3253                         break;
3254                 }
3255                 r = kvm_s390_get_irq_state(vcpu,
3256                                            (__u8 __user *)  irq_state.buf,
3257                                            irq_state.len);
3258                 break;
3259         }
3260         default:
3261                 r = -ENOTTY;
3262         }
3263         return r;
3264 }
3265
3266 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3267 {
3268 #ifdef CONFIG_KVM_S390_UCONTROL
3269         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3270                  && (kvm_is_ucontrol(vcpu->kvm))) {
3271                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3272                 get_page(vmf->page);
3273                 return 0;
3274         }
3275 #endif
3276         return VM_FAULT_SIGBUS;
3277 }
3278
3279 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3280                             unsigned long npages)
3281 {
3282         return 0;
3283 }
3284
3285 /* Section: memory related */
3286 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3287                                    struct kvm_memory_slot *memslot,
3288                                    const struct kvm_userspace_memory_region *mem,
3289                                    enum kvm_mr_change change)
3290 {
3291         /* A few sanity checks. We can have memory slots which have to be
3292            located/ended at a segment boundary (1MB). The memory in userland is
3293            ok to be fragmented into various different vmas. It is okay to mmap()
3294            and munmap() stuff in this slot after doing this call at any time */
3295
3296         if (mem->userspace_addr & 0xffffful)
3297                 return -EINVAL;
3298
3299         if (mem->memory_size & 0xffffful)
3300                 return -EINVAL;
3301
3302         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3303                 return -EINVAL;
3304
3305         return 0;
3306 }
3307
3308 void kvm_arch_commit_memory_region(struct kvm *kvm,
3309                                 const struct kvm_userspace_memory_region *mem,
3310                                 const struct kvm_memory_slot *old,
3311                                 const struct kvm_memory_slot *new,
3312                                 enum kvm_mr_change change)
3313 {
3314         int rc;
3315
3316         /* If the basics of the memslot do not change, we do not want
3317          * to update the gmap. Every update causes several unnecessary
3318          * segment translation exceptions. This is usually handled just
3319          * fine by the normal fault handler + gmap, but it will also
3320          * cause faults on the prefix page of running guest CPUs.
3321          */
3322         if (old->userspace_addr == mem->userspace_addr &&
3323             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3324             old->npages * PAGE_SIZE == mem->memory_size)
3325                 return;
3326
3327         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3328                 mem->guest_phys_addr, mem->memory_size);
3329         if (rc)
3330                 pr_warn("failed to commit memory region\n");
3331         return;
3332 }
3333
3334 static inline unsigned long nonhyp_mask(int i)
3335 {
3336         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3337
3338         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3339 }
3340
3341 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3342 {
3343         vcpu->valid_wakeup = false;
3344 }
3345
3346 static int __init kvm_s390_init(void)
3347 {
3348         int i;
3349
3350         if (!sclp.has_sief2) {
3351                 pr_info("SIE not available\n");
3352                 return -ENODEV;
3353         }
3354
3355         for (i = 0; i < 16; i++)
3356                 kvm_s390_fac_list_mask[i] |=
3357                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3358
3359         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3360 }
3361
3362 static void __exit kvm_s390_exit(void)
3363 {
3364         kvm_exit();
3365 }
3366
3367 module_init(kvm_s390_init);
3368 module_exit(kvm_s390_exit);
3369
3370 /*
3371  * Enable autoloading of the kvm module.
3372  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3373  * since x86 takes a different approach.
3374  */
3375 #include <linux/miscdevice.h>
3376 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3377 MODULE_ALIAS("devname:kvm");