]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/kvm/kvm.c
kvm tools: Remove useless empty lines for a call series in kvm__reset_vcpu
[karo-tx-linux.git] / tools / kvm / kvm.c
1 #include "kvm/kvm.h"
2
3 #include "kvm/cpufeature.h"
4 #include "kvm/interrupt.h"
5 #include "kvm/e820.h"
6 #include "kvm/util.h"
7
8 #include <linux/kvm.h>
9
10 #include <asm/bootparam.h>
11
12 #include <sys/ioctl.h>
13 #include <inttypes.h>
14 #include <sys/mman.h>
15 #include <sys/stat.h>
16 #include <stdbool.h>
17 #include <assert.h>
18 #include <limits.h>
19 #include <signal.h>
20 #include <stdarg.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <stdio.h>
25 #include <fcntl.h>
26 #include <time.h>
27
28 /*
29  * Compatibility code. Remove this when we move to tools/kvm.
30  */
31 #ifndef KVM_EXIT_INTERNAL_ERROR
32 # define KVM_EXIT_INTERNAL_ERROR                17
33 #endif
34
35 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
36
37 const char *kvm_exit_reasons[] = {
38         DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
39         DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
40         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
41         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
42         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
43         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
44         DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
45         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
46         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
47         DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
48         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
49         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
50         DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
51         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
52         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
53         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
54         DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
55         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
56 };
57
58 #define DEFINE_KVM_EXT(ext)             \
59         .name = #ext,                   \
60         .code = ext
61
62 struct {
63         const char *name;
64         int code;
65 } kvm_req_ext[] = {
66         { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) },
67         { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) },
68         { DEFINE_KVM_EXT(KVM_CAP_PIT2) },
69         { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) },
70         { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) },
71         { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
72         { DEFINE_KVM_EXT(KVM_CAP_HLT) },
73         { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) },
74         { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) },
75 };
76
77 static bool kvm__supports_extension(struct kvm *self, unsigned int extension)
78 {
79         int ret;
80
81         ret = ioctl(self->sys_fd, KVM_CHECK_EXTENSION, extension);
82         if (ret < 0)
83                 return false;
84
85         return ret;
86 }
87
88 static int kvm__check_extensions(struct kvm *self)
89 {
90         unsigned int i;
91
92         for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) {
93                 if (!kvm__supports_extension(self, kvm_req_ext[i].code)) {
94                         error("Unsuppored KVM extension detected: %s",
95                                 kvm_req_ext[i].name);
96                         return (int)-i;
97                 }
98         }
99
100         return 0;
101 }
102
103 static struct kvm *kvm__new(void)
104 {
105         struct kvm *self = calloc(1, sizeof *self);
106
107         if (!self)
108                 die("out of memory");
109
110         return self;
111 }
112
113 void kvm__delete(struct kvm *self)
114 {
115         free(self->ram_start);
116         free(self);
117 }
118
119 static bool kvm__cpu_supports_vm(void)
120 {
121         struct cpuid_regs regs;
122         uint32_t eax_base;
123         int feature;
124
125         regs    = (struct cpuid_regs) {
126                 .eax            = 0x00,
127         };
128         host_cpuid(&regs);
129
130         switch (regs.ebx) {
131         case CPUID_VENDOR_INTEL_1:
132                 eax_base        = 0x00;
133                 feature         = KVM__X86_FEATURE_VMX;
134                 break;
135
136         case CPUID_VENDOR_AMD_1:
137                 eax_base        = 0x80000000;
138                 feature         = KVM__X86_FEATURE_SVM;
139                 break;
140
141         default:
142                 return false;
143         }
144
145         regs    = (struct cpuid_regs) {
146                 .eax            = eax_base,
147         };
148         host_cpuid(&regs);
149
150         if (regs.eax < eax_base + 0x01)
151                 return false;
152
153         regs    = (struct cpuid_regs) {
154                 .eax            = eax_base + 0x01
155         };
156         host_cpuid(&regs);
157
158         return regs.ecx & (1 << feature);
159 }
160
161 struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size)
162 {
163         struct kvm_userspace_memory_region mem;
164         struct kvm_pit_config pit_config = { .flags = 0, };
165         struct kvm *self;
166         long page_size;
167         int mmap_size;
168         int ret;
169
170         if (!kvm__cpu_supports_vm())
171                 die("Your CPU does not support hardware virtualization");
172
173         self = kvm__new();
174
175         self->sys_fd = open(kvm_dev, O_RDWR);
176         if (self->sys_fd < 0) {
177                 if (errno == ENOENT)
178                         die("'%s' not found. Please make sure your kernel has CONFIG_KVM enabled and that the KVM modules are loaded.", kvm_dev);
179
180                 die_perror("open");
181         }
182
183         ret = ioctl(self->sys_fd, KVM_GET_API_VERSION, 0);
184         if (ret != KVM_API_VERSION)
185                 die_perror("KVM_API_VERSION ioctl");
186
187         self->vm_fd = ioctl(self->sys_fd, KVM_CREATE_VM, 0);
188         if (self->vm_fd < 0)
189                 die_perror("KVM_CREATE_VM ioctl");
190
191         if (kvm__check_extensions(self))
192                 die("A required KVM extention is not supported by OS");
193
194         ret = ioctl(self->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
195         if (ret < 0)
196                 die_perror("KVM_SET_TSS_ADDR ioctl");
197
198         ret = ioctl(self->vm_fd, KVM_CREATE_PIT2, &pit_config);
199         if (ret < 0)
200                 die_perror("KVM_CREATE_PIT2 ioctl");
201
202         self->ram_size          = ram_size;
203
204         page_size       = sysconf(_SC_PAGESIZE);
205         if (posix_memalign(&self->ram_start, page_size, self->ram_size) != 0)
206                 die("out of memory");
207
208         mem = (struct kvm_userspace_memory_region) {
209                 .slot                   = 0,
210                 .guest_phys_addr        = 0x0UL,
211                 .memory_size            = self->ram_size,
212                 .userspace_addr         = (unsigned long) self->ram_start,
213         };
214
215         ret = ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
216         if (ret < 0)
217                 die_perror("KVM_SET_USER_MEMORY_REGION ioctl");
218
219         ret = ioctl(self->vm_fd, KVM_CREATE_IRQCHIP);
220         if (ret < 0)
221                 die_perror("KVM_CREATE_IRQCHIP ioctl");
222
223         self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
224         if (self->vcpu_fd < 0)
225                 die_perror("KVM_CREATE_VCPU ioctl");
226
227         mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
228         if (mmap_size < 0)
229                 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
230
231         self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
232         if (self->kvm_run == MAP_FAILED)
233                 die("unable to mmap vcpu fd");
234
235         return self;
236 }
237
238 void kvm__enable_singlestep(struct kvm *self)
239 {
240         struct kvm_guest_debug debug = {
241                 .control        = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
242         };
243
244         if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
245                 warning("KVM_SET_GUEST_DEBUG failed");
246 }
247
248 #define BOOT_LOADER_SELECTOR    0x1000
249 #define BOOT_LOADER_IP          0x0000
250 #define BOOT_LOADER_SP          0x8000
251 #define BOOT_CMDLINE_OFFSET     0x20000
252
253 #define BOOT_PROTOCOL_REQUIRED  0x206
254 #define LOAD_HIGH               0x01
255
256 static int load_flat_binary(struct kvm *self, int fd)
257 {
258         void *p;
259         int nr;
260
261         if (lseek(fd, 0, SEEK_SET) < 0)
262                 die_perror("lseek");
263
264         p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
265
266         while ((nr = read(fd, p, 65536)) > 0)
267                 p += nr;
268
269         self->boot_selector     = BOOT_LOADER_SELECTOR;
270         self->boot_ip           = BOOT_LOADER_IP;
271         self->boot_sp           = BOOT_LOADER_SP;
272
273         return true;
274 }
275
276 /*
277  * The protected mode kernel part of a modern bzImage is loaded at 1 MB by
278  * default.
279  */
280 #define BZ_KERNEL_START                 0x100000UL
281 #define INITRD_START                    0x1000000UL
282 #define BZ_DEFAULT_SETUP_SECTS          4
283 static const char *BZIMAGE_MAGIC        = "HdrS";
284
285 static bool load_bzimage(struct kvm *self, int fd_kernel,
286                         int fd_initrd, const char *kernel_cmdline)
287 {
288         struct boot_params *kern_boot;
289         unsigned long setup_sects;
290         struct boot_params boot;
291         size_t cmdline_size;
292         ssize_t setup_size;
293         void *p;
294         int nr;
295
296         /*
297          * See Documentation/x86/boot.txt for details no bzImage on-disk and
298          * memory layout.
299          */
300
301         if (lseek(fd_kernel, 0, SEEK_SET) < 0)
302                 die_perror("lseek");
303
304         if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot))
305                 return false;
306
307         if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)))
308                 return false;
309
310         if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) {
311                 die("Too old kernel");
312         }
313
314         if (lseek(fd_kernel, 0, SEEK_SET) < 0)
315                 die_perror("lseek");
316
317         if (!boot.hdr.setup_sects)
318                 boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS;
319         setup_sects = boot.hdr.setup_sects + 1;
320
321         setup_size = setup_sects << 9;
322         p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
323
324         /* copy setup.bin to mem*/
325         if (read(fd_kernel, p, setup_size) != setup_size)
326                 die_perror("read");
327
328         /* copy vmlinux.bin to BZ_KERNEL_START*/
329         p = guest_flat_to_host(self, BZ_KERNEL_START);
330
331         while ((nr = read(fd_kernel, p, 65536)) > 0)
332                 p += nr;
333
334         p = guest_flat_to_host(self, BOOT_CMDLINE_OFFSET);
335         if (kernel_cmdline) {
336                 cmdline_size = strlen(kernel_cmdline) + 1;
337                 if (cmdline_size > boot.hdr.cmdline_size)
338                         cmdline_size = boot.hdr.cmdline_size;
339
340                 memset(p, 0, boot.hdr.cmdline_size);
341                 memcpy(p, kernel_cmdline, cmdline_size - 1);
342         }
343
344         kern_boot       = guest_real_to_host(self, BOOT_LOADER_SELECTOR, 0x00);
345
346         kern_boot->hdr.cmd_line_ptr     = BOOT_CMDLINE_OFFSET;
347         kern_boot->hdr.type_of_loader   = 0xff;
348         kern_boot->hdr.heap_end_ptr     = 0xfe00;
349         kern_boot->hdr.loadflags        |= CAN_USE_HEAP;
350
351         /*
352          * Read initrd image into guest memory
353          */
354         if (fd_initrd >= 0) {
355                 struct stat initrd_stat;
356                 unsigned long addr;
357
358                 if (fstat(fd_initrd, &initrd_stat))
359                         die_perror("fstat");
360
361                 addr = boot.hdr.initrd_addr_max & ~0xfffff;
362                 for (;;) {
363                         if (addr < BZ_KERNEL_START)
364                                 die("Not enough memory for initrd");
365                         else if (addr < (self->ram_size - initrd_stat.st_size))
366                                 break;
367                         addr -= 0x100000;
368                 }
369
370                 p = guest_flat_to_host(self, addr);
371                 nr = read(fd_initrd, p, initrd_stat.st_size);
372                 if (nr != initrd_stat.st_size)
373                         die("Failed to read initrd");
374
375                 kern_boot->hdr.ramdisk_image    = addr;
376                 kern_boot->hdr.ramdisk_size     = initrd_stat.st_size;
377         }
378
379         self->boot_selector     = BOOT_LOADER_SELECTOR;
380         /*
381          * The real-mode setup code starts at offset 0x200 of a bzImage. See
382          * Documentation/x86/boot.txt for details.
383          */
384         self->boot_ip           = BOOT_LOADER_IP + 0x200;
385         self->boot_sp           = BOOT_LOADER_SP;
386
387         return true;
388 }
389
390 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
391                 const char *initrd_filename, const char *kernel_cmdline)
392 {
393         bool ret;
394         int fd_kernel = -1, fd_initrd = -1;
395
396         fd_kernel = open(kernel_filename, O_RDONLY);
397         if (fd_kernel < 0)
398                 die("Unable to open kernel %s", kernel_filename);
399
400         if (initrd_filename) {
401                 fd_initrd = open(initrd_filename, O_RDONLY);
402                 if (fd_initrd < 0)
403                         die("Unable to open initrd %s", initrd_filename);
404         }
405
406         ret = load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline);
407
408         if (initrd_filename)
409                 close(fd_initrd);
410
411         if (ret)
412                 goto found_kernel;
413
414         warning("%s is not a bzImage. Trying to load it as a flat binary...", kernel_filename);
415
416         ret = load_flat_binary(kvm, fd_kernel);
417         if (ret)
418                 goto found_kernel;
419
420         die("%s is not a valid bzImage or flat binary", kernel_filename);
421
422 found_kernel:
423         return ret;
424 }
425
426 static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip)
427 {
428         uint64_t cs = self->sregs.cs.selector;
429
430         return ip - (cs << 4);
431 }
432
433 static inline bool is_in_protected_mode(struct kvm *self)
434 {
435         return self->sregs.cr0 & 0x01;
436 }
437
438 static inline uint64_t ip_to_flat(struct kvm *self, uint64_t ip)
439 {
440         uint64_t cs;
441
442         /*
443          * NOTE! We should take code segment base address into account here.
444          * Luckily it's usually zero because Linux uses flat memory model.
445          */
446         if (is_in_protected_mode(self))
447                 return ip;
448
449         cs = self->sregs.cs.selector;
450
451         return ip + (cs << 4);
452 }
453
454 static inline uint32_t selector_to_base(uint16_t selector)
455 {
456         /*
457          * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
458          */
459         return (uint32_t)selector * 16;
460 }
461
462 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
463 {
464         struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs));
465
466         if (!self)
467                 die("out of memory");
468
469         return self;
470 }
471
472 #define MSR_IA32_TIME_STAMP_COUNTER     0x10
473
474 #define MSR_IA32_SYSENTER_CS            0x174
475 #define MSR_IA32_SYSENTER_ESP           0x175
476 #define MSR_IA32_SYSENTER_EIP           0x176
477
478 #define MSR_IA32_STAR                   0xc0000081
479 #define MSR_IA32_LSTAR                  0xc0000082
480 #define MSR_IA32_CSTAR                  0xc0000083
481 #define MSR_IA32_FMASK                  0xc0000084
482 #define MSR_IA32_KERNEL_GS_BASE         0xc0000102
483
484 #define KVM_MSR_ENTRY(_index, _data)    \
485         (struct kvm_msr_entry) { .index = _index, .data = _data }
486
487 static void kvm__setup_msrs(struct kvm *self)
488 {
489         unsigned long ndx = 0;
490
491         self->msrs = kvm_msrs__new(100);
492
493         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,        0x0);
494         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,       0x0);
495         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,       0x0);
496 #ifdef CONFIG_X86_64
497         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR,               0x0);
498         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR,              0x0);
499         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE,     0x0);
500         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK,              0x0);
501         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR,              0x0);
502 #endif
503         self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER, 0x0);
504
505         self->msrs->nmsrs       = ndx;
506
507         if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0)
508                 die_perror("KVM_SET_MSRS failed");
509 }
510
511 static void kvm__setup_fpu(struct kvm *self)
512 {
513         self->fpu = (struct kvm_fpu) {
514                 .fcw            = 0x37f,
515                 .mxcsr          = 0x1f80,
516         };
517
518         if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0)
519                 die_perror("KVM_SET_FPU failed");
520 }
521
522 static void kvm__setup_regs(struct kvm *self)
523 {
524         self->regs = (struct kvm_regs) {
525                 /* We start the guest in 16-bit real mode  */
526                 .rflags         = 0x0000000000000002ULL,
527
528                 .rip            = self->boot_ip,
529                 .rsp            = self->boot_sp,
530                 .rbp            = self->boot_sp,
531         };
532
533         if (self->regs.rip > USHRT_MAX)
534                 die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
535
536         if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
537                 die_perror("KVM_SET_REGS failed");
538 }
539
540 static void kvm__setup_sregs(struct kvm *self)
541 {
542
543         if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
544                 die_perror("KVM_GET_SREGS failed");
545
546         self->sregs.cs.selector = self->boot_selector;
547         self->sregs.cs.base     = selector_to_base(self->boot_selector);
548         self->sregs.ss.selector = self->boot_selector;
549         self->sregs.ss.base     = selector_to_base(self->boot_selector);
550         self->sregs.ds.selector = self->boot_selector;
551         self->sregs.ds.base     = selector_to_base(self->boot_selector);
552         self->sregs.es.selector = self->boot_selector;
553         self->sregs.es.base     = selector_to_base(self->boot_selector);
554         self->sregs.fs.selector = self->boot_selector;
555         self->sregs.fs.base     = selector_to_base(self->boot_selector);
556         self->sregs.gs.selector = self->boot_selector;
557         self->sregs.gs.base     = selector_to_base(self->boot_selector);
558
559         if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
560                 die_perror("KVM_SET_SREGS failed");
561 }
562
563 /**
564  * kvm__reset_vcpu - reset virtual CPU to a known state
565  */
566 void kvm__reset_vcpu(struct kvm *self)
567 {
568         kvm__setup_sregs(self);
569         kvm__setup_regs(self);
570         kvm__setup_fpu(self);
571         kvm__setup_msrs(self);
572 }
573
574 void kvm__setup_mem(struct kvm *self)
575 {
576         struct e820_entry *mem_map;
577         unsigned char *size;
578
579         size            = guest_flat_to_host(self, E820_MAP_SIZE);
580         mem_map         = guest_flat_to_host(self, E820_MAP_START);
581
582         *size           = 4;
583
584         mem_map[0]      = (struct e820_entry) {
585                 .addr           = REAL_MODE_IVT_BEGIN,
586                 .size           = EBDA_START - REAL_MODE_IVT_BEGIN,
587                 .type           = E820_MEM_USABLE,
588         };
589         mem_map[1]      = (struct e820_entry) {
590                 .addr           = EBDA_START,
591                 .size           = VGA_RAM_BEGIN - EBDA_START,
592                 .type           = E820_MEM_RESERVED,
593         };
594         mem_map[2]      = (struct e820_entry) {
595                 .addr           = MB_BIOS_BEGIN,
596                 .size           = MB_BIOS_END - MB_BIOS_BEGIN,
597                 .type           = E820_MEM_RESERVED,
598         };
599         mem_map[3]      = (struct e820_entry) {
600                 .addr           = BZ_KERNEL_START,
601                 .size           = self->ram_size - BZ_KERNEL_START,
602                 .type           = E820_MEM_USABLE,
603         };
604 }
605
606 #define TIMER_INTERVAL_NS 1000000       /* 1 msec */
607
608 static void alarm_handler(int sig)
609 {
610 }
611
612 /*
613  * This function sets up a timer that's used to inject interrupts from the
614  * userspace hypervisor into the guest at periodical intervals. Please note
615  * that clock interrupt, for example, is not handled here.
616  */
617 void kvm__start_timer(struct kvm *self)
618 {
619         struct itimerspec its;
620         struct sigaction sa;
621         struct sigevent sev;
622
623         sigfillset(&sa.sa_mask);
624         sa.sa_flags                     = 0;
625         sa.sa_handler                   = alarm_handler;
626
627         sigaction(SIGALRM, &sa, NULL);
628
629         memset(&sev, 0, sizeof(struct sigevent));
630         sev.sigev_value.sival_int       = 0;
631         sev.sigev_notify                = SIGEV_SIGNAL;
632         sev.sigev_signo                 = SIGALRM;
633
634         if (timer_create(CLOCK_REALTIME, &sev, &self->timerid) < 0)
635                 die("timer_create()");
636
637         its.it_value.tv_sec             = TIMER_INTERVAL_NS / 1000000000;
638         its.it_value.tv_nsec            = TIMER_INTERVAL_NS % 1000000000;
639         its.it_interval.tv_sec          = its.it_value.tv_sec;
640         its.it_interval.tv_nsec         = its.it_value.tv_nsec;
641
642         if (timer_settime(self->timerid, 0, &its, NULL) < 0)
643                 die("timer_settime()");
644 }
645
646 void kvm__run(struct kvm *self)
647 {
648         int err;
649
650         err = ioctl(self->vcpu_fd, KVM_RUN, 0);
651         if (err && (errno != EINTR && errno != EAGAIN))
652                 die_perror("KVM_RUN failed");
653 }
654
655 void kvm__irq_line(struct kvm *self, int irq, int level)
656 {
657         struct kvm_irq_level irq_level;
658
659         irq_level       = (struct kvm_irq_level) {
660                 {
661                         .irq            = irq,
662                 },
663                 .level          = level,
664         };
665
666         if (ioctl(self->vm_fd, KVM_IRQ_LINE, &irq_level) < 0)
667                 die_perror("KVM_IRQ_LINE failed");
668 }
669
670 static void print_dtable(const char *name, struct kvm_dtable *dtable)
671 {
672         printf(" %s                 %016" PRIx64 "  %08" PRIx16 "\n",
673                 name, (uint64_t) dtable->base, (uint16_t) dtable->limit);
674 }
675
676 static void print_segment(const char *name, struct kvm_segment *seg)
677 {
678         printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  %02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
679                 name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
680                 (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
681 }
682
683 void kvm__show_registers(struct kvm *self)
684 {
685         unsigned long cr0, cr2, cr3;
686         unsigned long cr4, cr8;
687         unsigned long rax, rbx, rcx;
688         unsigned long rdx, rsi, rdi;
689         unsigned long rbp,  r8,  r9;
690         unsigned long r10, r11, r12;
691         unsigned long r13, r14, r15;
692         unsigned long rip, rsp;
693         struct kvm_sregs sregs;
694         unsigned long rflags;
695         struct kvm_regs regs;
696         int i;
697
698         if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
699                 die("KVM_GET_REGS failed");
700
701         rflags = regs.rflags;
702
703         rip = regs.rip; rsp = regs.rsp;
704         rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
705         rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
706         rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
707         r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
708         r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
709
710         printf("Registers:\n");
711         printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
712         printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
713         printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
714         printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
715         printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
716         printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
717
718         if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
719                 die("KVM_GET_REGS failed");
720
721         cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
722         cr4 = sregs.cr4; cr8 = sregs.cr8;
723
724         printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
725         printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
726         printf("Segment registers:\n");
727         printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
728         print_segment("cs ", &sregs.cs);
729         print_segment("ss ", &sregs.ss);
730         print_segment("ds ", &sregs.ds);
731         print_segment("es ", &sregs.es);
732         print_segment("fs ", &sregs.fs);
733         print_segment("gs ", &sregs.gs);
734         print_segment("tr ", &sregs.tr);
735         print_segment("ldt", &sregs.ldt);
736         print_dtable("gdt", &sregs.gdt);
737         print_dtable("idt", &sregs.idt);
738         printf(" [ efer: %016" PRIx64 "  apic base: %016" PRIx64 "  nmi: %s ]\n",
739                 (uint64_t) sregs.efer, (uint64_t) sregs.apic_base,
740                 (self->nmi_disabled ? "disabled" : "enabled"));
741         printf("Interrupt bitmap:\n");
742         printf(" ");
743         for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
744                 printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]);
745         printf("\n");
746 }
747
748 void kvm__show_code(struct kvm *self)
749 {
750         unsigned int code_bytes = 64;
751         unsigned int code_prologue = code_bytes * 43 / 64;
752         unsigned int code_len = code_bytes;
753         unsigned char c;
754         unsigned int i;
755         uint8_t *ip;
756
757         if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
758                 die("KVM_GET_REGS failed");
759
760         if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
761                 die("KVM_GET_SREGS failed");
762
763         ip = guest_flat_to_host(self, ip_to_flat(self, self->regs.rip) - code_prologue);
764
765         printf("Code: ");
766
767         for (i = 0; i < code_len; i++, ip++) {
768                 if (!host_ptr_in_ram(self, ip))
769                         break;
770
771                 c = *ip;
772
773                 if (ip == guest_flat_to_host(self, ip_to_flat(self, self->regs.rip)))
774                         printf("<%02x> ", c);
775                 else
776                         printf("%02x ", c);
777         }
778
779         printf("\n");
780
781         printf("Stack:\n");
782         kvm__dump_mem(self, self->regs.rsp, 32);
783 }
784
785 void kvm__show_page_tables(struct kvm *self)
786 {
787         uint64_t *pte1;
788         uint64_t *pte2;
789         uint64_t *pte3;
790         uint64_t *pte4;
791
792         if (!is_in_protected_mode(self))
793                 return;
794
795         if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
796                 die("KVM_GET_SREGS failed");
797
798         pte4    = guest_flat_to_host(self, self->sregs.cr3);
799         if (!host_ptr_in_ram(self, pte4))
800                 return;
801
802         pte3    = guest_flat_to_host(self, (*pte4 & ~0xfff));
803         if (!host_ptr_in_ram(self, pte3))
804                 return;
805
806         pte2    = guest_flat_to_host(self, (*pte3 & ~0xfff));
807         if (!host_ptr_in_ram(self, pte2))
808                 return;
809
810         pte1    = guest_flat_to_host(self, (*pte2 & ~0xfff));
811         if (!host_ptr_in_ram(self, pte1))
812                 return;
813
814         printf("Page Tables:\n");
815         if (*pte2 & (1 << 7))
816                 printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64
817                         "   pte2: %016" PRIx64 "\n",
818                         *pte4, *pte3, *pte2);
819         else
820                 printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64 "   pte2: %016"
821                         PRIx64 "   pte1: %016" PRIx64 "\n",
822                         *pte4, *pte3, *pte2, *pte1);
823 }
824
825 void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size)
826 {
827         unsigned char *p;
828         unsigned long n;
829
830         size &= ~7; /* mod 8 */
831         if (!size)
832                 return;
833
834         p = guest_flat_to_host(self, addr);
835
836         for (n = 0; n < size; n+=8) {
837                 if (!host_ptr_in_ram(self, p + n))
838                         break;
839
840                 printf("  0x%08lx: %02x %02x %02x %02x  %02x %02x %02x %02x\n",
841                         addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
842                                   p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
843         }
844 }