]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - tools/kvm/kvm.c
kvm tools: Prefix error() and friends helpers with pr_
[karo-tx-linux.git] / tools / kvm / kvm.c
index 7d1f3f38e524719be08bb3189f3b2c15227ed0d5..815cacfe30e682d6b8b65db37484e340139ecdd8 100644 (file)
@@ -1,30 +1,29 @@
 #include "kvm/kvm.h"
 
+#include "kvm/cpufeature.h"
+#include "kvm/interrupt.h"
+#include "kvm/boot-protocol.h"
+#include "kvm/util.h"
+#include "kvm/mptable.h"
+
 #include <linux/kvm.h>
 
 #include <asm/bootparam.h>
 
 #include <sys/ioctl.h>
-#include <inttypes.h>
 #include <sys/mman.h>
+#include <sys/stat.h>
 #include <stdbool.h>
+#include <assert.h>
 #include <limits.h>
+#include <signal.h>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <fcntl.h>
-
-#include "ivt.h"
-#include "util.h"
-
-/*
- * Compatibility code. Remove this when we move to tools/kvm.
- */
-#ifndef KVM_EXIT_INTERNAL_ERROR
-# define KVM_EXIT_INTERNAL_ERROR               17
-#endif
+#include <time.h>
 
 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
 
@@ -49,123 +48,255 @@ const char *kvm_exit_reasons[] = {
        DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
 };
 
-static inline uint32_t segment_to_flat(uint16_t selector, uint16_t offset)
-{
-       return ((uint32_t)selector << 4) + (uint32_t) offset;
-}
+#define DEFINE_KVM_EXT(ext)            \
+       .name = #ext,                   \
+       .code = ext
+
+struct {
+       const char *name;
+       int code;
+} kvm_req_ext[] = {
+       { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) },
+       { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) },
+       { DEFINE_KVM_EXT(KVM_CAP_PIT2) },
+       { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) },
+       { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) },
+       { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
+       { DEFINE_KVM_EXT(KVM_CAP_HLT) },
+       { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) },
+       { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) },
+};
 
-static inline void *guest_flat_to_host(struct kvm *self, unsigned long offset)
+static bool kvm__supports_extension(struct kvm *kvm, unsigned int extension)
 {
-       return self->ram_start + offset;
-}
+       int ret;
 
-static inline void *guest_real_to_host(struct kvm *self, uint16_t selector, uint16_t offset)
-{
-       unsigned long flat = segment_to_flat(selector, offset);
+       ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, extension);
+       if (ret < 0)
+               return false;
 
-       return guest_flat_to_host(self, flat);
+       return ret;
 }
 
-static bool kvm__supports_extension(struct kvm *self, unsigned int extension)
+static int kvm__check_extensions(struct kvm *kvm)
 {
-       int ret;
+       unsigned int i;
 
-       ret = ioctl(self->sys_fd, KVM_CHECK_EXTENSION, extension);
-       if (ret < 0)
-               return false;
+       for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) {
+               if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) {
+                       pr_error("Unsuppored KVM extension detected: %s",
+                               kvm_req_ext[i].name);
+                       return (int)-i;
+               }
+       }
 
-       return ret;
+       return 0;
 }
 
 static struct kvm *kvm__new(void)
 {
-       struct kvm *self = calloc(1, sizeof *self);
+       struct kvm *kvm = calloc(1, sizeof *kvm);
 
-       if (!self)
+       if (!kvm)
                die("out of memory");
 
-       return self;
+       return kvm;
 }
 
-struct kvm *kvm__init(void)
+void kvm__delete(struct kvm *kvm)
 {
-       struct kvm_userspace_memory_region mem;
-       struct kvm *self;
-       long page_size;
-       int mmap_size;
-       int ret;
+       kvm__stop_timer(kvm);
 
-       self = kvm__new();
+       munmap(kvm->ram_start, kvm->ram_size);
+       free(kvm);
+}
 
-       self->sys_fd = open("/dev/kvm", O_RDWR);
-       if (self->sys_fd < 0)
-               die_perror("open");
+static bool kvm__cpu_supports_vm(void)
+{
+       struct cpuid_regs regs;
+       u32 eax_base;
+       int feature;
 
-       ret = ioctl(self->sys_fd, KVM_GET_API_VERSION, 0);
-       if (ret != KVM_API_VERSION)
-               die_perror("KVM_API_VERSION ioctl");
+       regs    = (struct cpuid_regs) {
+               .eax            = 0x00,
+       };
+       host_cpuid(&regs);
 
-       self->vm_fd = ioctl(self->sys_fd, KVM_CREATE_VM, 0);
-       if (self->vm_fd < 0)
-               die_perror("KVM_CREATE_VM ioctl");
+       switch (regs.ebx) {
+       case CPUID_VENDOR_INTEL_1:
+               eax_base        = 0x00;
+               feature         = KVM__X86_FEATURE_VMX;
+               break;
 
-       if (!kvm__supports_extension(self, KVM_CAP_USER_MEMORY))
-               die("KVM_CAP_USER_MEMORY is not supported");
+       case CPUID_VENDOR_AMD_1:
+               eax_base        = 0x80000000;
+               feature         = KVM__X86_FEATURE_SVM;
+               break;
 
-       self->ram_size          = 64UL * 1024UL * 1024UL;
+       default:
+               return false;
+       }
 
-       page_size       = sysconf(_SC_PAGESIZE);
-       if (posix_memalign(&self->ram_start, page_size, self->ram_size) != 0)
-               die("out of memory");
+       regs    = (struct cpuid_regs) {
+               .eax            = eax_base,
+       };
+       host_cpuid(&regs);
+
+       if (regs.eax < eax_base + 0x01)
+               return false;
+
+       regs    = (struct cpuid_regs) {
+               .eax            = eax_base + 0x01
+       };
+       host_cpuid(&regs);
+
+       return regs.ecx & (1 << feature);
+}
+
+static void kvm_register_mem_slot(struct kvm *kvm, u32 slot, u64 guest_phys, u64 size, void *userspace_addr)
+{
+       struct kvm_userspace_memory_region mem;
+       int ret;
 
        mem = (struct kvm_userspace_memory_region) {
-               .slot                   = 0,
-               .guest_phys_addr        = 0x0UL,
-               .memory_size            = self->ram_size,
-               .userspace_addr         = (unsigned long) self->ram_start,
+               .slot                   = slot,
+               .guest_phys_addr        = guest_phys,
+               .memory_size            = size,
+               .userspace_addr         = (unsigned long)userspace_addr,
        };
 
-       ret = ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem, 1);
+       ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
        if (ret < 0)
                die_perror("KVM_SET_USER_MEMORY_REGION ioctl");
+}
 
-       if (!kvm__supports_extension(self, KVM_CAP_SET_TSS_ADDR))
-               die("KVM_CAP_SET_TSS_ADDR is not supported");
+/*
+ * Allocating RAM size bigger than 4GB requires us to leave a gap
+ * in the RAM which is used for PCI MMIO, hotplug, and unconfigured
+ * devices (see documentation of e820_setup_gap() for details).
+ *
+ * If we're required to initialize RAM bigger than 4GB, we will create
+ * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space.
+ */
 
-       ret = ioctl(self->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
-       if (ret < 0)
-               die_perror("KVM_SET_TSS_ADDR ioctl");
+void kvm__init_ram(struct kvm *kvm)
+{
+       u64     phys_start, phys_size;
+       void    *host_mem;
 
-       self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
-       if (self->vcpu_fd < 0)
-               die_perror("KVM_CREATE_VCPU ioctl");
+       if (kvm->ram_size < KVM_32BIT_GAP_START) {
+               /* Use a single block of RAM for 32bit RAM */
 
-       mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
-       if (mmap_size < 0)
-               die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
+               phys_start = 0;
+               phys_size  = kvm->ram_size;
+               host_mem   = kvm->ram_start;
 
-       self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
-       if (self->kvm_run == MAP_FAILED)
-               die("unable to mmap vcpu fd");
+               kvm_register_mem_slot(kvm, 0, 0, kvm->ram_size, kvm->ram_start);
+       } else {
+               /* First RAM range from zero to the PCI gap: */
 
-       return self;
+               phys_start = 0;
+               phys_size  = KVM_32BIT_GAP_START;
+               host_mem   = kvm->ram_start;
+
+               kvm_register_mem_slot(kvm, 0, phys_start, phys_size, host_mem);
+
+               /* Second RAM range from 4GB to the end of RAM: */
+
+               phys_start = 0x100000000ULL;
+               phys_size  = kvm->ram_size - phys_size;
+               host_mem   = kvm->ram_start + phys_start;
+
+               kvm_register_mem_slot(kvm, 1, phys_start, phys_size, host_mem);
+       }
 }
 
-void kvm__enable_singlestep(struct kvm *self)
+int kvm__max_cpus(struct kvm *kvm)
 {
-       struct kvm_guest_debug debug = {
-               .control        = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
-       };
+       int ret;
+
+       ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS);
+       if (ret < 0)
+               die_perror("KVM_CAP_NR_VCPUS");
+
+       return ret;
+}
+
+struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size)
+{
+       struct kvm_pit_config pit_config = { .flags = 0, };
+       struct kvm *kvm;
+       int ret;
+
+       if (!kvm__cpu_supports_vm())
+               die("Your CPU does not support hardware virtualization");
+
+       kvm = kvm__new();
+
+       kvm->sys_fd = open(kvm_dev, O_RDWR);
+       if (kvm->sys_fd < 0) {
+               if (errno == ENOENT)
+                       die("'%s' not found. Please make sure your kernel has CONFIG_KVM enabled and that the KVM modules are loaded.", kvm_dev);
+               if (errno == ENODEV)
+                       die("'%s' KVM driver not available.\n  # (If the KVM module is loaded then 'dmesg' may offer further clues about the failure.)", kvm_dev);
+
+               fprintf(stderr, "  Fatal, could not open %s: ", kvm_dev);
+               perror(NULL);
+               exit(1);
+       }
+
+       ret = ioctl(kvm->sys_fd, KVM_GET_API_VERSION, 0);
+       if (ret != KVM_API_VERSION)
+               die_perror("KVM_API_VERSION ioctl");
+
+       kvm->vm_fd = ioctl(kvm->sys_fd, KVM_CREATE_VM, 0);
+       if (kvm->vm_fd < 0)
+               die_perror("KVM_CREATE_VM ioctl");
+
+       if (kvm__check_extensions(kvm))
+               die("A required KVM extention is not supported by OS");
+
+       ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
+       if (ret < 0)
+               die_perror("KVM_SET_TSS_ADDR ioctl");
+
+       ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config);
+       if (ret < 0)
+               die_perror("KVM_CREATE_PIT2 ioctl");
+
+       kvm->ram_size           = ram_size;
+
+       if (kvm->ram_size < KVM_32BIT_GAP_START) {
+               kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
+       } else {
+               kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
+               if (kvm->ram_start != MAP_FAILED) {
+                       /*
+                        * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that
+                        * if we accidently write to it, we will know.
+                        */
+                       mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE);
+               }
+       }
+       if (kvm->ram_start == MAP_FAILED)
+               die("out of memory");
+
+       ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
+       if (ret < 0)
+               die_perror("KVM_CREATE_IRQCHIP ioctl");
 
-       if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
-               warning("KVM_SET_GUEST_DEBUG failed");
+       return kvm;
 }
 
 #define BOOT_LOADER_SELECTOR   0x1000
 #define BOOT_LOADER_IP         0x0000
 #define BOOT_LOADER_SP         0x8000
+#define BOOT_CMDLINE_OFFSET    0x20000
 
-static int load_flat_binary(struct kvm *self, int fd)
+#define BOOT_PROTOCOL_REQUIRED 0x206
+#define LOAD_HIGH              0x01
+
+static int load_flat_binary(struct kvm *kvm, int fd)
 {
        void *p;
        int nr;
@@ -173,33 +304,27 @@ static int load_flat_binary(struct kvm *self, int fd)
        if (lseek(fd, 0, SEEK_SET) < 0)
                die_perror("lseek");
 
-       p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
+       p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
 
        while ((nr = read(fd, p, 65536)) > 0)
                p += nr;
 
-       self->boot_selector     = BOOT_LOADER_SELECTOR;
-       self->boot_ip           = BOOT_LOADER_IP;
-       self->boot_sp           = BOOT_LOADER_SP;
+       kvm->boot_selector      = BOOT_LOADER_SELECTOR;
+       kvm->boot_ip            = BOOT_LOADER_IP;
+       kvm->boot_sp            = BOOT_LOADER_SP;
 
        return true;
 }
 
-/*
- * The protected mode kernel part of a modern bzImage is loaded at 1 MB by
- * default.
- */
-#define BZ_KERNEL_START                        0x100000UL
-
 static const char *BZIMAGE_MAGIC       = "HdrS";
 
-#define BZ_DEFAULT_SETUP_SECTS         4
-
-static bool load_bzimage(struct kvm *self, int fd, const char *kernel_cmdline)
+static bool load_bzimage(struct kvm *kvm, int fd_kernel,
+                       int fd_initrd, const char *kernel_cmdline)
 {
+       struct boot_params *kern_boot;
        unsigned long setup_sects;
        struct boot_params boot;
-       struct ivt_entry real_mode_irq;
+       size_t cmdline_size;
        ssize_t setup_size;
        void *p;
        int nr;
@@ -209,15 +334,19 @@ static bool load_bzimage(struct kvm *self, int fd, const char *kernel_cmdline)
         * memory layout.
         */
 
-       if (lseek(fd, 0, SEEK_SET) < 0)
+       if (lseek(fd_kernel, 0, SEEK_SET) < 0)
                die_perror("lseek");
 
-       read(fd, &boot, sizeof(boot));
+       if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot))
+               return false;
 
-        if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)) != 0)
+       if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)))
                return false;
 
-       if (lseek(fd, 0, SEEK_SET) < 0)
+       if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED)
+               die("Too old kernel");
+
+       if (lseek(fd_kernel, 0, SEEK_SET) < 0)
                die_perror("lseek");
 
        if (!boot.hdr.setup_sects)
@@ -225,302 +354,202 @@ static bool load_bzimage(struct kvm *self, int fd, const char *kernel_cmdline)
        setup_sects = boot.hdr.setup_sects + 1;
 
        setup_size = setup_sects << 9;
-       p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
+       p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
 
-       if (read(fd, p, setup_size) != setup_size)
+       /* copy setup.bin to mem*/
+       if (read(fd_kernel, p, setup_size) != setup_size)
                die_perror("read");
 
-       p = guest_flat_to_host(self, BZ_KERNEL_START);
+       /* copy vmlinux.bin to BZ_KERNEL_START*/
+       p = guest_flat_to_host(kvm, BZ_KERNEL_START);
 
-       while ((nr = read(fd, p, 65536)) > 0)
+       while ((nr = read(fd_kernel, p, 65536)) > 0)
                p += nr;
 
-       self->boot_selector     = BOOT_LOADER_SELECTOR;
+       p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET);
+       if (kernel_cmdline) {
+               cmdline_size = strlen(kernel_cmdline) + 1;
+               if (cmdline_size > boot.hdr.cmdline_size)
+                       cmdline_size = boot.hdr.cmdline_size;
+
+               memset(p, 0, boot.hdr.cmdline_size);
+               memcpy(p, kernel_cmdline, cmdline_size - 1);
+       }
+
+       kern_boot       = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00);
+
+       kern_boot->hdr.cmd_line_ptr     = BOOT_CMDLINE_OFFSET;
+       kern_boot->hdr.type_of_loader   = 0xff;
+       kern_boot->hdr.heap_end_ptr     = 0xfe00;
+       kern_boot->hdr.loadflags        |= CAN_USE_HEAP;
+
        /*
-        * The real-mode setup code starts at offset 0x200 of a bzImage. See
-        * Documentation/x86/boot.txt for details.
+        * Read initrd image into guest memory
         */
-       self->boot_ip           = BOOT_LOADER_IP + 0x200;
-       self->boot_sp           = BOOT_LOADER_SP;
+       if (fd_initrd >= 0) {
+               struct stat initrd_stat;
+               unsigned long addr;
+
+               if (fstat(fd_initrd, &initrd_stat))
+                       die_perror("fstat");
+
+               addr = boot.hdr.initrd_addr_max & ~0xfffff;
+               for (;;) {
+                       if (addr < BZ_KERNEL_START)
+                               die("Not enough memory for initrd");
+                       else if (addr < (kvm->ram_size - initrd_stat.st_size))
+                               break;
+                       addr -= 0x100000;
+               }
+
+               p = guest_flat_to_host(kvm, addr);
+               nr = read(fd_initrd, p, initrd_stat.st_size);
+               if (nr != initrd_stat.st_size)
+                       die("Failed to read initrd");
+
+               kern_boot->hdr.ramdisk_image    = addr;
+               kern_boot->hdr.ramdisk_size     = initrd_stat.st_size;
+       }
 
+       kvm->boot_selector      = BOOT_LOADER_SELECTOR;
        /*
-        * Setup a *fake* real mode IVT, it has only one real
-        * hadler which does just iret
+        * The real-mode setup code starts at offset 0x200 of a bzImage. See
+        * Documentation/x86/boot.txt for details.
         */
-       real_mode_irq = (struct ivt_entry) {
-               .segment        = 0,
-               .offset         = 0x400 + 1,
-       };
-       ivt_set_all(real_mode_irq);
-       p = guest_flat_to_host(self, 0);
-       ivt_copy_table(p, IVT_VECTORS * sizeof(real_mode_irq));
-       p += IVT_VECTORS * sizeof(real_mode_irq) + 1;
-       *(char *)p = 0xcf; /* iret here */
+       kvm->boot_ip            = BOOT_LOADER_IP + 0x200;
+       kvm->boot_sp            = BOOT_LOADER_SP;
 
        return true;
 }
 
 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
-                       const char *kernel_cmdline)
+               const char *initrd_filename, const char *kernel_cmdline)
 {
        bool ret;
-       int fd;
+       int fd_kernel = -1, fd_initrd = -1;
 
-       fd = open(kernel_filename, O_RDONLY);
-       if (fd < 0)
-               die("unable to open kernel");
+       fd_kernel = open(kernel_filename, O_RDONLY);
+       if (fd_kernel < 0)
+               die("Unable to open kernel %s", kernel_filename);
+
+       if (initrd_filename) {
+               fd_initrd = open(initrd_filename, O_RDONLY);
+               if (fd_initrd < 0)
+                       die("Unable to open initrd %s", initrd_filename);
+       }
+
+       ret = load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline);
+
+       if (initrd_filename)
+               close(fd_initrd);
 
-       ret = load_bzimage(kvm, fd, kernel_cmdline);
        if (ret)
                goto found_kernel;
 
-       ret = load_flat_binary(kvm, fd);
+       pr_warning("%s is not a bzImage. Trying to load it as a flat binary...", kernel_filename);
+
+       ret = load_flat_binary(kvm, fd_kernel);
        if (ret)
                goto found_kernel;
 
+       close(fd_kernel);
+
        die("%s is not a valid bzImage or flat binary", kernel_filename);
 
 found_kernel:
+       close(fd_kernel);
+
        return ret;
 }
 
-static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip)
+/**
+ * kvm__setup_bios - inject BIOS into guest system memory
+ * @kvm - guest system descriptor
+ *
+ * This function is a main routine where we poke guest memory
+ * and install BIOS there.
+ */
+void kvm__setup_bios(struct kvm *kvm)
 {
-       uint64_t cs = self->sregs.cs.selector;
+       /* standart minimal configuration */
+       setup_bios(kvm);
 
-       return ip - (cs << 4);
-}
+       /* FIXME: SMP, ACPI and friends here */
 
-static inline uint64_t ip_real_to_flat(struct kvm *self, uint64_t ip)
-{
-       uint64_t cs = self->sregs.cs.selector;
-
-       return ip + (cs << 4);
+       /* MP table */
+       mptable_setup(kvm, kvm->nrcpus);
 }
 
-static inline uint32_t selector_to_base(uint16_t selector)
-{
-       /*
-        * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
-        */
-       return (uint32_t)selector * 16;
-}
+#define TIMER_INTERVAL_NS 1000000      /* 1 msec */
 
-void kvm__reset_vcpu(struct kvm *self)
+/*
+ * This function sets up a timer that's used to inject interrupts from the
+ * userspace hypervisor into the guest at periodical intervals. Please note
+ * that clock interrupt, for example, is not handled here.
+ */
+void kvm__start_timer(struct kvm *kvm)
 {
-       self->sregs = (struct kvm_sregs) {
-               .cr0            = 0x60000010ULL,
-               .cs             = (struct kvm_segment) {
-                       .selector       = self->boot_selector,
-                       .base           = selector_to_base(self->boot_selector),
-                       .limit          = 0xffffU,
-                       .type           = 0x0bU,
-                       .present        = 1,
-                       .dpl            = 0x03,
-                       .s              = 1,
-               },
-               .ss             = (struct kvm_segment) {
-                       .selector       = self->boot_selector,
-                       .base           = selector_to_base(self->boot_selector),
-                       .limit          = 0xffffU,
-                       .type           = 0x03U,
-                       .present        = 1,
-                       .dpl            = 0x03,
-                       .s              = 1,
-               },
-               .ds             = (struct kvm_segment) {
-                       .selector       = self->boot_selector,
-                       .base           = selector_to_base(self->boot_selector),
-                       .limit          = 0xffffU,
-                       .type           = 0x03U,
-                       .present        = 1,
-                       .dpl            = 0x03,
-                       .s              = 1,
-               },
-               .es             = (struct kvm_segment) {
-                       .selector       = self->boot_selector,
-                       .base           = selector_to_base(self->boot_selector),
-                       .limit          = 0xffffU,
-                       .type           = 0x03U,
-                       .present        = 1,
-                       .dpl            = 0x03,
-                       .s              = 1,
-               },
-               .fs             = (struct kvm_segment) {
-                       .selector       = self->boot_selector,
-                       .base           = selector_to_base(self->boot_selector),
-                       .limit          = 0xffffU,
-                       .type           = 0x03U,
-                       .present        = 1,
-                       .dpl            = 0x03,
-                       .s              = 1,
-               },
-               .gs             = (struct kvm_segment) {
-                       .selector       = self->boot_selector,
-                       .base           = selector_to_base(self->boot_selector),
-                       .limit          = 0xffffU,
-                       .type           = 0x03U,
-                       .present        = 1,
-                       .dpl            = 0x03,
-                       .s              = 1,
-               },
-               .tr             = (struct kvm_segment) {
-                       .limit          = 0xffffU,
-                       .present        = 1,
-                       .type           = 0x03U,
-               },
-               .ldt            = (struct kvm_segment) {
-                       .limit          = 0xffffU,
-                       .present        = 1,
-                       .type           = 0x02U,
-               },
-               .gdt            = (struct kvm_dtable) {
-                       .limit          = 0xffffU,
-               },
-               .idt            = (struct kvm_dtable) {
-                       .limit          = 0xffffU,
-               },
-       };
+       struct itimerspec its;
+       struct sigevent sev;
 
-       if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
-               die_perror("KVM_SET_SREGS failed");
+       memset(&sev, 0, sizeof(struct sigevent));
+       sev.sigev_value.sival_int       = 0;
+       sev.sigev_notify                = SIGEV_SIGNAL;
+       sev.sigev_signo                 = SIGALRM;
 
-       self->regs = (struct kvm_regs) {
-               /* We start the guest in 16-bit real mode  */
-               .rflags         = 0x0000000000000002ULL,
+       if (timer_create(CLOCK_REALTIME, &sev, &kvm->timerid) < 0)
+               die("timer_create()");
 
-               .rip            = self->boot_ip,
-               .rsp            = self->boot_sp,
-               .rbp            = self->boot_sp,
-       };
-
-       if (self->regs.rip > USHRT_MAX)
-               die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
-
-       if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
-               die_perror("KVM_SET_REGS failed");
+       its.it_value.tv_sec             = TIMER_INTERVAL_NS / 1000000000;
+       its.it_value.tv_nsec            = TIMER_INTERVAL_NS % 1000000000;
+       its.it_interval.tv_sec          = its.it_value.tv_sec;
+       its.it_interval.tv_nsec         = its.it_value.tv_nsec;
 
+       if (timer_settime(kvm->timerid, 0, &its, NULL) < 0)
+               die("timer_settime()");
 }
 
-void kvm__run(struct kvm *self)
+void kvm__stop_timer(struct kvm *kvm)
 {
-       if (ioctl(self->vcpu_fd, KVM_RUN, 0) < 0)
-               die_perror("KVM_RUN failed");
-}
+       if (kvm->timerid)
+               if (timer_delete(kvm->timerid) < 0)
+                       die("timer_delete()");
 
-static void kvm__emulate_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
-{
-       fprintf(stderr, "%s port=%x, size=%d, count=%" PRIu32 "\n", __func__, port, size, count);
+       kvm->timerid = 0;
 }
 
-static void kvm__emulate_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+void kvm__irq_line(struct kvm *kvm, int irq, int level)
 {
-       fprintf(stderr, "%s port=%x, size=%d, count=%" PRIu32 "\n", __func__, port, size, count);
-}
-
-void kvm__emulate_io(struct kvm *self, uint16_t port, void *data, int direction, int size, uint32_t count)
-{
-       if (direction == KVM_EXIT_IO_IN)
-               kvm__emulate_io_in(self, port, data, size, count);
-       else
-               kvm__emulate_io_out(self, port, data, size, count);
-}
+       struct kvm_irq_level irq_level;
 
-static void print_segment(const char *name, struct kvm_segment *seg)
-{
-       printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  %02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
-               name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
-               (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
-}
+       irq_level       = (struct kvm_irq_level) {
+               {
+                       .irq            = irq,
+               },
+               .level          = level,
+       };
 
-void kvm__show_registers(struct kvm *self)
-{
-       unsigned long cr0, cr2, cr3;
-       unsigned long cr4, cr8;
-       unsigned long rax, rbx, rcx;
-       unsigned long rdx, rsi, rdi;
-       unsigned long rbp,  r8,  r9;
-       unsigned long r10, r11, r12;
-       unsigned long r13, r14, r15;
-       unsigned long rip, rsp;
-       struct kvm_sregs sregs;
-       unsigned long rflags;
-       struct kvm_regs regs;
-       int i;
-
-       if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
-               die("KVM_GET_REGS failed");
-
-       rflags = regs.rflags;
-
-       rip = regs.rip; rsp = regs.rsp;
-       rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
-       rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
-       rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
-       r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
-       r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
-
-       printf("Registers:\n");
-       printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
-       printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
-       printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
-       printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
-       printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
-       printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
-
-       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
-               die("KVM_GET_REGS failed");
-
-       cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
-       cr4 = sregs.cr4; cr8 = sregs.cr8;
-
-       printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
-       printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
-       printf("Segment registers:\n");
-       printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
-       print_segment("cs ", &sregs.cs);
-       print_segment("ss ", &sregs.ss);
-       print_segment("ds ", &sregs.ds);
-       print_segment("es ", &sregs.es);
-       print_segment("fs ", &sregs.fs);
-       print_segment("gs ", &sregs.gs);
-       print_segment("tr ", &sregs.tr);
-       print_segment("ldt", &sregs.ldt);
-       printf(" [ efer: %016lx  apic base: %016lx ]\n", (uint64_t) sregs.efer, (uint64_t) sregs.apic_base);
-       printf("Interrupt bitmap:\n");
-       printf(" ");
-       for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
-               printf("%016lx ", (uint64_t) sregs.interrupt_bitmap[i]);
-       printf("\n");
+       if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0)
+               die_perror("KVM_IRQ_LINE failed");
 }
 
-void kvm__show_code(struct kvm *self)
+void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size)
 {
-       unsigned int code_bytes = 64;
-       unsigned int code_prologue = code_bytes * 43 / 64;
-       unsigned int code_len = code_bytes;
-       unsigned char c;
-       unsigned int i;
-       uint8_t *ip;
-
-       if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
-               die("KVM_GET_REGS failed");
+       unsigned char *p;
+       unsigned long n;
 
-       if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
-               die("KVM_GET_SREGS failed");
+       size &= ~7; /* mod 8 */
+       if (!size)
+               return;
 
-       ip = guest_flat_to_host(self, ip_real_to_flat(self, self->regs.rip) - code_prologue);
+       p = guest_flat_to_host(kvm, addr);
 
-       printf("Code: ");
+       for (n = 0; n < size; n += 8) {
+               if (!host_ptr_in_ram(kvm, p + n))
+                       break;
 
-       for (i = 0; i < code_len; i++, ip++) {
-               c = *ip;
-
-               if (ip == guest_flat_to_host(self, ip_real_to_flat(self, self->regs.rip)))
-                       printf("<%02x> ", c);
-               else
-                       printf("%02x ", c);
+               printf("  0x%08lx: %02x %02x %02x %02x  %02x %02x %02x %02x\n",
+                       addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
+                                 p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
        }
-
-       printf("\n");
 }