5 #include <asm/bootparam.h>
22 * Compatibility code. Remove this when we move to tools/kvm.
24 #ifndef KVM_EXIT_INTERNAL_ERROR
25 # define KVM_EXIT_INTERNAL_ERROR 17
28 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
30 const char *kvm_exit_reasons[] = {
31 DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
32 DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
33 DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
34 DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
35 DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
36 DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
37 DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
38 DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
39 DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
40 DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
41 DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
42 DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
43 DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
44 DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
45 DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
46 DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
47 DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
48 DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
51 static inline uint32_t segment_to_flat(uint16_t selector, uint16_t offset)
53 return ((uint32_t)selector << 4) + (uint32_t) offset;
56 static inline void *guest_flat_to_host(struct kvm *self, unsigned long offset)
58 return self->ram_start + offset;
61 static inline void *guest_real_to_host(struct kvm *self, uint16_t selector, uint16_t offset)
63 unsigned long flat = segment_to_flat(selector, offset);
65 return guest_flat_to_host(self, flat);
68 static bool kvm__supports_extension(struct kvm *self, unsigned int extension)
72 ret = ioctl(self->sys_fd, KVM_CHECK_EXTENSION, extension);
79 static struct kvm *kvm__new(void)
81 struct kvm *self = calloc(1, sizeof *self);
89 struct kvm *kvm__init(void)
91 struct kvm_userspace_memory_region mem;
99 self->sys_fd = open("/dev/kvm", O_RDWR);
100 if (self->sys_fd < 0)
103 ret = ioctl(self->sys_fd, KVM_GET_API_VERSION, 0);
104 if (ret != KVM_API_VERSION)
105 die_perror("KVM_API_VERSION ioctl");
107 self->vm_fd = ioctl(self->sys_fd, KVM_CREATE_VM, 0);
109 die_perror("KVM_CREATE_VM ioctl");
111 if (!kvm__supports_extension(self, KVM_CAP_USER_MEMORY))
112 die("KVM_CAP_USER_MEMORY is not supported");
114 self->ram_size = 64UL * 1024UL * 1024UL;
116 page_size = sysconf(_SC_PAGESIZE);
117 if (posix_memalign(&self->ram_start, page_size, self->ram_size) != 0)
118 die("out of memory");
120 mem = (struct kvm_userspace_memory_region) {
122 .guest_phys_addr = 0x0UL,
123 .memory_size = self->ram_size,
124 .userspace_addr = (unsigned long) self->ram_start,
127 ret = ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem, 1);
129 die_perror("KVM_SET_USER_MEMORY_REGION ioctl");
131 if (!kvm__supports_extension(self, KVM_CAP_SET_TSS_ADDR))
132 die("KVM_CAP_SET_TSS_ADDR is not supported");
134 ret = ioctl(self->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
136 die_perror("KVM_SET_TSS_ADDR ioctl");
138 self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
139 if (self->vcpu_fd < 0)
140 die_perror("KVM_CREATE_VCPU ioctl");
142 mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
144 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
146 self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
147 if (self->kvm_run == MAP_FAILED)
148 die("unable to mmap vcpu fd");
153 void kvm__enable_singlestep(struct kvm *self)
155 struct kvm_guest_debug debug = {
156 .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
159 if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
160 warning("KVM_SET_GUEST_DEBUG failed");
163 #define BOOT_LOADER_SELECTOR 0x0100
164 #define BOOT_LOADER_IP 0x0000
165 #define BOOT_LOADER_SP 0x8000
167 static int load_flat_binary(struct kvm *self, int fd)
172 if (lseek(fd, 0, SEEK_SET) < 0)
175 p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
177 while ((nr = read(fd, p, 65536)) > 0)
180 self->boot_selector = BOOT_LOADER_SELECTOR;
181 self->boot_ip = BOOT_LOADER_IP;
182 self->boot_sp = BOOT_LOADER_SP;
188 * The protected mode kernel part of a modern bzImage is loaded at 1 MB by
191 #define BZ_KERNEL_START 0x100000UL
193 static const char *BZIMAGE_MAGIC = "HdrS";
195 #define BZ_DEFAULT_SETUP_SECTS 4
197 static bool load_bzimage(struct kvm *self, int fd)
199 unsigned long setup_sects;
200 struct boot_params boot;
206 * See Documentation/x86/boot.txt for details no bzImage on-disk and
210 if (lseek(fd, 0, SEEK_SET) < 0)
213 read(fd, &boot, sizeof(boot));
215 if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)) != 0)
218 if (lseek(fd, 0, SEEK_SET) < 0)
221 setup_sects = boot.hdr.setup_sects + 1;
222 if (setup_sects == 0)
223 setup_sects = BZ_DEFAULT_SETUP_SECTS;
225 setup_size = setup_sects << 9;
226 p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
228 if (read(fd, p, setup_size) != setup_size)
231 p = guest_flat_to_host(self, BZ_KERNEL_START);
233 while ((nr = read(fd, p, 65536)) > 0)
236 self->boot_selector = BOOT_LOADER_SELECTOR;
238 * The real-mode setup code starts at offset 0x200 of a bzImage. See
239 * Documentation/x86/boot.txt for details.
241 self->boot_ip = BOOT_LOADER_IP + 0x200;
242 self->boot_sp = BOOT_LOADER_SP;
247 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename)
252 fd = open(kernel_filename, O_RDONLY);
254 die("unable to open kernel");
256 ret = load_bzimage(kvm, fd);
260 ret = load_flat_binary(kvm, fd);
264 die("%s is not a valid bzImage or flat binary", kernel_filename);
270 static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip)
272 uint64_t cs = self->sregs.cs.selector;
274 return ip - (cs << 4);
277 static inline uint64_t ip_real_to_flat(struct kvm *self, uint64_t ip)
279 uint64_t cs = self->sregs.cs.selector;
281 return ip + (cs << 4);
284 static inline uint32_t selector_to_base(uint16_t selector)
287 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
289 return (uint32_t)selector * 16;
292 void kvm__reset_vcpu(struct kvm *self)
294 self->sregs = (struct kvm_sregs) {
295 .cr0 = 0x60000010ULL,
296 .cs = (struct kvm_segment) {
297 .selector = self->boot_selector,
298 .base = selector_to_base(self->boot_selector),
305 .ss = (struct kvm_segment) {
306 .selector = self->boot_selector,
307 .base = selector_to_base(self->boot_selector),
314 .ds = (struct kvm_segment) {
315 .selector = self->boot_selector,
316 .base = selector_to_base(self->boot_selector),
323 .es = (struct kvm_segment) {
324 .selector = self->boot_selector,
325 .base = selector_to_base(self->boot_selector),
332 .fs = (struct kvm_segment) {
333 .selector = self->boot_selector,
334 .base = selector_to_base(self->boot_selector),
341 .gs = (struct kvm_segment) {
342 .selector = self->boot_selector,
343 .base = selector_to_base(self->boot_selector),
350 .tr = (struct kvm_segment) {
355 .ldt = (struct kvm_segment) {
360 .gdt = (struct kvm_dtable) {
363 .idt = (struct kvm_dtable) {
368 if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
369 die_perror("KVM_SET_SREGS failed");
371 self->regs = (struct kvm_regs) {
372 /* We start the guest in 16-bit real mode */
373 .rflags = 0x0000000000000002ULL,
375 .rip = self->boot_ip,
376 .rsp = self->boot_sp,
377 .rbp = self->boot_sp,
380 if (self->regs.rip > USHRT_MAX)
381 die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
383 if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
384 die_perror("KVM_SET_REGS failed");
388 void kvm__run(struct kvm *self)
390 if (ioctl(self->vcpu_fd, KVM_RUN, 0) < 0)
391 die_perror("KVM_RUN failed");
394 static void kvm__emulate_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
396 fprintf(stderr, "%s port=%x, size=%d, count=%" PRIu32 "\n", __func__, port, size, count);
399 static void kvm__emulate_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
401 fprintf(stderr, "%s port=%x, size=%d, count=%" PRIu32 "\n", __func__, port, size, count);
404 void kvm__emulate_io(struct kvm *self, uint16_t port, void *data, int direction, int size, uint32_t count)
406 if (direction == KVM_EXIT_IO_IN)
407 kvm__emulate_io_in(self, port, data, size, count);
409 kvm__emulate_io_out(self, port, data, size, count);
412 static void print_segment(const char *name, struct kvm_segment *seg)
414 printf(" %s %04" PRIx16 " %016" PRIx64 " %08" PRIx32 " %02" PRIx8 " %x %x %x %x %x %x %x\n",
415 name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
416 (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
419 void kvm__show_registers(struct kvm *self)
421 unsigned long cr0, cr2, cr3;
422 unsigned long cr4, cr8;
423 unsigned long rax, rbx, rcx;
424 unsigned long rdx, rsi, rdi;
425 unsigned long rbp, r8, r9;
426 unsigned long r10, r11, r12;
427 unsigned long r13, r14, r15;
428 unsigned long rip, rsp;
429 struct kvm_sregs sregs;
430 unsigned long rflags;
431 struct kvm_regs regs;
434 if (ioctl(self->vcpu_fd, KVM_GET_REGS, ®s) < 0)
435 die("KVM_GET_REGS failed");
437 rflags = regs.rflags;
439 rip = regs.rip; rsp = regs.rsp;
440 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
441 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
442 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9;
443 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
444 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
446 printf("Registers:\n");
447 printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
448 printf(" rax: %016lx ebx: %016lx ecx: %016lx\n", rax, rbx, rcx);
449 printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi);
450 printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9);
451 printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12);
452 printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15);
454 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
455 die("KVM_GET_REGS failed");
457 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
458 cr4 = sregs.cr4; cr8 = sregs.cr8;
460 printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3);
461 printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8);
462 printf("Segment registers:\n");
463 printf(" register selector base limit type p dpl db s l g avl\n");
464 print_segment("cs ", &sregs.cs);
465 print_segment("ss ", &sregs.ss);
466 print_segment("ds ", &sregs.ds);
467 print_segment("es ", &sregs.es);
468 print_segment("fs ", &sregs.fs);
469 print_segment("gs ", &sregs.gs);
470 print_segment("tr ", &sregs.tr);
471 print_segment("ldt", &sregs.ldt);
472 printf(" [ efer: %016lx apic base: %016lx ]\n", (uint64_t) sregs.efer, (uint64_t) sregs.apic_base);
473 printf("Interrupt bitmap:\n");
475 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
476 printf("%016lx ", (uint64_t) sregs.interrupt_bitmap[i]);
480 void kvm__show_code(struct kvm *self)
482 unsigned int code_bytes = 64;
483 unsigned int code_prologue = code_bytes * 43 / 64;
484 unsigned int code_len = code_bytes;
489 if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
490 die("KVM_GET_REGS failed");
492 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
493 die("KVM_GET_SREGS failed");
495 ip = guest_flat_to_host(self, ip_real_to_flat(self, self->regs.rip) - code_prologue);
499 for (i = 0; i < code_len; i++, ip++) {
502 if (ip == guest_flat_to_host(self, ip_real_to_flat(self, self->regs.rip)))
503 printf("<%02x> ", c);