]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/kvm/kvm.c
kvm: Setup stack for the kernel
[karo-tx-linux.git] / tools / kvm / kvm.c
1 #include "kvm/kvm.h"
2
3 #include <linux/kvm.h>
4
5 #include <asm/bootparam.h>
6
7 #include <sys/ioctl.h>
8 #include <inttypes.h>
9 #include <sys/mman.h>
10 #include <stdbool.h>
11 #include <limits.h>
12 #include <stdarg.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <unistd.h>
16 #include <stdio.h>
17 #include <fcntl.h>
18
19 #include "util.h"
20
21 /*
22  * Compatibility code. Remove this when we move to tools/kvm.
23  */
24 #ifndef KVM_EXIT_INTERNAL_ERROR
25 # define KVM_EXIT_INTERNAL_ERROR                17
26 #endif
27
28 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
29
30 const char *kvm_exit_reasons[] = {
31         DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
32         DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
33         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
34         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
35         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
36         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
37         DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
38         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
39         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
40         DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
41         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
42         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
43         DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
44         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
45         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
46         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
47         DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
48         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
49 };
50
51 static inline void *guest_addr_to_host(struct kvm *self, unsigned long offset)
52 {
53         return self->ram_start + offset;
54 }
55
56 static bool kvm__supports_extension(struct kvm *self, unsigned int extension)
57 {
58         int ret;
59
60         ret = ioctl(self->sys_fd, KVM_CHECK_EXTENSION, extension);
61         if (ret < 0)
62                 return false;
63
64         return ret;
65 }
66
67 static struct kvm *kvm__new(void)
68 {
69         struct kvm *self = calloc(1, sizeof *self);
70
71         if (!self)
72                 die("out of memory");
73
74         return self;
75 }
76
77 struct kvm *kvm__init(void)
78 {
79         struct kvm_userspace_memory_region mem;
80         struct kvm *self;
81         long page_size;
82         int mmap_size;
83         int ret;
84
85         self = kvm__new();
86
87         self->sys_fd = open("/dev/kvm", O_RDWR);
88         if (self->sys_fd < 0)
89                 die_perror("open");
90
91         ret = ioctl(self->sys_fd, KVM_GET_API_VERSION, 0);
92         if (ret != KVM_API_VERSION)
93                 die_perror("KVM_API_VERSION ioctl");
94
95         self->vm_fd = ioctl(self->sys_fd, KVM_CREATE_VM, 0);
96         if (self->vm_fd < 0)
97                 die_perror("KVM_CREATE_VM ioctl");
98
99         if (!kvm__supports_extension(self, KVM_CAP_USER_MEMORY))
100                 die("KVM_CAP_USER_MEMORY is not supported");
101
102         self->ram_size          = 64UL * 1024UL * 1024UL;
103
104         page_size       = sysconf(_SC_PAGESIZE);
105         if (posix_memalign(&self->ram_start, page_size, self->ram_size) != 0)
106                 die("out of memory");
107
108         mem = (struct kvm_userspace_memory_region) {
109                 .slot                   = 0,
110                 .guest_phys_addr        = 0x0UL,
111                 .memory_size            = self->ram_size,
112                 .userspace_addr         = (unsigned long) self->ram_start,
113         };
114
115         ret = ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem, 1);
116         if (ret < 0)
117                 die_perror("KVM_SET_USER_MEMORY_REGION ioctl");
118
119         if (!kvm__supports_extension(self, KVM_CAP_SET_TSS_ADDR))
120                 die("KVM_CAP_SET_TSS_ADDR is not supported");
121
122         ret = ioctl(self->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
123         if (ret < 0)
124                 die_perror("KVM_SET_TSS_ADDR ioctl");
125
126         self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
127         if (self->vcpu_fd < 0)
128                 die_perror("KVM_CREATE_VCPU ioctl");
129
130         mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
131         if (mmap_size < 0)
132                 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
133
134         self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
135         if (self->kvm_run == MAP_FAILED)
136                 die("unable to mmap vcpu fd");
137
138         return self;
139 }
140
141 void kvm__enable_singlestep(struct kvm *self)
142 {
143         struct kvm_guest_debug debug = {
144                 .control        = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
145         };
146
147         if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
148                 warning("KVM_SET_GUEST_DEBUG failed");
149 }
150
151 static inline uint32_t segment_to_flat(uint16_t selector, uint16_t offset)
152 {
153         return ((uint32_t)selector << 4) + (uint32_t) offset;
154 }
155
156 #define BOOT_LOADER_SELECTOR    0x0100
157 #define BOOT_LOADER_IP          0x0000
158 #define BOOT_LOADER_SP          0x8000
159
160 static int load_flat_binary(struct kvm *self, int fd)
161 {
162         void *p;
163         int nr;
164
165         if (lseek(fd, 0, SEEK_SET) < 0)
166                 die_perror("lseek");
167
168         p = guest_addr_to_host(self, segment_to_flat(BOOT_LOADER_SELECTOR, BOOT_LOADER_IP));
169
170         while ((nr = read(fd, p, 65536)) > 0)
171                 p += nr;
172
173         self->boot_selector     = BOOT_LOADER_SELECTOR;
174         self->boot_ip           = BOOT_LOADER_IP;
175         self->boot_sp           = BOOT_LOADER_SP;
176
177         return true;
178 }
179
180 /*
181  * The protected mode kernel part of a modern bzImage is loaded at 1 MB by
182  * default.
183  */
184 #define BZ_KERNEL_START                 0x100000UL
185
186 static const char *BZIMAGE_MAGIC        = "HdrS";
187
188 #define BZ_DEFAULT_SETUP_SECTS          4
189
190 static bool load_bzimage(struct kvm *self, int fd)
191 {
192         unsigned long setup_sects;
193         struct boot_params boot;
194         ssize_t setup_size;
195         void *p;
196         int nr;
197
198         /*
199          * See Documentation/x86/boot.txt for details no bzImage on-disk and
200          * memory layout.
201          */
202
203         if (lseek(fd, 0, SEEK_SET) < 0)
204                 die_perror("lseek");
205
206         read(fd, &boot, sizeof(boot));
207
208         if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)) != 0)
209                 return false;
210
211         if (lseek(fd, 0, SEEK_SET) < 0)
212                 die_perror("lseek");
213
214         setup_sects = boot.hdr.setup_sects + 1;
215         if (setup_sects == 0)
216                 setup_sects      = BZ_DEFAULT_SETUP_SECTS;
217
218         setup_size = setup_sects << 9;
219         p = guest_addr_to_host(self, segment_to_flat(BOOT_LOADER_SELECTOR, BOOT_LOADER_IP));
220
221         if (read(fd, p, setup_size) != setup_size)
222                 die_perror("read");
223
224         p = guest_addr_to_host(self, BZ_KERNEL_START);
225
226         while ((nr = read(fd, p, 65536)) > 0)
227                 p += nr;
228
229         self->boot_selector     = BOOT_LOADER_SELECTOR;
230         /*
231          * The real-mode setup code starts at offset 0x200 of a bzImage. See
232          * Documentation/x86/boot.txt for details.
233          */
234         self->boot_ip           = BOOT_LOADER_IP + 0x200;
235         self->boot_sp           = BOOT_LOADER_SP;
236
237         return true;
238 }
239
240 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename)
241 {
242         bool ret;
243         int fd;
244
245         fd = open(kernel_filename, O_RDONLY);
246         if (fd < 0)
247                 die("unable to open kernel");
248
249         ret = load_bzimage(kvm, fd);
250         if (ret)
251                 goto found_kernel;
252
253         ret = load_flat_binary(kvm, fd);
254         if (ret)
255                 goto found_kernel;
256
257         die("%s is not a valid bzImage or flat binary", kernel_filename);
258
259 found_kernel:
260         return ret;
261 }
262
263 static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip)
264 {
265         uint64_t cs = self->sregs.cs.selector;
266
267         return ip - (cs << 4);
268 }
269
270 static inline uint64_t ip_real_to_flat(struct kvm *self, uint64_t ip)
271 {
272         uint64_t cs = self->sregs.cs.selector;
273
274         return ip + (cs << 4);
275 }
276
277 static inline uint32_t selector_to_base(uint16_t selector)
278 {
279         /*
280          * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
281          */
282         return (uint32_t)selector * 16;
283 }
284
285 void kvm__reset_vcpu(struct kvm *self)
286 {
287         self->sregs = (struct kvm_sregs) {
288                 .cr0            = 0x60000010ULL,
289                 .cs             = (struct kvm_segment) {
290                         .selector       = self->boot_selector,
291                         .base           = selector_to_base(self->boot_selector),
292                         .limit          = 0xffffU,
293                         .type           = 0x0bU,
294                         .present        = 1,
295                         .dpl            = 0x03,
296                         .s              = 1,
297                 },
298                 .ss             = (struct kvm_segment) {
299                         .selector       = self->boot_selector,
300                         .base           = selector_to_base(self->boot_selector),
301                         .limit          = 0xffffU,
302                         .type           = 0x03U,
303                         .present        = 1,
304                         .dpl            = 0x03,
305                         .s              = 1,
306                 },
307                 .ds             = (struct kvm_segment) {
308                         .selector       = self->boot_selector,
309                         .base           = selector_to_base(self->boot_selector),
310                         .limit          = 0xffffU,
311                         .type           = 0x03U,
312                         .present        = 1,
313                         .dpl            = 0x03,
314                         .s              = 1,
315                 },
316                 .es             = (struct kvm_segment) {
317                         .selector       = self->boot_selector,
318                         .base           = selector_to_base(self->boot_selector),
319                         .limit          = 0xffffU,
320                         .type           = 0x03U,
321                         .present        = 1,
322                         .dpl            = 0x03,
323                         .s              = 1,
324                 },
325                 .fs             = (struct kvm_segment) {
326                         .selector       = self->boot_selector,
327                         .base           = selector_to_base(self->boot_selector),
328                         .limit          = 0xffffU,
329                         .type           = 0x03U,
330                         .present        = 1,
331                         .dpl            = 0x03,
332                         .s              = 1,
333                 },
334                 .gs             = (struct kvm_segment) {
335                         .selector       = self->boot_selector,
336                         .base           = selector_to_base(self->boot_selector),
337                         .limit          = 0xffffU,
338                         .type           = 0x03U,
339                         .present        = 1,
340                         .dpl            = 0x03,
341                         .s              = 1,
342                 },
343                 .tr             = (struct kvm_segment) {
344                         .limit          = 0xffffU,
345                         .present        = 1,
346                         .type           = 0x03U,
347                 },
348                 .ldt            = (struct kvm_segment) {
349                         .limit          = 0xffffU,
350                         .present        = 1,
351                         .type           = 0x02U,
352                 },
353                 .gdt            = (struct kvm_dtable) {
354                         .limit          = 0xffffU,
355                 },
356                 .idt            = (struct kvm_dtable) {
357                         .limit          = 0xffffU,
358                 },
359         };
360
361         if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
362                 die_perror("KVM_SET_SREGS failed");
363
364         self->regs = (struct kvm_regs) {
365                 /* We start the guest in 16-bit real mode  */
366                 .rflags         = 0x0000000000000002ULL,
367
368                 .rip            = self->boot_ip,
369                 .rsp            = self->boot_sp,
370                 .rbp            = self->boot_sp,
371         };
372
373         if (self->regs.rip > USHRT_MAX)
374                 die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
375
376         if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
377                 die_perror("KVM_SET_REGS failed");
378
379 }
380
381 void kvm__run(struct kvm *self)
382 {
383         if (ioctl(self->vcpu_fd, KVM_RUN, 0) < 0)
384                 die_perror("KVM_RUN failed");
385 }
386
387 static void kvm__emulate_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
388 {
389         fprintf(stderr, "%s port=%x, size=%d, count=%" PRIu32 "\n", __func__, port, size, count);
390 }
391
392 static void kvm__emulate_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
393 {
394         fprintf(stderr, "%s port=%x, size=%d, count=%" PRIu32 "\n", __func__, port, size, count);
395 }
396
397 void kvm__emulate_io(struct kvm *self, uint16_t port, void *data, int direction, int size, uint32_t count)
398 {
399         if (direction == KVM_EXIT_IO_IN)
400                 kvm__emulate_io_in(self, port, data, size, count);
401         else
402                 kvm__emulate_io_out(self, port, data, size, count);
403 }
404
405 static void print_segment(const char *name, struct kvm_segment *seg)
406 {
407         printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  %02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
408                 name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
409                 (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
410 }
411
412 void kvm__show_registers(struct kvm *self)
413 {
414         unsigned long cr0, cr2, cr3;
415         unsigned long cr4, cr8;
416         unsigned long rax, rbx, rcx;
417         unsigned long rdx, rsi, rdi;
418         unsigned long rbp,  r8,  r9;
419         unsigned long r10, r11, r12;
420         unsigned long r13, r14, r15;
421         unsigned long rip, rsp;
422         struct kvm_sregs sregs;
423         unsigned long rflags;
424         struct kvm_regs regs;
425         int i;
426
427         if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
428                 die("KVM_GET_REGS failed");
429
430         rflags = regs.rflags;
431
432         rip = regs.rip; rsp = regs.rsp;
433         rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
434         rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
435         rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
436         r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
437         r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
438
439         printf("Registers:\n");
440         printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
441         printf(" rax: %016lx   ebx: %016lx   ecx: %016lx\n", rax, rbx, rcx);
442         printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
443         printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
444         printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
445         printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
446
447         if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
448                 die("KVM_GET_REGS failed");
449
450         cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
451         cr4 = sregs.cr4; cr8 = sregs.cr8;
452
453         printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
454         printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
455         printf("Segment registers:\n");
456         printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
457         print_segment("cs ", &sregs.cs);
458         print_segment("ss ", &sregs.ss);
459         print_segment("ds ", &sregs.ds);
460         print_segment("es ", &sregs.es);
461         print_segment("fs ", &sregs.fs);
462         print_segment("gs ", &sregs.gs);
463         print_segment("tr ", &sregs.tr);
464         print_segment("ldt", &sregs.ldt);
465         printf(" [ efer: %016lx  apic base: %016lx ]\n", (uint64_t) sregs.efer, (uint64_t) sregs.apic_base);
466         printf("Interrupt bitmap:\n");
467         printf(" ");
468         for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
469                 printf("%016lx ", (uint64_t) sregs.interrupt_bitmap[i]);
470         printf("\n");
471 }
472
473 void kvm__show_code(struct kvm *self)
474 {
475         unsigned int code_bytes = 64;
476         unsigned int code_prologue = code_bytes * 43 / 64;
477         unsigned int code_len = code_bytes;
478         unsigned char c;
479         unsigned int i;
480         uint8_t *ip;
481
482         if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
483                 die("KVM_GET_REGS failed");
484
485         if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
486                 die("KVM_GET_SREGS failed");
487
488         ip = guest_addr_to_host(self, ip_real_to_flat(self, self->regs.rip) - code_prologue);
489
490         printf("Code: ");
491
492         for (i = 0; i < code_len; i++, ip++) {
493                 c = *ip;
494
495                 if (ip == guest_addr_to_host(self, ip_real_to_flat(self, self->regs.rip)))
496                         printf("<%02x> ", c);
497                 else
498                         printf("%02x ", c);
499         }
500
501         printf("\n");
502 }