]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/kvm/kvm.c
kvm tools: Abolishment of uint*_t types
[karo-tx-linux.git] / tools / kvm / kvm.c
1 #include "kvm/kvm.h"
2
3 #include "kvm/cpufeature.h"
4 #include "kvm/interrupt.h"
5 #include "kvm/boot-protocol.h"
6 #include "kvm/util.h"
7 #include "kvm/mptable.h"
8
9 #include <linux/kvm.h>
10
11 #include <asm/bootparam.h>
12
13 #include <sys/ioctl.h>
14 #include <sys/mman.h>
15 #include <sys/stat.h>
16 #include <stdbool.h>
17 #include <assert.h>
18 #include <limits.h>
19 #include <signal.h>
20 #include <stdarg.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <stdio.h>
25 #include <fcntl.h>
26 #include <time.h>
27
28 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
29
30 const char *kvm_exit_reasons[] = {
31         DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
32         DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
33         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
34         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
35         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
36         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
37         DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
38         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
39         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
40         DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
41         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
42         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
43         DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
44         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
45         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
46         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
47         DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
48         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
49 };
50
51 #define DEFINE_KVM_EXT(ext)             \
52         .name = #ext,                   \
53         .code = ext
54
55 struct {
56         const char *name;
57         int code;
58 } kvm_req_ext[] = {
59         { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) },
60         { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) },
61         { DEFINE_KVM_EXT(KVM_CAP_PIT2) },
62         { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) },
63         { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) },
64         { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
65         { DEFINE_KVM_EXT(KVM_CAP_HLT) },
66         { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) },
67         { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) },
68 };
69
70 static bool kvm__supports_extension(struct kvm *self, unsigned int extension)
71 {
72         int ret;
73
74         ret = ioctl(self->sys_fd, KVM_CHECK_EXTENSION, extension);
75         if (ret < 0)
76                 return false;
77
78         return ret;
79 }
80
81 static int kvm__check_extensions(struct kvm *self)
82 {
83         unsigned int i;
84
85         for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) {
86                 if (!kvm__supports_extension(self, kvm_req_ext[i].code)) {
87                         error("Unsuppored KVM extension detected: %s",
88                                 kvm_req_ext[i].name);
89                         return (int)-i;
90                 }
91         }
92
93         return 0;
94 }
95
96 static struct kvm *kvm__new(void)
97 {
98         struct kvm *self = calloc(1, sizeof *self);
99
100         if (!self)
101                 die("out of memory");
102
103         return self;
104 }
105
106 void kvm__delete(struct kvm *self)
107 {
108         kvm__stop_timer(self);
109
110         munmap(self->ram_start, self->ram_size);
111         free(self);
112 }
113
114 static bool kvm__cpu_supports_vm(void)
115 {
116         struct cpuid_regs regs;
117         u32 eax_base;
118         int feature;
119
120         regs    = (struct cpuid_regs) {
121                 .eax            = 0x00,
122         };
123         host_cpuid(&regs);
124
125         switch (regs.ebx) {
126         case CPUID_VENDOR_INTEL_1:
127                 eax_base        = 0x00;
128                 feature         = KVM__X86_FEATURE_VMX;
129                 break;
130
131         case CPUID_VENDOR_AMD_1:
132                 eax_base        = 0x80000000;
133                 feature         = KVM__X86_FEATURE_SVM;
134                 break;
135
136         default:
137                 return false;
138         }
139
140         regs    = (struct cpuid_regs) {
141                 .eax            = eax_base,
142         };
143         host_cpuid(&regs);
144
145         if (regs.eax < eax_base + 0x01)
146                 return false;
147
148         regs    = (struct cpuid_regs) {
149                 .eax            = eax_base + 0x01
150         };
151         host_cpuid(&regs);
152
153         return regs.ecx & (1 << feature);
154 }
155
156 void kvm__init_ram(struct kvm *self)
157 {
158         struct kvm_userspace_memory_region mem;
159         int ret;
160
161         mem = (struct kvm_userspace_memory_region) {
162                 .slot                   = 0,
163                 .guest_phys_addr        = 0x0UL,
164                 .memory_size            = self->ram_size,
165                 .userspace_addr         = (unsigned long) self->ram_start,
166         };
167
168         ret = ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
169         if (ret < 0)
170                 die_perror("KVM_SET_USER_MEMORY_REGION ioctl");
171 }
172
173 struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size)
174 {
175         struct kvm_pit_config pit_config = { .flags = 0, };
176         struct kvm *self;
177         int ret;
178
179         if (!kvm__cpu_supports_vm())
180                 die("Your CPU does not support hardware virtualization");
181
182         self = kvm__new();
183
184         self->sys_fd = open(kvm_dev, O_RDWR);
185         if (self->sys_fd < 0) {
186                 if (errno == ENOENT)
187                         die("'%s' not found. Please make sure your kernel has CONFIG_KVM enabled and that the KVM modules are loaded.", kvm_dev);
188                 if (errno == ENODEV)
189                         die("'%s' KVM driver not available.\n  # (If the KVM module is loaded then 'dmesg' may offer further clues about the failure.)", kvm_dev);
190
191                 fprintf(stderr, "  Fatal, could not open %s: ", kvm_dev);
192                 perror(NULL);
193                 exit(1);
194         }
195
196         ret = ioctl(self->sys_fd, KVM_GET_API_VERSION, 0);
197         if (ret != KVM_API_VERSION)
198                 die_perror("KVM_API_VERSION ioctl");
199
200         self->vm_fd = ioctl(self->sys_fd, KVM_CREATE_VM, 0);
201         if (self->vm_fd < 0)
202                 die_perror("KVM_CREATE_VM ioctl");
203
204         if (kvm__check_extensions(self))
205                 die("A required KVM extention is not supported by OS");
206
207         ret = ioctl(self->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
208         if (ret < 0)
209                 die_perror("KVM_SET_TSS_ADDR ioctl");
210
211         ret = ioctl(self->vm_fd, KVM_CREATE_PIT2, &pit_config);
212         if (ret < 0)
213                 die_perror("KVM_CREATE_PIT2 ioctl");
214
215         self->ram_size          = ram_size;
216
217         self->ram_start = mmap(NULL, ram_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
218         if (self->ram_start == MAP_FAILED)
219                 die("out of memory");
220
221         ret = ioctl(self->vm_fd, KVM_CREATE_IRQCHIP);
222         if (ret < 0)
223                 die_perror("KVM_CREATE_IRQCHIP ioctl");
224
225         return self;
226 }
227
228 #define BOOT_LOADER_SELECTOR    0x1000
229 #define BOOT_LOADER_IP          0x0000
230 #define BOOT_LOADER_SP          0x8000
231 #define BOOT_CMDLINE_OFFSET     0x20000
232
233 #define BOOT_PROTOCOL_REQUIRED  0x206
234 #define LOAD_HIGH               0x01
235
236 static int load_flat_binary(struct kvm *self, int fd)
237 {
238         void *p;
239         int nr;
240
241         if (lseek(fd, 0, SEEK_SET) < 0)
242                 die_perror("lseek");
243
244         p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
245
246         while ((nr = read(fd, p, 65536)) > 0)
247                 p += nr;
248
249         self->boot_selector     = BOOT_LOADER_SELECTOR;
250         self->boot_ip           = BOOT_LOADER_IP;
251         self->boot_sp           = BOOT_LOADER_SP;
252
253         return true;
254 }
255
256 static const char *BZIMAGE_MAGIC        = "HdrS";
257
258 static bool load_bzimage(struct kvm *self, int fd_kernel,
259                         int fd_initrd, const char *kernel_cmdline)
260 {
261         struct boot_params *kern_boot;
262         unsigned long setup_sects;
263         struct boot_params boot;
264         size_t cmdline_size;
265         ssize_t setup_size;
266         void *p;
267         int nr;
268
269         /*
270          * See Documentation/x86/boot.txt for details no bzImage on-disk and
271          * memory layout.
272          */
273
274         if (lseek(fd_kernel, 0, SEEK_SET) < 0)
275                 die_perror("lseek");
276
277         if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot))
278                 return false;
279
280         if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)))
281                 return false;
282
283         if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED)
284                 die("Too old kernel");
285
286         if (lseek(fd_kernel, 0, SEEK_SET) < 0)
287                 die_perror("lseek");
288
289         if (!boot.hdr.setup_sects)
290                 boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS;
291         setup_sects = boot.hdr.setup_sects + 1;
292
293         setup_size = setup_sects << 9;
294         p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
295
296         /* copy setup.bin to mem*/
297         if (read(fd_kernel, p, setup_size) != setup_size)
298                 die_perror("read");
299
300         /* copy vmlinux.bin to BZ_KERNEL_START*/
301         p = guest_flat_to_host(self, BZ_KERNEL_START);
302
303         while ((nr = read(fd_kernel, p, 65536)) > 0)
304                 p += nr;
305
306         p = guest_flat_to_host(self, BOOT_CMDLINE_OFFSET);
307         if (kernel_cmdline) {
308                 cmdline_size = strlen(kernel_cmdline) + 1;
309                 if (cmdline_size > boot.hdr.cmdline_size)
310                         cmdline_size = boot.hdr.cmdline_size;
311
312                 memset(p, 0, boot.hdr.cmdline_size);
313                 memcpy(p, kernel_cmdline, cmdline_size - 1);
314         }
315
316         kern_boot       = guest_real_to_host(self, BOOT_LOADER_SELECTOR, 0x00);
317
318         kern_boot->hdr.cmd_line_ptr     = BOOT_CMDLINE_OFFSET;
319         kern_boot->hdr.type_of_loader   = 0xff;
320         kern_boot->hdr.heap_end_ptr     = 0xfe00;
321         kern_boot->hdr.loadflags        |= CAN_USE_HEAP;
322
323         /*
324          * Read initrd image into guest memory
325          */
326         if (fd_initrd >= 0) {
327                 struct stat initrd_stat;
328                 unsigned long addr;
329
330                 if (fstat(fd_initrd, &initrd_stat))
331                         die_perror("fstat");
332
333                 addr = boot.hdr.initrd_addr_max & ~0xfffff;
334                 for (;;) {
335                         if (addr < BZ_KERNEL_START)
336                                 die("Not enough memory for initrd");
337                         else if (addr < (self->ram_size - initrd_stat.st_size))
338                                 break;
339                         addr -= 0x100000;
340                 }
341
342                 p = guest_flat_to_host(self, addr);
343                 nr = read(fd_initrd, p, initrd_stat.st_size);
344                 if (nr != initrd_stat.st_size)
345                         die("Failed to read initrd");
346
347                 kern_boot->hdr.ramdisk_image    = addr;
348                 kern_boot->hdr.ramdisk_size     = initrd_stat.st_size;
349         }
350
351         self->boot_selector     = BOOT_LOADER_SELECTOR;
352         /*
353          * The real-mode setup code starts at offset 0x200 of a bzImage. See
354          * Documentation/x86/boot.txt for details.
355          */
356         self->boot_ip           = BOOT_LOADER_IP + 0x200;
357         self->boot_sp           = BOOT_LOADER_SP;
358
359         return true;
360 }
361
362 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
363                 const char *initrd_filename, const char *kernel_cmdline)
364 {
365         bool ret;
366         int fd_kernel = -1, fd_initrd = -1;
367
368         fd_kernel = open(kernel_filename, O_RDONLY);
369         if (fd_kernel < 0)
370                 die("Unable to open kernel %s", kernel_filename);
371
372         if (initrd_filename) {
373                 fd_initrd = open(initrd_filename, O_RDONLY);
374                 if (fd_initrd < 0)
375                         die("Unable to open initrd %s", initrd_filename);
376         }
377
378         ret = load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline);
379
380         if (initrd_filename)
381                 close(fd_initrd);
382
383         if (ret)
384                 goto found_kernel;
385
386         warning("%s is not a bzImage. Trying to load it as a flat binary...", kernel_filename);
387
388         ret = load_flat_binary(kvm, fd_kernel);
389         if (ret)
390                 goto found_kernel;
391
392         close(fd_kernel);
393
394         die("%s is not a valid bzImage or flat binary", kernel_filename);
395
396 found_kernel:
397         close(fd_kernel);
398
399         return ret;
400 }
401
402 /**
403  * kvm__setup_bios - inject BIOS into guest system memory
404  * @self - guest system descriptor
405  *
406  * This function is a main routine where we poke guest memory
407  * and install BIOS there.
408  */
409 void kvm__setup_bios(struct kvm *self)
410 {
411         /* standart minimal configuration */
412         setup_bios(self);
413
414         /* FIXME: SMP, ACPI and friends here */
415
416         /* MP table */
417         mptable_setup(self, self->nrcpus);
418 }
419
420 #define TIMER_INTERVAL_NS 1000000       /* 1 msec */
421
422 /*
423  * This function sets up a timer that's used to inject interrupts from the
424  * userspace hypervisor into the guest at periodical intervals. Please note
425  * that clock interrupt, for example, is not handled here.
426  */
427 void kvm__start_timer(struct kvm *self)
428 {
429         struct itimerspec its;
430         struct sigevent sev;
431
432         memset(&sev, 0, sizeof(struct sigevent));
433         sev.sigev_value.sival_int       = 0;
434         sev.sigev_notify                = SIGEV_SIGNAL;
435         sev.sigev_signo                 = SIGALRM;
436
437         if (timer_create(CLOCK_REALTIME, &sev, &self->timerid) < 0)
438                 die("timer_create()");
439
440         its.it_value.tv_sec             = TIMER_INTERVAL_NS / 1000000000;
441         its.it_value.tv_nsec            = TIMER_INTERVAL_NS % 1000000000;
442         its.it_interval.tv_sec          = its.it_value.tv_sec;
443         its.it_interval.tv_nsec         = its.it_value.tv_nsec;
444
445         if (timer_settime(self->timerid, 0, &its, NULL) < 0)
446                 die("timer_settime()");
447 }
448
449 void kvm__stop_timer(struct kvm *self)
450 {
451         if (self->timerid)
452                 if (timer_delete(self->timerid) < 0)
453                         die("timer_delete()");
454
455         self->timerid = 0;
456 }
457
458 void kvm__irq_line(struct kvm *self, int irq, int level)
459 {
460         struct kvm_irq_level irq_level;
461
462         irq_level       = (struct kvm_irq_level) {
463                 {
464                         .irq            = irq,
465                 },
466                 .level          = level,
467         };
468
469         if (ioctl(self->vm_fd, KVM_IRQ_LINE, &irq_level) < 0)
470                 die_perror("KVM_IRQ_LINE failed");
471 }
472
473 void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size)
474 {
475         unsigned char *p;
476         unsigned long n;
477
478         size &= ~7; /* mod 8 */
479         if (!size)
480                 return;
481
482         p = guest_flat_to_host(self, addr);
483
484         for (n = 0; n < size; n += 8) {
485                 if (!host_ptr_in_ram(self, p + n))
486                         break;
487
488                 printf("  0x%08lx: %02x %02x %02x %02x  %02x %02x %02x %02x\n",
489                         addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
490                                   p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
491         }
492 }