]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/kvm/kvm.c
kvm tools: Introduce KVM VCPU data structure
[karo-tx-linux.git] / tools / kvm / kvm.c
1 #include "kvm/kvm.h"
2
3 #include "kvm/cpufeature.h"
4 #include "kvm/interrupt.h"
5 #include "kvm/boot-protocol.h"
6 #include "kvm/util.h"
7
8 #include <linux/kvm.h>
9
10 #include <asm/bootparam.h>
11
12 #include <sys/ioctl.h>
13 #include <inttypes.h>
14 #include <sys/mman.h>
15 #include <sys/stat.h>
16 #include <stdbool.h>
17 #include <assert.h>
18 #include <limits.h>
19 #include <signal.h>
20 #include <stdarg.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <stdio.h>
25 #include <fcntl.h>
26 #include <time.h>
27
28 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
29
30 const char *kvm_exit_reasons[] = {
31         DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
32         DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
33         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
34         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
35         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
36         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
37         DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
38         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
39         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
40         DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
41         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
42         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
43         DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
44         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
45         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
46         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
47         DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
48         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
49 };
50
51 #define DEFINE_KVM_EXT(ext)             \
52         .name = #ext,                   \
53         .code = ext
54
55 struct {
56         const char *name;
57         int code;
58 } kvm_req_ext[] = {
59         { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) },
60         { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) },
61         { DEFINE_KVM_EXT(KVM_CAP_PIT2) },
62         { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) },
63         { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) },
64         { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
65         { DEFINE_KVM_EXT(KVM_CAP_HLT) },
66         { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) },
67         { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) },
68 };
69
70 static bool kvm__supports_extension(struct kvm *self, unsigned int extension)
71 {
72         int ret;
73
74         ret = ioctl(self->sys_fd, KVM_CHECK_EXTENSION, extension);
75         if (ret < 0)
76                 return false;
77
78         return ret;
79 }
80
81 static int kvm__check_extensions(struct kvm *self)
82 {
83         unsigned int i;
84
85         for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) {
86                 if (!kvm__supports_extension(self, kvm_req_ext[i].code)) {
87                         error("Unsuppored KVM extension detected: %s",
88                                 kvm_req_ext[i].name);
89                         return (int)-i;
90                 }
91         }
92
93         return 0;
94 }
95
96 static struct kvm *kvm__new(void)
97 {
98         struct kvm *self = calloc(1, sizeof *self);
99
100         if (!self)
101                 die("out of memory");
102
103         return self;
104 }
105
106 void kvm__delete(struct kvm *self)
107 {
108         kvm__stop_timer(self);
109
110         free(self->ram_start);
111         free(self);
112 }
113
114 static bool kvm__cpu_supports_vm(void)
115 {
116         struct cpuid_regs regs;
117         uint32_t eax_base;
118         int feature;
119
120         regs    = (struct cpuid_regs) {
121                 .eax            = 0x00,
122         };
123         host_cpuid(&regs);
124
125         switch (regs.ebx) {
126         case CPUID_VENDOR_INTEL_1:
127                 eax_base        = 0x00;
128                 feature         = KVM__X86_FEATURE_VMX;
129                 break;
130
131         case CPUID_VENDOR_AMD_1:
132                 eax_base        = 0x80000000;
133                 feature         = KVM__X86_FEATURE_SVM;
134                 break;
135
136         default:
137                 return false;
138         }
139
140         regs    = (struct cpuid_regs) {
141                 .eax            = eax_base,
142         };
143         host_cpuid(&regs);
144
145         if (regs.eax < eax_base + 0x01)
146                 return false;
147
148         regs    = (struct cpuid_regs) {
149                 .eax            = eax_base + 0x01
150         };
151         host_cpuid(&regs);
152
153         return regs.ecx & (1 << feature);
154 }
155
156 struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size)
157 {
158         struct kvm_userspace_memory_region mem;
159         struct kvm_pit_config pit_config = { .flags = 0, };
160         struct kvm *self;
161         long page_size;
162         int ret;
163
164         if (!kvm__cpu_supports_vm())
165                 die("Your CPU does not support hardware virtualization");
166
167         self = kvm__new();
168
169         self->sys_fd = open(kvm_dev, O_RDWR);
170         if (self->sys_fd < 0) {
171                 if (errno == ENOENT)
172                         die("'%s' not found. Please make sure your kernel has CONFIG_KVM enabled and that the KVM modules are loaded.", kvm_dev);
173                 if (errno == ENODEV)
174                         die("'%s' KVM driver not available.\n  # (If the KVM module is loaded then 'dmesg' may offer further clues about the failure.)", kvm_dev);
175
176                 fprintf(stderr, "  Fatal, could not open %s: ", kvm_dev);
177                 perror(NULL);
178                 exit(1);
179         }
180
181         ret = ioctl(self->sys_fd, KVM_GET_API_VERSION, 0);
182         if (ret != KVM_API_VERSION)
183                 die_perror("KVM_API_VERSION ioctl");
184
185         self->vm_fd = ioctl(self->sys_fd, KVM_CREATE_VM, 0);
186         if (self->vm_fd < 0)
187                 die_perror("KVM_CREATE_VM ioctl");
188
189         if (kvm__check_extensions(self))
190                 die("A required KVM extention is not supported by OS");
191
192         ret = ioctl(self->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
193         if (ret < 0)
194                 die_perror("KVM_SET_TSS_ADDR ioctl");
195
196         ret = ioctl(self->vm_fd, KVM_CREATE_PIT2, &pit_config);
197         if (ret < 0)
198                 die_perror("KVM_CREATE_PIT2 ioctl");
199
200         self->ram_size          = ram_size;
201
202         page_size       = sysconf(_SC_PAGESIZE);
203         if (posix_memalign(&self->ram_start, page_size, self->ram_size) != 0)
204                 die("out of memory");
205
206         mem = (struct kvm_userspace_memory_region) {
207                 .slot                   = 0,
208                 .guest_phys_addr        = 0x0UL,
209                 .memory_size            = self->ram_size,
210                 .userspace_addr         = (unsigned long) self->ram_start,
211         };
212
213         ret = ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
214         if (ret < 0)
215                 die_perror("KVM_SET_USER_MEMORY_REGION ioctl");
216
217         ret = ioctl(self->vm_fd, KVM_CREATE_IRQCHIP);
218         if (ret < 0)
219                 die_perror("KVM_CREATE_IRQCHIP ioctl");
220
221         return self;
222 }
223
224 #define BOOT_LOADER_SELECTOR    0x1000
225 #define BOOT_LOADER_IP          0x0000
226 #define BOOT_LOADER_SP          0x8000
227 #define BOOT_CMDLINE_OFFSET     0x20000
228
229 #define BOOT_PROTOCOL_REQUIRED  0x206
230 #define LOAD_HIGH               0x01
231
232 static int load_flat_binary(struct kvm *self, int fd)
233 {
234         void *p;
235         int nr;
236
237         if (lseek(fd, 0, SEEK_SET) < 0)
238                 die_perror("lseek");
239
240         p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
241
242         while ((nr = read(fd, p, 65536)) > 0)
243                 p += nr;
244
245         self->boot_selector     = BOOT_LOADER_SELECTOR;
246         self->boot_ip           = BOOT_LOADER_IP;
247         self->boot_sp           = BOOT_LOADER_SP;
248
249         return true;
250 }
251
252 static const char *BZIMAGE_MAGIC        = "HdrS";
253
254 static bool load_bzimage(struct kvm *self, int fd_kernel,
255                         int fd_initrd, const char *kernel_cmdline)
256 {
257         struct boot_params *kern_boot;
258         unsigned long setup_sects;
259         struct boot_params boot;
260         size_t cmdline_size;
261         ssize_t setup_size;
262         void *p;
263         int nr;
264
265         /*
266          * See Documentation/x86/boot.txt for details no bzImage on-disk and
267          * memory layout.
268          */
269
270         if (lseek(fd_kernel, 0, SEEK_SET) < 0)
271                 die_perror("lseek");
272
273         if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot))
274                 return false;
275
276         if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)))
277                 return false;
278
279         if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED)
280                 die("Too old kernel");
281
282         if (lseek(fd_kernel, 0, SEEK_SET) < 0)
283                 die_perror("lseek");
284
285         if (!boot.hdr.setup_sects)
286                 boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS;
287         setup_sects = boot.hdr.setup_sects + 1;
288
289         setup_size = setup_sects << 9;
290         p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
291
292         /* copy setup.bin to mem*/
293         if (read(fd_kernel, p, setup_size) != setup_size)
294                 die_perror("read");
295
296         /* copy vmlinux.bin to BZ_KERNEL_START*/
297         p = guest_flat_to_host(self, BZ_KERNEL_START);
298
299         while ((nr = read(fd_kernel, p, 65536)) > 0)
300                 p += nr;
301
302         p = guest_flat_to_host(self, BOOT_CMDLINE_OFFSET);
303         if (kernel_cmdline) {
304                 cmdline_size = strlen(kernel_cmdline) + 1;
305                 if (cmdline_size > boot.hdr.cmdline_size)
306                         cmdline_size = boot.hdr.cmdline_size;
307
308                 memset(p, 0, boot.hdr.cmdline_size);
309                 memcpy(p, kernel_cmdline, cmdline_size - 1);
310         }
311
312         kern_boot       = guest_real_to_host(self, BOOT_LOADER_SELECTOR, 0x00);
313
314         kern_boot->hdr.cmd_line_ptr     = BOOT_CMDLINE_OFFSET;
315         kern_boot->hdr.type_of_loader   = 0xff;
316         kern_boot->hdr.heap_end_ptr     = 0xfe00;
317         kern_boot->hdr.loadflags        |= CAN_USE_HEAP;
318
319         /*
320          * Read initrd image into guest memory
321          */
322         if (fd_initrd >= 0) {
323                 struct stat initrd_stat;
324                 unsigned long addr;
325
326                 if (fstat(fd_initrd, &initrd_stat))
327                         die_perror("fstat");
328
329                 addr = boot.hdr.initrd_addr_max & ~0xfffff;
330                 for (;;) {
331                         if (addr < BZ_KERNEL_START)
332                                 die("Not enough memory for initrd");
333                         else if (addr < (self->ram_size - initrd_stat.st_size))
334                                 break;
335                         addr -= 0x100000;
336                 }
337
338                 p = guest_flat_to_host(self, addr);
339                 nr = read(fd_initrd, p, initrd_stat.st_size);
340                 if (nr != initrd_stat.st_size)
341                         die("Failed to read initrd");
342
343                 kern_boot->hdr.ramdisk_image    = addr;
344                 kern_boot->hdr.ramdisk_size     = initrd_stat.st_size;
345         }
346
347         self->boot_selector     = BOOT_LOADER_SELECTOR;
348         /*
349          * The real-mode setup code starts at offset 0x200 of a bzImage. See
350          * Documentation/x86/boot.txt for details.
351          */
352         self->boot_ip           = BOOT_LOADER_IP + 0x200;
353         self->boot_sp           = BOOT_LOADER_SP;
354
355         return true;
356 }
357
358 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
359                 const char *initrd_filename, const char *kernel_cmdline)
360 {
361         bool ret;
362         int fd_kernel = -1, fd_initrd = -1;
363
364         fd_kernel = open(kernel_filename, O_RDONLY);
365         if (fd_kernel < 0)
366                 die("Unable to open kernel %s", kernel_filename);
367
368         if (initrd_filename) {
369                 fd_initrd = open(initrd_filename, O_RDONLY);
370                 if (fd_initrd < 0)
371                         die("Unable to open initrd %s", initrd_filename);
372         }
373
374         ret = load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline);
375
376         if (initrd_filename)
377                 close(fd_initrd);
378
379         if (ret)
380                 goto found_kernel;
381
382         warning("%s is not a bzImage. Trying to load it as a flat binary...", kernel_filename);
383
384         ret = load_flat_binary(kvm, fd_kernel);
385         if (ret)
386                 goto found_kernel;
387
388         die("%s is not a valid bzImage or flat binary", kernel_filename);
389
390 found_kernel:
391         return ret;
392 }
393
394 /**
395  * kvm__setup_bios - inject BIOS into guest system memory
396  * @self - guest system descriptor
397  *
398  * This function is a main routine where we poke guest memory
399  * and install BIOS there.
400  */
401 void kvm__setup_bios(struct kvm *self)
402 {
403         /* standart minimal configuration */
404         setup_bios(self);
405
406         /* FIXME: SMP, ACPI and friends here */
407 }
408
409 #define TIMER_INTERVAL_NS 1000000       /* 1 msec */
410
411 static void alarm_handler(int sig)
412 {
413 }
414
415 /*
416  * This function sets up a timer that's used to inject interrupts from the
417  * userspace hypervisor into the guest at periodical intervals. Please note
418  * that clock interrupt, for example, is not handled here.
419  */
420 void kvm__start_timer(struct kvm *self)
421 {
422         struct itimerspec its;
423         struct sigaction sa;
424         struct sigevent sev;
425
426         sigfillset(&sa.sa_mask);
427         sa.sa_flags                     = 0;
428         sa.sa_handler                   = alarm_handler;
429
430         sigaction(SIGALRM, &sa, NULL);
431
432         memset(&sev, 0, sizeof(struct sigevent));
433         sev.sigev_value.sival_int       = 0;
434         sev.sigev_notify                = SIGEV_SIGNAL;
435         sev.sigev_signo                 = SIGALRM;
436
437         if (timer_create(CLOCK_REALTIME, &sev, &self->timerid) < 0)
438                 die("timer_create()");
439
440         its.it_value.tv_sec             = TIMER_INTERVAL_NS / 1000000000;
441         its.it_value.tv_nsec            = TIMER_INTERVAL_NS % 1000000000;
442         its.it_interval.tv_sec          = its.it_value.tv_sec;
443         its.it_interval.tv_nsec         = its.it_value.tv_nsec;
444
445         if (timer_settime(self->timerid, 0, &its, NULL) < 0)
446                 die("timer_settime()");
447 }
448
449 void kvm__stop_timer(struct kvm *self)
450 {
451         if (self->timerid)
452                 if (timer_delete(self->timerid) < 0)
453                         die("timer_delete()");
454
455         self->timerid = 0;
456 }
457
458 void kvm__irq_line(struct kvm *self, int irq, int level)
459 {
460         struct kvm_irq_level irq_level;
461
462         irq_level       = (struct kvm_irq_level) {
463                 {
464                         .irq            = irq,
465                 },
466                 .level          = level,
467         };
468
469         if (ioctl(self->vm_fd, KVM_IRQ_LINE, &irq_level) < 0)
470                 die_perror("KVM_IRQ_LINE failed");
471 }
472
473 void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size)
474 {
475         unsigned char *p;
476         unsigned long n;
477
478         size &= ~7; /* mod 8 */
479         if (!size)
480                 return;
481
482         p = guest_flat_to_host(self, addr);
483
484         for (n = 0; n < size; n += 8) {
485                 if (!host_ptr_in_ram(self, p + n))
486                         break;
487
488                 printf("  0x%08lx: %02x %02x %02x %02x  %02x %02x %02x %02x\n",
489                         addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
490                                   p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
491         }
492 }