]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/kvm/kvm.c
kvm tools: Rename 'self' variables
[karo-tx-linux.git] / tools / kvm / kvm.c
1 #include "kvm/kvm.h"
2
3 #include "kvm/cpufeature.h"
4 #include "kvm/interrupt.h"
5 #include "kvm/boot-protocol.h"
6 #include "kvm/util.h"
7 #include "kvm/mptable.h"
8
9 #include <linux/kvm.h>
10
11 #include <asm/bootparam.h>
12
13 #include <sys/ioctl.h>
14 #include <sys/mman.h>
15 #include <sys/stat.h>
16 #include <stdbool.h>
17 #include <assert.h>
18 #include <limits.h>
19 #include <signal.h>
20 #include <stdarg.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <stdio.h>
25 #include <fcntl.h>
26 #include <time.h>
27
28 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
29
30 const char *kvm_exit_reasons[] = {
31         DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
32         DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
33         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
34         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
35         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
36         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
37         DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
38         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
39         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
40         DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
41         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
42         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
43         DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
44         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
45         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
46         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
47         DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
48         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
49 };
50
51 #define DEFINE_KVM_EXT(ext)             \
52         .name = #ext,                   \
53         .code = ext
54
55 struct {
56         const char *name;
57         int code;
58 } kvm_req_ext[] = {
59         { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) },
60         { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) },
61         { DEFINE_KVM_EXT(KVM_CAP_PIT2) },
62         { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) },
63         { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) },
64         { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
65         { DEFINE_KVM_EXT(KVM_CAP_HLT) },
66         { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) },
67         { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) },
68 };
69
70 static bool kvm__supports_extension(struct kvm *kvm, unsigned int extension)
71 {
72         int ret;
73
74         ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, extension);
75         if (ret < 0)
76                 return false;
77
78         return ret;
79 }
80
81 static int kvm__check_extensions(struct kvm *kvm)
82 {
83         unsigned int i;
84
85         for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) {
86                 if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) {
87                         error("Unsuppored KVM extension detected: %s",
88                                 kvm_req_ext[i].name);
89                         return (int)-i;
90                 }
91         }
92
93         return 0;
94 }
95
96 static struct kvm *kvm__new(void)
97 {
98         struct kvm *kvm = calloc(1, sizeof *kvm);
99
100         if (!kvm)
101                 die("out of memory");
102
103         return kvm;
104 }
105
106 void kvm__delete(struct kvm *kvm)
107 {
108         kvm__stop_timer(kvm);
109
110         munmap(kvm->ram_start, kvm->ram_size);
111         free(kvm);
112 }
113
114 static bool kvm__cpu_supports_vm(void)
115 {
116         struct cpuid_regs regs;
117         u32 eax_base;
118         int feature;
119
120         regs    = (struct cpuid_regs) {
121                 .eax            = 0x00,
122         };
123         host_cpuid(&regs);
124
125         switch (regs.ebx) {
126         case CPUID_VENDOR_INTEL_1:
127                 eax_base        = 0x00;
128                 feature         = KVM__X86_FEATURE_VMX;
129                 break;
130
131         case CPUID_VENDOR_AMD_1:
132                 eax_base        = 0x80000000;
133                 feature         = KVM__X86_FEATURE_SVM;
134                 break;
135
136         default:
137                 return false;
138         }
139
140         regs    = (struct cpuid_regs) {
141                 .eax            = eax_base,
142         };
143         host_cpuid(&regs);
144
145         if (regs.eax < eax_base + 0x01)
146                 return false;
147
148         regs    = (struct cpuid_regs) {
149                 .eax            = eax_base + 0x01
150         };
151         host_cpuid(&regs);
152
153         return regs.ecx & (1 << feature);
154 }
155
156 static void kvm_register_mem_slot(struct kvm *kvm, u32 slot, u64 guest_phys, u64 size, void *userspace_addr)
157 {
158         struct kvm_userspace_memory_region mem;
159         int ret;
160
161         mem = (struct kvm_userspace_memory_region) {
162                 .slot                   = slot,
163                 .guest_phys_addr        = guest_phys,
164                 .memory_size            = size,
165                 .userspace_addr         = (u64)userspace_addr,
166         };
167
168         ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
169         if (ret < 0)
170                 die_perror("KVM_SET_USER_MEMORY_REGION ioctl");
171 }
172
173 /*
174  * Allocating RAM size bigger than 4GB requires us to leave a gap
175  * in the RAM which is used for PCI MMIO, hotplug, and unconfigured
176  * devices (see documentation of e820_setup_gap() for details).
177  *
178  * If we're required to initialize RAM bigger than 4GB, we will create
179  * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space.
180  */
181
182 void kvm__init_ram(struct kvm *kvm)
183 {
184         u64     phys_start, phys_size;
185         void    *host_mem;
186
187         if (kvm->ram_size < KVM_32BIT_GAP_START) {
188                 /* Use a single block of RAM for 32bit RAM */
189
190                 phys_start = 0;
191                 phys_size  = kvm->ram_size;
192                 host_mem   = kvm->ram_start;
193
194                 kvm_register_mem_slot(kvm, 0, 0, kvm->ram_size, kvm->ram_start);
195         } else {
196                 /* First RAM range from zero to the PCI gap: */
197
198                 phys_start = 0;
199                 phys_size  = KVM_32BIT_GAP_START;
200                 host_mem   = kvm->ram_start;
201
202                 kvm_register_mem_slot(kvm, 0, phys_start, phys_size, host_mem);
203
204                 /* Second RAM range from 4GB to the end of RAM: */
205
206                 phys_start = 0x100000000ULL;
207                 phys_size  = kvm->ram_size - phys_size;
208                 host_mem   = kvm->ram_start + phys_start;
209
210                 kvm_register_mem_slot(kvm, 1, phys_start, phys_size, host_mem);
211         }
212 }
213
214 int kvm__max_cpus(struct kvm *kvm)
215 {
216         int ret;
217
218         ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS);
219         if (ret < 0)
220                 die_perror("KVM_CAP_NR_VCPUS");
221
222         return ret;
223 }
224
225 struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size)
226 {
227         struct kvm_pit_config pit_config = { .flags = 0, };
228         struct kvm *kvm;
229         int ret;
230
231         if (!kvm__cpu_supports_vm())
232                 die("Your CPU does not support hardware virtualization");
233
234         kvm = kvm__new();
235
236         kvm->sys_fd = open(kvm_dev, O_RDWR);
237         if (kvm->sys_fd < 0) {
238                 if (errno == ENOENT)
239                         die("'%s' not found. Please make sure your kernel has CONFIG_KVM enabled and that the KVM modules are loaded.", kvm_dev);
240                 if (errno == ENODEV)
241                         die("'%s' KVM driver not available.\n  # (If the KVM module is loaded then 'dmesg' may offer further clues about the failure.)", kvm_dev);
242
243                 fprintf(stderr, "  Fatal, could not open %s: ", kvm_dev);
244                 perror(NULL);
245                 exit(1);
246         }
247
248         ret = ioctl(kvm->sys_fd, KVM_GET_API_VERSION, 0);
249         if (ret != KVM_API_VERSION)
250                 die_perror("KVM_API_VERSION ioctl");
251
252         kvm->vm_fd = ioctl(kvm->sys_fd, KVM_CREATE_VM, 0);
253         if (kvm->vm_fd < 0)
254                 die_perror("KVM_CREATE_VM ioctl");
255
256         if (kvm__check_extensions(kvm))
257                 die("A required KVM extention is not supported by OS");
258
259         ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
260         if (ret < 0)
261                 die_perror("KVM_SET_TSS_ADDR ioctl");
262
263         ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config);
264         if (ret < 0)
265                 die_perror("KVM_CREATE_PIT2 ioctl");
266
267         kvm->ram_size           = ram_size;
268
269         if (kvm->ram_size < KVM_32BIT_GAP_START) {
270                 kvm->ram_start = mmap(NULL, ram_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
271         } else {
272                 kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
273                 if (kvm->ram_start != MAP_FAILED) {
274                         /*
275                          * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that
276                          * if we accidently write to it, we will know.
277                          */
278                         mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE);
279                 }
280         }
281         if (kvm->ram_start == MAP_FAILED)
282                 die("out of memory");
283
284         ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
285         if (ret < 0)
286                 die_perror("KVM_CREATE_IRQCHIP ioctl");
287
288         return kvm;
289 }
290
291 #define BOOT_LOADER_SELECTOR    0x1000
292 #define BOOT_LOADER_IP          0x0000
293 #define BOOT_LOADER_SP          0x8000
294 #define BOOT_CMDLINE_OFFSET     0x20000
295
296 #define BOOT_PROTOCOL_REQUIRED  0x206
297 #define LOAD_HIGH               0x01
298
299 static int load_flat_binary(struct kvm *kvm, int fd)
300 {
301         void *p;
302         int nr;
303
304         if (lseek(fd, 0, SEEK_SET) < 0)
305                 die_perror("lseek");
306
307         p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
308
309         while ((nr = read(fd, p, 65536)) > 0)
310                 p += nr;
311
312         kvm->boot_selector      = BOOT_LOADER_SELECTOR;
313         kvm->boot_ip            = BOOT_LOADER_IP;
314         kvm->boot_sp            = BOOT_LOADER_SP;
315
316         return true;
317 }
318
319 static const char *BZIMAGE_MAGIC        = "HdrS";
320
321 static bool load_bzimage(struct kvm *kvm, int fd_kernel,
322                         int fd_initrd, const char *kernel_cmdline)
323 {
324         struct boot_params *kern_boot;
325         unsigned long setup_sects;
326         struct boot_params boot;
327         size_t cmdline_size;
328         ssize_t setup_size;
329         void *p;
330         int nr;
331
332         /*
333          * See Documentation/x86/boot.txt for details no bzImage on-disk and
334          * memory layout.
335          */
336
337         if (lseek(fd_kernel, 0, SEEK_SET) < 0)
338                 die_perror("lseek");
339
340         if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot))
341                 return false;
342
343         if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)))
344                 return false;
345
346         if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED)
347                 die("Too old kernel");
348
349         if (lseek(fd_kernel, 0, SEEK_SET) < 0)
350                 die_perror("lseek");
351
352         if (!boot.hdr.setup_sects)
353                 boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS;
354         setup_sects = boot.hdr.setup_sects + 1;
355
356         setup_size = setup_sects << 9;
357         p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
358
359         /* copy setup.bin to mem*/
360         if (read(fd_kernel, p, setup_size) != setup_size)
361                 die_perror("read");
362
363         /* copy vmlinux.bin to BZ_KERNEL_START*/
364         p = guest_flat_to_host(kvm, BZ_KERNEL_START);
365
366         while ((nr = read(fd_kernel, p, 65536)) > 0)
367                 p += nr;
368
369         p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET);
370         if (kernel_cmdline) {
371                 cmdline_size = strlen(kernel_cmdline) + 1;
372                 if (cmdline_size > boot.hdr.cmdline_size)
373                         cmdline_size = boot.hdr.cmdline_size;
374
375                 memset(p, 0, boot.hdr.cmdline_size);
376                 memcpy(p, kernel_cmdline, cmdline_size - 1);
377         }
378
379         kern_boot       = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00);
380
381         kern_boot->hdr.cmd_line_ptr     = BOOT_CMDLINE_OFFSET;
382         kern_boot->hdr.type_of_loader   = 0xff;
383         kern_boot->hdr.heap_end_ptr     = 0xfe00;
384         kern_boot->hdr.loadflags        |= CAN_USE_HEAP;
385
386         /*
387          * Read initrd image into guest memory
388          */
389         if (fd_initrd >= 0) {
390                 struct stat initrd_stat;
391                 unsigned long addr;
392
393                 if (fstat(fd_initrd, &initrd_stat))
394                         die_perror("fstat");
395
396                 addr = boot.hdr.initrd_addr_max & ~0xfffff;
397                 for (;;) {
398                         if (addr < BZ_KERNEL_START)
399                                 die("Not enough memory for initrd");
400                         else if (addr < (kvm->ram_size - initrd_stat.st_size))
401                                 break;
402                         addr -= 0x100000;
403                 }
404
405                 p = guest_flat_to_host(kvm, addr);
406                 nr = read(fd_initrd, p, initrd_stat.st_size);
407                 if (nr != initrd_stat.st_size)
408                         die("Failed to read initrd");
409
410                 kern_boot->hdr.ramdisk_image    = addr;
411                 kern_boot->hdr.ramdisk_size     = initrd_stat.st_size;
412         }
413
414         kvm->boot_selector      = BOOT_LOADER_SELECTOR;
415         /*
416          * The real-mode setup code starts at offset 0x200 of a bzImage. See
417          * Documentation/x86/boot.txt for details.
418          */
419         kvm->boot_ip            = BOOT_LOADER_IP + 0x200;
420         kvm->boot_sp            = BOOT_LOADER_SP;
421
422         return true;
423 }
424
425 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
426                 const char *initrd_filename, const char *kernel_cmdline)
427 {
428         bool ret;
429         int fd_kernel = -1, fd_initrd = -1;
430
431         fd_kernel = open(kernel_filename, O_RDONLY);
432         if (fd_kernel < 0)
433                 die("Unable to open kernel %s", kernel_filename);
434
435         if (initrd_filename) {
436                 fd_initrd = open(initrd_filename, O_RDONLY);
437                 if (fd_initrd < 0)
438                         die("Unable to open initrd %s", initrd_filename);
439         }
440
441         ret = load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline);
442
443         if (initrd_filename)
444                 close(fd_initrd);
445
446         if (ret)
447                 goto found_kernel;
448
449         warning("%s is not a bzImage. Trying to load it as a flat binary...", kernel_filename);
450
451         ret = load_flat_binary(kvm, fd_kernel);
452         if (ret)
453                 goto found_kernel;
454
455         close(fd_kernel);
456
457         die("%s is not a valid bzImage or flat binary", kernel_filename);
458
459 found_kernel:
460         close(fd_kernel);
461
462         return ret;
463 }
464
465 /**
466  * kvm__setup_bios - inject BIOS into guest system memory
467  * @kvm - guest system descriptor
468  *
469  * This function is a main routine where we poke guest memory
470  * and install BIOS there.
471  */
472 void kvm__setup_bios(struct kvm *kvm)
473 {
474         /* standart minimal configuration */
475         setup_bios(kvm);
476
477         /* FIXME: SMP, ACPI and friends here */
478
479         /* MP table */
480         mptable_setup(kvm, kvm->nrcpus);
481 }
482
483 #define TIMER_INTERVAL_NS 1000000       /* 1 msec */
484
485 /*
486  * This function sets up a timer that's used to inject interrupts from the
487  * userspace hypervisor into the guest at periodical intervals. Please note
488  * that clock interrupt, for example, is not handled here.
489  */
490 void kvm__start_timer(struct kvm *kvm)
491 {
492         struct itimerspec its;
493         struct sigevent sev;
494
495         memset(&sev, 0, sizeof(struct sigevent));
496         sev.sigev_value.sival_int       = 0;
497         sev.sigev_notify                = SIGEV_SIGNAL;
498         sev.sigev_signo                 = SIGALRM;
499
500         if (timer_create(CLOCK_REALTIME, &sev, &kvm->timerid) < 0)
501                 die("timer_create()");
502
503         its.it_value.tv_sec             = TIMER_INTERVAL_NS / 1000000000;
504         its.it_value.tv_nsec            = TIMER_INTERVAL_NS % 1000000000;
505         its.it_interval.tv_sec          = its.it_value.tv_sec;
506         its.it_interval.tv_nsec         = its.it_value.tv_nsec;
507
508         if (timer_settime(kvm->timerid, 0, &its, NULL) < 0)
509                 die("timer_settime()");
510 }
511
512 void kvm__stop_timer(struct kvm *kvm)
513 {
514         if (kvm->timerid)
515                 if (timer_delete(kvm->timerid) < 0)
516                         die("timer_delete()");
517
518         kvm->timerid = 0;
519 }
520
521 void kvm__irq_line(struct kvm *kvm, int irq, int level)
522 {
523         struct kvm_irq_level irq_level;
524
525         irq_level       = (struct kvm_irq_level) {
526                 {
527                         .irq            = irq,
528                 },
529                 .level          = level,
530         };
531
532         if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0)
533                 die_perror("KVM_IRQ_LINE failed");
534 }
535
536 void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size)
537 {
538         unsigned char *p;
539         unsigned long n;
540
541         size &= ~7; /* mod 8 */
542         if (!size)
543                 return;
544
545         p = guest_flat_to_host(kvm, addr);
546
547         for (n = 0; n < size; n += 8) {
548                 if (!host_ptr_in_ram(kvm, p + n))
549                         break;
550
551                 printf("  0x%08lx: %02x %02x %02x %02x  %02x %02x %02x %02x\n",
552                         addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
553                                   p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
554         }
555 }