]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jul 2017 17:18:16 +0000 (10:18 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jul 2017 17:18:16 +0000 (10:18 -0700)
Pull more KVM updates from Radim Krčmář:
 "Second batch of KVM updates for v4.13

  Common:
   - add uevents for VM creation/destruction
   - annotate and properly access RCU-protected objects

  s390:
   - rename IOCTL added in the first v4.13 merge

  x86:
   - emulate VMLOAD VMSAVE feature in SVM
   - support paravirtual asynchronous page fault while nested
   - add Hyper-V userspace interfaces for better migration
   - improve master clock corner cases
   - extend internal error reporting after EPT misconfig
   - correct single-stepping of emulated instructions in SVM
   - handle MCE during VM entry
   - fix nVMX VM entry checks and nVMX VMCS shadowing"

* tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
  kvm: x86: hyperv: make VP_INDEX managed by userspace
  KVM: async_pf: Let guest support delivery of async_pf from guest mode
  KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf
  KVM: async_pf: Add L1 guest async_pf #PF vmexit handler
  KVM: x86: Simplify kvm_x86_ops->queue_exception parameter list
  kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2
  KVM: x86: make backwards_tsc_observed a per-VM variable
  KVM: trigger uevents when creating or destroying a VM
  KVM: SVM: Enable Virtual VMLOAD VMSAVE feature
  KVM: SVM: Add Virtual VMLOAD VMSAVE feature definition
  KVM: SVM: Rename lbr_ctl field in the vmcb control area
  KVM: SVM: Prepare for new bit definition in lbr_ctl
  KVM: SVM: handle singlestep exception when skipping emulated instructions
  KVM: x86: take slots_lock in kvm_free_pit
  KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition
  kvm: vmx: Properly handle machine check during VM-entry
  KVM: x86: update master clock before computing kvmclock_offset
  kvm: nVMX: Shadow "high" parts of shadowed 64-bit VMCS fields
  kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls
  kvm: nVMX: Validate the I/O bitmaps on nested VM-entry
  ...

21 files changed:
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/msr.txt
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/kvm_emulate.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/svm.h
arch/x86/include/uapi/asm/kvm_para.h
arch/x86/kernel/kvm.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/hyperv.h
arch/x86/kvm/i8254.c
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/linux/kvm_host.h
include/uapi/linux/kvm.h
virt/kvm/eventfd.c
virt/kvm/irqchip.c
virt/kvm/kvm_main.c

index 3a9831b72945a812b69cd37fe3c9157352ef7e6f..e63a35fafef0e153c30023e92622111006d1dd7c 100644 (file)
@@ -4329,3 +4329,21 @@ Querying this capability returns a bitmap indicating the possible
 virtual SMT modes that can be set using KVM_CAP_PPC_SMT.  If bit N
 (counting from the right) is set, then a virtual SMT mode of 2^N is
 available.
+
+8.11 KVM_CAP_HYPERV_SYNIC2
+
+Architectures: x86
+
+This capability enables a newer version of Hyper-V Synthetic interrupt
+controller (SynIC).  The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
+doesn't clear SynIC message and event flags pages when they are enabled by
+writing to the respective MSRs.
+
+8.12 KVM_CAP_HYPERV_VP_INDEX
+
+Architectures: x86
+
+This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr.  Its
+value is used to denote the target vcpu for a SynIC interrupt.  For
+compatibilty, KVM initializes this msr to KVM's internal vcpu index.  When this
+capability is absent, userspace can still query this msr's value.
index 0a9ea515512a208a4b239813028c136e1d2943af..1ebecc115dc6efdbbfa76eb4ab329f5b1d8b765e 100644 (file)
@@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
 MSR_KVM_ASYNC_PF_EN: 0x4b564d02
        data: Bits 63-6 hold 64-byte aligned physical address of a
        64 byte memory area which must be in guest RAM and must be
-       zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
+       zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
        when asynchronous page faults are enabled on the vcpu 0 when
        disabled. Bit 1 is 1 if asynchronous page faults can be injected
-       when vcpu is in cpl == 0.
+       when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
+       are delivered to L1 as #PF vmexits.
 
        First 4 byte of 64 byte memory location will be written to by
        the hypervisor at the time of asynchronous page fault (APF)
index 2701e5f8145bd250c3a35dc12cab5839e16ff30d..ca3c48c0872f4beba6b03ac92c50eb6a352b3006 100644 (file)
 #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC       (15*32+13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
index 722d0e56886342a3a9f65d7f419d0e240a9cc6ab..fde36f189836db83af655652f44eec92820072b8 100644 (file)
@@ -23,6 +23,7 @@ struct x86_exception {
        u16 error_code;
        bool nested_page_fault;
        u64 address; /* cr2 or nested page fault gpa */
+       u8 async_page_fault;
 };
 
 /*
index 1588e9e3dc01f9b807afafc35136974e5870977e..87ac4fba6d8e12f07e8a9f191bdb028a1c3e6234 100644 (file)
@@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic {
        DECLARE_BITMAP(auto_eoi_bitmap, 256);
        DECLARE_BITMAP(vec_bitmap, 256);
        bool active;
+       bool dont_zero_synic_pages;
 };
 
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
+       u32 vp_index;
        u64 hv_vapic;
        s64 runtime_offset;
        struct kvm_vcpu_hv_synic synic;
@@ -549,6 +551,7 @@ struct kvm_vcpu_arch {
                bool reinject;
                u8 nr;
                u32 error_code;
+               u8 nested_apf;
        } exception;
 
        struct kvm_queued_interrupt {
@@ -649,6 +652,9 @@ struct kvm_vcpu_arch {
                u64 msr_val;
                u32 id;
                bool send_user_only;
+               u32 host_apf_reason;
+               unsigned long nested_apf_token;
+               bool delivery_as_pf_vmexit;
        } apf;
 
        /* OSVW MSRs (AMD only) */
@@ -803,6 +809,7 @@ struct kvm_arch {
        int audit_point;
        #endif
 
+       bool backwards_tsc_observed;
        bool boot_vcpu_runs_old_kvmclock;
        u32 bsp_vcpu_id;
 
@@ -952,9 +959,7 @@ struct kvm_x86_ops {
                                unsigned char *hypercall_addr);
        void (*set_irq)(struct kvm_vcpu *vcpu);
        void (*set_nmi)(struct kvm_vcpu *vcpu);
-       void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
-                               bool has_error_code, u32 error_code,
-                               bool reinject);
+       void (*queue_exception)(struct kvm_vcpu *vcpu);
        void (*cancel_injection)(struct kvm_vcpu *vcpu);
        int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
        int (*nmi_allowed)(struct kvm_vcpu *vcpu);
index 14824fc78f7e7160fa014979424f111a8a2e40f0..58fffe79e417e61cbd4f7041c9a7753a84b1331c 100644 (file)
@@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
        u32 event_inj;
        u32 event_inj_err;
        u64 nested_cr3;
-       u64 lbr_ctl;
+       u64 virt_ext;
        u32 clean;
        u32 reserved_5;
        u64 next_rip;
@@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define AVIC_ENABLE_SHIFT 31
 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
 
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
 #define SVM_INTERRUPT_SHADOW_MASK 1
 
 #define SVM_IOIO_STR_SHIFT 2
index cff0bb6556f8809ec39d08c61036a86d7c8adde0..a965e5b0d32804fa19f9434600054b39014dd310 100644 (file)
@@ -67,6 +67,7 @@ struct kvm_clock_pairing {
 
 #define KVM_ASYNC_PF_ENABLED                   (1 << 0)
 #define KVM_ASYNC_PF_SEND_ALWAYS               (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT     (1 << 2)
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE            1
index 43e10d6fdbeda3002dcd9e2063a723ded4ca5972..71c17a5be983524ea277718b759ff1511a9a3214 100644 (file)
@@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
 #ifdef CONFIG_PREEMPT
                pa |= KVM_ASYNC_PF_SEND_ALWAYS;
 #endif
-               wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
+               pa |= KVM_ASYNC_PF_ENABLED;
+
+               /* Async page fault support for L1 hypervisor is optional */
+               if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
+                       (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
+                       wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
                __this_cpu_write(apf_reason.enabled, 1);
                printk(KERN_INFO"KVM setup async PF for cpu %d\n",
                       smp_processor_id());
index ebae57ac59024a6759ff9e2bb403de60f6bce759..2695a34fa1c5190f98b9fdd092e9cc2d3e44b2a0 100644 (file)
@@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
        return 0;
 }
 
-static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
+static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
+{
+       struct kvm_vcpu *vcpu = NULL;
+       int i;
+
+       if (vpidx < KVM_MAX_VCPUS)
+               vcpu = kvm_get_vcpu(kvm, vpidx);
+       if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
+               return vcpu;
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
+                       return vcpu;
+       return NULL;
+}
+
+static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
 {
        struct kvm_vcpu *vcpu;
        struct kvm_vcpu_hv_synic *synic;
 
-       if (vcpu_id >= atomic_read(&kvm->online_vcpus))
-               return NULL;
-       vcpu = kvm_get_vcpu(kvm, vcpu_id);
+       vcpu = get_vcpu_by_vpidx(kvm, vpidx);
        if (!vcpu)
                return NULL;
        synic = vcpu_to_synic(vcpu);
@@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
                synic->version = data;
                break;
        case HV_X64_MSR_SIEFP:
-               if (data & HV_SYNIC_SIEFP_ENABLE)
+               if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
+                   !synic->dont_zero_synic_pages)
                        if (kvm_clear_guest(vcpu->kvm,
                                            data & PAGE_MASK, PAGE_SIZE)) {
                                ret = 1;
@@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
                        synic_exit(synic, msr);
                break;
        case HV_X64_MSR_SIMP:
-               if (data & HV_SYNIC_SIMP_ENABLE)
+               if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
+                   !synic->dont_zero_synic_pages)
                        if (kvm_clear_guest(vcpu->kvm,
                                            data & PAGE_MASK, PAGE_SIZE)) {
                                ret = 1;
@@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
        return ret;
 }
 
-int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
 {
        struct kvm_vcpu_hv_synic *synic;
 
-       synic = synic_get(kvm, vcpu_id);
+       synic = synic_get(kvm, vpidx);
        if (!synic)
                return -EINVAL;
 
@@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
                        kvm_hv_notify_acked_sint(vcpu, i);
 }
 
-static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
+static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
 {
        struct kvm_vcpu_hv_synic *synic;
 
-       synic = synic_get(kvm, vcpu_id);
+       synic = synic_get(kvm, vpidx);
        if (!synic)
                return -EINVAL;
 
@@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
                stimer_init(&hv_vcpu->stimer[i], i);
 }
 
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+
+       hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
+}
+
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
 {
+       struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+
        /*
         * Hyper-V SynIC auto EOI SINT's are
         * not compatible with APICV, so deactivate APICV
         */
        kvm_vcpu_deactivate_apicv(vcpu);
-       vcpu_to_synic(vcpu)->active = true;
+       synic->active = true;
+       synic->dont_zero_synic_pages = dont_zero_synic_pages;
        return 0;
 }
 
@@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
        struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
 
        switch (msr) {
+       case HV_X64_MSR_VP_INDEX:
+               if (!host)
+                       return 1;
+               hv->vp_index = (u32)data;
+               break;
        case HV_X64_MSR_APIC_ASSIST_PAGE: {
                u64 gfn;
                unsigned long addr;
@@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
 
        switch (msr) {
-       case HV_X64_MSR_VP_INDEX: {
-               int r;
-               struct kvm_vcpu *v;
-
-               kvm_for_each_vcpu(r, v, vcpu->kvm) {
-                       if (v == vcpu) {
-                               data = r;
-                               break;
-                       }
-               }
+       case HV_X64_MSR_VP_INDEX:
+               data = hv->vp_index;
                break;
-       }
        case HV_X64_MSR_EOI:
                return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
        case HV_X64_MSR_ICR:
index cd1119538add9185f6966f2e3ea5a9a3f7fab630..e637631a9574feb5c5c5e0053a46d1b3f29d8646 100644 (file)
@@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
 void kvm_hv_irq_routing_update(struct kvm *kvm);
 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
 
 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
index a78b445ce4116b3dbcd48da97efdb6c89394abb5..af192895b1fc633e9b2922c587862d1cbb41efd7 100644 (file)
@@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm)
        struct kvm_pit *pit = kvm->arch.vpit;
 
        if (pit) {
+               mutex_lock(&kvm->slots_lock);
                kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
                kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
+               mutex_unlock(&kvm->slots_lock);
                kvm_pit_set_reinject(pit, false);
                hrtimer_cancel(&pit->pit_state.timer);
                kthread_destroy_worker(pit->worker);
index aafd399cf8c6f3d3e219ec636a73b19ee1e9d20d..9b1dd114956a8bcb9e724bd4df792460f68a0943 100644 (file)
@@ -46,6 +46,7 @@
 #include <asm/io.h>
 #include <asm/vmx.h>
 #include <asm/kvm_page_track.h>
+#include "trace.h"
 
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
@@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
                     kvm_event_needs_reinjection(vcpu)))
                return false;
 
-       if (is_guest_mode(vcpu))
+       if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
                return false;
 
        return kvm_x86_ops->interrupt_allowed(vcpu);
@@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
        return false;
 }
 
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+                               u64 fault_address, char *insn, int insn_len,
+                               bool need_unprotect)
+{
+       int r = 1;
+
+       switch (vcpu->arch.apf.host_apf_reason) {
+       default:
+               trace_kvm_page_fault(fault_address, error_code);
+
+               if (need_unprotect && kvm_event_needs_reinjection(vcpu))
+                       kvm_mmu_unprotect_page_virt(vcpu, fault_address);
+               r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
+                               insn_len);
+               break;
+       case KVM_PV_REASON_PAGE_NOT_PRESENT:
+               vcpu->arch.apf.host_apf_reason = 0;
+               local_irq_disable();
+               kvm_async_pf_task_wait(fault_address);
+               local_irq_enable();
+               break;
+       case KVM_PV_REASON_PAGE_READY:
+               vcpu->arch.apf.host_apf_reason = 0;
+               local_irq_disable();
+               kvm_async_pf_task_wake(fault_address);
+               local_irq_enable();
+               break;
+       }
+       return r;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+
 static bool
 check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
 {
index a276834950c14a15681c9d125ddde8e9b9dc6af8..d7d248a000dd6772681f3f5541e344f9677a2d1d 100644 (file)
@@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
                             bool accessed_dirty);
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+                               u64 fault_address, char *insn, int insn_len,
+                               bool need_unprotect);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
index 905ea6052517fef7a09bf82f396222ade76be2a5..4d8141e533c369711df245d0a783683598ad4559 100644 (file)
@@ -194,7 +194,6 @@ struct vcpu_svm {
 
        unsigned int3_injected;
        unsigned long int3_rip;
-       u32 apf_reason;
 
        /* cached guest cpuid flags for faster access */
        bool nrips_enabled      : 1;
@@ -277,6 +276,10 @@ static int avic;
 module_param(avic, int, S_IRUGO);
 #endif
 
+/* enable/disable Virtual VMLOAD VMSAVE */
+static int vls = true;
+module_param(vls, int, 0444);
+
 /* AVIC VM ID bit masks and lock */
 static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
 static DEFINE_SPINLOCK(avic_vm_id_lock);
@@ -633,11 +636,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
        svm_set_interrupt_shadow(vcpu, 0);
 }
 
-static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-                               bool has_error_code, u32 error_code,
-                               bool reinject)
+static void svm_queue_exception(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned nr = vcpu->arch.exception.nr;
+       bool has_error_code = vcpu->arch.exception.has_error_code;
+       bool reinject = vcpu->arch.exception.reinject;
+       u32 error_code = vcpu->arch.exception.error_code;
 
        /*
         * If we are within a nested VM we'd better #VMEXIT and let the guest
@@ -947,7 +952,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm)
 {
        u32 *msrpm = svm->msrpm;
 
-       svm->vmcb->control.lbr_ctl = 1;
+       svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
        set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
        set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
        set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
@@ -958,7 +963,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 {
        u32 *msrpm = svm->msrpm;
 
-       svm->vmcb->control.lbr_ctl = 0;
+       svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
        set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
        set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
        set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
@@ -1093,6 +1098,16 @@ static __init int svm_hardware_setup(void)
                }
        }
 
+       if (vls) {
+               if (!npt_enabled ||
+                   !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) ||
+                   !IS_ENABLED(CONFIG_X86_64)) {
+                       vls = false;
+               } else {
+                       pr_info("Virtual VMLOAD VMSAVE supported\n");
+               }
+       }
+
        return 0;
 
 err:
@@ -1280,6 +1295,16 @@ static void init_vmcb(struct vcpu_svm *svm)
        if (avic)
                avic_init_vmcb(svm);
 
+       /*
+        * If hardware supports Virtual VMLOAD VMSAVE then enable it
+        * in VMCB and clear intercepts to avoid #VMEXIT.
+        */
+       if (vls) {
+               clr_intercept(svm, INTERCEPT_VMLOAD);
+               clr_intercept(svm, INTERCEPT_VMSAVE);
+               svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+       }
+
        mark_all_dirty(svm->vmcb);
 
        enable_gif(svm);
@@ -2096,34 +2121,11 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 static int pf_interception(struct vcpu_svm *svm)
 {
        u64 fault_address = svm->vmcb->control.exit_info_2;
-       u64 error_code;
-       int r = 1;
+       u64 error_code = svm->vmcb->control.exit_info_1;
 
-       switch (svm->apf_reason) {
-       default:
-               error_code = svm->vmcb->control.exit_info_1;
-
-               trace_kvm_page_fault(fault_address, error_code);
-               if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
-                       kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
-               r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
+       return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
                        svm->vmcb->control.insn_bytes,
-                       svm->vmcb->control.insn_len);
-               break;
-       case KVM_PV_REASON_PAGE_NOT_PRESENT:
-               svm->apf_reason = 0;
-               local_irq_disable();
-               kvm_async_pf_task_wait(fault_address);
-               local_irq_enable();
-               break;
-       case KVM_PV_REASON_PAGE_READY:
-               svm->apf_reason = 0;
-               local_irq_disable();
-               kvm_async_pf_task_wake(fault_address);
-               local_irq_enable();
-               break;
-       }
-       return r;
+                       svm->vmcb->control.insn_len, !npt_enabled);
 }
 
 static int db_interception(struct vcpu_svm *svm)
@@ -2267,7 +2269,7 @@ static int io_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
        u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-       int size, in, string;
+       int size, in, string, ret;
        unsigned port;
 
        ++svm->vcpu.stat.io_exits;
@@ -2279,10 +2281,16 @@ static int io_interception(struct vcpu_svm *svm)
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
        svm->next_rip = svm->vmcb->control.exit_info_2;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
-       return in ? kvm_fast_pio_in(vcpu, size, port)
-                 : kvm_fast_pio_out(vcpu, size, port);
+       /*
+        * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+        * KVM_EXIT_DEBUG here.
+        */
+       if (in)
+               return kvm_fast_pio_in(vcpu, size, port) && ret;
+       else
+               return kvm_fast_pio_out(vcpu, size, port) && ret;
 }
 
 static int nmi_interception(struct vcpu_svm *svm)
@@ -2415,15 +2423,19 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
        if (!is_guest_mode(&svm->vcpu))
                return 0;
 
+       vmexit = nested_svm_intercept(svm);
+       if (vmexit != NESTED_EXIT_DONE)
+               return 0;
+
        svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
        svm->vmcb->control.exit_code_hi = 0;
        svm->vmcb->control.exit_info_1 = error_code;
-       svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
-       vmexit = nested_svm_intercept(svm);
-       if (vmexit == NESTED_EXIT_DONE)
-               svm->nested.exit_required = true;
+       if (svm->vcpu.arch.exception.nested_apf)
+               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+       else
+               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
 
+       svm->nested.exit_required = true;
        return vmexit;
 }
 
@@ -2598,7 +2610,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
                break;
        case SVM_EXIT_EXCP_BASE + PF_VECTOR:
                /* When we're shadowing, trap PFs, but not async PF */
-               if (!npt_enabled && svm->apf_reason == 0)
+               if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
                        return NESTED_EXIT_HOST;
                break;
        default:
@@ -2645,7 +2657,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
                }
                /* async page fault always cause vmexit */
                else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-                        svm->apf_reason != 0)
+                        svm->vcpu.arch.exception.nested_apf != 0)
                        vmexit = NESTED_EXIT_DONE;
                break;
        }
@@ -2702,7 +2714,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
        dst->event_inj            = from->event_inj;
        dst->event_inj_err        = from->event_inj_err;
        dst->nested_cr3           = from->nested_cr3;
-       dst->lbr_ctl              = from->lbr_ctl;
+       dst->virt_ext              = from->virt_ext;
 }
 
 static int nested_svm_vmexit(struct vcpu_svm *svm)
@@ -3008,7 +3020,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
        /* We don't want to see VMMCALLs from a nested guest */
        clr_intercept(svm, INTERCEPT_VMMCALL);
 
-       svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
+       svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
        svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
        svm->vmcb->control.int_state = nested_vmcb->control.int_state;
        svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
@@ -3055,6 +3067,7 @@ static int vmload_interception(struct vcpu_svm *svm)
 {
        struct vmcb *nested_vmcb;
        struct page *page;
+       int ret;
 
        if (nested_svm_check_permissions(svm))
                return 1;
@@ -3064,18 +3077,19 @@ static int vmload_interception(struct vcpu_svm *svm)
                return 1;
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
        nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
        nested_svm_unmap(page);
 
-       return 1;
+       return ret;
 }
 
 static int vmsave_interception(struct vcpu_svm *svm)
 {
        struct vmcb *nested_vmcb;
        struct page *page;
+       int ret;
 
        if (nested_svm_check_permissions(svm))
                return 1;
@@ -3085,12 +3099,12 @@ static int vmsave_interception(struct vcpu_svm *svm)
                return 1;
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
        nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
        nested_svm_unmap(page);
 
-       return 1;
+       return ret;
 }
 
 static int vmrun_interception(struct vcpu_svm *svm)
@@ -3123,25 +3137,29 @@ failed:
 
 static int stgi_interception(struct vcpu_svm *svm)
 {
+       int ret;
+
        if (nested_svm_check_permissions(svm))
                return 1;
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
        kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 
        enable_gif(svm);
 
-       return 1;
+       return ret;
 }
 
 static int clgi_interception(struct vcpu_svm *svm)
 {
+       int ret;
+
        if (nested_svm_check_permissions(svm))
                return 1;
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
        disable_gif(svm);
 
@@ -3152,7 +3170,7 @@ static int clgi_interception(struct vcpu_svm *svm)
                mark_dirty(svm->vmcb, VMCB_INTR);
        }
 
-       return 1;
+       return ret;
 }
 
 static int invlpga_interception(struct vcpu_svm *svm)
@@ -3166,8 +3184,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
        kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int skinit_interception(struct vcpu_svm *svm)
@@ -3190,7 +3207,7 @@ static int xsetbv_interception(struct vcpu_svm *svm)
 
        if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
                svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-               skip_emulated_instruction(&svm->vcpu);
+               return kvm_skip_emulated_instruction(&svm->vcpu);
        }
 
        return 1;
@@ -3286,8 +3303,7 @@ static int invlpg_interception(struct vcpu_svm *svm)
                return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 
        kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
-       skip_emulated_instruction(&svm->vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int emulate_on_interception(struct vcpu_svm *svm)
@@ -3437,9 +3453,7 @@ static int dr_interception(struct vcpu_svm *svm)
                kvm_register_write(&svm->vcpu, reg, val);
        }
 
-       skip_emulated_instruction(&svm->vcpu);
-
-       return 1;
+       return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int cr8_write_interception(struct vcpu_svm *svm)
@@ -3562,6 +3576,7 @@ static int rdmsr_interception(struct vcpu_svm *svm)
        if (svm_get_msr(&svm->vcpu, &msr_info)) {
                trace_kvm_msr_read_ex(ecx);
                kvm_inject_gp(&svm->vcpu, 0);
+               return 1;
        } else {
                trace_kvm_msr_read(ecx, msr_info.data);
 
@@ -3570,9 +3585,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
                kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
                                   msr_info.data >> 32);
                svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-               skip_emulated_instruction(&svm->vcpu);
+               return kvm_skip_emulated_instruction(&svm->vcpu);
        }
-       return 1;
 }
 
 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -3698,11 +3712,11 @@ static int wrmsr_interception(struct vcpu_svm *svm)
        if (kvm_set_msr(&svm->vcpu, &msr)) {
                trace_kvm_msr_write_ex(ecx, data);
                kvm_inject_gp(&svm->vcpu, 0);
+               return 1;
        } else {
                trace_kvm_msr_write(ecx, data);
-               skip_emulated_instruction(&svm->vcpu);
+               return kvm_skip_emulated_instruction(&svm->vcpu);
        }
-       return 1;
 }
 
 static int msr_interception(struct vcpu_svm *svm)
@@ -3731,8 +3745,7 @@ static int pause_interception(struct vcpu_svm *svm)
 
 static int nop_interception(struct vcpu_svm *svm)
 {
-       skip_emulated_instruction(&(svm->vcpu));
-       return 1;
+       return kvm_skip_emulated_instruction(&(svm->vcpu));
 }
 
 static int monitor_interception(struct vcpu_svm *svm)
@@ -4117,7 +4130,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
        pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
        pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
-       pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
+       pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
        pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
        pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
        pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
@@ -4965,7 +4978,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
        /* if exit due to PF check for async PF */
        if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
-               svm->apf_reason = kvm_read_and_reset_pf_reason();
+               svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
 
        if (npt_enabled) {
                vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
index f76efad248aba0dc02bce77a4cd984343d181d79..84e62acf2dd861023b17382e61f9c98c8df82f68 100644 (file)
@@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+       unsigned int nr = vcpu->arch.exception.nr;
 
-       if (!(vmcs12->exception_bitmap & (1u << nr)))
+       if (!((vmcs12->exception_bitmap & (1u << nr)) ||
+               (nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
                return 0;
 
+       if (vcpu->arch.exception.nested_apf) {
+               vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
+               nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+                       PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
+                       INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
+                       vcpu->arch.apf.nested_apf_token);
+               return 1;
+       }
+
        nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                          vmcs_read32(VM_EXIT_INTR_INFO),
                          vmcs_readl(EXIT_QUALIFICATION));
        return 1;
 }
 
-static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-                               bool has_error_code, u32 error_code,
-                               bool reinject)
+static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned nr = vcpu->arch.exception.nr;
+       bool has_error_code = vcpu->arch.exception.has_error_code;
+       bool reinject = vcpu->arch.exception.reinject;
+       u32 error_code = vcpu->arch.exception.error_code;
        u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
        if (!reinject && is_guest_mode(vcpu) &&
-           nested_vmx_check_exception(vcpu, nr))
+           nested_vmx_check_exception(vcpu))
                return;
 
        if (has_error_code) {
@@ -3764,6 +3777,25 @@ static void free_kvm_area(void)
        }
 }
 
+enum vmcs_field_type {
+       VMCS_FIELD_TYPE_U16 = 0,
+       VMCS_FIELD_TYPE_U64 = 1,
+       VMCS_FIELD_TYPE_U32 = 2,
+       VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
+};
+
+static inline int vmcs_field_type(unsigned long field)
+{
+       if (0x1 & field)        /* the *_HIGH fields are all 32 bit */
+               return VMCS_FIELD_TYPE_U32;
+       return (field >> 13) & 0x3 ;
+}
+
+static inline int vmcs_field_readonly(unsigned long field)
+{
+       return (((field >> 10) & 0x3) == 1);
+}
+
 static void init_vmcs_shadow_fields(void)
 {
        int i, j;
@@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void)
 
        /* shadowed fields guest access without vmexit */
        for (i = 0; i < max_shadow_read_write_fields; i++) {
-               clear_bit(shadow_read_write_fields[i],
-                         vmx_vmwrite_bitmap);
-               clear_bit(shadow_read_write_fields[i],
-                         vmx_vmread_bitmap);
+               unsigned long field = shadow_read_write_fields[i];
+
+               clear_bit(field, vmx_vmwrite_bitmap);
+               clear_bit(field, vmx_vmread_bitmap);
+               if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
+                       clear_bit(field + 1, vmx_vmwrite_bitmap);
+                       clear_bit(field + 1, vmx_vmread_bitmap);
+               }
+       }
+       for (i = 0; i < max_shadow_read_only_fields; i++) {
+               unsigned long field = shadow_read_only_fields[i];
+
+               clear_bit(field, vmx_vmread_bitmap);
+               if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
+                       clear_bit(field + 1, vmx_vmread_bitmap);
        }
-       for (i = 0; i < max_shadow_read_only_fields; i++)
-               clear_bit(shadow_read_only_fields[i],
-                         vmx_vmread_bitmap);
 }
 
 static __init int alloc_kvm_area(void)
@@ -4634,6 +4674,11 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
        return true;
 }
 
+static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+       return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
+}
+
 static int init_rmode_tss(struct kvm *kvm)
 {
        gfn_t fn;
@@ -5664,14 +5709,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
        }
 
        if (is_page_fault(intr_info)) {
-               /* EPT won't cause page fault directly */
-               BUG_ON(enable_ept);
                cr2 = vmcs_readl(EXIT_QUALIFICATION);
-               trace_kvm_page_fault(cr2, error_code);
-
-               if (kvm_event_needs_reinjection(vcpu))
-                       kvm_mmu_unprotect_page_virt(vcpu, cr2);
-               return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
+               /* EPT won't cause page fault directly */
+               WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
+               return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
+                               true);
        }
 
        ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@@ -7214,25 +7256,6 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
        return nested_vmx_run(vcpu, false);
 }
 
-enum vmcs_field_type {
-       VMCS_FIELD_TYPE_U16 = 0,
-       VMCS_FIELD_TYPE_U64 = 1,
-       VMCS_FIELD_TYPE_U32 = 2,
-       VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
-};
-
-static inline int vmcs_field_type(unsigned long field)
-{
-       if (0x1 & field)        /* the *_HIGH fields are all 32 bit */
-               return VMCS_FIELD_TYPE_U32;
-       return (field >> 13) & 0x3 ;
-}
-
-static inline int vmcs_field_readonly(unsigned long field)
-{
-       return (((field >> 10) & 0x3) == 1);
-}
-
 /*
  * Read a vmcs12 field. Since these can have varying lengths and we return
  * one type, we chose the biggest type (u64) and zero-extend the return value
@@ -8014,7 +8037,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                if (is_nmi(intr_info))
                        return false;
                else if (is_page_fault(intr_info))
-                       return enable_ept;
+                       return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
                else if (is_no_device(intr_info) &&
                         !(vmcs12->guest_cr0 & X86_CR0_TS))
                        return false;
@@ -8418,9 +8441,15 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
                        exit_reason != EXIT_REASON_TASK_SWITCH)) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
-               vcpu->run->internal.ndata = 2;
+               vcpu->run->internal.ndata = 3;
                vcpu->run->internal.data[0] = vectoring_info;
                vcpu->run->internal.data[1] = exit_reason;
+               vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
+               if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
+                       vcpu->run->internal.ndata++;
+                       vcpu->run->internal.data[3] =
+                               vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+               }
                return 0;
        }
 
@@ -8611,17 +8640,24 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 
 static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 {
-       u32 exit_intr_info;
+       u32 exit_intr_info = 0;
+       u16 basic_exit_reason = (u16)vmx->exit_reason;
 
-       if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
-             || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
+       if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
+             || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
                return;
 
-       vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-       exit_intr_info = vmx->exit_intr_info;
+       if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+               exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+       vmx->exit_intr_info = exit_intr_info;
+
+       /* if exit due to PF check for async PF */
+       if (is_page_fault(exit_intr_info))
+               vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
 
        /* Handle machine checks before interrupts are enabled */
-       if (is_machine_check(exit_intr_info))
+       if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
+           is_machine_check(exit_intr_info))
                kvm_machine_check();
 
        /* We need to handle NMIs before interrupts are enabled */
@@ -9589,23 +9625,26 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
                      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
 }
 
+static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
+                                              struct vmcs12 *vmcs12)
+{
+       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+               return 0;
+
+       if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
+           !page_address_valid(vcpu, vmcs12->io_bitmap_b))
+               return -EINVAL;
+
+       return 0;
+}
+
 static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
                                                struct vmcs12 *vmcs12)
 {
-       int maxphyaddr;
-       u64 addr;
-
        if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
                return 0;
 
-       if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
-               WARN_ON(1);
-               return -EINVAL;
-       }
-       maxphyaddr = cpuid_maxphyaddr(vcpu);
-
-       if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
-          ((addr + PAGE_SIZE) >> maxphyaddr))
+       if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
                return -EINVAL;
 
        return 0;
@@ -10293,6 +10332,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
            vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
                return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+       if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
+               return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
        if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
                return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
@@ -10429,8 +10471,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
                return 1;
        }
 
-       vmcs12->launch_state = 1;
-
        /*
         * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
         * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -10804,6 +10844,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 
        if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+               vmcs12->launch_state = 1;
+
                /* vm_entry_intr_info_field is cleared on exit. Emulate this
                 * instead of reading the real value. */
                vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
index 6c7266f7766dcb6ec02b13b9b1439c9f9d547071..5b8f07889f6a591f4e23ac69c6bb9656e7bc0a31 100644 (file)
@@ -134,8 +134,6 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
 
-static bool __read_mostly backwards_tsc_observed = false;
-
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -452,7 +450,12 @@ EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 {
        ++vcpu->stat.pf_guest;
-       vcpu->arch.cr2 = fault->address;
+       vcpu->arch.exception.nested_apf =
+               is_guest_mode(vcpu) && fault->async_page_fault;
+       if (vcpu->arch.exception.nested_apf)
+               vcpu->arch.apf.nested_apf_token = fault->address;
+       else
+               vcpu->arch.cr2 = fault->address;
        kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
 }
 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
@@ -1719,7 +1722,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
                                        &ka->master_cycle_now);
 
        ka->use_master_clock = host_tsc_clocksource && vcpus_matched
-                               && !backwards_tsc_observed
+                               && !ka->backwards_tsc_observed
                                && !ka->boot_vcpu_runs_old_kvmclock;
 
        if (ka->use_master_clock)
@@ -2060,8 +2063,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
        gpa_t gpa = data & ~0x3f;
 
-       /* Bits 2:5 are reserved, Should be zero */
-       if (data & 0x3c)
+       /* Bits 3:5 are reserved, Should be zero */
+       if (data & 0x38)
                return 1;
 
        vcpu->arch.apf.msr_val = data;
@@ -2077,6 +2080,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
                return 1;
 
        vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
+       vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
        kvm_async_pf_wakeup_all(vcpu);
        return 0;
 }
@@ -2661,6 +2665,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_VAPIC:
        case KVM_CAP_HYPERV_SPIN:
        case KVM_CAP_HYPERV_SYNIC:
+       case KVM_CAP_HYPERV_SYNIC2:
+       case KVM_CAP_HYPERV_VP_INDEX:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
        case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3384,10 +3390,14 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
        switch (cap->cap) {
+       case KVM_CAP_HYPERV_SYNIC2:
+               if (cap->args[0])
+                       return -EINVAL;
        case KVM_CAP_HYPERV_SYNIC:
                if (!irqchip_in_kernel(vcpu->kvm))
                        return -EINVAL;
-               return kvm_hv_activate_synic(vcpu);
+               return kvm_hv_activate_synic(vcpu, cap->cap ==
+                                            KVM_CAP_HYPERV_SYNIC2);
        default:
                return -EINVAL;
        }
@@ -4188,9 +4198,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
 
                r = 0;
+               /*
+                * TODO: userspace has to take care of races with VCPU_RUN, so
+                * kvm_gen_update_masterclock() can be cut down to locked
+                * pvclock_update_vm_gtod_copy().
+                */
+               kvm_gen_update_masterclock(kvm);
                now_ns = get_kvmclock_ns(kvm);
                kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
-               kvm_gen_update_masterclock(kvm);
+               kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
                break;
        }
        case KVM_GET_CLOCK: {
@@ -6347,10 +6363,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
                        kvm_update_dr7(vcpu);
                }
 
-               kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
-                                         vcpu->arch.exception.has_error_code,
-                                         vcpu->arch.exception.error_code,
-                                         vcpu->arch.exception.reinject);
+               kvm_x86_ops->queue_exception(vcpu);
                return 0;
        }
 
@@ -7676,6 +7689,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
        struct msr_data msr;
        struct kvm *kvm = vcpu->kvm;
 
+       kvm_hv_vcpu_postcreate(vcpu);
+
        if (vcpu_load(vcpu))
                return;
        msr.data = 0x0;
@@ -7829,8 +7844,8 @@ int kvm_arch_hardware_enable(void)
         */
        if (backwards_tsc) {
                u64 delta_cyc = max_tsc - local_tsc;
-               backwards_tsc_observed = true;
                list_for_each_entry(kvm, &vm_list, vm_list) {
+                       kvm->arch.backwards_tsc_observed = true;
                        kvm_for_each_vcpu(i, vcpu, kvm) {
                                vcpu->arch.tsc_offset_adjustment += delta_cyc;
                                vcpu->arch.last_host_tsc = local_tsc;
@@ -8576,6 +8591,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
                fault.error_code = 0;
                fault.nested_page_fault = false;
                fault.address = work->arch.token;
+               fault.async_page_fault = true;
                kvm_inject_page_fault(vcpu, &fault);
        }
 }
@@ -8598,6 +8614,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                fault.error_code = 0;
                fault.nested_page_fault = false;
                fault.address = work->arch.token;
+               fault.async_page_fault = true;
                kvm_inject_page_fault(vcpu, &fault);
        }
        vcpu->arch.apf.halted = false;
index 0b50e7b35ed4135f81a3332331132aa0b1fc1c18..648b34cabb38214e6bb957aeecbcf61e03a26d4c 100644 (file)
@@ -234,7 +234,7 @@ struct kvm_vcpu {
 
        int guest_fpu_loaded, guest_xcr0_loaded;
        struct swait_queue_head wq;
-       struct pid *pid;
+       struct pid __rcu *pid;
        int sigset_active;
        sigset_t sigset;
        struct kvm_vcpu_stat stat;
@@ -390,7 +390,7 @@ struct kvm {
        spinlock_t mmu_lock;
        struct mutex slots_lock;
        struct mm_struct *mm; /* userspace tied to this vm */
-       struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
+       struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
        struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 
        /*
@@ -404,7 +404,7 @@ struct kvm {
        int last_boosted_vcpu;
        struct list_head vm_list;
        struct mutex lock;
-       struct kvm_io_bus *buses[KVM_NR_BUSES];
+       struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
 #ifdef CONFIG_HAVE_KVM_EVENTFD
        struct {
                spinlock_t        lock;
@@ -473,6 +473,12 @@ struct kvm {
 #define vcpu_err(vcpu, fmt, ...)                                       \
        kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
+static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
+{
+       return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
+                                     lockdep_is_held(&kvm->slots_lock));
+}
+
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 {
        /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
@@ -562,9 +568,8 @@ void kvm_put_kvm(struct kvm *kvm);
 
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
-       return rcu_dereference_check(kvm->memslots[as_id],
-                       srcu_read_lock_held(&kvm->srcu)
-                       || lockdep_is_held(&kvm->slots_lock));
+       return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
+                       lockdep_is_held(&kvm->slots_lock));
 }
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
index c0b6dfec5f87241cd96f386643eeee159c0da1b2..6cd63c18708ae1d23dbc280ed49aed55f817a2f5 100644 (file)
@@ -927,6 +927,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_CMMA_MIGRATION 145
 #define KVM_CAP_PPC_FWNMI 146
 #define KVM_CAP_PPC_SMT_POSSIBLE 147
+#define KVM_CAP_HYPERV_SYNIC2 148
+#define KVM_CAP_HYPERV_VP_INDEX 149
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1351,7 +1353,7 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_X86_SMM */
 #define KVM_SMI                   _IO(KVMIO,   0xb7)
 /* Available with KVM_CAP_S390_CMMA_MIGRATION */
-#define KVM_S390_GET_CMMA_BITS      _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log)
+#define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
 #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
index 9120edf3c94bfccd1e34a625d163385abbd3cbc8..f2ac53ab82438f0b473ecd8ed91b1e2548af7ca2 100644 (file)
@@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
        if (ret < 0)
                goto unlock_fail;
 
-       kvm->buses[bus_idx]->ioeventfd_count++;
+       kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
        list_add_tail(&p->list, &kvm->ioeventfds);
 
        mutex_unlock(&kvm->slots_lock);
@@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 {
        struct _ioeventfd        *p, *tmp;
        struct eventfd_ctx       *eventfd;
+       struct kvm_io_bus        *bus;
        int                       ret = -ENOENT;
 
        eventfd = eventfd_ctx_fdget(args->fd);
@@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
                        continue;
 
                kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-               if (kvm->buses[bus_idx])
-                       kvm->buses[bus_idx]->ioeventfd_count--;
+               bus = kvm_get_bus(kvm, bus_idx);
+               if (bus)
+                       bus->ioeventfd_count--;
                ioeventfd_release(p);
                ret = 0;
                break;
index 31e40c9e81df41de6f8cadb076515677ac5cae9c..b1286c4e0712259fac5d66b9bbc3aaf388d3d3f9 100644 (file)
@@ -230,7 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
        }
 
        mutex_lock(&kvm->irq_lock);
-       old = kvm->irq_routing;
+       old = rcu_dereference_protected(kvm->irq_routing, 1);
        rcu_assign_pointer(kvm->irq_routing, new);
        kvm_irq_routing_update(kvm);
        kvm_arch_irq_routing_update(kvm);
index 19f0ecb9b93e23501af3f5c21e2c971cebfd3c6a..82987d457b8bb4e7ec4c1159e37857fd85a11c18 100644 (file)
@@ -130,6 +130,12 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
 
 static bool largepages_enabled = true;
 
+#define KVM_EVENT_CREATE_VM 0
+#define KVM_EVENT_DESTROY_VM 1
+static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
+static unsigned long long kvm_createvm_count;
+static unsigned long long kvm_active_vms;
+
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
 {
        if (pfn_valid(pfn))
@@ -187,12 +193,23 @@ static void ack_flush(void *_completed)
 {
 }
 
+static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
+{
+       if (unlikely(!cpus))
+               cpus = cpu_online_mask;
+
+       if (cpumask_empty(cpus))
+               return false;
+
+       smp_call_function_many(cpus, ack_flush, NULL, wait);
+       return true;
+}
+
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
        int i, cpu, me;
        cpumask_var_t cpus;
-       bool called = true;
-       bool wait = req & KVM_REQUEST_WAIT;
+       bool called;
        struct kvm_vcpu *vcpu;
 
        zalloc_cpumask_var(&cpus, GFP_ATOMIC);
@@ -207,14 +224,9 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
                if (cpus != NULL && cpu != -1 && cpu != me &&
                    kvm_request_needs_ipi(vcpu, req))
-                       cpumask_set_cpu(cpu, cpus);
+                       __cpumask_set_cpu(cpu, cpus);
        }
-       if (unlikely(cpus == NULL))
-               smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait);
-       else if (!cpumask_empty(cpus))
-               smp_call_function_many(cpus, ack_flush, NULL, wait);
-       else
-               called = false;
+       called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
        put_cpu();
        free_cpumask_var(cpus);
        return called;
@@ -293,7 +305,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
 
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
-       put_pid(vcpu->pid);
+       /*
+        * no need for rcu_read_lock as VCPU_RUN is the only place that
+        * will change the vcpu->pid pointer and on uninit all file
+        * descriptors are already gone.
+        */
+       put_pid(rcu_dereference_protected(vcpu->pid, 1));
        kvm_arch_vcpu_uninit(vcpu);
        free_page((unsigned long)vcpu->run);
 }
@@ -674,8 +691,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
        if (init_srcu_struct(&kvm->irq_srcu))
                goto out_err_no_irq_srcu;
        for (i = 0; i < KVM_NR_BUSES; i++) {
-               kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
-                                       GFP_KERNEL);
+               rcu_assign_pointer(kvm->buses[i],
+                       kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
                if (!kvm->buses[i])
                        goto out_err;
        }
@@ -700,9 +717,10 @@ out_err_no_srcu:
        hardware_disable_all();
 out_err_no_disable:
        for (i = 0; i < KVM_NR_BUSES; i++)
-               kfree(kvm->buses[i]);
+               kfree(rcu_access_pointer(kvm->buses[i]));
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-               kvm_free_memslots(kvm, kvm->memslots[i]);
+               kvm_free_memslots(kvm,
+                       rcu_dereference_protected(kvm->memslots[i], 1));
        kvm_arch_free_vm(kvm);
        mmdrop(current->mm);
        return ERR_PTR(r);
@@ -728,6 +746,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
        int i;
        struct mm_struct *mm = kvm->mm;
 
+       kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
        kvm_destroy_vm_debugfs(kvm);
        kvm_arch_sync_events(kvm);
        spin_lock(&kvm_lock);
@@ -735,8 +754,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
        spin_unlock(&kvm_lock);
        kvm_free_irq_routing(kvm);
        for (i = 0; i < KVM_NR_BUSES; i++) {
-               if (kvm->buses[i])
-                       kvm_io_bus_destroy(kvm->buses[i]);
+               struct kvm_io_bus *bus;
+
+               bus = rcu_dereference_protected(kvm->buses[i], 1);
+               if (bus)
+                       kvm_io_bus_destroy(bus);
                kvm->buses[i] = NULL;
        }
        kvm_coalesced_mmio_free(kvm);
@@ -748,7 +770,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
        kvm_arch_destroy_vm(kvm);
        kvm_destroy_devices(kvm);
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-               kvm_free_memslots(kvm, kvm->memslots[i]);
+               kvm_free_memslots(kvm,
+                       rcu_dereference_protected(kvm->memslots[i], 1));
        cleanup_srcu_struct(&kvm->irq_srcu);
        cleanup_srcu_struct(&kvm->srcu);
        kvm_arch_free_vm(kvm);
@@ -2551,13 +2574,14 @@ static long kvm_vcpu_ioctl(struct file *filp,
        if (r)
                return r;
        switch (ioctl) {
-       case KVM_RUN:
+       case KVM_RUN: {
+               struct pid *oldpid;
                r = -EINVAL;
                if (arg)
                        goto out;
-               if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+               oldpid = rcu_access_pointer(vcpu->pid);
+               if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
                        /* The thread running this VCPU changed. */
-                       struct pid *oldpid = vcpu->pid;
                        struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
 
                        rcu_assign_pointer(vcpu->pid, newpid);
@@ -2568,6 +2592,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
                r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
                trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
                break;
+       }
        case KVM_GET_REGS: {
                struct kvm_regs *kvm_regs;
 
@@ -3202,6 +3227,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
                fput(file);
                return -ENOMEM;
        }
+       kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
 
        fd_install(r, file);
        return r;
@@ -3563,7 +3589,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 {
        struct kvm_io_bus *new_bus, *bus;
 
-       bus = kvm->buses[bus_idx];
+       bus = kvm_get_bus(kvm, bus_idx);
        if (!bus)
                return -ENOMEM;
 
@@ -3592,7 +3618,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
        int i;
        struct kvm_io_bus *new_bus, *bus;
 
-       bus = kvm->buses[bus_idx];
+       bus = kvm_get_bus(kvm, bus_idx);
        if (!bus)
                return;
 
@@ -3854,6 +3880,67 @@ static const struct file_operations *stat_fops[] = {
        [KVM_STAT_VM]   = &vm_stat_fops,
 };
 
+static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
+{
+       struct kobj_uevent_env *env;
+       char *tmp, *pathbuf = NULL;
+       unsigned long long created, active;
+
+       if (!kvm_dev.this_device || !kvm)
+               return;
+
+       spin_lock(&kvm_lock);
+       if (type == KVM_EVENT_CREATE_VM) {
+               kvm_createvm_count++;
+               kvm_active_vms++;
+       } else if (type == KVM_EVENT_DESTROY_VM) {
+               kvm_active_vms--;
+       }
+       created = kvm_createvm_count;
+       active = kvm_active_vms;
+       spin_unlock(&kvm_lock);
+
+       env = kzalloc(sizeof(*env), GFP_KERNEL);
+       if (!env)
+               return;
+
+       add_uevent_var(env, "CREATED=%llu", created);
+       add_uevent_var(env, "COUNT=%llu", active);
+
+       if (type == KVM_EVENT_CREATE_VM)
+               add_uevent_var(env, "EVENT=create");
+       else if (type == KVM_EVENT_DESTROY_VM)
+               add_uevent_var(env, "EVENT=destroy");
+
+       if (kvm->debugfs_dentry) {
+               char p[ITOA_MAX_LEN];
+
+               snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
+               tmp = strchrnul(p + 1, '-');
+               *tmp = '\0';
+               add_uevent_var(env, "PID=%s", p);
+               pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+               if (pathbuf) {
+                       /* sizeof counts the final '\0' */
+                       int len = sizeof("STATS_PATH=") - 1;
+                       const char *pvar = "STATS_PATH=";
+
+                       tmp = dentry_path_raw(kvm->debugfs_dentry,
+                                             pathbuf + len,
+                                             PATH_MAX - len);
+                       if (!IS_ERR(tmp)) {
+                               memcpy(tmp - len, pvar, len);
+                               env->envp[env->envp_idx++] = tmp - len;
+                       }
+               }
+       }
+       /* no need for checks, since we are adding at most only 5 keys */
+       env->envp[env->envp_idx++] = NULL;
+       kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
+       kfree(env);
+       kfree(pathbuf);
+}
+
 static int kvm_init_debug(void)
 {
        int r = -EEXIST;