Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 15 Jul 2017 17:18:16 +0000 (10:18 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 15 Jul 2017 17:18:16 +0000 (10:18 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jul 2017 17:18:16 +0000 (10:18 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jul 2017 17:18:16 +0000 (10:18 -0700)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt

index 3a9831b72945a812b69cd37fe3c9157352ef7e6f..e63a35fafef0e153c30023e92622111006d1dd7c 100644 (file)
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4329,3 +4329,21 @@ Querying this capability returns a bitmap indicating the possible
  virtual SMT modes that can be set using KVM_CAP_PPC_SMT.  If bit N
  (counting from the right) is set, then a virtual SMT mode of 2^N is
  available.
+
+8.11 KVM_CAP_HYPERV_SYNIC2
+
+Architectures: x86
+
+This capability enables a newer version of Hyper-V Synthetic interrupt
+controller (SynIC).  The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
+doesn't clear SynIC message and event flags pages when they are enabled by
+writing to the respective MSRs.
+
+8.12 KVM_CAP_HYPERV_VP_INDEX
+
+Architectures: x86
+
+This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr.  Its
+value is used to denote the target vcpu for a SynIC interrupt.  For
+compatibilty, KVM initializes this msr to KVM's internal vcpu index.  When this
+capability is absent, userspace can still query this msr's value.
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt

index 0a9ea515512a208a4b239813028c136e1d2943af..1ebecc115dc6efdbbfa76eb4ab329f5b1d8b765e 100644 (file)
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
  MSR_KVM_ASYNC_PF_EN: 0x4b564d02
         data: Bits 63-6 hold 64-byte aligned physical address of a
         64 byte memory area which must be in guest RAM and must be
-       zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
+       zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
         when asynchronous page faults are enabled on the vcpu 0 when
         disabled. Bit 1 is 1 if asynchronous page faults can be injected
-       when vcpu is in cpl == 0.
+       when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
+       are delivered to L1 as #PF vmexits.
  
         First 4 byte of 64 byte memory location will be written to by
         the hypervisor at the time of asynchronous page fault (APF)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h

index 2701e5f8145bd250c3a35dc12cab5839e16ff30d..ca3c48c0872f4beba6b03ac92c50eb6a352b3006 100644 (file)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -286,6 +286,7 @@
  #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
  #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
  #define X86_FEATURE_AVIC       (15*32+13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
  
  /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
  #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h

index 722d0e56886342a3a9f65d7f419d0e240a9cc6ab..fde36f189836db83af655652f44eec92820072b8 100644 (file)
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -23,6 +23,7 @@ struct x86_exception {
         u16 error_code;
         bool nested_page_fault;
         u64 address; /* cr2 or nested page fault gpa */
+       u8 async_page_fault;
  };
  
  /*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 1588e9e3dc01f9b807afafc35136974e5870977e..87ac4fba6d8e12f07e8a9f191bdb028a1c3e6234 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic {
         DECLARE_BITMAP(auto_eoi_bitmap, 256);
         DECLARE_BITMAP(vec_bitmap, 256);
         bool active;
+       bool dont_zero_synic_pages;
  };
  
  /* Hyper-V per vcpu emulation context */
  struct kvm_vcpu_hv {
+       u32 vp_index;
         u64 hv_vapic;
         s64 runtime_offset;
         struct kvm_vcpu_hv_synic synic;
@@ -549,6 +551,7 @@ struct kvm_vcpu_arch {
                 bool reinject;
                 u8 nr;
                 u32 error_code;
+               u8 nested_apf;
         } exception;
  
         struct kvm_queued_interrupt {
@@ -649,6 +652,9 @@ struct kvm_vcpu_arch {
                 u64 msr_val;
                 u32 id;
                 bool send_user_only;
+               u32 host_apf_reason;
+               unsigned long nested_apf_token;
+               bool delivery_as_pf_vmexit;
         } apf;
  
         /* OSVW MSRs (AMD only) */
@@ -803,6 +809,7 @@ struct kvm_arch {
         int audit_point;
         #endif
  
+       bool backwards_tsc_observed;
         bool boot_vcpu_runs_old_kvmclock;
         u32 bsp_vcpu_id;
  
@@ -952,9 +959,7 @@ struct kvm_x86_ops {
                                 unsigned char *hypercall_addr);
         void (*set_irq)(struct kvm_vcpu *vcpu);
         void (*set_nmi)(struct kvm_vcpu *vcpu);
-       void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
-                               bool has_error_code, u32 error_code,
-                               bool reinject);
+       void (*queue_exception)(struct kvm_vcpu *vcpu);
         void (*cancel_injection)(struct kvm_vcpu *vcpu);
         int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
         int (*nmi_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h

index 14824fc78f7e7160fa014979424f111a8a2e40f0..58fffe79e417e61cbd4f7041c9a7753a84b1331c 100644 (file)
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
         u32 event_inj;
         u32 event_inj_err;
         u64 nested_cr3;
-       u64 lbr_ctl;
+       u64 virt_ext;
         u32 clean;
         u32 reserved_5;
         u64 next_rip;
@@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
  #define AVIC_ENABLE_SHIFT 31
  #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
  
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
  #define SVM_INTERRUPT_SHADOW_MASK 1
  
  #define SVM_IOIO_STR_SHIFT 2
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h

index cff0bb6556f8809ec39d08c61036a86d7c8adde0..a965e5b0d32804fa19f9434600054b39014dd310 100644 (file)
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -67,6 +67,7 @@ struct kvm_clock_pairing {
  
  #define KVM_ASYNC_PF_ENABLED                   (1 << 0)
  #define KVM_ASYNC_PF_SEND_ALWAYS               (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT     (1 << 2)
  
  /* Operations for KVM_HC_MMU_OP */
  #define KVM_MMU_OP_WRITE_PTE            1
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index 43e10d6fdbeda3002dcd9e2063a723ded4ca5972..71c17a5be983524ea277718b759ff1511a9a3214 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
  #ifdef CONFIG_PREEMPT
                 pa |= KVM_ASYNC_PF_SEND_ALWAYS;
  #endif
-               wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
+               pa |= KVM_ASYNC_PF_ENABLED;
+
+               /* Async page fault support for L1 hypervisor is optional */
+               if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
+                       (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
+                       wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
                 __this_cpu_write(apf_reason.enabled, 1);
                 printk(KERN_INFO"KVM setup async PF for cpu %d\n",
                        smp_processor_id());
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c

index ebae57ac59024a6759ff9e2bb403de60f6bce759..2695a34fa1c5190f98b9fdd092e9cc2d3e44b2a0 100644 (file)
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
         return 0;
  }
  
-static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
+static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
+{
+       struct kvm_vcpu *vcpu = NULL;
+       int i;
+
+       if (vpidx < KVM_MAX_VCPUS)
+               vcpu = kvm_get_vcpu(kvm, vpidx);
+       if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
+               return vcpu;
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
+                       return vcpu;
+       return NULL;
+}
+
+static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
  {
         struct kvm_vcpu *vcpu;
         struct kvm_vcpu_hv_synic *synic;
  
-       if (vcpu_id >= atomic_read(&kvm->online_vcpus))
-               return NULL;
-       vcpu = kvm_get_vcpu(kvm, vcpu_id);
+       vcpu = get_vcpu_by_vpidx(kvm, vpidx);
         if (!vcpu)
                 return NULL;
         synic = vcpu_to_synic(vcpu);
@@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
                 synic->version = data;
                 break;
         case HV_X64_MSR_SIEFP:
-               if (data & HV_SYNIC_SIEFP_ENABLE)
+               if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
+                   !synic->dont_zero_synic_pages)
                         if (kvm_clear_guest(vcpu->kvm,
                                             data & PAGE_MASK, PAGE_SIZE)) {
                                 ret = 1;
@@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
                         synic_exit(synic, msr);
                 break;
         case HV_X64_MSR_SIMP:
-               if (data & HV_SYNIC_SIMP_ENABLE)
+               if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
+                   !synic->dont_zero_synic_pages)
                         if (kvm_clear_guest(vcpu->kvm,
                                             data & PAGE_MASK, PAGE_SIZE)) {
                                 ret = 1;
@@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
         return ret;
  }
  
-int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
  {
         struct kvm_vcpu_hv_synic *synic;
  
-       synic = synic_get(kvm, vcpu_id);
+       synic = synic_get(kvm, vpidx);
         if (!synic)
                 return -EINVAL;
  
@@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
                         kvm_hv_notify_acked_sint(vcpu, i);
  }
  
-static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
+static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
  {
         struct kvm_vcpu_hv_synic *synic;
  
-       synic = synic_get(kvm, vcpu_id);
+       synic = synic_get(kvm, vpidx);
         if (!synic)
                 return -EINVAL;
  
@@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
                 stimer_init(&hv_vcpu->stimer[i], i);
  }
  
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+
+       hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
+}
+
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
  {
+       struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+
         /*
          * Hyper-V SynIC auto EOI SINT's are
          * not compatible with APICV, so deactivate APICV
          */
         kvm_vcpu_deactivate_apicv(vcpu);
-       vcpu_to_synic(vcpu)->active = true;
+       synic->active = true;
+       synic->dont_zero_synic_pages = dont_zero_synic_pages;
         return 0;
  }
  
@@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
         struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
  
         switch (msr) {
+       case HV_X64_MSR_VP_INDEX:
+               if (!host)
+                       return 1;
+               hv->vp_index = (u32)data;
+               break;
         case HV_X64_MSR_APIC_ASSIST_PAGE: {
                 u64 gfn;
                 unsigned long addr;
@@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
         struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
  
         switch (msr) {
-       case HV_X64_MSR_VP_INDEX: {
-               int r;
-               struct kvm_vcpu *v;
-
-               kvm_for_each_vcpu(r, v, vcpu->kvm) {
-                       if (v == vcpu) {
-                               data = r;
-                               break;
-                       }
-               }
+       case HV_X64_MSR_VP_INDEX:
+               data = hv->vp_index;
                 break;
-       }
         case HV_X64_MSR_EOI:
                 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
         case HV_X64_MSR_ICR:
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h

index cd1119538add9185f6966f2e3ea5a9a3f7fab630..e637631a9574feb5c5c5e0053a46d1b3f29d8646 100644 (file)
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
  void kvm_hv_irq_routing_update(struct kvm *kvm);
  int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
  void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
  
  void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
  void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
  
  static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c

index a78b445ce4116b3dbcd48da97efdb6c89394abb5..af192895b1fc633e9b2922c587862d1cbb41efd7 100644 (file)
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm)
         struct kvm_pit *pit = kvm->arch.vpit;
  
         if (pit) {
+               mutex_lock(&kvm->slots_lock);
                 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
                 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
+               mutex_unlock(&kvm->slots_lock);
                 kvm_pit_set_reinject(pit, false);
                 hrtimer_cancel(&pit->pit_state.timer);
                 kthread_destroy_worker(pit->worker);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index aafd399cf8c6f3d3e219ec636a73b19ee1e9d20d..9b1dd114956a8bcb9e724bd4df792460f68a0943 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -46,6 +46,7 @@
  #include <asm/io.h>
  #include <asm/vmx.h>
  #include <asm/kvm_page_track.h>
+#include "trace.h"
  
  /*
   * When setting this variable to true it enables Two-Dimensional-Paging
@@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
                      kvm_event_needs_reinjection(vcpu)))
                 return false;
  
-       if (is_guest_mode(vcpu))
+       if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
                 return false;
  
         return kvm_x86_ops->interrupt_allowed(vcpu);
@@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
         return false;
  }
  
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+                               u64 fault_address, char *insn, int insn_len,
+                               bool need_unprotect)
+{
+       int r = 1;
+
+       switch (vcpu->arch.apf.host_apf_reason) {
+       default:
+               trace_kvm_page_fault(fault_address, error_code);
+
+               if (need_unprotect && kvm_event_needs_reinjection(vcpu))
+                       kvm_mmu_unprotect_page_virt(vcpu, fault_address);
+               r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
+                               insn_len);
+               break;
+       case KVM_PV_REASON_PAGE_NOT_PRESENT:
+               vcpu->arch.apf.host_apf_reason = 0;
+               local_irq_disable();
+               kvm_async_pf_task_wait(fault_address);
+               local_irq_enable();
+               break;
+       case KVM_PV_REASON_PAGE_READY:
+               vcpu->arch.apf.host_apf_reason = 0;
+               local_irq_disable();
+               kvm_async_pf_task_wake(fault_address);
+               local_irq_enable();
+               break;
+       }
+       return r;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+
  static bool
  check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
  {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h

index a276834950c14a15681c9d125ddde8e9b9dc6af8..d7d248a000dd6772681f3f5541e344f9677a2d1d 100644 (file)
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
  void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
                              bool accessed_dirty);
  bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+                               u64 fault_address, char *insn, int insn_len,
+                               bool need_unprotect);
  
  static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
  {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index 905ea6052517fef7a09bf82f396222ade76be2a5..4d8141e533c369711df245d0a783683598ad4559 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -194,7 +194,6 @@ struct vcpu_svm {
  
         unsigned int3_injected;
         unsigned long int3_rip;
-       u32 apf_reason;
  
         /* cached guest cpuid flags for faster access */
         bool nrips_enabled      : 1;
@@ -277,6 +276,10 @@ static int avic;
  module_param(avic, int, S_IRUGO);
  #endif
  
+/* enable/disable Virtual VMLOAD VMSAVE */
+static int vls = true;
+module_param(vls, int, 0444);
+
  /* AVIC VM ID bit masks and lock */
  static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
  static DEFINE_SPINLOCK(avic_vm_id_lock);
@@ -633,11 +636,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
         svm_set_interrupt_shadow(vcpu, 0);
  }
  
-static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-                               bool has_error_code, u32 error_code,
-                               bool reinject)
+static void svm_queue_exception(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned nr = vcpu->arch.exception.nr;
+       bool has_error_code = vcpu->arch.exception.has_error_code;
+       bool reinject = vcpu->arch.exception.reinject;
+       u32 error_code = vcpu->arch.exception.error_code;
  
         /*
          * If we are within a nested VM we'd better #VMEXIT and let the guest
@@ -947,7 +952,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm)
  {
         u32 *msrpm = svm->msrpm;
  
-       svm->vmcb->control.lbr_ctl = 1;
+       svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
@@ -958,7 +963,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
  {
         u32 *msrpm = svm->msrpm;
  
-       svm->vmcb->control.lbr_ctl = 0;
+       svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
@@ -1093,6 +1098,16 @@ static __init int svm_hardware_setup(void)
                 }
         }
  
+       if (vls) {
+               if (!npt_enabled ||
+                   !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) ||
+                   !IS_ENABLED(CONFIG_X86_64)) {
+                       vls = false;
+               } else {
+                       pr_info("Virtual VMLOAD VMSAVE supported\n");
+               }
+       }
+
         return 0;
  
  err:
@@ -1280,6 +1295,16 @@ static void init_vmcb(struct vcpu_svm *svm)
         if (avic)
                 avic_init_vmcb(svm);
  
+       /*
+        * If hardware supports Virtual VMLOAD VMSAVE then enable it
+        * in VMCB and clear intercepts to avoid #VMEXIT.
+        */
+       if (vls) {
+               clr_intercept(svm, INTERCEPT_VMLOAD);
+               clr_intercept(svm, INTERCEPT_VMSAVE);
+               svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+       }
+
         mark_all_dirty(svm->vmcb);
  
         enable_gif(svm);
@@ -2096,34 +2121,11 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
  static int pf_interception(struct vcpu_svm *svm)
  {
         u64 fault_address = svm->vmcb->control.exit_info_2;
-       u64 error_code;
-       int r = 1;
+       u64 error_code = svm->vmcb->control.exit_info_1;
  
-       switch (svm->apf_reason) {
-       default:
-               error_code = svm->vmcb->control.exit_info_1;
-
-               trace_kvm_page_fault(fault_address, error_code);
-               if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
-                       kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
-               r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
+       return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
                         svm->vmcb->control.insn_bytes,
-                       svm->vmcb->control.insn_len);
-               break;
-       case KVM_PV_REASON_PAGE_NOT_PRESENT:
-               svm->apf_reason = 0;
-               local_irq_disable();
-               kvm_async_pf_task_wait(fault_address);
-               local_irq_enable();
-               break;
-       case KVM_PV_REASON_PAGE_READY:
-               svm->apf_reason = 0;
-               local_irq_disable();
-               kvm_async_pf_task_wake(fault_address);
-               local_irq_enable();
-               break;
-       }
-       return r;
+                       svm->vmcb->control.insn_len, !npt_enabled);
  }
  
  static int db_interception(struct vcpu_svm *svm)
@@ -2267,7 +2269,7 @@ static int io_interception(struct vcpu_svm *svm)
  {
         struct kvm_vcpu *vcpu = &svm->vcpu;
         u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-       int size, in, string;
+       int size, in, string, ret;
         unsigned port;
  
         ++svm->vcpu.stat.io_exits;
@@ -2279,10 +2281,16 @@ static int io_interception(struct vcpu_svm *svm)
         port = io_info >> 16;
         size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
         svm->next_rip = svm->vmcb->control.exit_info_2;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
  
-       return in ? kvm_fast_pio_in(vcpu, size, port)
-                 : kvm_fast_pio_out(vcpu, size, port);
+       /*
+        * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+        * KVM_EXIT_DEBUG here.
+        */
+       if (in)
+               return kvm_fast_pio_in(vcpu, size, port) && ret;
+       else
+               return kvm_fast_pio_out(vcpu, size, port) && ret;
  }
  
  static int nmi_interception(struct vcpu_svm *svm)
@@ -2415,15 +2423,19 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
         if (!is_guest_mode(&svm->vcpu))
                 return 0;
  
+       vmexit = nested_svm_intercept(svm);
+       if (vmexit != NESTED_EXIT_DONE)
+               return 0;
+
         svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
         svm->vmcb->control.exit_code_hi = 0;
         svm->vmcb->control.exit_info_1 = error_code;
-       svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
-       vmexit = nested_svm_intercept(svm);
-       if (vmexit == NESTED_EXIT_DONE)
-               svm->nested.exit_required = true;
+       if (svm->vcpu.arch.exception.nested_apf)
+               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+       else
+               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
  
+       svm->nested.exit_required = true;
         return vmexit;
  }
  
@@ -2598,7 +2610,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
                 break;
         case SVM_EXIT_EXCP_BASE + PF_VECTOR:
                 /* When we're shadowing, trap PFs, but not async PF */
-               if (!npt_enabled && svm->apf_reason == 0)
+               if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
                         return NESTED_EXIT_HOST;
                 break;
         default:
@@ -2645,7 +2657,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
                 }
                 /* async page fault always cause vmexit */
                 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-                        svm->apf_reason != 0)
+                        svm->vcpu.arch.exception.nested_apf != 0)
                         vmexit = NESTED_EXIT_DONE;
                 break;
         }
@@ -2702,7 +2714,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
         dst->event_inj            = from->event_inj;
         dst->event_inj_err        = from->event_inj_err;
         dst->nested_cr3           = from->nested_cr3;
-       dst->lbr_ctl              = from->lbr_ctl;
+       dst->virt_ext              = from->virt_ext;
  }
  
  static int nested_svm_vmexit(struct vcpu_svm *svm)
@@ -3008,7 +3020,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
         /* We don't want to see VMMCALLs from a nested guest */
         clr_intercept(svm, INTERCEPT_VMMCALL);
  
-       svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
+       svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
         svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
         svm->vmcb->control.int_state = nested_vmcb->control.int_state;
         svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
@@ -3055,6 +3067,7 @@ static int vmload_interception(struct vcpu_svm *svm)
  {
         struct vmcb *nested_vmcb;
         struct page *page;
+       int ret;
  
         if (nested_svm_check_permissions(svm))
                 return 1;
@@ -3064,18 +3077,19 @@ static int vmload_interception(struct vcpu_svm *svm)
                 return 1;
  
         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
  
         nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
         nested_svm_unmap(page);
  
-       return 1;
+       return ret;
  }
  
  static int vmsave_interception(struct vcpu_svm *svm)
  {
         struct vmcb *nested_vmcb;
         struct page *page;
+       int ret;
  
         if (nested_svm_check_permissions(svm))
                 return 1;
@@ -3085,12 +3099,12 @@ static int vmsave_interception(struct vcpu_svm *svm)
                 return 1;
  
         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
  
         nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
         nested_svm_unmap(page);
  
-       return 1;
+       return ret;
  }
  
  static int vmrun_interception(struct vcpu_svm *svm)
@@ -3123,25 +3137,29 @@ failed:
  
  static int stgi_interception(struct vcpu_svm *svm)
  {
+       int ret;
+
         if (nested_svm_check_permissions(svm))
                 return 1;
  
         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
  
         enable_gif(svm);
  
-       return 1;
+       return ret;
  }
  
  static int clgi_interception(struct vcpu_svm *svm)
  {
+       int ret;
+
         if (nested_svm_check_permissions(svm))
                 return 1;
  
         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
  
         disable_gif(svm);
  
@@ -3152,7 +3170,7 @@ static int clgi_interception(struct vcpu_svm *svm)
                 mark_dirty(svm->vmcb, VMCB_INTR);
         }
  
-       return 1;
+       return ret;
  }
  
  static int invlpga_interception(struct vcpu_svm *svm)
@@ -3166,8 +3184,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
         kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
  
         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(&svm->vcpu);
  }
  
  static int skinit_interception(struct vcpu_svm *svm)
@@ -3190,7 +3207,7 @@ static int xsetbv_interception(struct vcpu_svm *svm)
  
         if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
                 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-               skip_emulated_instruction(&svm->vcpu);
+               return kvm_skip_emulated_instruction(&svm->vcpu);
         }
  
         return 1;
@@ -3286,8 +3303,7 @@ static int invlpg_interception(struct vcpu_svm *svm)
                 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
  
         kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
-       skip_emulated_instruction(&svm->vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(&svm->vcpu);
  }
  
  static int emulate_on_interception(struct vcpu_svm *svm)
@@ -3437,9 +3453,7 @@ static int dr_interception(struct vcpu_svm *svm)
                 kvm_register_write(&svm->vcpu, reg, val);
         }
  
-       skip_emulated_instruction(&svm->vcpu);
-
-       return 1;
+       return kvm_skip_emulated_instruction(&svm->vcpu);
  }
  
  static int cr8_write_interception(struct vcpu_svm *svm)
@@ -3562,6 +3576,7 @@ static int rdmsr_interception(struct vcpu_svm *svm)
         if (svm_get_msr(&svm->vcpu, &msr_info)) {
                 trace_kvm_msr_read_ex(ecx);
                 kvm_inject_gp(&svm->vcpu, 0);
+               return 1;
         } else {
                 trace_kvm_msr_read(ecx, msr_info.data);
  
@@ -3570,9 +3585,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
                 kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
                                    msr_info.data >> 32);
                 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-               skip_emulated_instruction(&svm->vcpu);
+               return kvm_skip_emulated_instruction(&svm->vcpu);
         }
-       return 1;
  }
  
  static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -3698,11 +3712,11 @@ static int wrmsr_interception(struct vcpu_svm *svm)
         if (kvm_set_msr(&svm->vcpu, &msr)) {
                 trace_kvm_msr_write_ex(ecx, data);
                 kvm_inject_gp(&svm->vcpu, 0);
+               return 1;
         } else {
                 trace_kvm_msr_write(ecx, data);
-               skip_emulated_instruction(&svm->vcpu);
+               return kvm_skip_emulated_instruction(&svm->vcpu);
         }
-       return 1;
  }
  
  static int msr_interception(struct vcpu_svm *svm)
@@ -3731,8 +3745,7 @@ static int pause_interception(struct vcpu_svm *svm)
  
  static int nop_interception(struct vcpu_svm *svm)
  {
-       skip_emulated_instruction(&(svm->vcpu));
-       return 1;
+       return kvm_skip_emulated_instruction(&(svm->vcpu));
  }
  
  static int monitor_interception(struct vcpu_svm *svm)
@@ -4117,7 +4130,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
         pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
         pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
         pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
-       pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
+       pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
         pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
         pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
         pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
@@ -4965,7 +4978,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
  
         /* if exit due to PF check for async PF */
         if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
-               svm->apf_reason = kvm_read_and_reset_pf_reason();
+               svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
  
         if (npt_enabled) {
                 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index f76efad248aba0dc02bce77a4cd984343d181d79..84e62acf2dd861023b17382e61f9c98c8df82f68 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
   * KVM wants to inject page-faults which it got to the guest. This function
   * checks whether in a nested guest, we need to inject them to L1 or L2.
   */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
  {
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+       unsigned int nr = vcpu->arch.exception.nr;
  
-       if (!(vmcs12->exception_bitmap & (1u << nr)))
+       if (!((vmcs12->exception_bitmap & (1u << nr)) ||
+               (nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
                 return 0;
  
+       if (vcpu->arch.exception.nested_apf) {
+               vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
+               nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+                       PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
+                       INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
+                       vcpu->arch.apf.nested_apf_token);
+               return 1;
+       }
+
         nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                           vmcs_read32(VM_EXIT_INTR_INFO),
                           vmcs_readl(EXIT_QUALIFICATION));
         return 1;
  }
  
-static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-                               bool has_error_code, u32 error_code,
-                               bool reinject)
+static void vmx_queue_exception(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned nr = vcpu->arch.exception.nr;
+       bool has_error_code = vcpu->arch.exception.has_error_code;
+       bool reinject = vcpu->arch.exception.reinject;
+       u32 error_code = vcpu->arch.exception.error_code;
         u32 intr_info = nr | INTR_INFO_VALID_MASK;
  
         if (!reinject && is_guest_mode(vcpu) &&
-           nested_vmx_check_exception(vcpu, nr))
+           nested_vmx_check_exception(vcpu))
                 return;
  
         if (has_error_code) {
@@ -3764,6 +3777,25 @@ static void free_kvm_area(void)
         }
  }
  
+enum vmcs_field_type {
+       VMCS_FIELD_TYPE_U16 = 0,
+       VMCS_FIELD_TYPE_U64 = 1,
+       VMCS_FIELD_TYPE_U32 = 2,
+       VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
+};
+
+static inline int vmcs_field_type(unsigned long field)
+{
+       if (0x1 & field)        /* the *_HIGH fields are all 32 bit */
+               return VMCS_FIELD_TYPE_U32;
+       return (field >> 13) & 0x3 ;
+}
+
+static inline int vmcs_field_readonly(unsigned long field)
+{
+       return (((field >> 10) & 0x3) == 1);
+}
+
  static void init_vmcs_shadow_fields(void)
  {
         int i, j;
@@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void)
  
         /* shadowed fields guest access without vmexit */
         for (i = 0; i < max_shadow_read_write_fields; i++) {
-               clear_bit(shadow_read_write_fields[i],
-                         vmx_vmwrite_bitmap);
-               clear_bit(shadow_read_write_fields[i],
-                         vmx_vmread_bitmap);
+               unsigned long field = shadow_read_write_fields[i];
+
+               clear_bit(field, vmx_vmwrite_bitmap);
+               clear_bit(field, vmx_vmread_bitmap);
+               if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
+                       clear_bit(field + 1, vmx_vmwrite_bitmap);
+                       clear_bit(field + 1, vmx_vmread_bitmap);
+               }
+       }
+       for (i = 0; i < max_shadow_read_only_fields; i++) {
+               unsigned long field = shadow_read_only_fields[i];
+
+               clear_bit(field, vmx_vmread_bitmap);
+               if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
+                       clear_bit(field + 1, vmx_vmread_bitmap);
         }
-       for (i = 0; i < max_shadow_read_only_fields; i++)
-               clear_bit(shadow_read_only_fields[i],
-                         vmx_vmread_bitmap);
  }
  
  static __init int alloc_kvm_area(void)
@@ -4634,6 +4674,11 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
         return true;
  }
  
+static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+       return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
+}
+
  static int init_rmode_tss(struct kvm *kvm)
  {
         gfn_t fn;
@@ -5664,14 +5709,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
         }
  
         if (is_page_fault(intr_info)) {
-               /* EPT won't cause page fault directly */
-               BUG_ON(enable_ept);
                 cr2 = vmcs_readl(EXIT_QUALIFICATION);
-               trace_kvm_page_fault(cr2, error_code);
-
-               if (kvm_event_needs_reinjection(vcpu))
-                       kvm_mmu_unprotect_page_virt(vcpu, cr2);
-               return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
+               /* EPT won't cause page fault directly */
+               WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
+               return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
+                               true);
         }
  
         ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@@ -7214,25 +7256,6 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
         return nested_vmx_run(vcpu, false);
  }
  
-enum vmcs_field_type {
-       VMCS_FIELD_TYPE_U16 = 0,
-       VMCS_FIELD_TYPE_U64 = 1,
-       VMCS_FIELD_TYPE_U32 = 2,
-       VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
-};
-
-static inline int vmcs_field_type(unsigned long field)
-{
-       if (0x1 & field)        /* the *_HIGH fields are all 32 bit */
-               return VMCS_FIELD_TYPE_U32;
-       return (field >> 13) & 0x3 ;
-}
-
-static inline int vmcs_field_readonly(unsigned long field)
-{
-       return (((field >> 10) & 0x3) == 1);
-}
-
  /*
   * Read a vmcs12 field. Since these can have varying lengths and we return
   * one type, we chose the biggest type (u64) and zero-extend the return value
@@ -8014,7 +8037,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 if (is_nmi(intr_info))
                         return false;
                 else if (is_page_fault(intr_info))
-                       return enable_ept;
+                       return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
                 else if (is_no_device(intr_info) &&
                          !(vmcs12->guest_cr0 & X86_CR0_TS))
                         return false;
@@ -8418,9 +8441,15 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
                         exit_reason != EXIT_REASON_TASK_SWITCH)) {
                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
-               vcpu->run->internal.ndata = 2;
+               vcpu->run->internal.ndata = 3;
                 vcpu->run->internal.data[0] = vectoring_info;
                 vcpu->run->internal.data[1] = exit_reason;
+               vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
+               if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
+                       vcpu->run->internal.ndata++;
+                       vcpu->run->internal.data[3] =
+                               vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+               }
                 return 0;
         }
  
@@ -8611,17 +8640,24 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
  
  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
  {
-       u32 exit_intr_info;
+       u32 exit_intr_info = 0;
+       u16 basic_exit_reason = (u16)vmx->exit_reason;
  
-       if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
-             || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
+       if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
+             || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
                 return;
  
-       vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-       exit_intr_info = vmx->exit_intr_info;
+       if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+               exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+       vmx->exit_intr_info = exit_intr_info;
+
+       /* if exit due to PF check for async PF */
+       if (is_page_fault(exit_intr_info))
+               vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
  
         /* Handle machine checks before interrupts are enabled */
-       if (is_machine_check(exit_intr_info))
+       if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
+           is_machine_check(exit_intr_info))
                 kvm_machine_check();
  
         /* We need to handle NMIs before interrupts are enabled */
@@ -9589,23 +9625,26 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
                       ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
  }
  
+static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
+                                              struct vmcs12 *vmcs12)
+{
+       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+               return 0;
+
+       if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
+           !page_address_valid(vcpu, vmcs12->io_bitmap_b))
+               return -EINVAL;
+
+       return 0;
+}
+
  static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
                                                 struct vmcs12 *vmcs12)
  {
-       int maxphyaddr;
-       u64 addr;
-
         if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
                 return 0;
  
-       if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
-               WARN_ON(1);
-               return -EINVAL;
-       }
-       maxphyaddr = cpuid_maxphyaddr(vcpu);
-
-       if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
-          ((addr + PAGE_SIZE) >> maxphyaddr))
+       if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
                 return -EINVAL;
  
         return 0;
@@ -10293,6 +10332,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
             vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
                 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
  
+       if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
+               return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
         if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
                 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
  
@@ -10429,8 +10471,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
                 return 1;
         }
  
-       vmcs12->launch_state = 1;
-
         /*
          * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
          * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -10804,6 +10844,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
         vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
  
         if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+               vmcs12->launch_state = 1;
+
                 /* vm_entry_intr_info_field is cleared on exit. Emulate this
                  * instead of reading the real value. */
                 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 6c7266f7766dcb6ec02b13b9b1439c9f9d547071..5b8f07889f6a591f4e23ac69c6bb9656e7bc0a31 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -134,8 +134,6 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
  static bool __read_mostly vector_hashing = true;
  module_param(vector_hashing, bool, S_IRUGO);
  
-static bool __read_mostly backwards_tsc_observed = false;
-
  #define KVM_NR_SHARED_MSRS 16
  
  struct kvm_shared_msrs_global {
@@ -452,7 +450,12 @@ EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
  void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
  {
         ++vcpu->stat.pf_guest;
-       vcpu->arch.cr2 = fault->address;
+       vcpu->arch.exception.nested_apf =
+               is_guest_mode(vcpu) && fault->async_page_fault;
+       if (vcpu->arch.exception.nested_apf)
+               vcpu->arch.apf.nested_apf_token = fault->address;
+       else
+               vcpu->arch.cr2 = fault->address;
         kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
  }
  EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
@@ -1719,7 +1722,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
                                         &ka->master_cycle_now);
  
         ka->use_master_clock = host_tsc_clocksource && vcpus_matched
-                               && !backwards_tsc_observed
+                               && !ka->backwards_tsc_observed
                                 && !ka->boot_vcpu_runs_old_kvmclock;
  
         if (ka->use_master_clock)
@@ -2060,8 +2063,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
  {
         gpa_t gpa = data & ~0x3f;
  
-       /* Bits 2:5 are reserved, Should be zero */
-       if (data & 0x3c)
+       /* Bits 3:5 are reserved, Should be zero */
+       if (data & 0x38)
                 return 1;
  
         vcpu->arch.apf.msr_val = data;
@@ -2077,6 +2080,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
                 return 1;
  
         vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
+       vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
         kvm_async_pf_wakeup_all(vcpu);
         return 0;
  }
@@ -2661,6 +2665,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
         case KVM_CAP_HYPERV_VAPIC:
         case KVM_CAP_HYPERV_SPIN:
         case KVM_CAP_HYPERV_SYNIC:
+       case KVM_CAP_HYPERV_SYNIC2:
+       case KVM_CAP_HYPERV_VP_INDEX:
         case KVM_CAP_PCI_SEGMENT:
         case KVM_CAP_DEBUGREGS:
         case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3384,10 +3390,14 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                 return -EINVAL;
  
         switch (cap->cap) {
+       case KVM_CAP_HYPERV_SYNIC2:
+               if (cap->args[0])
+                       return -EINVAL;
         case KVM_CAP_HYPERV_SYNIC:
                 if (!irqchip_in_kernel(vcpu->kvm))
                         return -EINVAL;
-               return kvm_hv_activate_synic(vcpu);
+               return kvm_hv_activate_synic(vcpu, cap->cap ==
+                                            KVM_CAP_HYPERV_SYNIC2);
         default:
                 return -EINVAL;
         }
@@ -4188,9 +4198,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
                         goto out;
  
                 r = 0;
+               /*
+                * TODO: userspace has to take care of races with VCPU_RUN, so
+                * kvm_gen_update_masterclock() can be cut down to locked
+                * pvclock_update_vm_gtod_copy().
+                */
+               kvm_gen_update_masterclock(kvm);
                 now_ns = get_kvmclock_ns(kvm);
                 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
-               kvm_gen_update_masterclock(kvm);
+               kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
                 break;
         }
         case KVM_GET_CLOCK: {
@@ -6347,10 +6363,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
                         kvm_update_dr7(vcpu);
                 }
  
-               kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
-                                         vcpu->arch.exception.has_error_code,
-                                         vcpu->arch.exception.error_code,
-                                         vcpu->arch.exception.reinject);
+               kvm_x86_ops->queue_exception(vcpu);
                 return 0;
         }
  
@@ -7676,6 +7689,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
         struct msr_data msr;
         struct kvm *kvm = vcpu->kvm;
  
+       kvm_hv_vcpu_postcreate(vcpu);
+
         if (vcpu_load(vcpu))
                 return;
         msr.data = 0x0;
@@ -7829,8 +7844,8 @@ int kvm_arch_hardware_enable(void)
          */
         if (backwards_tsc) {
                 u64 delta_cyc = max_tsc - local_tsc;
-               backwards_tsc_observed = true;
                 list_for_each_entry(kvm, &vm_list, vm_list) {
+                       kvm->arch.backwards_tsc_observed = true;
                         kvm_for_each_vcpu(i, vcpu, kvm) {
                                 vcpu->arch.tsc_offset_adjustment += delta_cyc;
                                 vcpu->arch.last_host_tsc = local_tsc;
@@ -8576,6 +8591,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
                 fault.error_code = 0;
                 fault.nested_page_fault = false;
                 fault.address = work->arch.token;
+               fault.async_page_fault = true;
                 kvm_inject_page_fault(vcpu, &fault);
         }
  }
@@ -8598,6 +8614,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                 fault.error_code = 0;
                 fault.nested_page_fault = false;
                 fault.address = work->arch.token;
+               fault.async_page_fault = true;
                 kvm_inject_page_fault(vcpu, &fault);
         }
         vcpu->arch.apf.halted = false;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 0b50e7b35ed4135f81a3332331132aa0b1fc1c18..648b34cabb38214e6bb957aeecbcf61e03a26d4c 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -234,7 +234,7 @@ struct kvm_vcpu {
  
         int guest_fpu_loaded, guest_xcr0_loaded;
         struct swait_queue_head wq;
-       struct pid *pid;
+       struct pid __rcu *pid;
         int sigset_active;
         sigset_t sigset;
         struct kvm_vcpu_stat stat;
@@ -390,7 +390,7 @@ struct kvm {
         spinlock_t mmu_lock;
         struct mutex slots_lock;
         struct mm_struct *mm; /* userspace tied to this vm */
-       struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
+       struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
         struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
  
         /*
@@ -404,7 +404,7 @@ struct kvm {
         int last_boosted_vcpu;
         struct list_head vm_list;
         struct mutex lock;
-       struct kvm_io_bus *buses[KVM_NR_BUSES];
+       struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
  #ifdef CONFIG_HAVE_KVM_EVENTFD
         struct {
                 spinlock_t        lock;
@@ -473,6 +473,12 @@ struct kvm {
  #define vcpu_err(vcpu, fmt, ...)                                       \
         kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
  
+static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
+{
+       return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
+                                     lockdep_is_held(&kvm->slots_lock));
+}
+
  static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
  {
         /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
@@ -562,9 +568,8 @@ void kvm_put_kvm(struct kvm *kvm);
  
  static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
  {
-       return rcu_dereference_check(kvm->memslots[as_id],
-                       srcu_read_lock_held(&kvm->srcu)
-                       || lockdep_is_held(&kvm->slots_lock));
+       return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
+                       lockdep_is_held(&kvm->slots_lock));
  }
  
  static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index c0b6dfec5f87241cd96f386643eeee159c0da1b2..6cd63c18708ae1d23dbc280ed49aed55f817a2f5 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -927,6 +927,8 @@ struct kvm_ppc_resize_hpt {
  #define KVM_CAP_S390_CMMA_MIGRATION 145
  #define KVM_CAP_PPC_FWNMI 146
  #define KVM_CAP_PPC_SMT_POSSIBLE 147
+#define KVM_CAP_HYPERV_SYNIC2 148
+#define KVM_CAP_HYPERV_VP_INDEX 149
  
  #ifdef KVM_CAP_IRQ_ROUTING
  
@@ -1351,7 +1353,7 @@ struct kvm_s390_ucas_mapping {
  /* Available with KVM_CAP_X86_SMM */
  #define KVM_SMI                   _IO(KVMIO,   0xb7)
  /* Available with KVM_CAP_S390_CMMA_MIGRATION */
-#define KVM_S390_GET_CMMA_BITS      _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log)
+#define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
  #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
  
  #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c

index 9120edf3c94bfccd1e34a625d163385abbd3cbc8..f2ac53ab82438f0b473ecd8ed91b1e2548af7ca2 100644 (file)
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
         if (ret < 0)
                 goto unlock_fail;
  
-       kvm->buses[bus_idx]->ioeventfd_count++;
+       kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
         list_add_tail(&p->list, &kvm->ioeventfds);
  
         mutex_unlock(&kvm->slots_lock);
@@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
  {
         struct _ioeventfd        *p, *tmp;
         struct eventfd_ctx       *eventfd;
+       struct kvm_io_bus        *bus;
         int                       ret = -ENOENT;
  
         eventfd = eventfd_ctx_fdget(args->fd);
@@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
                         continue;
  
                 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-               if (kvm->buses[bus_idx])
-                       kvm->buses[bus_idx]->ioeventfd_count--;
+               bus = kvm_get_bus(kvm, bus_idx);
+               if (bus)
+                       bus->ioeventfd_count--;
                 ioeventfd_release(p);
                 ret = 0;
                 break;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c

index 31e40c9e81df41de6f8cadb076515677ac5cae9c..b1286c4e0712259fac5d66b9bbc3aaf388d3d3f9 100644 (file)
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -230,7 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
         }
  
         mutex_lock(&kvm->irq_lock);
-       old = kvm->irq_routing;
+       old = rcu_dereference_protected(kvm->irq_routing, 1);
         rcu_assign_pointer(kvm->irq_routing, new);
         kvm_irq_routing_update(kvm);
         kvm_arch_irq_routing_update(kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 19f0ecb9b93e23501af3f5c21e2c971cebfd3c6a..82987d457b8bb4e7ec4c1159e37857fd85a11c18 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -130,6 +130,12 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
  
  static bool largepages_enabled = true;
  
+#define KVM_EVENT_CREATE_VM 0
+#define KVM_EVENT_DESTROY_VM 1
+static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
+static unsigned long long kvm_createvm_count;
+static unsigned long long kvm_active_vms;
+
  bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
  {
         if (pfn_valid(pfn))
@@ -187,12 +193,23 @@ static void ack_flush(void *_completed)
  {
  }
  
+static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
+{
+       if (unlikely(!cpus))
+               cpus = cpu_online_mask;
+
+       if (cpumask_empty(cpus))
+               return false;
+
+       smp_call_function_many(cpus, ack_flush, NULL, wait);
+       return true;
+}
+
  bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
  {
         int i, cpu, me;
         cpumask_var_t cpus;
-       bool called = true;
-       bool wait = req & KVM_REQUEST_WAIT;
+       bool called;
         struct kvm_vcpu *vcpu;
  
         zalloc_cpumask_var(&cpus, GFP_ATOMIC);
@@ -207,14 +224,9 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
  
                 if (cpus != NULL && cpu != -1 && cpu != me &&
                     kvm_request_needs_ipi(vcpu, req))
-                       cpumask_set_cpu(cpu, cpus);
+                       __cpumask_set_cpu(cpu, cpus);
         }
-       if (unlikely(cpus == NULL))
-               smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait);
-       else if (!cpumask_empty(cpus))
-               smp_call_function_many(cpus, ack_flush, NULL, wait);
-       else
-               called = false;
+       called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
         put_cpu();
         free_cpumask_var(cpus);
         return called;
@@ -293,7 +305,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
  
  void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
  {
-       put_pid(vcpu->pid);
+       /*
+        * no need for rcu_read_lock as VCPU_RUN is the only place that
+        * will change the vcpu->pid pointer and on uninit all file
+        * descriptors are already gone.
+        */
+       put_pid(rcu_dereference_protected(vcpu->pid, 1));
         kvm_arch_vcpu_uninit(vcpu);
         free_page((unsigned long)vcpu->run);
  }
@@ -674,8 +691,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
         if (init_srcu_struct(&kvm->irq_srcu))
                 goto out_err_no_irq_srcu;
         for (i = 0; i < KVM_NR_BUSES; i++) {
-               kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
-                                       GFP_KERNEL);
+               rcu_assign_pointer(kvm->buses[i],
+                       kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
                 if (!kvm->buses[i])
                         goto out_err;
         }
@@ -700,9 +717,10 @@ out_err_no_srcu:
         hardware_disable_all();
  out_err_no_disable:
         for (i = 0; i < KVM_NR_BUSES; i++)
-               kfree(kvm->buses[i]);
+               kfree(rcu_access_pointer(kvm->buses[i]));
         for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-               kvm_free_memslots(kvm, kvm->memslots[i]);
+               kvm_free_memslots(kvm,
+                       rcu_dereference_protected(kvm->memslots[i], 1));
         kvm_arch_free_vm(kvm);
         mmdrop(current->mm);
         return ERR_PTR(r);
@@ -728,6 +746,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
         int i;
         struct mm_struct *mm = kvm->mm;
  
+       kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
         kvm_destroy_vm_debugfs(kvm);
         kvm_arch_sync_events(kvm);
         spin_lock(&kvm_lock);
@@ -735,8 +754,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
         spin_unlock(&kvm_lock);
         kvm_free_irq_routing(kvm);
         for (i = 0; i < KVM_NR_BUSES; i++) {
-               if (kvm->buses[i])
-                       kvm_io_bus_destroy(kvm->buses[i]);
+               struct kvm_io_bus *bus;
+
+               bus = rcu_dereference_protected(kvm->buses[i], 1);
+               if (bus)
+                       kvm_io_bus_destroy(bus);
                 kvm->buses[i] = NULL;
         }
         kvm_coalesced_mmio_free(kvm);
@@ -748,7 +770,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
         kvm_arch_destroy_vm(kvm);
         kvm_destroy_devices(kvm);
         for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-               kvm_free_memslots(kvm, kvm->memslots[i]);
+               kvm_free_memslots(kvm,
+                       rcu_dereference_protected(kvm->memslots[i], 1));
         cleanup_srcu_struct(&kvm->irq_srcu);
         cleanup_srcu_struct(&kvm->srcu);
         kvm_arch_free_vm(kvm);
@@ -2551,13 +2574,14 @@ static long kvm_vcpu_ioctl(struct file *filp,
         if (r)
                 return r;
         switch (ioctl) {
-       case KVM_RUN:
+       case KVM_RUN: {
+               struct pid *oldpid;
                 r = -EINVAL;
                 if (arg)
                         goto out;
-               if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+               oldpid = rcu_access_pointer(vcpu->pid);
+               if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
                         /* The thread running this VCPU changed. */
-                       struct pid *oldpid = vcpu->pid;
                         struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
  
                         rcu_assign_pointer(vcpu->pid, newpid);
@@ -2568,6 +2592,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
                 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
                 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
                 break;
+       }
         case KVM_GET_REGS: {
                 struct kvm_regs *kvm_regs;
  
@@ -3202,6 +3227,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
                 fput(file);
                 return -ENOMEM;
         }
+       kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
  
         fd_install(r, file);
         return r;
@@ -3563,7 +3589,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
  {
         struct kvm_io_bus *new_bus, *bus;
  
-       bus = kvm->buses[bus_idx];
+       bus = kvm_get_bus(kvm, bus_idx);
         if (!bus)
                 return -ENOMEM;
  
@@ -3592,7 +3618,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
         int i;
         struct kvm_io_bus *new_bus, *bus;
  
-       bus = kvm->buses[bus_idx];
+       bus = kvm_get_bus(kvm, bus_idx);
         if (!bus)
                 return;
  
@@ -3854,6 +3880,67 @@ static const struct file_operations *stat_fops[] = {
         [KVM_STAT_VM]   = &vm_stat_fops,
  };
  
+static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
+{
+       struct kobj_uevent_env *env;
+       char *tmp, *pathbuf = NULL;
+       unsigned long long created, active;
+
+       if (!kvm_dev.this_device || !kvm)
+               return;
+
+       spin_lock(&kvm_lock);
+       if (type == KVM_EVENT_CREATE_VM) {
+               kvm_createvm_count++;
+               kvm_active_vms++;
+       } else if (type == KVM_EVENT_DESTROY_VM) {
+               kvm_active_vms--;
+       }
+       created = kvm_createvm_count;
+       active = kvm_active_vms;
+       spin_unlock(&kvm_lock);
+
+       env = kzalloc(sizeof(*env), GFP_KERNEL);
+       if (!env)
+               return;
+
+       add_uevent_var(env, "CREATED=%llu", created);
+       add_uevent_var(env, "COUNT=%llu", active);
+
+       if (type == KVM_EVENT_CREATE_VM)
+               add_uevent_var(env, "EVENT=create");
+       else if (type == KVM_EVENT_DESTROY_VM)
+               add_uevent_var(env, "EVENT=destroy");
+
+       if (kvm->debugfs_dentry) {
+               char p[ITOA_MAX_LEN];
+
+               snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
+               tmp = strchrnul(p + 1, '-');
+               *tmp = '\0';
+               add_uevent_var(env, "PID=%s", p);
+               pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+               if (pathbuf) {
+                       /* sizeof counts the final '\0' */
+                       int len = sizeof("STATS_PATH=") - 1;
+                       const char *pvar = "STATS_PATH=";
+
+                       tmp = dentry_path_raw(kvm->debugfs_dentry,
+                                             pathbuf + len,
+                                             PATH_MAX - len);
+                       if (!IS_ERR(tmp)) {
+                               memcpy(tmp - len, pvar, len);
+                               env->envp[env->envp_idx++] = tmp - len;
+                       }
+               }
+       }
+       /* no need for checks, since we are adding at most only 5 keys */
+       env->envp[env->envp_idx++] = NULL;
+       kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
+       kfree(env);
+       kfree(pathbuf);
+}
+
  static int kvm_init_debug(void)
  {
         int r = -EEXIST;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 15 Jul 2017 17:18:16 +0000 (10:18 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 15 Jul 2017 17:18:16 +0000 (10:18 -0700)
Documentation/virtual/kvm/api.txt		patch \| blob \| history
Documentation/virtual/kvm/msr.txt		patch \| blob \| history
arch/x86/include/asm/cpufeatures.h		patch \| blob \| history
arch/x86/include/asm/kvm_emulate.h		patch \| blob \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/include/asm/svm.h		patch \| blob \| history
arch/x86/include/uapi/asm/kvm_para.h		patch \| blob \| history
arch/x86/kernel/kvm.c		patch \| blob \| history
arch/x86/kvm/hyperv.c		patch \| blob \| history
arch/x86/kvm/hyperv.h		patch \| blob \| history
arch/x86/kvm/i8254.c		patch \| blob \| history
arch/x86/kvm/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu.h		patch \| blob \| history
arch/x86/kvm/svm.c		patch \| blob \| history
arch/x86/kvm/vmx.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
include/linux/kvm_host.h		patch \| blob \| history
include/uapi/linux/kvm.h		patch \| blob \| history
virt/kvm/eventfd.c		patch \| blob \| history
virt/kvm/irqchip.c		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history