Merge tag 'kvm-arm-fixes-4.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Marcelo Tosatti <mtosatt@redhat.com>

Mon, 16 Mar 2015 23:08:56 +0000 (20:08 -0300)

committer Marcelo Tosatti <mtosatti@redhat.com>

Mon, 16 Mar 2015 23:08:56 +0000 (20:08 -0300)
author Marcelo Tosatti <mtosatt@redhat.com>
Mon, 16 Mar 2015 23:08:56 +0000 (20:08 -0300)
committer Marcelo Tosatti <mtosatti@redhat.com>
Mon, 16 Mar 2015 23:08:56 +0000 (20:08 -0300)
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h

index bf0fe99e8ca927e8b3894dabaf1e1fca9f079c3d..4cf48c3aca13e9afe7b852660afa4d5945994a93 100644 (file)
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
         (__boundary - 1 < (end) - 1)? __boundary: (end);                \
  })
  
+#define kvm_pgd_index(addr)                    pgd_index(addr)
+
  static inline bool kvm_page_empty(void *ptr)
  {
         struct page *ptr_page = virt_to_page(ptr);
         return page_count(ptr_page) == 1;
  }
  
-
  #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
  #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
  #define kvm_pud_table_empty(kvm, pudp) (0)
  
  #define KVM_PREALLOC_LEVEL     0
  
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
  {
-       return 0;
+       return kvm->arch.pgd;
  }
  
-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
  {
-       return kvm->arch.pgd;
+       return PTRS_PER_S2_PGD * sizeof(pgd_t);
  }
  
  struct kvm;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c

index 3e6859bc3e1170fc83489be9ba51a15c97b148a5..5656d79c5a44f4d2ca816e15b647abf29a114e0b 100644 (file)
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
         phys_addr_t addr = start, end = start + size;
         phys_addr_t next;
  
-       pgd = pgdp + pgd_index(addr);
+       pgd = pgdp + kvm_pgd_index(addr);
         do {
                 next = kvm_pgd_addr_end(addr, end);
                 if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
         phys_addr_t next;
         pgd_t *pgd;
  
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
         do {
                 next = kvm_pgd_addr_end(addr, end);
                 stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
                                      __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
  }
  
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+       free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+       unsigned int size = kvm_get_hwpgd_size();
+
+       return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
  /**
   * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
   * @kvm:       The KVM struct pointer for the VM.
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
   */
  int kvm_alloc_stage2_pgd(struct kvm *kvm)
  {
-       int ret;
         pgd_t *pgd;
+       void *hwpgd;
  
         if (kvm->arch.pgd != NULL) {
                 kvm_err("kvm_arch already initialized?\n");
                 return -EINVAL;
         }
  
+       hwpgd = kvm_alloc_hwpgd();
+       if (!hwpgd)
+               return -ENOMEM;
+
+       /* When the kernel uses more levels of page tables than the
+        * guest, we allocate a fake PGD and pre-populate it to point
+        * to the next-level page table, which will be the real
+        * initial page table pointed to by the VTTBR.
+        *
+        * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+        * the PMD and the kernel will use folded pud.
+        * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+        * pages.
+        */
         if (KVM_PREALLOC_LEVEL > 0) {
+               int i;
+
                 /*
                  * Allocate fake pgd for the page table manipulation macros to
                  * work.  This is not used by the hardware and we have no
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
                  */
                 pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
                                        GFP_KERNEL | __GFP_ZERO);
+
+               if (!pgd) {
+                       kvm_free_hwpgd(hwpgd);
+                       return -ENOMEM;
+               }
+
+               /* Plug the HW PGD into the fake one. */
+               for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+                       if (KVM_PREALLOC_LEVEL == 1)
+                               pgd_populate(NULL, pgd + i,
+                                            (pud_t *)hwpgd + i * PTRS_PER_PUD);
+                       else if (KVM_PREALLOC_LEVEL == 2)
+                               pud_populate(NULL, pud_offset(pgd, 0) + i,
+                                            (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+               }
         } else {
                 /*
                  * Allocate actual first-level Stage-2 page table used by the
                  * hardware for Stage-2 page table walks.
                  */
-               pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+               pgd = (pgd_t *)hwpgd;
         }
  
-       if (!pgd)
-               return -ENOMEM;
-
-       ret = kvm_prealloc_hwpgd(kvm, pgd);
-       if (ret)
-               goto out_err;
-
         kvm_clean_pgd(pgd);
         kvm->arch.pgd = pgd;
         return 0;
-out_err:
-       if (KVM_PREALLOC_LEVEL > 0)
-               kfree(pgd);
-       else
-               free_pages((unsigned long)pgd, S2_PGD_ORDER);
-       return ret;
  }
  
  /**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
                 return;
  
         unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-       kvm_free_hwpgd(kvm);
+       kvm_free_hwpgd(kvm_get_hwpgd(kvm));
         if (KVM_PREALLOC_LEVEL > 0)
                 kfree(kvm->arch.pgd);
-       else
-               free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
         kvm->arch.pgd = NULL;
  }
  
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
         pgd_t *pgd;
         pud_t *pud;
  
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
         if (WARN_ON(pgd_none(*pgd))) {
                 if (!cache)
                         return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
         pgd_t *pgd;
         phys_addr_t next;
  
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
         do {
                 /*
                  * Release kvm_mmu_lock periodically if the memory region is
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h

index 94674eb7e7bb3cebaf671ef28ec7de05145bb0c9..54bb4ba974417e269656d50adb524654851fbbd2 100644 (file)
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -129,6 +129,9 @@
   * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
   * not known to exist and will break with this configuration.
   *
+ * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
+ * (see hyp-init.S).
+ *
   * Note that when using 4K pages, we concatenate two first level page tables
   * together.
   *
@@ -138,7 +141,6 @@
  #ifdef CONFIG_ARM64_64K_PAGES
  /*
   * Stage2 translation configuration:
- * 40bits output (PS = 2)
   * 40bits input  (T0SZ = 24)
   * 64kB pages (TG0 = 1)
   * 2 level page tables (SL = 1)
@@ -150,7 +152,6 @@
  #else
  /*
   * Stage2 translation configuration:
- * 40bits output (PS = 2)
   * 40bits input  (T0SZ = 24)
   * 4kB pages (TG0 = 0)
   * 3 level page tables (SL = 1)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h

index 6458b53731421343e7640a922a202a2b4c9682be..bbfb600fa82295a8a81c85603254946422b70992 100644 (file)
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
  #define PTRS_PER_S2_PGD                (1 << PTRS_PER_S2_PGD_SHIFT)
  #define S2_PGD_ORDER           get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
  
+#define kvm_pgd_index(addr)    (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
  /*
   * If we are concatenating first level stage-2 page tables, we would have less
   * than or equal to 16 pointers in the fake PGD, because that's what the
@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
  #define KVM_PREALLOC_LEVEL     (0)
  #endif
  
-/**
- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
- * @kvm:       The KVM struct pointer for the VM.
- * @pgd:       The kernel pseudo pgd
- *
- * When the kernel uses more levels of page tables than the guest, we allocate
- * a fake PGD and pre-populate it to point to the next-level page table, which
- * will be the real initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
- * the kernel will use folded pud.  When KVM_PREALLOC_LEVEL==1, we
- * allocate 2 consecutive PUD pages.
- */
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
-{
-       unsigned int i;
-       unsigned long hwpgd;
-
-       if (KVM_PREALLOC_LEVEL == 0)
-               return 0;
-
-       hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
-       if (!hwpgd)
-               return -ENOMEM;
-
-       for (i = 0; i < PTRS_PER_S2_PGD; i++) {
-               if (KVM_PREALLOC_LEVEL == 1)
-                       pgd_populate(NULL, pgd + i,
-                                    (pud_t *)hwpgd + i * PTRS_PER_PUD);
-               else if (KVM_PREALLOC_LEVEL == 2)
-                       pud_populate(NULL, pud_offset(pgd, 0) + i,
-                                    (pmd_t *)hwpgd + i * PTRS_PER_PMD);
-       }
-
-       return 0;
-}
-
  static inline void *kvm_get_hwpgd(struct kvm *kvm)
  {
         pgd_t *pgd = kvm->arch.pgd;
@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
         return pmd_offset(pud, 0);
  }
  
-static inline void kvm_free_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
  {
-       if (KVM_PREALLOC_LEVEL > 0) {
-               unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
-               free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
-       }
+       if (KVM_PREALLOC_LEVEL > 0)
+               return PTRS_PER_S2_PGD * PAGE_SIZE;
+       return PTRS_PER_S2_PGD * sizeof(pgd_t);
  }
  
  static inline bool kvm_page_empty(void *ptr)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h

index 7c55dd5dd2c9faf2202692ca5f669f064c173ed4..66203b268984ebedd72d5bd1b2f54440e56011bc 100644 (file)
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -114,6 +114,7 @@ struct vgic_ops {
         void    (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
         u64     (*get_elrsr)(const struct kvm_vcpu *vcpu);
         u64     (*get_eisr)(const struct kvm_vcpu *vcpu);
+       void    (*clear_eisr)(struct kvm_vcpu *vcpu);
         u32     (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
         void    (*enable_underflow)(struct kvm_vcpu *vcpu);
         void    (*disable_underflow)(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c

index a0a7b5d1a0703a00f81c9421856c0fdf1b1f6aaa..f9b9c7c5137214cb56bcc0cf7455d2d712dc6848 100644 (file)
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
  {
         if (!(lr_desc.state & LR_STATE_MASK))
                 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
+       else
+               vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
  }
  
  static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
         return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
  }
  
+static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
+}
+
  static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
  {
         u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
         .sync_lr_elrsr          = vgic_v2_sync_lr_elrsr,
         .get_elrsr              = vgic_v2_get_elrsr,
         .get_eisr               = vgic_v2_get_eisr,
+       .clear_eisr             = vgic_v2_clear_eisr,
         .get_interrupt_status   = vgic_v2_get_interrupt_status,
         .enable_underflow       = vgic_v2_enable_underflow,
         .disable_underflow      = vgic_v2_disable_underflow,
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c

index 3a62d8a9a2c6fce0cc2f94a6e558115785c03811..dff06021e74855a2d6cb9b8830fd30818a63c927 100644 (file)
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
  {
         if (!(lr_desc.state & LR_STATE_MASK))
                 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+       else
+               vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
  }
  
  static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
         return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
  }
  
+static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
+}
+
  static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
  {
         u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
@@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
         .sync_lr_elrsr          = vgic_v3_sync_lr_elrsr,
         .get_elrsr              = vgic_v3_get_elrsr,
         .get_eisr               = vgic_v3_get_eisr,
+       .clear_eisr             = vgic_v3_clear_eisr,
         .get_interrupt_status   = vgic_v3_get_interrupt_status,
         .enable_underflow       = vgic_v3_enable_underflow,
         .disable_underflow      = vgic_v3_disable_underflow,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c

index 0cc6ab6005a07bb2e9453106a672e9efd4e237a6..c9f60f52458802f4a66a3912732d8665bc4a3e32 100644 (file)
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -883,6 +883,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
         return vgic_ops->get_eisr(vcpu);
  }
  
+static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
+{
+       vgic_ops->clear_eisr(vcpu);
+}
+
  static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
  {
         return vgic_ops->get_interrupt_status(vcpu);
@@ -922,6 +927,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
         vgic_set_lr(vcpu, lr_nr, vlr);
         clear_bit(lr_nr, vgic_cpu->lr_used);
         vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+       vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
  }
  
  /*
@@ -978,6 +984,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
                         BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
                         vlr.state |= LR_STATE_PENDING;
                         vgic_set_lr(vcpu, lr, vlr);
+                       vgic_sync_lr_elrsr(vcpu, lr, vlr);
                         return true;
                 }
         }
@@ -999,6 +1006,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
                 vlr.state |= LR_EOI_INT;
  
         vgic_set_lr(vcpu, lr, vlr);
+       vgic_sync_lr_elrsr(vcpu, lr, vlr);
  
         return true;
  }
@@ -1136,6 +1144,14 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
         if (status & INT_STATUS_UNDERFLOW)
                 vgic_disable_underflow(vcpu);
  
+       /*
+        * In the next iterations of the vcpu loop, if we sync the vgic state
+        * after flushing it, but before entering the guest (this happens for
+        * pending signals and vmid rollovers), then make sure we don't pick
+        * up any old maintenance interrupts here.
+        */
+       vgic_clear_eisr(vcpu);
+
         return level_pending;
  }
  
@@ -1583,8 +1599,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
          * emulation. So check this here again. KVM_CREATE_DEVICE does
          * the proper checks already.
          */
-       if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
-               return -ENODEV;
+       if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
+               ret = -ENODEV;
+               goto out;
+       }
  
         /*
          * Any time a vcpu is run, vcpu_load is called which tries to grab the
author	Marcelo Tosatti <mtosatt@redhat.com>
	Mon, 16 Mar 2015 23:08:56 +0000 (20:08 -0300)
committer	Marcelo Tosatti <mtosatti@redhat.com>
	Mon, 16 Mar 2015 23:08:56 +0000 (20:08 -0300)
arch/arm/include/asm/kvm_mmu.h		patch \| blob \| history
arch/arm/kvm/mmu.c		patch \| blob \| history
arch/arm64/include/asm/kvm_arm.h		patch \| blob \| history
arch/arm64/include/asm/kvm_mmu.h		patch \| blob \| history
include/kvm/arm_vgic.h		patch \| blob \| history
virt/kvm/arm/vgic-v2.c		patch \| blob \| history
virt/kvm/arm/vgic-v3.c		patch \| blob \| history
virt/kvm/arm/vgic.c		patch \| blob \| history