Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

[karo-tx-linux.git] / arch / arm / kvm / mmu.c
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c

index 6034697ede3f3ac5e7a1060ce3026ba5d9f26714..3e6859bc3e1170fc83489be9ba51a15c97b148a5 100644 (file)
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -78,6 +78,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
                 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
  }
  
+/*
+ * D-Cache management functions. They take the page table entries by
+ * value, as they are flushing the cache using the kernel mapping (or
+ * kmap on 32bit).
+ */
+static void kvm_flush_dcache_pte(pte_t pte)
+{
+       __kvm_flush_dcache_pte(pte);
+}
+
+static void kvm_flush_dcache_pmd(pmd_t pmd)
+{
+       __kvm_flush_dcache_pmd(pmd);
+}
+
+static void kvm_flush_dcache_pud(pud_t pud)
+{
+       __kvm_flush_dcache_pud(pud);
+}
+
  /**
   * stage2_dissolve_pmd() - clear and flush huge PMD entry
   * @kvm:       pointer to kvm structure.
@@ -158,6 +178,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
         put_page(virt_to_page(pmd));
  }
  
+/*
+ * Unmapping vs dcache management:
+ *
+ * If a guest maps certain memory pages as uncached, all writes will
+ * bypass the data cache and go directly to RAM.  However, the CPUs
+ * can still speculate reads (not writes) and fill cache lines with
+ * data.
+ *
+ * Those cache lines will be *clean* cache lines though, so a
+ * clean+invalidate operation is equivalent to an invalidate
+ * operation, because no cache lines are marked dirty.
+ *
+ * Those clean cache lines could be filled prior to an uncached write
+ * by the guest, and the cache coherent IO subsystem would therefore
+ * end up writing old data to disk.
+ *
+ * This is why right after unmapping a page/section and invalidating
+ * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
+ * the IO subsystem will never hit in the cache.
+ */
  static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
                        phys_addr_t addr, phys_addr_t end)
  {
@@ -167,9 +207,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
         start_pte = pte = pte_offset_kernel(pmd, addr);
         do {
                 if (!pte_none(*pte)) {
+                       pte_t old_pte = *pte;
+
                         kvm_set_pte(pte, __pte(0));
-                       put_page(virt_to_page(pte));
                         kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+                       /* No need to invalidate the cache for device mappings */
+                       if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+                               kvm_flush_dcache_pte(old_pte);
+
+                       put_page(virt_to_page(pte));
                 }
         } while (pte++, addr += PAGE_SIZE, addr != end);
  
@@ -188,8 +235,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
                 next = kvm_pmd_addr_end(addr, end);
                 if (!pmd_none(*pmd)) {
                         if (kvm_pmd_huge(*pmd)) {
+                               pmd_t old_pmd = *pmd;
+
                                 pmd_clear(pmd);
                                 kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+                               kvm_flush_dcache_pmd(old_pmd);
+
                                 put_page(virt_to_page(pmd));
                         } else {
                                 unmap_ptes(kvm, pmd, addr, next);
@@ -212,8 +264,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
                 next = kvm_pud_addr_end(addr, end);
                 if (!pud_none(*pud)) {
                         if (pud_huge(*pud)) {
+                               pud_t old_pud = *pud;
+
                                 pud_clear(pud);
                                 kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+                               kvm_flush_dcache_pud(old_pud);
+
                                 put_page(virt_to_page(pud));
                         } else {
                                 unmap_pmds(kvm, pud, addr, next);
@@ -248,10 +305,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
  
         pte = pte_offset_kernel(pmd, addr);
         do {
-               if (!pte_none(*pte)) {
-                       hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-                       kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
-               }
+               if (!pte_none(*pte) &&
+                   (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+                       kvm_flush_dcache_pte(*pte);
         } while (pte++, addr += PAGE_SIZE, addr != end);
  }
  
@@ -265,12 +321,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
         do {
                 next = kvm_pmd_addr_end(addr, end);
                 if (!pmd_none(*pmd)) {
-                       if (kvm_pmd_huge(*pmd)) {
-                               hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-                               kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
-                       } else {
+                       if (kvm_pmd_huge(*pmd))
+                               kvm_flush_dcache_pmd(*pmd);
+                       else
                                 stage2_flush_ptes(kvm, pmd, addr, next);
-                       }
                 }
         } while (pmd++, addr = next, addr != end);
  }
@@ -285,12 +339,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
         do {
                 next = kvm_pud_addr_end(addr, end);
                 if (!pud_none(*pud)) {
-                       if (pud_huge(*pud)) {
-                               hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-                               kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
-                       } else {
+                       if (pud_huge(*pud))
+                               kvm_flush_dcache_pud(*pud);
+                       else
                                 stage2_flush_pmds(kvm, pud, addr, next);
-                       }
                 }
         } while (pud++, addr = next, addr != end);
  }
@@ -317,7 +369,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
   * Go through the stage 2 page tables and invalidate any cache lines
   * backing memory already mapped to the VM.
   */
-void stage2_flush_vm(struct kvm *kvm)
+static void stage2_flush_vm(struct kvm *kvm)
  {
         struct kvm_memslots *slots;
         struct kvm_memory_slot *memslot;
@@ -1116,6 +1168,12 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
         kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
  }
  
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+                                     unsigned long size, bool uncached)
+{
+       __coherent_cache_guest_page(vcpu, pfn, size, uncached);
+}
+
  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                           struct kvm_memory_slot *memslot, unsigned long hva,
                           unsigned long fault_status)
@@ -1225,8 +1283,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                         kvm_set_s2pmd_writable(&new_pmd);
                         kvm_set_pfn_dirty(pfn);
                 }
-               coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
-                                         fault_ipa_uncached);
+               coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
                 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
         } else {
                 pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1236,8 +1293,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                         kvm_set_pfn_dirty(pfn);
                         mark_page_dirty(kvm, gfn);
                 }
-               coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
-                                         fault_ipa_uncached);
+               coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
                 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
         }
  
@@ -1664,3 +1720,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
         unmap_stage2_range(kvm, gpa, size);
         spin_unlock(&kvm->mmu_lock);
  }
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ *
+ * Main problems:
+ * - S/W ops are local to a CPU (not broadcast)
+ * - We have line migration behind our back (speculation)
+ * - System caches don't support S/W at all (damn!)
+ *
+ * In the face of the above, the best we can do is to try and convert
+ * S/W ops to VA ops. Because the guest is not allowed to infer the
+ * S/W to PA mapping, it can only use S/W to nuke the whole cache,
+ * which is a rather good thing for us.
+ *
+ * Also, it is only used when turning caches on/off ("The expected
+ * usage of the cache maintenance instructions that operate by set/way
+ * is associated with the cache maintenance instructions associated
+ * with the powerdown and powerup of caches, if this is required by
+ * the implementation.").
+ *
+ * We use the following policy:
+ *
+ * - If we trap a S/W operation, we enable VM trapping to detect
+ *   caches being turned on/off, and do a full clean.
+ *
+ * - We flush the caches on both caches being turned on and off.
+ *
+ * - Once the caches are enabled, we stop trapping VM ops.
+ */
+void kvm_set_way_flush(struct kvm_vcpu *vcpu)
+{
+       unsigned long hcr = vcpu_get_hcr(vcpu);
+
+       /*
+        * If this is the first time we do a S/W operation
+        * (i.e. HCR_TVM not set) flush the whole memory, and set the
+        * VM trapping.
+        *
+        * Otherwise, rely on the VM trapping to wait for the MMU +
+        * Caches to be turned off. At that point, we'll be able to
+        * clean the caches again.
+        */
+       if (!(hcr & HCR_TVM)) {
+               trace_kvm_set_way_flush(*vcpu_pc(vcpu),
+                                       vcpu_has_cache_enabled(vcpu));
+               stage2_flush_vm(vcpu->kvm);
+               vcpu_set_hcr(vcpu, hcr | HCR_TVM);
+       }
+}
+
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
+{
+       bool now_enabled = vcpu_has_cache_enabled(vcpu);
+
+       /*
+        * If switching the MMU+caches on, need to invalidate the caches.
+        * If switching it off, need to clean the caches.
+        * Clean + invalidate does the trick always.
+        */
+       if (now_enabled != was_enabled)
+               stage2_flush_vm(vcpu->kvm);
+
+       /* Caches are now on, stop trapping VM ops (until a S/W op) */
+       if (now_enabled)
+               vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
+
+       trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
+}