]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - arch/x86/kvm/mmu.c
KVM: MMU: introduce is_shadow_zero_bits_set()
[karo-tx-linux.git] / arch / x86 / kvm / mmu.c
index f807496b62c2cc76e82a60cd58ee187f0cdc77c2..823e3bbbbfddab54c01775034711d4937d559ad7 100644 (file)
@@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep)
 {
        return ACCESS_ONCE(*sptep);
 }
-
-static bool __check_direct_spte_mmio_pf(u64 spte)
-{
-       /* It is valid if the spte is zapped. */
-       return spte == 0ull;
-}
 #else
 union split_spte {
        struct {
@@ -478,23 +472,6 @@ retry:
 
        return spte.spte;
 }
-
-static bool __check_direct_spte_mmio_pf(u64 spte)
-{
-       union split_spte sspte = (union split_spte)spte;
-       u32 high_mmio_mask = shadow_mmio_mask >> 32;
-
-       /* It is valid if the spte is zapped. */
-       if (spte == 0ull)
-               return true;
-
-       /* It is valid if the spte is being zapped. */
-       if (sspte.spte_low == 0ull &&
-           (sspte.spte_high & high_mmio_mask) == high_mmio_mask)
-               return true;
-
-       return false;
-}
 #endif
 
 static bool spte_is_locklessly_modifiable(u64 spte)
@@ -2479,6 +2456,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
        return 0;
 }
 
+static bool kvm_is_mmio_pfn(pfn_t pfn)
+{
+       if (pfn_valid(pfn))
+               return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+
+       return true;
+}
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                    unsigned pte_access, int level,
                    gfn_t gfn, pfn_t pfn, bool speculative,
@@ -2506,7 +2491,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                spte |= PT_PAGE_SIZE_MASK;
        if (tdp_enabled)
                spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
-                       kvm_is_reserved_pfn(pfn));
+                       kvm_is_mmio_pfn(pfn));
 
        if (host_writable)
                spte |= SPTE_HOST_WRITEABLE;
@@ -3283,27 +3268,31 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
        return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
 }
 
-static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+static bool
+__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
 {
-       if (direct)
-               return vcpu_match_mmio_gpa(vcpu, addr);
+       int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
 
-       return vcpu_match_mmio_gva(vcpu, addr);
+       return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
+               ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
 }
 
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+       return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
+}
 
-/*
- * On direct hosts, the last spte is only allows two states
- * for mmio page fault:
- *   - It is the mmio spte
- *   - It is zapped or it is being zapped.
- *
- * This function completely checks the spte when the last spte
- * is not the mmio spte.
- */
-static bool check_direct_spte_mmio_pf(u64 spte)
+static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
 {
-       return __check_direct_spte_mmio_pf(spte);
+       return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
+}
+
+static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+{
+       if (direct)
+               return vcpu_match_mmio_gpa(vcpu, addr);
+
+       return vcpu_match_mmio_gva(vcpu, addr);
 }
 
 static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
@@ -3347,13 +3336,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
                return RET_MMIO_PF_EMULATE;
        }
 
-       /*
-        * It's ok if the gva is remapped by other cpus on shadow guest,
-        * it's a BUG if the gfn is not a mmio page.
-        */
-       if (direct && !check_direct_spte_mmio_pf(spte))
-               return RET_MMIO_PF_BUG;
-
        /*
         * If the page table is zapped by other cpus, let CPU fault again on
         * the address.
@@ -3596,19 +3578,21 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
 #include "paging_tmpl.h"
 #undef PTTYPE
 
-static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
-                                 struct kvm_mmu *context)
+static void
+__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+                       struct rsvd_bits_validate *rsvd_check,
+                       int maxphyaddr, int level, bool nx, bool gbpages,
+                       bool pse)
 {
-       int maxphyaddr = cpuid_maxphyaddr(vcpu);
        u64 exb_bit_rsvd = 0;
        u64 gbpages_bit_rsvd = 0;
        u64 nonleaf_bit8_rsvd = 0;
 
-       context->bad_mt_xwr = 0;
+       rsvd_check->bad_mt_xwr = 0;
 
-       if (!context->nx)
+       if (!nx)
                exb_bit_rsvd = rsvd_bits(63, 63);
-       if (!guest_cpuid_has_gbpages(vcpu))
+       if (!gbpages)
                gbpages_bit_rsvd = rsvd_bits(7, 7);
 
        /*
@@ -3618,80 +3602,95 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
        if (guest_cpuid_is_amd(vcpu))
                nonleaf_bit8_rsvd = rsvd_bits(8, 8);
 
-       switch (context->root_level) {
+       switch (level) {
        case PT32_ROOT_LEVEL:
                /* no rsvd bits for 2 level 4K page table entries */
-               context->rsvd_bits_mask[0][1] = 0;
-               context->rsvd_bits_mask[0][0] = 0;
-               context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+               rsvd_check->rsvd_bits_mask[0][1] = 0;
+               rsvd_check->rsvd_bits_mask[0][0] = 0;
+               rsvd_check->rsvd_bits_mask[1][0] =
+                       rsvd_check->rsvd_bits_mask[0][0];
 
-               if (!is_pse(vcpu)) {
-                       context->rsvd_bits_mask[1][1] = 0;
+               if (!pse) {
+                       rsvd_check->rsvd_bits_mask[1][1] = 0;
                        break;
                }
 
                if (is_cpuid_PSE36())
                        /* 36bits PSE 4MB page */
-                       context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+                       rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
                else
                        /* 32 bits PSE 4MB page */
-                       context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+                       rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
                break;
        case PT32E_ROOT_LEVEL:
-               context->rsvd_bits_mask[0][2] =
+               rsvd_check->rsvd_bits_mask[0][2] =
                        rsvd_bits(maxphyaddr, 63) |
                        rsvd_bits(5, 8) | rsvd_bits(1, 2);      /* PDPTE */
-               context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 62);      /* PDE */
-               context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 62);      /* PTE */
-               context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 62) |
                        rsvd_bits(13, 20);              /* large page */
-               context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+               rsvd_check->rsvd_bits_mask[1][0] =
+                       rsvd_check->rsvd_bits_mask[0][0];
                break;
        case PT64_ROOT_LEVEL:
-               context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
-                       nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
-               context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
-                       nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
-               context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+                       nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
+                       rsvd_bits(maxphyaddr, 51);
+               rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+                       nonleaf_bit8_rsvd | gbpages_bit_rsvd |
+                       rsvd_bits(maxphyaddr, 51);
+               rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 51);
-               context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 51);
-               context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-               context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[1][3] =
+                       rsvd_check->rsvd_bits_mask[0][3];
+               rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
                        gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
                        rsvd_bits(13, 29);
-               context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+               rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
                        rsvd_bits(maxphyaddr, 51) |
                        rsvd_bits(13, 20);              /* large page */
-               context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+               rsvd_check->rsvd_bits_mask[1][0] =
+                       rsvd_check->rsvd_bits_mask[0][0];
                break;
        }
 }
 
-static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
-               struct kvm_mmu *context, bool execonly)
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+                                 struct kvm_mmu *context)
+{
+       __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
+                               cpuid_maxphyaddr(vcpu), context->root_level,
+                               context->nx, guest_cpuid_has_gbpages(vcpu),
+                               is_pse(vcpu));
+}
+
+static void
+__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
+                           int maxphyaddr, bool execonly)
 {
-       int maxphyaddr = cpuid_maxphyaddr(vcpu);
        int pte;
 
-       context->rsvd_bits_mask[0][3] =
+       rsvd_check->rsvd_bits_mask[0][3] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
-       context->rsvd_bits_mask[0][2] =
+       rsvd_check->rsvd_bits_mask[0][2] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-       context->rsvd_bits_mask[0][1] =
+       rsvd_check->rsvd_bits_mask[0][1] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-       context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+       rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
 
        /* large page */
-       context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
-       context->rsvd_bits_mask[1][2] =
+       rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
+       rsvd_check->rsvd_bits_mask[1][2] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
-       context->rsvd_bits_mask[1][1] =
+       rsvd_check->rsvd_bits_mask[1][1] =
                rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
-       context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+       rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
 
        for (pte = 0; pte < 64; pte++) {
                int rwx_bits = pte & 7;
@@ -3699,10 +3698,64 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
                if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
                                rwx_bits == 0x2 || rwx_bits == 0x6 ||
                                (rwx_bits == 0x4 && !execonly))
-                       context->bad_mt_xwr |= (1ull << pte);
+                       rsvd_check->bad_mt_xwr |= (1ull << pte);
        }
 }
 
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+               struct kvm_mmu *context, bool execonly)
+{
+       __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
+                                   cpuid_maxphyaddr(vcpu), execonly);
+}
+
+/*
+ * the page table on host is the shadow page table for the page
+ * table in guest or amd nested guest, its mmu features completely
+ * follow the features in guest.
+ */
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+{
+       __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+                               boot_cpu_data.x86_phys_bits,
+                               context->shadow_root_level, context->nx,
+                               guest_cpuid_has_gbpages(vcpu), is_pse(vcpu));
+}
+EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
+
+/*
+ * the direct page table on host, use as much mmu features as
+ * possible, however, kvm currently does not do execution-protection.
+ */
+static void
+reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+                               struct kvm_mmu *context)
+{
+       if (guest_cpuid_is_amd(vcpu))
+               __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+                                       boot_cpu_data.x86_phys_bits,
+                                       context->shadow_root_level, false,
+                                       cpu_has_gbpages, true);
+       else
+               __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+                                           boot_cpu_data.x86_phys_bits,
+                                           false);
+
+}
+
+/*
+ * as the comments in reset_shadow_zero_bits_mask() except it
+ * is the shadow page table for intel nested guest.
+ */
+static void
+reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+                               struct kvm_mmu *context, bool execonly)
+{
+       __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+                                   boot_cpu_data.x86_phys_bits, execonly);
+}
+
 static void update_permission_bitmask(struct kvm_vcpu *vcpu,
                                      struct kvm_mmu *mmu, bool ept)
 {
@@ -3881,6 +3934,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
        update_permission_bitmask(vcpu, context, false);
        update_last_pte_bitmap(vcpu, context);
+       reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
@@ -3908,6 +3962,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
        context->base_role.smap_andnot_wp
                = smap && !is_write_protection(vcpu);
        context->base_role.smm = is_smm(vcpu);
+       reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
@@ -3931,6 +3986,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
 
        update_permission_bitmask(vcpu, context, true);
        reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+       reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);