arch/arm/kvm/mmu.c

   1 /*
   2  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
   3  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License, version 2, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  17  */
  18
  19 #include <linux/mman.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/io.h>
  22 #include <trace/events/kvm.h>
  23 #include <asm/pgalloc.h>
  24 #include <asm/cacheflush.h>
  25 #include <asm/kvm_arm.h>
  26 #include <asm/kvm_mmu.h>
  27 #include <asm/kvm_mmio.h>
  28 #include <asm/kvm_asm.h>
  29 #include <asm/kvm_emulate.h>
  30
  31 #include "trace.h"
  32
  33 extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
  34
  35 static pgd_t *boot_hyp_pgd;
  36 static pgd_t *hyp_pgd;
  37 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
  38
  39 static void *init_bounce_page;
  40 static unsigned long hyp_idmap_start;
  41 static unsigned long hyp_idmap_end;
  42 static phys_addr_t hyp_idmap_vector;
  43
  44 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
  45 {
  46         /*
  47          * This function also gets called when dealing with HYP page
  48          * tables. As HYP doesn't have an associated struct kvm (and
  49          * the HYP page tables are fairly static), we don't do
  50          * anything there.
  51          */
  52         if (kvm)
  53                 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
  54 }
  55
  56 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
  57                                   int min, int max)
  58 {
  59         void *page;
  60
  61         BUG_ON(max > KVM_NR_MEM_OBJS);
  62         if (cache->nobjs >= min)
  63                 return 0;
  64         while (cache->nobjs < max) {
  65                 page = (void *)__get_free_page(PGALLOC_GFP);
  66                 if (!page)
  67                         return -ENOMEM;
  68                 cache->objects[cache->nobjs++] = page;
  69         }
  70         return 0;
  71 }
  72
  73 static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
  74 {
  75         while (mc->nobjs)
  76                 free_page((unsigned long)mc->objects[--mc->nobjs]);
  77 }
  78
  79 static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
  80 {
  81         void *p;
  82
  83         BUG_ON(!mc || !mc->nobjs);
  84         p = mc->objects[--mc->nobjs];
  85         return p;
  86 }
  87
  88 static bool page_empty(void *ptr)
  89 {
  90         struct page *ptr_page = virt_to_page(ptr);
  91         return page_count(ptr_page) == 1;
  92 }
  93
  94 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
  95 {
  96         pmd_t *pmd_table = pmd_offset(pud, 0);
  97         pud_clear(pud);
  98         kvm_tlb_flush_vmid_ipa(kvm, addr);
  99         pmd_free(NULL, pmd_table);
 100         put_page(virt_to_page(pud));
 101 }
 102
 103 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 104 {
 105         pte_t *pte_table = pte_offset_kernel(pmd, 0);
 106         pmd_clear(pmd);
 107         kvm_tlb_flush_vmid_ipa(kvm, addr);
 108         pte_free_kernel(NULL, pte_table);
 109         put_page(virt_to_page(pmd));
 110 }
 111
 112 static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
 113 {
 114         if (pte_present(*pte)) {
 115                 kvm_set_pte(pte, __pte(0));
 116                 put_page(virt_to_page(pte));
 117                 kvm_tlb_flush_vmid_ipa(kvm, addr);
 118         }
 119 }
 120
 121 static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 122                         unsigned long long start, u64 size)
 123 {
 124         pgd_t *pgd;
 125         pud_t *pud;
 126         pmd_t *pmd;
 127         pte_t *pte;
 128         unsigned long long addr = start, end = start + size;
 129         u64 next;
 130
 131         while (addr < end) {
 132                 pgd = pgdp + pgd_index(addr);
 133                 pud = pud_offset(pgd, addr);
 134                 if (pud_none(*pud)) {
 135                         addr = pud_addr_end(addr, end);
 136                         continue;
 137                 }
 138
 139                 pmd = pmd_offset(pud, addr);
 140                 if (pmd_none(*pmd)) {
 141                         addr = pmd_addr_end(addr, end);
 142                         continue;
 143                 }
 144
 145                 pte = pte_offset_kernel(pmd, addr);
 146                 clear_pte_entry(kvm, pte, addr);
 147                 next = addr + PAGE_SIZE;
 148
 149                 /* If we emptied the pte, walk back up the ladder */
 150                 if (page_empty(pte)) {
 151                         clear_pmd_entry(kvm, pmd, addr);
 152                         next = pmd_addr_end(addr, end);
 153                         if (page_empty(pmd) && !page_empty(pud)) {
 154                                 clear_pud_entry(kvm, pud, addr);
 155                                 next = pud_addr_end(addr, end);
 156                         }
 157                 }
 158
 159                 addr = next;
 160         }
 161 }
 162
 163 /**
 164  * free_boot_hyp_pgd - free HYP boot page tables
 165  *
 166  * Free the HYP boot page tables. The bounce page is also freed.
 167  */
 168 void free_boot_hyp_pgd(void)
 169 {
 170         mutex_lock(&kvm_hyp_pgd_mutex);
 171
 172         if (boot_hyp_pgd) {
 173                 unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
 174                 unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 175                 kfree(boot_hyp_pgd);
 176                 boot_hyp_pgd = NULL;
 177         }
 178
 179         if (hyp_pgd)
 180                 unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 181
 182         kfree(init_bounce_page);
 183         init_bounce_page = NULL;
 184
 185         mutex_unlock(&kvm_hyp_pgd_mutex);
 186 }
 187
 188 /**
 189  * free_hyp_pgds - free Hyp-mode page tables
 190  *
 191  * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
 192  * therefore contains either mappings in the kernel memory area (above
 193  * PAGE_OFFSET), or device mappings in the vmalloc range (from
 194  * VMALLOC_START to VMALLOC_END).
 195  *
 196  * boot_hyp_pgd should only map two pages for the init code.
 197  */
 198 void free_hyp_pgds(void)
 199 {
 200         unsigned long addr;
 201
 202         free_boot_hyp_pgd();
 203
 204         mutex_lock(&kvm_hyp_pgd_mutex);
 205
 206         if (hyp_pgd) {
 207                 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
 208                         unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 209                 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
 210                         unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 211
 212                 kfree(hyp_pgd);
 213                 hyp_pgd = NULL;
 214         }
 215
 216         mutex_unlock(&kvm_hyp_pgd_mutex);
 217 }
 218
 219 static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
 220                                     unsigned long end, unsigned long pfn,
 221                                     pgprot_t prot)
 222 {
 223         pte_t *pte;
 224         unsigned long addr;
 225
 226         addr = start;
 227         do {
 228                 pte = pte_offset_kernel(pmd, addr);
 229                 kvm_set_pte(pte, pfn_pte(pfn, prot));
 230                 get_page(virt_to_page(pte));
 231                 kvm_flush_dcache_to_poc(pte, sizeof(*pte));
 232                 pfn++;
 233         } while (addr += PAGE_SIZE, addr != end);
 234 }
 235
 236 static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 237                                    unsigned long end, unsigned long pfn,
 238                                    pgprot_t prot)
 239 {
 240         pmd_t *pmd;
 241         pte_t *pte;
 242         unsigned long addr, next;
 243
 244         addr = start;
 245         do {
 246                 pmd = pmd_offset(pud, addr);
 247
 248                 BUG_ON(pmd_sect(*pmd));
 249
 250                 if (pmd_none(*pmd)) {
 251                         pte = pte_alloc_one_kernel(NULL, addr);
 252                         if (!pte) {
 253                                 kvm_err("Cannot allocate Hyp pte\n");
 254                                 return -ENOMEM;
 255                         }
 256                         pmd_populate_kernel(NULL, pmd, pte);
 257                         get_page(virt_to_page(pmd));
 258                         kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
 259                 }
 260
 261                 next = pmd_addr_end(addr, end);
 262
 263                 create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
 264                 pfn += (next - addr) >> PAGE_SHIFT;
 265         } while (addr = next, addr != end);
 266
 267         return 0;
 268 }
 269
 270 static int __create_hyp_mappings(pgd_t *pgdp,
 271                                  unsigned long start, unsigned long end,
 272                                  unsigned long pfn, pgprot_t prot)
 273 {
 274         pgd_t *pgd;
 275         pud_t *pud;
 276         pmd_t *pmd;
 277         unsigned long addr, next;
 278         int err = 0;
 279
 280         mutex_lock(&kvm_hyp_pgd_mutex);
 281         addr = start & PAGE_MASK;
 282         end = PAGE_ALIGN(end);
 283         do {
 284                 pgd = pgdp + pgd_index(addr);
 285                 pud = pud_offset(pgd, addr);
 286
 287                 if (pud_none_or_clear_bad(pud)) {
 288                         pmd = pmd_alloc_one(NULL, addr);
 289                         if (!pmd) {
 290                                 kvm_err("Cannot allocate Hyp pmd\n");
 291                                 err = -ENOMEM;
 292                                 goto out;
 293                         }
 294                         pud_populate(NULL, pud, pmd);
 295                         get_page(virt_to_page(pud));
 296                         kvm_flush_dcache_to_poc(pud, sizeof(*pud));
 297                 }
 298
 299                 next = pgd_addr_end(addr, end);
 300                 err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
 301                 if (err)
 302                         goto out;
 303                 pfn += (next - addr) >> PAGE_SHIFT;
 304         } while (addr = next, addr != end);
 305 out:
 306         mutex_unlock(&kvm_hyp_pgd_mutex);
 307         return err;
 308 }
 309
 310 /**
 311  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
 312  * @from:       The virtual kernel start address of the range
 313  * @to:         The virtual kernel end address of the range (exclusive)
 314  *
 315  * The same virtual address as the kernel virtual address is also used
 316  * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
 317  * physical pages.
 318  */
 319 int create_hyp_mappings(void *from, void *to)
 320 {
 321         unsigned long phys_addr = virt_to_phys(from);
 322         unsigned long start = KERN_TO_HYP((unsigned long)from);
 323         unsigned long end = KERN_TO_HYP((unsigned long)to);
 324
 325         /* Check for a valid kernel memory mapping */
 326         if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
 327                 return -EINVAL;
 328
 329         return __create_hyp_mappings(hyp_pgd, start, end,
 330                                      __phys_to_pfn(phys_addr), PAGE_HYP);
 331 }
 332
 333 /**
 334  * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
 335  * @from:       The kernel start VA of the range
 336  * @to:         The kernel end VA of the range (exclusive)
 337  * @phys_addr:  The physical start address which gets mapped
 338  *
 339  * The resulting HYP VA is the same as the kernel VA, modulo
 340  * HYP_PAGE_OFFSET.
 341  */
 342 int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 343 {
 344         unsigned long start = KERN_TO_HYP((unsigned long)from);
 345         unsigned long end = KERN_TO_HYP((unsigned long)to);
 346
 347         /* Check for a valid kernel IO mapping */
 348         if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
 349                 return -EINVAL;
 350
 351         return __create_hyp_mappings(hyp_pgd, start, end,
 352                                      __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
 353 }
 354
 355 /**
 356  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
 357  * @kvm:        The KVM struct pointer for the VM.
 358  *
 359  * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
 360  * support either full 40-bit input addresses or limited to 32-bit input
 361  * addresses). Clears the allocated pages.
 362  *
 363  * Note we don't need locking here as this is only called when the VM is
 364  * created, which can only be done once.
 365  */
 366 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 367 {
 368         pgd_t *pgd;
 369
 370         if (kvm->arch.pgd != NULL) {
 371                 kvm_err("kvm_arch already initialized?\n");
 372                 return -EINVAL;
 373         }
 374
 375         pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
 376         if (!pgd)
 377                 return -ENOMEM;
 378
 379         memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
 380         kvm_clean_pgd(pgd);
 381         kvm->arch.pgd = pgd;
 382
 383         return 0;
 384 }
 385
 386 /**
 387  * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
 388  * @kvm:   The VM pointer
 389  * @start: The intermediate physical base address of the range to unmap
 390  * @size:  The size of the area to unmap
 391  *
 392  * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
 393  * be called while holding mmu_lock (unless for freeing the stage2 pgd before
 394  * destroying the VM), otherwise another faulting VCPU may come in and mess
 395  * with things behind our backs.
 396  */
 397 static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 398 {
 399         unmap_range(kvm, kvm->arch.pgd, start, size);
 400 }
 401
 402 /**
 403  * kvm_free_stage2_pgd - free all stage-2 tables
 404  * @kvm:        The KVM struct pointer for the VM.
 405  *
 406  * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
 407  * underlying level-2 and level-3 tables before freeing the actual level-1 table
 408  * and setting the struct pointer to NULL.
 409  *
 410  * Note we don't need locking here as this is only called when the VM is
 411  * destroyed, which can only be done once.
 412  */
 413 void kvm_free_stage2_pgd(struct kvm *kvm)
 414 {
 415         if (kvm->arch.pgd == NULL)
 416                 return;
 417
 418         unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
 419         free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
 420         kvm->arch.pgd = NULL;
 421 }
 422
 423
 424 static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 425                           phys_addr_t addr, const pte_t *new_pte, bool iomap)
 426 {
 427         pgd_t *pgd;
 428         pud_t *pud;
 429         pmd_t *pmd;
 430         pte_t *pte, old_pte;
 431
 432         /* Create 2nd stage page table mapping - Level 1 */
 433         pgd = kvm->arch.pgd + pgd_index(addr);
 434         pud = pud_offset(pgd, addr);
 435         if (pud_none(*pud)) {
 436                 if (!cache)
 437                         return 0; /* ignore calls from kvm_set_spte_hva */
 438                 pmd = mmu_memory_cache_alloc(cache);
 439                 pud_populate(NULL, pud, pmd);
 440                 get_page(virt_to_page(pud));
 441         }
 442
 443         pmd = pmd_offset(pud, addr);
 444
 445         /* Create 2nd stage page table mapping - Level 2 */
 446         if (pmd_none(*pmd)) {
 447                 if (!cache)
 448                         return 0; /* ignore calls from kvm_set_spte_hva */
 449                 pte = mmu_memory_cache_alloc(cache);
 450                 kvm_clean_pte(pte);
 451                 pmd_populate_kernel(NULL, pmd, pte);
 452                 get_page(virt_to_page(pmd));
 453         }
 454
 455         pte = pte_offset_kernel(pmd, addr);
 456
 457         if (iomap && pte_present(*pte))
 458                 return -EFAULT;
 459
 460         /* Create 2nd stage page table mapping - Level 3 */
 461         old_pte = *pte;
 462         kvm_set_pte(pte, *new_pte);
 463         if (pte_present(old_pte))
 464                 kvm_tlb_flush_vmid_ipa(kvm, addr);
 465         else
 466                 get_page(virt_to_page(pte));
 467
 468         return 0;
 469 }
 470
 471 /**
 472  * kvm_phys_addr_ioremap - map a device range to guest IPA
 473  *
 474  * @kvm:        The KVM pointer
 475  * @guest_ipa:  The IPA at which to insert the mapping
 476  * @pa:         The physical address of the device
 477  * @size:       The size of the mapping
 478  */
 479 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 480                           phys_addr_t pa, unsigned long size)
 481 {
 482         phys_addr_t addr, end;
 483         int ret = 0;
 484         unsigned long pfn;
 485         struct kvm_mmu_memory_cache cache = { 0, };
 486
 487         end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
 488         pfn = __phys_to_pfn(pa);
 489
 490         for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
 491                 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
 492
 493                 ret = mmu_topup_memory_cache(&cache, 2, 2);
 494                 if (ret)
 495                         goto out;
 496                 spin_lock(&kvm->mmu_lock);
 497                 ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
 498                 spin_unlock(&kvm->mmu_lock);
 499                 if (ret)
 500                         goto out;
 501
 502                 pfn++;
 503         }
 504
 505 out:
 506         mmu_free_memory_cache(&cache);
 507         return ret;
 508 }
 509
 510 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 511                           gfn_t gfn, struct kvm_memory_slot *memslot,
 512                           unsigned long fault_status)
 513 {
 514         pte_t new_pte;
 515         pfn_t pfn;
 516         int ret;
 517         bool write_fault, writable;
 518         unsigned long mmu_seq;
 519         struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 520
 521         write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 522         if (fault_status == FSC_PERM && !write_fault) {
 523                 kvm_err("Unexpected L2 read permission error\n");
 524                 return -EFAULT;
 525         }
 526
 527         /* We need minimum second+third level pages */
 528         ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
 529         if (ret)
 530                 return ret;
 531
 532         mmu_seq = vcpu->kvm->mmu_notifier_seq;
 533         /*
 534          * Ensure the read of mmu_notifier_seq happens before we call
 535          * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
 536          * the page we just got a reference to gets unmapped before we have a
 537          * chance to grab the mmu_lock, which ensure that if the page gets
 538          * unmapped afterwards, the call to kvm_unmap_hva will take it away
 539          * from us again properly. This smp_rmb() interacts with the smp_wmb()
 540          * in kvm_mmu_notifier_invalidate_<page|range_end>.
 541          */
 542         smp_rmb();
 543
 544         pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
 545         if (is_error_pfn(pfn))
 546                 return -EFAULT;
 547
 548         new_pte = pfn_pte(pfn, PAGE_S2);
 549         coherent_icache_guest_page(vcpu->kvm, gfn);
 550
 551         spin_lock(&vcpu->kvm->mmu_lock);
 552         if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 553                 goto out_unlock;
 554         if (writable) {
 555                 kvm_set_s2pte_writable(&new_pte);
 556                 kvm_set_pfn_dirty(pfn);
 557         }
 558         stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
 559
 560 out_unlock:
 561         spin_unlock(&vcpu->kvm->mmu_lock);
 562         kvm_release_pfn_clean(pfn);
 563         return 0;
 564 }
 565
 566 /**
 567  * kvm_handle_guest_abort - handles all 2nd stage aborts
 568  * @vcpu:       the VCPU pointer
 569  * @run:        the kvm_run structure
 570  *
 571  * Any abort that gets to the host is almost guaranteed to be caused by a
 572  * missing second stage translation table entry, which can mean that either the
 573  * guest simply needs more memory and we must allocate an appropriate page or it
 574  * can mean that the guest tried to access I/O memory, which is emulated by user
 575  * space. The distinction is based on the IPA causing the fault and whether this
 576  * memory region has been registered as standard RAM by user space.
 577  */
 578 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 579 {
 580         unsigned long fault_status;
 581         phys_addr_t fault_ipa;
 582         struct kvm_memory_slot *memslot;
 583         bool is_iabt;
 584         gfn_t gfn;
 585         int ret, idx;
 586
 587         is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
 588         fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 589
 590         trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
 591                               kvm_vcpu_get_hfar(vcpu), fault_ipa);
 592
 593         /* Check the stage-2 fault is trans. fault or write fault */
 594         fault_status = kvm_vcpu_trap_get_fault(vcpu);
 595         if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
 596                 kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
 597                         kvm_vcpu_trap_get_class(vcpu), fault_status);
 598                 return -EFAULT;
 599         }
 600
 601         idx = srcu_read_lock(&vcpu->kvm->srcu);
 602
 603         gfn = fault_ipa >> PAGE_SHIFT;
 604         if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
 605                 if (is_iabt) {
 606                         /* Prefetch Abort on I/O address */
 607                         kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
 608                         ret = 1;
 609                         goto out_unlock;
 610                 }
 611
 612                 if (fault_status != FSC_FAULT) {
 613                         kvm_err("Unsupported fault status on io memory: %#lx\n",
 614                                 fault_status);
 615                         ret = -EFAULT;
 616                         goto out_unlock;
 617                 }
 618
 619                 /*
 620                  * The IPA is reported as [MAX:12], so we need to
 621                  * complement it with the bottom 12 bits from the
 622                  * faulting VA. This is always 12 bits, irrespective
 623                  * of the page size.
 624                  */
 625                 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
 626                 ret = io_mem_abort(vcpu, run, fault_ipa);
 627                 goto out_unlock;
 628         }
 629
 630         memslot = gfn_to_memslot(vcpu->kvm, gfn);
 631
 632         ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
 633         if (ret == 0)
 634                 ret = 1;
 635 out_unlock:
 636         srcu_read_unlock(&vcpu->kvm->srcu, idx);
 637         return ret;
 638 }
 639
 640 static void handle_hva_to_gpa(struct kvm *kvm,
 641                               unsigned long start,
 642                               unsigned long end,
 643                               void (*handler)(struct kvm *kvm,
 644                                               gpa_t gpa, void *data),
 645                               void *data)
 646 {
 647         struct kvm_memslots *slots;
 648         struct kvm_memory_slot *memslot;
 649
 650         slots = kvm_memslots(kvm);
 651
 652         /* we only care about the pages that the guest sees */
 653         kvm_for_each_memslot(memslot, slots) {
 654                 unsigned long hva_start, hva_end;
 655                 gfn_t gfn, gfn_end;
 656
 657                 hva_start = max(start, memslot->userspace_addr);
 658                 hva_end = min(end, memslot->userspace_addr +
 659                                         (memslot->npages << PAGE_SHIFT));
 660                 if (hva_start >= hva_end)
 661                         continue;
 662
 663                 /*
 664                  * {gfn(page) | page intersects with [hva_start, hva_end)} =
 665                  * {gfn_start, gfn_start+1, ..., gfn_end-1}.
 666                  */
 667                 gfn = hva_to_gfn_memslot(hva_start, memslot);
 668                 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 669
 670                 for (; gfn < gfn_end; ++gfn) {
 671                         gpa_t gpa = gfn << PAGE_SHIFT;
 672                         handler(kvm, gpa, data);
 673                 }
 674         }
 675 }
 676
 677 static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 678 {
 679         unmap_stage2_range(kvm, gpa, PAGE_SIZE);
 680 }
 681
 682 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 683 {
 684         unsigned long end = hva + PAGE_SIZE;
 685
 686         if (!kvm->arch.pgd)
 687                 return 0;
 688
 689         trace_kvm_unmap_hva(hva);
 690         handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
 691         return 0;
 692 }
 693
 694 int kvm_unmap_hva_range(struct kvm *kvm,
 695                         unsigned long start, unsigned long end)
 696 {
 697         if (!kvm->arch.pgd)
 698                 return 0;
 699
 700         trace_kvm_unmap_hva_range(start, end);
 701         handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
 702         return 0;
 703 }
 704
 705 static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
 706 {
 707         pte_t *pte = (pte_t *)data;
 708
 709         stage2_set_pte(kvm, NULL, gpa, pte, false);
 710 }
 711
 712
 713 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 714 {
 715         unsigned long end = hva + PAGE_SIZE;
 716         pte_t stage2_pte;
 717
 718         if (!kvm->arch.pgd)
 719                 return;
 720
 721         trace_kvm_set_spte_hva(hva);
 722         stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
 723         handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 724 }
 725
 726 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 727 {
 728         mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
 729 }
 730
 731 phys_addr_t kvm_mmu_get_httbr(void)
 732 {
 733         return virt_to_phys(hyp_pgd);
 734 }
 735
 736 phys_addr_t kvm_mmu_get_boot_httbr(void)
 737 {
 738         return virt_to_phys(boot_hyp_pgd);
 739 }
 740
 741 phys_addr_t kvm_get_idmap_vector(void)
 742 {
 743         return hyp_idmap_vector;
 744 }
 745
 746 int kvm_mmu_init(void)
 747 {
 748         int err;
 749
 750         hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
 751         hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
 752         hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
 753
 754         if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
 755                 /*
 756                  * Our init code is crossing a page boundary. Allocate
 757                  * a bounce page, copy the code over and use that.
 758                  */
 759                 size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
 760                 phys_addr_t phys_base;
 761
 762                 init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
 763                 if (!init_bounce_page) {
 764                         kvm_err("Couldn't allocate HYP init bounce page\n");
 765                         err = -ENOMEM;
 766                         goto out;
 767                 }
 768
 769                 memcpy(init_bounce_page, __hyp_idmap_text_start, len);
 770                 /*
 771                  * Warning: the code we just copied to the bounce page
 772                  * must be flushed to the point of coherency.
 773                  * Otherwise, the data may be sitting in L2, and HYP
 774                  * mode won't be able to observe it as it runs with
 775                  * caches off at that point.
 776                  */
 777                 kvm_flush_dcache_to_poc(init_bounce_page, len);
 778
 779                 phys_base = virt_to_phys(init_bounce_page);
 780                 hyp_idmap_vector += phys_base - hyp_idmap_start;
 781                 hyp_idmap_start = phys_base;
 782                 hyp_idmap_end = phys_base + len;
 783
 784                 kvm_info("Using HYP init bounce page @%lx\n",
 785                          (unsigned long)phys_base);
 786         }
 787
 788         hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
 789         boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
 790         if (!hyp_pgd || !boot_hyp_pgd) {
 791                 kvm_err("Hyp mode PGD not allocated\n");
 792                 err = -ENOMEM;
 793                 goto out;
 794         }
 795
 796         /* Create the idmap in the boot page tables */
 797         err =   __create_hyp_mappings(boot_hyp_pgd,
 798                                       hyp_idmap_start, hyp_idmap_end,
 799                                       __phys_to_pfn(hyp_idmap_start),
 800                                       PAGE_HYP);
 801
 802         if (err) {
 803                 kvm_err("Failed to idmap %lx-%lx\n",
 804                         hyp_idmap_start, hyp_idmap_end);
 805                 goto out;
 806         }
 807
 808         /* Map the very same page at the trampoline VA */
 809         err =   __create_hyp_mappings(boot_hyp_pgd,
 810                                       TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
 811                                       __phys_to_pfn(hyp_idmap_start),
 812                                       PAGE_HYP);
 813         if (err) {
 814                 kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
 815                         TRAMPOLINE_VA);
 816                 goto out;
 817         }
 818
 819         /* Map the same page again into the runtime page tables */
 820         err =   __create_hyp_mappings(hyp_pgd,
 821                                       TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
 822                                       __phys_to_pfn(hyp_idmap_start),
 823                                       PAGE_HYP);
 824         if (err) {
 825                 kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
 826                         TRAMPOLINE_VA);
 827                 goto out;
 828         }
 829
 830         return 0;
 831 out:
 832         free_hyp_pgds();
 833         return err;
 834 }