struct kvm_cpuid_entry2 entries[0];
};
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC 2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT 4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
struct kvm_cpuid_entry2 {
__u32 function;
PPC | KVM_REG_PPC_TLB3PS | 32
PPC | KVM_REG_PPC_EPTCFG | 32
PPC | KVM_REG_PPC_ICP_STATE | 64
+ PPC | KVM_REG_PPC_TB_OFFSET | 64
+ PPC | KVM_REG_PPC_SPMC1 | 32
+ PPC | KVM_REG_PPC_SPMC2 | 32
+ PPC | KVM_REG_PPC_IAMR | 64
+ PPC | KVM_REG_PPC_TFHAR | 64
+ PPC | KVM_REG_PPC_TFIAR | 64
+ PPC | KVM_REG_PPC_TEXASR | 64
+ PPC | KVM_REG_PPC_FSCR | 64
+ PPC | KVM_REG_PPC_PSPB | 32
+ PPC | KVM_REG_PPC_EBBHR | 64
+ PPC | KVM_REG_PPC_EBBRR | 64
+ PPC | KVM_REG_PPC_BESCR | 64
+ PPC | KVM_REG_PPC_TAR | 64
+ PPC | KVM_REG_PPC_DPDES | 64
+ PPC | KVM_REG_PPC_DAWR | 64
+ PPC | KVM_REG_PPC_DAWRX | 64
+ PPC | KVM_REG_PPC_CIABR | 64
+ PPC | KVM_REG_PPC_IC | 64
+ PPC | KVM_REG_PPC_VTB | 64
+ PPC | KVM_REG_PPC_CSIGR | 64
+ PPC | KVM_REG_PPC_TACR | 64
+ PPC | KVM_REG_PPC_TCSCR | 64
+ PPC | KVM_REG_PPC_PID | 64
+ PPC | KVM_REG_PPC_ACOP | 64
+ PPC | KVM_REG_PPC_VRSAVE | 32
+ PPC | KVM_REG_PPC_LPCR | 64
+ PPC | KVM_REG_PPC_PPR | 64
+ PPC | KVM_REG_PPC_ARCH_COMPAT 32
+ PPC | KVM_REG_PPC_TM_GPR0 | 64
+ ...
+ PPC | KVM_REG_PPC_TM_GPR31 | 64
+ PPC | KVM_REG_PPC_TM_VSR0 | 128
+ ...
+ PPC | KVM_REG_PPC_TM_VSR63 | 128
+ PPC | KVM_REG_PPC_TM_CR | 64
+ PPC | KVM_REG_PPC_TM_LR | 64
+ PPC | KVM_REG_PPC_TM_CTR | 64
+ PPC | KVM_REG_PPC_TM_FPSCR | 64
+ PPC | KVM_REG_PPC_TM_AMR | 64
+ PPC | KVM_REG_PPC_TM_PPR | 64
+ PPC | KVM_REG_PPC_TM_VRSAVE | 64
+ PPC | KVM_REG_PPC_TM_VSCR | 32
+ PPC | KVM_REG_PPC_TM_DSCR | 64
+ PPC | KVM_REG_PPC_TM_TAR | 64
ARM registers are mapped using the lower 32 bits. The upper 16 of that
is the register group type, or coprocessor number:
};
+4.81 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+ __u32 nent;
+ __u32 flags;
+ struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+
+struct kvm_cpuid_entry2 {
+ __u32 function;
+ __u32 index;
+ __u32 flags;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ __u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+ function: the eax value used to obtain the entry
+ index: the ecx value used to obtain the entry (for entries that are
+ affected by ecx)
+ flags: an OR of zero or more of the following:
+ KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+ if the index field is valid
+ KVM_CPUID_FLAG_STATEFUL_FUNC:
+ if cpuid for this function returns different values for successive
+ invocations; there will be several entries with the same function,
+ all with this flag set
+ KVM_CPUID_FLAG_STATE_READ_NEXT:
+ for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+ the first entry to be read by a cpu
+ eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+ this function/index combination
+
+
6. Capabilities that can be enabled
-----------------------------------
#define TS_FPR(i) fpr[i][TS_FPROFFSET]
#define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
- struct thread_struct {
- unsigned long ksp; /* Kernel stack pointer */
- #ifdef CONFIG_PPC64
- unsigned long ksp_vsid;
- #endif
- struct pt_regs *regs; /* Pointer to saved register state */
- mm_segment_t fs; /* for get_fs() validation */
- #ifdef CONFIG_BOOKE
- /* BookE base exception scratch space; align on cacheline */
- unsigned long normsave[8] ____cacheline_aligned;
- #endif
- #ifdef CONFIG_PPC32
- void *pgdir; /* root of page-table tree */
- unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
- #endif
+ struct debug_reg {
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/*
* The following help to manage the use of Debug Control Registers
unsigned long dvc2;
#endif
#endif
- unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
+ };
+
+ struct thread_struct {
+ unsigned long ksp; /* Kernel stack pointer */
+
+ #ifdef CONFIG_PPC64
+ unsigned long ksp_vsid;
+ #endif
+ struct pt_regs *regs; /* Pointer to saved register state */
+ mm_segment_t fs; /* for get_fs() validation */
+ #ifdef CONFIG_BOOKE
+ /* BookE base exception scratch space; align on cacheline */
+ unsigned long normsave[8] ____cacheline_aligned;
+ #endif
+ #ifdef CONFIG_PPC32
+ void *pgdir; /* root of page-table tree */
++ unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
+ #endif
+ /* Debug Registers */
+ struct debug_reg debug;
+
/* FP and VSX 0-31 register set */
double fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
struct {
#else
#define INIT_THREAD { \
.ksp = INIT_SP, \
- .ksp_limit = INIT_SP_LIMIT, \
.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
.fs = KERNEL_DS, \
.fpr = {{0}}, \
DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
#else
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+ DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
+ DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
#endif /* CONFIG_PPC64 */
DEFINE(KSP, offsetof(struct thread_struct, ksp));
- DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
#ifdef CONFIG_BOOKE
DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
#endif /* CONFIG_SPE */
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
+ DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));
#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
- #ifdef CONFIG_KVM_BOOK3S_64_HV
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
/* book3s */
- #ifdef CONFIG_KVM_BOOK3S_64_HV
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
+ DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
+ DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
+ DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
+ DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
- DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
- offsetof(struct kvmppc_vcpu_book3s, vcpu));
+ DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
+ DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
+ DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
#ifdef CONFIG_PPC_BOOK3S_64
- #ifdef CONFIG_KVM_BOOK3S_PR
+ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
#else
# define SVCPU_FIELD(x, f)
HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
HSTATE_FIELD(HSTATE_NAPPING, napping);
- #ifdef CONFIG_KVM_BOOK3S_64_HV
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_DABR, dabr);
HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
DEFINE(IPI_PRIORITY, IPI_PRIORITY);
- #endif /* CONFIG_KVM_BOOK3S_64_HV */
+ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64
HSTATE_FIELD(HSTATE_CFAR, cfar);
+ HSTATE_FIELD(HSTATE_PPR, ppr);
#endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */
*/
static void set_debug_reg_defaults(struct thread_struct *thread)
{
- thread->iac1 = thread->iac2 = 0;
+ thread->debug.iac1 = thread->debug.iac2 = 0;
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- thread->iac3 = thread->iac4 = 0;
+ thread->debug.iac3 = thread->debug.iac4 = 0;
#endif
- thread->dac1 = thread->dac2 = 0;
+ thread->debug.dac1 = thread->debug.dac2 = 0;
#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- thread->dvc1 = thread->dvc2 = 0;
+ thread->debug.dvc1 = thread->debug.dvc2 = 0;
#endif
- thread->dbcr0 = 0;
+ thread->debug.dbcr0 = 0;
#ifdef CONFIG_BOOKE
/*
* Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
*/
- thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \
+ thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
DBCR1_IAC3US | DBCR1_IAC4US;
/*
* Force Data Address Compare User/Supervisor bits to be User-only
* (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
*/
- thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+ thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
#else
- thread->dbcr1 = 0;
+ thread->debug.dbcr1 = 0;
#endif
}
*/
mtmsr(mfmsr() & ~MSR_DE);
- mtspr(SPRN_IAC1, thread->iac1);
- mtspr(SPRN_IAC2, thread->iac2);
+ mtspr(SPRN_IAC1, thread->debug.iac1);
+ mtspr(SPRN_IAC2, thread->debug.iac2);
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- mtspr(SPRN_IAC3, thread->iac3);
- mtspr(SPRN_IAC4, thread->iac4);
+ mtspr(SPRN_IAC3, thread->debug.iac3);
+ mtspr(SPRN_IAC4, thread->debug.iac4);
#endif
- mtspr(SPRN_DAC1, thread->dac1);
- mtspr(SPRN_DAC2, thread->dac2);
+ mtspr(SPRN_DAC1, thread->debug.dac1);
+ mtspr(SPRN_DAC2, thread->debug.dac2);
#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- mtspr(SPRN_DVC1, thread->dvc1);
- mtspr(SPRN_DVC2, thread->dvc2);
+ mtspr(SPRN_DVC1, thread->debug.dvc1);
+ mtspr(SPRN_DVC2, thread->debug.dvc2);
#endif
- mtspr(SPRN_DBCR0, thread->dbcr0);
- mtspr(SPRN_DBCR1, thread->dbcr1);
+ mtspr(SPRN_DBCR0, thread->debug.dbcr0);
+ mtspr(SPRN_DBCR1, thread->debug.dbcr1);
#ifdef CONFIG_BOOKE
- mtspr(SPRN_DBCR2, thread->dbcr2);
+ mtspr(SPRN_DBCR2, thread->debug.dbcr2);
#endif
}
/*
* debug registers, set the debug registers from the values
* stored in the new thread.
*/
- static void switch_booke_debug_regs(struct thread_struct *new_thread)
+ void switch_booke_debug_regs(struct thread_struct *new_thread)
{
- if ((current->thread.dbcr0 & DBCR0_IDM)
- || (new_thread->dbcr0 & DBCR0_IDM))
+ if ((current->thread.debug.dbcr0 & DBCR0_IDM)
+ || (new_thread->debug.dbcr0 & DBCR0_IDM))
prime_debug_regs(new_thread);
}
+ EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
#ifndef CONFIG_HAVE_HW_BREAKPOINT
static void set_debug_reg_defaults(struct thread_struct *thread)
kregs = (struct pt_regs *) sp;
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
+#ifdef CONFIG_PPC32
p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
_ALIGN_UP(sizeof(struct thread_info), 16);
-
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
p->thread.ptrace_bps[0] = NULL;
#endif
#error Need to fix lppaca and SLB shadow accesses in little endian mode
#endif
- /*****************************************************************************
- * *
- * Real Mode handlers that need to be in the linear mapping *
- * *
- ****************************************************************************/
-
- .globl kvmppc_skip_interrupt
- kvmppc_skip_interrupt:
- mfspr r13,SPRN_SRR0
- addi r13,r13,4
- mtspr SPRN_SRR0,r13
- GET_SCRATCH0(r13)
- rfid
- b .
-
- .globl kvmppc_skip_Hinterrupt
- kvmppc_skip_Hinterrupt:
- mfspr r13,SPRN_HSRR0
- addi r13,r13,4
- mtspr SPRN_HSRR0,r13
- GET_SCRATCH0(r13)
- hrfid
- b .
-
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
* LR = return address to continue at after eventually re-enabling MMU
*/
_GLOBAL(kvmppc_hv_entry_trampoline)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -112(r1)
mfmsr r10
- LOAD_REG_ADDR(r5, kvmppc_hv_entry)
+ LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
li r0,MSR_RI
andc r0,r10,r0
li r6,MSR_IR | MSR_DR
mtsrr1 r6
RFI
- /******************************************************************************
- * *
- * Entry code *
- * *
- *****************************************************************************/
+ kvmppc_call_hv_entry:
+ bl kvmppc_hv_entry
+
+ /* Back from guest - restore host state and return to caller */
+
+ /* Restore host DABR and DABRX */
+ ld r5,HSTATE_DABR(r13)
+ li r6,7
+ mtspr SPRN_DABR,r5
+ mtspr SPRN_DABRX,r6
+
+ /* Restore SPRG3 */
+ ld r3,PACA_SPRG3(r13)
+ mtspr SPRN_SPRG3,r3
+
+ /*
+ * Reload DEC. HDEC interrupts were disabled when
+ * we reloaded the host's LPCR value.
+ */
+ ld r3, HSTATE_DECEXP(r13)
+ mftb r4
+ subf r4, r4, r3
+ mtspr SPRN_DEC, r4
+
+ /* Reload the host's PMU registers */
+ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
+ lbz r4, LPPACA_PMCINUSE(r3)
+ cmpwi r4, 0
+ beq 23f /* skip if not */
+ lwz r3, HSTATE_PMC(r13)
+ lwz r4, HSTATE_PMC + 4(r13)
+ lwz r5, HSTATE_PMC + 8(r13)
+ lwz r6, HSTATE_PMC + 12(r13)
+ lwz r8, HSTATE_PMC + 16(r13)
+ lwz r9, HSTATE_PMC + 20(r13)
+ BEGIN_FTR_SECTION
+ lwz r10, HSTATE_PMC + 24(r13)
+ lwz r11, HSTATE_PMC + 28(r13)
+ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r4
+ mtspr SPRN_PMC3, r5
+ mtspr SPRN_PMC4, r6
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+ BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, HSTATE_MMCR(r13)
+ ld r4, HSTATE_MMCR + 8(r13)
+ ld r5, HSTATE_MMCR + 16(r13)
+ mtspr SPRN_MMCR1, r4
+ mtspr SPRN_MMCRA, r5
+ mtspr SPRN_MMCR0, r3
+ isync
+ 23:
+
+ /*
+ * For external and machine check interrupts, we need
+ * to call the Linux handler to process the interrupt.
+ * We do that by jumping to absolute address 0x500 for
+ * external interrupts, or the machine_check_fwnmi label
+ * for machine checks (since firmware might have patched
+ * the vector area at 0x200). The [h]rfid at the end of the
+ * handler will return to the book3s_hv_interrupts.S code.
+ * For other interrupts we do the rfid to get back
+ * to the book3s_hv_interrupts.S code here.
+ */
+ ld r8, 112+PPC_LR_STKOFF(r1)
+ addi r1, r1, 112
+ ld r7, HSTATE_HOST_MSR(r13)
+
+ cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ BEGIN_FTR_SECTION
+ beq 11f
+ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* RFI into the highmem handler, or branch to interrupt handler */
+ mfmsr r6
+ li r0, MSR_RI
+ andc r6, r6, r0
+ mtmsrd r6, 1 /* Clear RI in MSR */
+ mtsrr0 r8
+ mtsrr1 r7
+ beqa 0x500 /* external interrupt (PPC970) */
+ beq cr1, 13f /* machine check */
+ RFI
+
+ /* On POWER7, we have external interrupts set to use HSRR0/1 */
+ 11: mtspr SPRN_HSRR0, r8
+ mtspr SPRN_HSRR1, r7
+ ba 0x500
+
+ 13: b machine_check_fwnmi
+
/*
* We come in here when wakened from nap mode on a secondary hw thread.
cmpdi r4,0
/* if we have no vcpu to run, go back to sleep */
beq kvm_no_guest
- b kvmppc_hv_entry
+ b 30f
27: /* XXX should handle hypervisor maintenance interrupts etc. here */
b kvm_no_guest
stw r8,HSTATE_SAVED_XIRR(r13)
b kvm_no_guest
+ 30: bl kvmppc_hv_entry
+
+ /* Back from the guest, go back to nap */
+ /* Clear our vcpu pointer so we don't come back in early */
+ li r0, 0
+ std r0, HSTATE_KVM_VCPU(r13)
+ lwsync
+ /* Clear any pending IPI - we're an offline thread */
+ ld r5, HSTATE_XICS_PHYS(r13)
+ li r7, XICS_XIRR
+ lwzcix r3, r5, r7 /* ack any pending interrupt */
+ rlwinm. r0, r3, 0, 0xffffff /* any pending? */
+ beq 37f
+ sync
+ li r0, 0xff
+ li r6, XICS_MFRR
+ stbcix r0, r5, r6 /* clear the IPI */
+ stwcix r3, r5, r7 /* EOI it */
+ 37: sync
+
+ /* increment the nap count and then go to nap mode */
+ ld r4, HSTATE_KVM_VCORE(r13)
+ addi r4, r4, VCORE_NAP_COUNT
+ lwsync /* make previous updates visible */
+ 51: lwarx r3, 0, r4
+ addi r3, r3, 1
+ stwcx. r3, 0, r4
+ bne 51b
+
+ kvm_no_guest:
+ li r0, KVM_HWTHREAD_IN_NAP
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+ li r3, LPCR_PECE0
+ mfspr r4, SPRN_LPCR
+ rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
+ mtspr SPRN_LPCR, r4
+ isync
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+ 1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+ /******************************************************************************
+ * *
+ * Entry code *
+ * *
+ *****************************************************************************/
+
.global kvmppc_hv_entry
kvmppc_hv_entry:
* all other volatile GPRS = free
*/
mflr r0
- std r0, HSTATE_VMHANDLER(r13)
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -112(r1)
/* Set partition DABR */
/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
ld r3, VCPU_MMCR(r4)
ld r5, VCPU_MMCR + 8(r4)
ld r6, VCPU_MMCR + 16(r4)
+ ld r7, VCPU_SIAR(r4)
+ ld r8, VCPU_SDAR(r4)
mtspr SPRN_MMCR1, r5
mtspr SPRN_MMCRA, r6
+ mtspr SPRN_SIAR, r7
+ mtspr SPRN_SDAR, r8
mtspr SPRN_MMCR0, r3
isync
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
- /* Increment yield count if they have a VPA */
- ld r3, VCPU_VPA(r4)
- cmpdi r3, 0
- beq 25f
- lwz r5, LPPACA_YIELDCOUNT(r3)
- addi r5, r5, 1
- stw r5, LPPACA_YIELDCOUNT(r3)
- li r6, 1
- stb r6, VCPU_VPA_DIRTY(r4)
- 25:
/* Load up DAR and DSISR */
ld r5, VCPU_DAR(r4)
lwz r6, VCPU_DSISR(r4)
mtspr SPRN_DAR, r5
mtspr SPRN_DSISR, r6
+ li r6, KVM_GUEST_MODE_HOST_HV
+ stb r6, HSTATE_IN_GUEST(r13)
+
BEGIN_FTR_SECTION
/* Restore AMR and UAMOR, set AMOR to all 1s */
ld r5,VCPU_AMR(r4)
bdnz 28b
ptesync
- 22: li r0,1
+ /* Add timebase offset onto timebase */
+ 22: ld r8,VCORE_TB_OFFSET(r5)
+ cmpdi r8,0
+ beq 37f
+ mftb r6 /* current host timebase */
+ add r8,r8,r6
+ mtspr SPRN_TBU40,r8 /* update upper 40 bits */
+ mftb r7 /* check if lower 24 bits overflowed */
+ clrldi r6,r6,40
+ clrldi r7,r7,40
+ cmpld r7,r6
+ bge 37f
+ addis r8,r8,0x100 /* if so, increment upper 40 bits */
+ mtspr SPRN_TBU40,r8
+
+ /* Load guest PCR value to select appropriate compat mode */
+ 37: ld r7, VCORE_PCR(r5)
+ cmpdi r7, 0
+ beq 38f
+ mtspr SPRN_PCR, r7
+ 38:
+ li r0,1
stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
b 10f
beq 20b
/* Set LPCR and RMOR. */
- 10: ld r8,KVM_LPCR(r9)
+ 10: ld r8,VCORE_LPCR(r5)
mtspr SPRN_LPCR,r8
ld r8,KVM_RMOR(r9)
mtspr SPRN_RMOR,r8
isync
+ /* Increment yield count if they have a VPA */
+ ld r3, VCPU_VPA(r4)
+ cmpdi r3, 0
+ beq 25f
+ lwz r5, LPPACA_YIELDCOUNT(r3)
+ addi r5, r5, 1
+ stw r5, LPPACA_YIELDCOUNT(r3)
+ li r6, 1
+ stb r6, VCPU_VPA_DIRTY(r4)
+ 25:
/* Check if HDEC expires soon */
mfspr r3,SPRN_HDEC
cmpwi r3,10
bne 24b
isync
- ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */
li r0,0x18f
rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
or r0,r7,r0
mtspr SPRN_HSRR1,r11
/* Activate guest mode, so faults get handled by KVM */
- li r9, KVM_GUEST_MODE_GUEST
+ li r9, KVM_GUEST_MODE_GUEST_HV
stb r9, HSTATE_IN_GUEST(r13)
/* Enter guest */
ld r5, VCPU_CFAR(r4)
mtspr SPRN_CFAR, r5
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ BEGIN_FTR_SECTION
+ ld r0, VCPU_PPR(r4)
+ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r5, VCPU_LR(r4)
lwz r6, VCPU_CR(r4)
mtlr r5
mtcr r6
- ld r0, VCPU_GPR(R0)(r4)
ld r1, VCPU_GPR(R1)(r4)
ld r2, VCPU_GPR(R2)(r4)
ld r3, VCPU_GPR(R3)(r4)
ld r12, VCPU_GPR(R12)(r4)
ld r13, VCPU_GPR(R13)(r4)
+ BEGIN_FTR_SECTION
+ mtspr SPRN_PPR, r0
+ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r0, VCPU_GPR(R0)(r4)
ld r4, VCPU_GPR(R4)(r4)
hrfid
/*
* We come here from the first-level interrupt handlers.
*/
- .globl kvmppc_interrupt
- kvmppc_interrupt:
+ .globl kvmppc_interrupt_hv
+ kvmppc_interrupt_hv:
/*
* Register contents:
* R12 = interrupt vector
*/
/* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
std r9, HSTATE_HOST_R2(r13)
+
+ lbz r9, HSTATE_IN_GUEST(r13)
+ cmpwi r9, KVM_GUEST_MODE_HOST_HV
+ beq kvmppc_bad_host_intr
+ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ cmpwi r9, KVM_GUEST_MODE_GUEST
+ ld r9, HSTATE_HOST_R2(r13)
+ beq kvmppc_interrupt_pr
+ #endif
+ /* We're now back in the host but in guest MMU context */
+ li r9, KVM_GUEST_MODE_HOST_HV
+ stb r9, HSTATE_IN_GUEST(r13)
+
ld r9, HSTATE_KVM_VCPU(r13)
/* Save registers */
ld r3, HSTATE_CFAR(r13)
std r3, VCPU_CFAR(r9)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ BEGIN_FTR_SECTION
+ ld r4, HSTATE_PPR(r13)
+ std r4, VCPU_PPR(r9)
+ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Restore R1/R2 so we can handle faults */
ld r1, HSTATE_HOST_R1(r13)
std r3, VCPU_GPR(R13)(r9)
std r4, VCPU_LR(r9)
- /* Unset guest mode */
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
stw r12,VCPU_TRAP(r9)
/* Save HEIR (HV emulation assist reg) in last_inst
* set, we know the host wants us out so let's do it now
*/
do_ext_interrupt:
- lbz r0, HSTATE_HOST_IPI(r13)
- cmpwi r0, 0
- bne ext_interrupt_to_host
-
- /* Now read the interrupt from the ICP */
- ld r5, HSTATE_XICS_PHYS(r13)
- li r7, XICS_XIRR
- cmpdi r5, 0
- beq- ext_interrupt_to_host
- lwzcix r3, r5, r7
- rlwinm. r0, r3, 0, 0xffffff
- sync
- beq 3f /* if nothing pending in the ICP */
-
- /* We found something in the ICP...
- *
- * If it's not an IPI, stash it in the PACA and return to
- * the host, we don't (yet) handle directing real external
- * interrupts directly to the guest
- */
- cmpwi r0, XICS_IPI
- bne ext_stash_for_host
-
- /* It's an IPI, clear the MFRR and EOI it */
- li r0, 0xff
- li r6, XICS_MFRR
- stbcix r0, r5, r6 /* clear the IPI */
- stwcix r3, r5, r7 /* EOI it */
- sync
-
- /* We need to re-check host IPI now in case it got set in the
- * meantime. If it's clear, we bounce the interrupt to the
- * guest
- */
- lbz r0, HSTATE_HOST_IPI(r13)
- cmpwi r0, 0
- bne- 1f
+ bl kvmppc_read_intr
+ cmpdi r3, 0
+ bgt ext_interrupt_to_host
/* Allright, looks like an IPI for the guest, we need to set MER */
- 3:
/* Check if any CPU is heading out to the host, if so head out too */
ld r5, HSTATE_KVM_VCORE(r13)
lwz r0, VCORE_ENTRY_EXIT(r5)
mtspr SPRN_LPCR, r8
b fast_guest_return
- /* We raced with the host, we need to resend that IPI, bummer */
- 1: li r0, IPI_PRIORITY
- stbcix r0, r5, r6 /* set the IPI */
- sync
- b ext_interrupt_to_host
-
- ext_stash_for_host:
- /* It's not an IPI and it's for the host, stash it in the PACA
- * before exit, it will be picked up by the host ICP driver
- */
- stw r3, HSTATE_SAVED_XIRR(r13)
ext_interrupt_to_host:
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
- /* Save DEC */
- mfspr r5,SPRN_DEC
- mftb r6
- extsw r5,r5
- add r5,r5,r6
- std r5,VCPU_DEC_EXPIRES(r9)
-
/* Save more register state */
mfdar r6
mfdsisr r7
mtspr SPRN_SDR1,r6 /* switch to partition page table */
mtspr SPRN_LPID,r7
isync
- li r0,0
+
+ /* Subtract timebase offset from timebase */
+ ld r8,VCORE_TB_OFFSET(r5)
+ cmpdi r8,0
+ beq 17f
+ mftb r6 /* current host timebase */
+ subf r8,r8,r6
+ mtspr SPRN_TBU40,r8 /* update upper 40 bits */
+ mftb r7 /* check if lower 24 bits overflowed */
+ clrldi r6,r6,40
+ clrldi r7,r7,40
+ cmpld r7,r6
+ bge 17f
+ addis r8,r8,0x100 /* if so, increment upper 40 bits */
+ mtspr SPRN_TBU40,r8
+
+ /* Reset PCR */
+ 17: ld r0, VCORE_PCR(r5)
+ cmpdi r0, 0
+ beq 18f
+ li r0, 0
+ mtspr SPRN_PCR, r0
+ 18:
+ /* Signal secondary CPUs to continue */
stb r0,VCORE_IN_GUEST(r5)
lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8
1: addi r8,r8,16
.endr
+ /* Save DEC */
+ mfspr r5,SPRN_DEC
+ mftb r6
+ extsw r5,r5
+ add r5,r5,r6
+ std r5,VCPU_DEC_EXPIRES(r9)
+
/* Save and reset AMR and UAMOR before turning on the MMU */
BEGIN_FTR_SECTION
mfspr r5,SPRN_AMR
mtspr SPRN_AMR,r6
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ /* Unset guest mode */
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+
/* Switch DSCR back to host value */
BEGIN_FTR_SECTION
mfspr r8, SPRN_DSCR
ld r7, HSTATE_DSCR(r13)
- std r8, VCPU_DSCR(r7)
+ std r8, VCPU_DSCR(r9)
mtspr SPRN_DSCR, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
b 22f
21: mfspr r5, SPRN_MMCR1
+ mfspr r7, SPRN_SIAR
+ mfspr r8, SPRN_SDAR
std r4, VCPU_MMCR(r9)
std r5, VCPU_MMCR + 8(r9)
std r6, VCPU_MMCR + 16(r9)
+ std r7, VCPU_SIAR(r9)
+ std r8, VCPU_SDAR(r9)
mfspr r3, SPRN_PMC1
mfspr r4, SPRN_PMC2
mfspr r5, SPRN_PMC3
stw r11, VCPU_PMC + 28(r9)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
22:
+ ld r0, 112+PPC_LR_STKOFF(r1)
+ addi r1, r1, 112
+ mtlr r0
+ blr
+ secondary_too_late:
+ ld r5,HSTATE_KVM_VCORE(r13)
+ HMT_LOW
+ 13: lbz r3,VCORE_IN_GUEST(r5)
+ cmpwi r3,0
+ bne 13b
+ HMT_MEDIUM
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+ ld r11,PACA_SLBSHADOWPTR(r13)
- /* Secondary threads go off to take a nap on POWER7 */
- BEGIN_FTR_SECTION
- lwz r0,VCPU_PTID(r9)
- cmpwi r0,0
- bne secondary_nap
- END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
- /* Restore host DABR and DABRX */
- ld r5,HSTATE_DABR(r13)
- li r6,7
- mtspr SPRN_DABR,r5
- mtspr SPRN_DABRX,r6
-
- /* Restore SPRG3 */
- ld r3,PACA_SPRG3(r13)
- mtspr SPRN_SPRG3,r3
-
- /*
- * Reload DEC. HDEC interrupts were disabled when
- * we reloaded the host's LPCR value.
- */
- ld r3, HSTATE_DECEXP(r13)
- mftb r4
- subf r4, r4, r3
- mtspr SPRN_DEC, r4
-
- /* Reload the host's PMU registers */
- ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
- lbz r4, LPPACA_PMCINUSE(r3)
- cmpwi r4, 0
- beq 23f /* skip if not */
- lwz r3, HSTATE_PMC(r13)
- lwz r4, HSTATE_PMC + 4(r13)
- lwz r5, HSTATE_PMC + 8(r13)
- lwz r6, HSTATE_PMC + 12(r13)
- lwz r8, HSTATE_PMC + 16(r13)
- lwz r9, HSTATE_PMC + 20(r13)
- BEGIN_FTR_SECTION
- lwz r10, HSTATE_PMC + 24(r13)
- lwz r11, HSTATE_PMC + 28(r13)
- END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- mtspr SPRN_PMC1, r3
- mtspr SPRN_PMC2, r4
- mtspr SPRN_PMC3, r5
- mtspr SPRN_PMC4, r6
- mtspr SPRN_PMC5, r8
- mtspr SPRN_PMC6, r9
- BEGIN_FTR_SECTION
- mtspr SPRN_PMC7, r10
- mtspr SPRN_PMC8, r11
- END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- ld r3, HSTATE_MMCR(r13)
- ld r4, HSTATE_MMCR + 8(r13)
- ld r5, HSTATE_MMCR + 16(r13)
- mtspr SPRN_MMCR1, r4
- mtspr SPRN_MMCRA, r5
- mtspr SPRN_MMCR0, r3
- isync
- 23:
- /*
- * For external and machine check interrupts, we need
- * to call the Linux handler to process the interrupt.
- * We do that by jumping to absolute address 0x500 for
- * external interrupts, or the machine_check_fwnmi label
- * for machine checks (since firmware might have patched
- * the vector area at 0x200). The [h]rfid at the end of the
- * handler will return to the book3s_hv_interrupts.S code.
- * For other interrupts we do the rfid to get back
- * to the book3s_hv_interrupts.S code here.
- */
- ld r8, HSTATE_VMHANDLER(r13)
- ld r7, HSTATE_HOST_MSR(r13)
-
- cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- BEGIN_FTR_SECTION
- beq 11f
- END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
- /* RFI into the highmem handler, or branch to interrupt handler */
- mfmsr r6
- li r0, MSR_RI
- andc r6, r6, r0
- mtmsrd r6, 1 /* Clear RI in MSR */
- mtsrr0 r8
- mtsrr1 r7
- beqa 0x500 /* external interrupt (PPC970) */
- beq cr1, 13f /* machine check */
- RFI
-
- /* On POWER7, we have external interrupts set to use HSRR0/1 */
- 11: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0x500
-
- 13: b machine_check_fwnmi
+ .rept SLB_NUM_BOLTED
+ ld r5,SLBSHADOW_SAVEAREA(r11)
+ ld r6,SLBSHADOW_SAVEAREA+8(r11)
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+ 1: addi r11,r11,16
+ .endr
+ b 22b
/*
* Check whether an HDSI is an HPTE not found fault or something else.
stw r8, VCPU_LAST_INST(r9)
/* Unset guest mode. */
- li r0, KVM_GUEST_MODE_NONE
+ li r0, KVM_GUEST_MODE_HOST_HV
stb r0, HSTATE_IN_GUEST(r13)
b guest_exit_cont
rotldi r11, r11, 63
b fast_interrupt_c_return
- secondary_too_late:
- ld r5,HSTATE_KVM_VCORE(r13)
- HMT_LOW
- 13: lbz r3,VCORE_IN_GUEST(r5)
- cmpwi r3,0
- bne 13b
- HMT_MEDIUM
- ld r11,PACA_SLBSHADOWPTR(r13)
-
- .rept SLB_NUM_BOLTED
- ld r5,SLBSHADOW_SAVEAREA(r11)
- ld r6,SLBSHADOW_SAVEAREA+8(r11)
- andis. r7,r5,SLB_ESID_V@h
- beq 1f
- slbmte r6,r5
- 1: addi r11,r11,16
- .endr
+ /*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ * 0 if no interrupt is pending
+ * 1 if an interrupt is pending that needs to be handled by the host
+ * -1 if there was a guest wakeup IPI (which has now been cleared)
+ */
+ kvmppc_read_intr:
+ /* see if a host IPI is pending */
+ li r3, 1
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ bne 1f
- secondary_nap:
- /* Clear our vcpu pointer so we don't come back in early */
- li r0, 0
- std r0, HSTATE_KVM_VCPU(r13)
- lwsync
- /* Clear any pending IPI - assume we're a secondary thread */
- ld r5, HSTATE_XICS_PHYS(r13)
+ /* Now read the interrupt from the ICP */
+ ld r6, HSTATE_XICS_PHYS(r13)
li r7, XICS_XIRR
- lwzcix r3, r5, r7 /* ack any pending interrupt */
- rlwinm. r0, r3, 0, 0xffffff /* any pending? */
- beq 37f
+ cmpdi r6, 0
+ beq- 1f
+ lwzcix r0, r6, r7
+ rlwinm. r3, r0, 0, 0xffffff
sync
- li r0, 0xff
- li r6, XICS_MFRR
- stbcix r0, r5, r6 /* clear the IPI */
- stwcix r3, r5, r7 /* EOI it */
- 37: sync
+ beq 1f /* if nothing pending in the ICP */
- /* increment the nap count and then go to nap mode */
- ld r4, HSTATE_KVM_VCORE(r13)
- addi r4, r4, VCORE_NAP_COUNT
- lwsync /* make previous updates visible */
- 51: lwarx r3, 0, r4
- addi r3, r3, 1
- stwcx. r3, 0, r4
- bne 51b
+ /* We found something in the ICP...
+ *
+ * If it's not an IPI, stash it in the PACA and return to
+ * the host, we don't (yet) handle directing real external
+ * interrupts directly to the guest
+ */
+ cmpwi r3, XICS_IPI /* if there is, is it an IPI? */
+ li r3, 1
+ bne 42f
- kvm_no_guest:
- li r0, KVM_HWTHREAD_IN_NAP
- stb r0, HSTATE_HWTHREAD_STATE(r13)
+ /* It's an IPI, clear the MFRR and EOI it */
+ li r3, 0xff
+ li r8, XICS_MFRR
+ stbcix r3, r6, r8 /* clear the IPI */
+ stwcix r0, r6, r7 /* EOI it */
+ sync
- li r3, LPCR_PECE0
- mfspr r4, SPRN_LPCR
- rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
- mtspr SPRN_LPCR, r4
- isync
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
- 1: cmpd r0, r0
- bne 1b
- nap
- b .
+ /* We need to re-check host IPI now in case it got set in the
+ * meantime. If it's clear, we bounce the interrupt to the
+ * guest
+ */
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ bne- 43f
+
+ /* OK, it's an IPI for us */
+ li r3, -1
+ 1: blr
+
+ 42: /* It's not an IPI and it's for the host, stash it in the PACA
+ * before exit, it will be picked up by the host ICP driver
+ */
+ stw r0, HSTATE_SAVED_XIRR(r13)
+ b 1b
+
+ 43: /* We raced with the host, we need to resend that IPI, bummer */
+ li r0, IPI_PRIORITY
+ stbcix r0, r6, r8 /* set the IPI */
+ sync
+ b 1b
/*
* Save away FP, VMX and VSX registers.
lwz r7,VCPU_VRSAVE(r4)
mtspr SPRN_VRSAVE,r7
blr
+
+ /*
+ * We come here if we get any exception or interrupt while we are
+ * executing host real mode code while in guest MMU context.
+ * For now just spin, but we should do something better.
+ */
+ kvmppc_bad_host_intr:
+ b .
#include <asm/kvm_ppc.h>
#include "e500.h"
- #include "trace.h"
#include "timing.h"
#include "e500_mmu_host.h"
+ #include "trace_booke.h"
+
#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
ref->pfn = pfn;
ref->flags |= E500_TLB_VALID;
+ /* Mark the page accessed */
+ kvm_set_pfn_accessed(pfn);
+
if (tlbe_is_writable(gtlbe))
kvm_set_pfn_dirty(pfn);
}
unsigned long hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
+ int ret = 0;
+ unsigned long mmu_seq;
+ struct kvm *kvm = vcpu_e500->vcpu.kvm;
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
/*
* Translate guest physical to true physical, acquiring
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
}
+ spin_lock(&kvm->mmu_lock);
+ if (mmu_notifier_retry(kvm, mmu_seq)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
kvmppc_e500_ref_setup(ref, gtlbe, pfn);
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
/* Clear i-cache for new pages */
kvmppc_mmu_flush_icache(pfn);
+out:
+ spin_unlock(&kvm->mmu_lock);
+
/* Drop refcount on page, so that mmu notifiers can clear it */
kvm_release_pfn_clean(pfn);
- return 0;
+ return ret;
}
/* XXX only map the one-one case, for now use TLB0 */
int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
u64 xcr0;
+ u64 valid_bits;
/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
if (index != XCR_XFEATURE_ENABLED_MASK)
return 1;
if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
return 1;
- if (xcr0 & ~vcpu->arch.guest_supported_xcr0)
+
+ /*
+ * Do not allow the guest to set bits that we do not support
+ * saving. However, xcr0 bit 0 is always set, even if the
+ * emulated CPU does not support XSAVE (see fx_init).
+ */
+ valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
+ if (xcr0 & ~valid_bits)
return 1;
+
kvm_put_guest_xcr0(vcpu);
vcpu->arch.xcr0 = xcr0;
return 0;
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
+ case KVM_CAP_EXT_EMUL_CPUID:
case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
r = 0;
break;
}
- case KVM_GET_SUPPORTED_CPUID: {
+ case KVM_GET_SUPPORTED_CPUID:
+ case KVM_GET_EMULATED_CPUID: {
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
- r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
- cpuid_arg->entries);
+
+ r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
+ ioctl);
if (r)
goto out;
static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
- return vcpu->kvm->arch.iommu_domain &&
- !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
+ return kvm_arch_has_noncoherent_dma(vcpu->kvm);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
for (i = 0; i < guest_xcrs->nr_xcrs; i++)
/* Only support XCR0 currently */
- if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
+ if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
- guest_xcrs->xcrs[0].value);
+ guest_xcrs->xcrs[i].value);
break;
}
if (r)
static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
- memset(&ctxt->twobyte, 0,
- (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
+ memset(&ctxt->opcode_len, 0,
+ (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
ctxt->fetch.start = 0;
ctxt->fetch.end = 0;
ctxt->have_exception = false;
ctxt->perm_ok = false;
- ctxt->only_vendor_specific_insn
- = emulation_type & EMULTYPE_TRAP_UD;
+ ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
r = x86_decode_insn(ctxt, insn, insn_len);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+ atomic_set(&kvm->arch.noncoherent_dma_count, 0);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
}
- void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
int i;
}
}
- int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
int i;
kvm_x86_ops->interrupt_allowed(vcpu);
}
+void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
+{
+ atomic_inc(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
+
+void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
+{
+ atomic_dec(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
+
+bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
+{
+ return atomic_read(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
struct kvm_userspace_memory_region *mem);
int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
- void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
- int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages);
void kvm_arch_memslots_updated(struct kvm *kvm);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
}
#endif
+#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
+void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
+void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
+bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
+#else
+static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
+{
+}
+
+static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
+{
+}
+
+static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
+{
+ return false;
+}
+#endif
+
static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
{
#ifdef __KVM_HAVE_ARCH_WQP
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
-/* For vcpu->arch.iommu_flags */
-#define KVM_IOMMU_CACHE_COHERENCY 0x1
-
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
- * is very similar to exiting to userspase from rcu point of view. In
+ * is very similar to exiting to userspace from rcu point of view. In
* addition CPU may stay in a guest mode for quite a long time (up to
* one time slice). Lets treat guest mode as quiescent state, just like
* we do with user-mode execution.
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
+extern struct kvm_device_ops kvm_vfio_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
/* machine type bits, to be used as argument to KVM_CREATE_VM */
#define KVM_VM_S390_UCONTROL 1
+ /* on ppc, 0 indicate default, 1 should force HV and 2 PR */
+ #define KVM_VM_PPC_HV 1
+ #define KVM_VM_PPC_PR 2
+
#define KVM_S390_SIE_PAGE_OFFSET 1
/*
#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
+#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
/*
* Extension capability list.
#define KVM_CAP_IRQ_XICS 92
#define KVM_CAP_ARM_EL1_32BIT 93
#define KVM_CAP_SPAPR_MULTITCE 94
+#define KVM_CAP_EXT_EMUL_CPUID 95
#ifdef KVM_CAP_IRQ_ROUTING
#define KVM_DEV_TYPE_FSL_MPIC_20 1
#define KVM_DEV_TYPE_FSL_MPIC_42 2
#define KVM_DEV_TYPE_XICS 3
+#define KVM_DEV_TYPE_VFIO 4
+#define KVM_DEV_VFIO_GROUP 1
+#define KVM_DEV_VFIO_GROUP_ADD 1
+#define KVM_DEV_VFIO_GROUP_DEL 2
/*
* ioctls for VM fds
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
+ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
void kvm_reload_remote_mmus(struct kvm *kvm)
{
/*
* Free any memory in @free but not in @dont.
*/
- static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+ static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
kvm_destroy_dirty_bitmap(free);
- kvm_arch_free_memslot(free, dont);
+ kvm_arch_free_memslot(kvm, free, dont);
free->npages = 0;
}
struct kvm_memory_slot *memslot;
kvm_for_each_memslot(memslot, slots)
- kvm_free_physmem_slot(memslot, NULL);
+ kvm_free_physmem_slot(kvm, memslot, NULL);
kfree(kvm->memslots);
}
if (change == KVM_MR_CREATE) {
new.userspace_addr = mem->userspace_addr;
- if (kvm_arch_create_memslot(&new, npages))
+ if (kvm_arch_create_memslot(kvm, &new, npages))
goto out_free;
}
goto out_free;
}
- /*
- * IOMMU mapping: New slots need to be mapped. Old slots need to be
- * un-mapped and re-mapped if their base changes. Since base change
- * unmapping is handled above with slot deletion, mapping alone is
- * needed here. Anything else the iommu might care about for existing
- * slots (size changes, userspace addr changes and read-only flag
- * changes) is disallowed above, so any other attribute changes getting
- * here can be skipped.
- */
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
- r = kvm_iommu_map_pages(kvm, &new);
- if (r)
- goto out_slots;
- }
-
/* actual memory is freed via old in kvm_free_physmem_slot below */
if (change == KVM_MR_DELETE) {
new.dirty_bitmap = NULL;
kvm_arch_commit_memory_region(kvm, mem, &old, change);
- kvm_free_physmem_slot(&old, &new);
+ kvm_free_physmem_slot(kvm, &old, &new);
kfree(old_memslots);
+ /*
+ * IOMMU mapping: New slots need to be mapped. Old slots need to be
+ * un-mapped and re-mapped if their base changes. Since base change
+ * unmapping is handled above with slot deletion, mapping alone is
+ * needed here. Anything else the iommu might care about for existing
+ * slots (size changes, userspace addr changes and read-only flag
+ * changes) is disallowed above, so any other attribute changes getting
+ * here can be skipped.
+ */
+ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+ r = kvm_iommu_map_pages(kvm, &new);
+ return r;
+ }
+
return 0;
out_slots:
kfree(slots);
out_free:
- kvm_free_physmem_slot(&new, &old);
+ kvm_free_physmem_slot(kvm, &new, &old);
out:
return r;
}
out:
return r;
}
+ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
bool kvm_largepages_enabled(void)
{
unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- if (writable)
+ unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+
+ if (!kvm_is_error_hva(hva) && writable)
*writable = !memslot_is_readonly(slot);
- return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+ return hva;
}
static int kvm_read_hva(void *data, void __user *hva, int len)
memslot = gfn_to_memslot(kvm, gfn);
mark_page_dirty_in_slot(kvm, memslot, gfn);
}
+ EXPORT_SYMBOL_GPL(mark_page_dirty);
/*
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
finish_wait(&vcpu->wq, &wait);
}
+ EXPORT_SYMBOL_GPL(kvm_vcpu_block);
#ifndef CONFIG_S390
/*
case KVM_DEV_TYPE_XICS:
ops = &kvm_xics_ops;
break;
+#endif
+#ifdef CONFIG_KVM_VFIO
+ case KVM_DEV_TYPE_VFIO:
+ ops = &kvm_vfio_ops;
+ break;
#endif
default:
return -ENODEV;