Commit 95a6432c authored by Paul Mackerras's avatar Paul Mackerras Committed by Michael Ellerman

KVM: PPC: Book3S HV: Streamlined guest entry/exit path on P9 for radix guests

This creates an alternative guest entry/exit path which is used for
radix guests on POWER9 systems when we have indep_threads_mode=Y.  In
these circumstances there is exactly one vcpu per vcore and there is
no coordination required between vcpus or vcores; the vcpu can enter
the guest without needing to synchronize with anything else.

The new fast path is implemented almost entirely in C in book3s_hv.c
and runs with the MMU on until the guest is entered.  On guest exit
we use the existing path until the point where we are committed to
exiting the guest (as distinct from handling an interrupt in the
low-level code and returning to the guest) and we have pulled the
guest context from the XIVE.  At that point we check a flag in the
stack frame to see whether we came in via the old path and the new
path; if we came in via the new path then we go back to C code to do
the rest of the process of saving the guest context and restoring the
host context.

The C code is split into separate functions for handling the
OS-accessible state and the hypervisor state, with the idea that the
latter can be replaced by a hypercall when we implement nested
virtualization.
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
Reviewed-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
[mpe: Fix CONFIG_ALTIVEC=n build]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 53655ddd
...@@ -165,4 +165,6 @@ void kvmhv_load_host_pmu(void); ...@@ -165,4 +165,6 @@ void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
...@@ -583,6 +583,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); ...@@ -583,6 +583,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status); int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
#else #else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; } u32 priority) { return -1; }
...@@ -605,6 +606,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur ...@@ -605,6 +606,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; } int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
#endif /* CONFIG_KVM_XIVE */ #endif /* CONFIG_KVM_XIVE */
/* /*
......
...@@ -3079,6 +3079,273 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -3079,6 +3079,273 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1); trace_kvmppc_run_core(vc, 1);
} }
/*
* Load up hypervisor-mode registers on P9.
*/
static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
s64 hdec;
u64 tb, purr, spurr;
int trap;
unsigned long host_hfscr = mfspr(SPRN_HFSCR);
unsigned long host_ciabr = mfspr(SPRN_CIABR);
unsigned long host_dawr = mfspr(SPRN_DAWR);
unsigned long host_dawrx = mfspr(SPRN_DAWRX);
unsigned long host_psscr = mfspr(SPRN_PSSCR);
unsigned long host_pidr = mfspr(SPRN_PID);
hdec = time_limit - mftb();
if (hdec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
mtspr(SPRN_HDEC, hdec);
if (vc->tb_offset) {
u64 new_tb = mftb() + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
mtspr(SPRN_TBU40, new_tb + 0x1000000);
vc->tb_offset_applied = vc->tb_offset;
}
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr);
mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
if (cpu_has_feature(CPU_FTR_DAWR)) {
mtspr(SPRN_DAWR, vcpu->arch.dawr);
mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
mtspr(SPRN_PID, vcpu->arch.pid);
mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
mtspr(SPRN_AMOR, ~0UL);
mtspr(SPRN_LPCR, vc->lpcr);
isync();
kvmppc_xive_push_vcpu(vcpu);
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
trap = __kvmhv_vcpu_entry_p9(vcpu);
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
purr - vcpu->arch.purr);
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
spurr - vcpu->arch.spurr);
vcpu->arch.purr = purr;
vcpu->arch.spurr = spurr;
vcpu->arch.ic = mfspr(SPRN_IC);
vcpu->arch.pid = mfspr(SPRN_PID);
vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
mtspr(SPRN_PSSCR, host_psscr);
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR, host_dawr);
mtspr(SPRN_DAWRX, host_dawrx);
mtspr(SPRN_PID, host_pidr);
/*
* Since this is radix, do a eieio; tlbsync; ptesync sequence in
* case we interrupted the guest between a tlbie and a ptesync.
*/
asm volatile("eieio; tlbsync; ptesync");
mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
isync();
vc->dpdes = mfspr(SPRN_DPDES);
vc->vtb = mfspr(SPRN_VTB);
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
mtspr(SPRN_PCR, 0);
if (vc->tb_offset_applied) {
u64 new_tb = mftb() - vc->tb_offset_applied;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
mtspr(SPRN_TBU40, new_tb + 0x1000000);
vc->tb_offset_applied = 0;
}
mtspr(SPRN_HDEC, 0x7fffffff);
mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
return trap;
}
/*
* Virtual-mode guest entry for POWER9 and later when the host and
* guest are both using the radix MMU. The LPIDR has already been set.
*/
int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long host_dscr = mfspr(SPRN_DSCR);
unsigned long host_tidr = mfspr(SPRN_TIDR);
unsigned long host_iamr = mfspr(SPRN_IAMR);
s64 dec;
u64 tb;
int trap, save_pmu;
dec = mfspr(SPRN_DEC);
tb = mftb();
if (dec < 512)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
local_paca->kvm_hstate.dec_expires = dec + tb;
if (local_paca->kvm_hstate.dec_expires < time_limit)
time_limit = local_paca->kvm_hstate.dec_expires;
vcpu->arch.ceded = 0;
kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
kvmppc_subcore_enter_guest();
vc->entry_exit_map = 1;
vc->in_guest = 1;
if (vcpu->arch.vpa.pinned_addr) {
struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
lp->yield_count = cpu_to_be32(yield_count);
vcpu->arch.vpa.dirty = 1;
}
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
kvmhv_load_guest_pmu(vcpu);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
load_fp_state(&vcpu->arch.fp);
#ifdef CONFIG_ALTIVEC
load_vr_state(&vcpu->arch.vr);
#endif
mtspr(SPRN_DSCR, vcpu->arch.dscr);
mtspr(SPRN_IAMR, vcpu->arch.iamr);
mtspr(SPRN_PSPB, vcpu->arch.pspb);
mtspr(SPRN_FSCR, vcpu->arch.fscr);
mtspr(SPRN_TAR, vcpu->arch.tar);
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
mtspr(SPRN_BESCR, vcpu->arch.bescr);
mtspr(SPRN_WORT, vcpu->arch.wort);
mtspr(SPRN_TIDR, vcpu->arch.tid);
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
mtspr(SPRN_AMR, vcpu->arch.amr);
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (vcpu->arch.doorbell_request) {
vc->dpdes = 1;
smp_wmb();
vcpu->arch.doorbell_request = 0;
}
trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit);
vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
tb = mftb();
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.pspb = mfspr(SPRN_PSPB);
vcpu->arch.fscr = mfspr(SPRN_FSCR);
vcpu->arch.tar = mfspr(SPRN_TAR);
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
vcpu->arch.wort = mfspr(SPRN_WORT);
vcpu->arch.tid = mfspr(SPRN_TIDR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
vcpu->arch.dscr = mfspr(SPRN_DSCR);
mtspr(SPRN_PSPB, 0);
mtspr(SPRN_WORT, 0);
mtspr(SPRN_AMR, 0);
mtspr(SPRN_UAMOR, 0);
mtspr(SPRN_DSCR, host_dscr);
mtspr(SPRN_TIDR, host_tidr);
mtspr(SPRN_IAMR, host_iamr);
mtspr(SPRN_PSPB, 0);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
#ifdef CONFIG_ALTIVEC
store_vr_state(&vcpu->arch.vr);
#endif
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
save_pmu = 1;
if (vcpu->arch.vpa.pinned_addr) {
struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
lp->yield_count = cpu_to_be32(yield_count);
vcpu->arch.vpa.dirty = 1;
save_pmu = lp->pmcregs_in_use;
}
kvmhv_save_guest_pmu(vcpu, save_pmu);
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
kvmhv_load_host_pmu();
kvmppc_subcore_exit_guest();
return trap;
}
/* /*
* Wait for some other vcpu thread to execute us, and * Wait for some other vcpu thread to execute us, and
* wake us up when we need to handle something in the host. * wake us up when we need to handle something in the host.
...@@ -3405,6 +3672,167 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ...@@ -3405,6 +3672,167 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
return vcpu->arch.ret; return vcpu->arch.ret;
} }
static int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
struct kvm_vcpu *vcpu, u64 time_limit)
{
int trap, r, pcpu, pcpu0;
int srcu_idx;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
trace_kvmppc_run_vcpu_enter(vcpu);
kvm_run->exit_reason = 0;
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
vc = vcpu->arch.vcore;
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
vcpu->arch.kvm_run = kvm_run;
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.busy_preempt = TB_NIL;
vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
vc->runnable_threads[0] = vcpu;
vc->n_runnable = 1;
vc->runner = vcpu;
/* See if the MMU is ready to go */
if (!kvm->arch.mmu_ready) {
r = kvmhv_setup_mmu(vcpu);
if (r) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.
hardware_entry_failure_reason = 0;
vcpu->arch.ret = r;
goto out;
}
}
if (need_resched())
cond_resched();
kvmppc_update_vpas(vcpu);
init_vcore_to_run(vc);
vc->preempt_tb = TB_NIL;
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
if (signal_pending(current))
goto sigpend;
if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
goto out;
kvmppc_core_prepare_to_enter(vcpu);
kvmppc_clear_host_core(pcpu);
local_paca->kvm_hstate.tid = 0;
local_paca->kvm_hstate.napping = 0;
local_paca->kvm_hstate.kvm_split_mode = NULL;
kvmppc_start_thread(vcpu, vc);
kvmppc_create_dtl_entry(vcpu, vc);
trace_kvm_guest_enter(vcpu);
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
mtspr(SPRN_LPID, vc->kvm->arch.lpid);
isync();
/* See comment above in kvmppc_run_core() about this */
pcpu0 = pcpu;
if (cpu_has_feature(CPU_FTR_ARCH_300))
pcpu0 &= ~0x3UL;
if (cpumask_test_cpu(pcpu0, &kvm->arch.need_tlb_flush)) {
radix__local_flush_tlb_lpid_guest(kvm->arch.lpid);
/* Clear the bit after the TLB flush */
cpumask_clear_cpu(pcpu0, &kvm->arch.need_tlb_flush);
}
trace_hardirqs_on();
guest_enter_irqoff();
srcu_idx = srcu_read_lock(&kvm->srcu);
this_cpu_disable_ftrace();
trap = kvmhv_p9_guest_entry(vcpu, time_limit);
vcpu->arch.trap = trap;
this_cpu_enable_ftrace();
srcu_read_unlock(&kvm->srcu, srcu_idx);
mtspr(SPRN_LPID, kvm->arch.host_lpid);
isync();
trace_hardirqs_off();
set_irq_happened(trap);
kvmppc_set_host_core(pcpu);
local_irq_enable();
guest_exit();
cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
preempt_enable();
/* cancel pending decrementer exception if DEC is now positive */
if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
kvmppc_core_dequeue_dec(vcpu);
trace_kvm_guest_exit(vcpu);
r = RESUME_GUEST;
if (trap)
r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
vcpu->arch.ret = r;
if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
!kvmppc_vcpu_woken(vcpu)) {
kvmppc_set_timer(vcpu);
while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
if (signal_pending(current)) {
vcpu->stat.signal_exits++;
kvm_run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
break;
}
spin_lock(&vc->lock);
kvmppc_vcore_blocked(vc);
spin_unlock(&vc->lock);
}
}
vcpu->arch.ceded = 0;
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_run_core(vc, 1);
done:
kvmppc_remove_runnable(vc, vcpu);
trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
return vcpu->arch.ret;
sigpend:
vcpu->stat.signal_exits++;
kvm_run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
out:
local_irq_enable();
preempt_enable();
goto done;
}
static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
{ {
int r; int r;
...@@ -3480,7 +3908,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) ...@@ -3480,7 +3908,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do { do {
r = kvmppc_run_vcpu(run, vcpu); if (kvm->arch.threads_indep && kvm_is_radix(kvm))
r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0);
else
r = kvmppc_run_vcpu(run, vcpu);
if (run->exit_reason == KVM_EXIT_PAPR_HCALL && if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
!(vcpu->arch.shregs.msr & MSR_PR)) { !(vcpu->arch.shregs.msr & MSR_PR)) {
......
...@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void) ...@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 1; local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
} }
EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
void kvmppc_subcore_exit_guest(void) void kvmppc_subcore_exit_guest(void)
{ {
...@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void) ...@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 0; local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
} }
EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
static bool kvmppc_tb_resync_required(void) static bool kvmppc_tb_resync_required(void)
{ {
......
...@@ -47,8 +47,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) ...@@ -47,8 +47,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_NOVCPU 2 #define NAPPING_NOVCPU 2
/* Stack frame offsets for kvmppc_hv_entry */ /* Stack frame offsets for kvmppc_hv_entry */
#define SFS 160 #define SFS 208
#define STACK_SLOT_TRAP (SFS-4) #define STACK_SLOT_TRAP (SFS-4)
#define STACK_SLOT_SHORT_PATH (SFS-8)
#define STACK_SLOT_TID (SFS-16) #define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24) #define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32) #define STACK_SLOT_PID (SFS-32)
...@@ -57,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) ...@@ -57,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_DAWR (SFS-56) #define STACK_SLOT_DAWR (SFS-56)
#define STACK_SLOT_DAWRX (SFS-64) #define STACK_SLOT_DAWRX (SFS-64)
#define STACK_SLOT_HFSCR (SFS-72) #define STACK_SLOT_HFSCR (SFS-72)
/* the following is used by the P9 short path */
#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
/* /*
* Call kvmppc_hv_entry in real mode. * Call kvmppc_hv_entry in real mode.
...@@ -1020,6 +1023,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) ...@@ -1020,6 +1023,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
no_xive: no_xive:
#endif /* CONFIG_KVM_XICS */ #endif /* CONFIG_KVM_XICS */
li r0, 0
stw r0, STACK_SLOT_SHORT_PATH(r1)
deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */ deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */ /* Check if we can deliver an external or decrementer interrupt now */
ld r0, VCPU_PENDING_EXC(r4) ld r0, VCPU_PENDING_EXC(r4)
...@@ -1034,13 +1040,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ...@@ -1034,13 +1040,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
bl kvmppc_guest_entry_inject_int bl kvmppc_guest_entry_inject_int
ld r4, HSTATE_KVM_VCPU(r13) ld r4, HSTATE_KVM_VCPU(r13)
71: 71:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
ld r6, VCPU_SRR0(r4) ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4) ld r7, VCPU_SRR1(r4)
mtspr SPRN_SRR0, r6 mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7 mtspr SPRN_SRR1, r7
fast_guest_entry_c:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */ /* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1 rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG rotldi r11, r11, 1 + MSR_HV_LG
...@@ -1117,6 +1124,83 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ...@@ -1117,6 +1124,83 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
HRFI_TO_GUEST HRFI_TO_GUEST
b . b .
/*
* Enter the guest on a P9 or later system where we have exactly
* one vcpu per vcore and we don't need to go to real mode
* (which implies that host and guest are both using radix MMU mode).
* r3 = vcpu pointer
* Most SPRs and all the VSRs have been loaded already.
*/
_GLOBAL(__kvmhv_vcpu_entry_p9)
EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -SFS(r1)
li r0, 1
stw r0, STACK_SLOT_SHORT_PATH(r1)
std r3, HSTATE_KVM_VCPU(r13)
mfcr r4
stw r4, SFS+8(r1)
std r1, HSTATE_HOST_R1(r13)
reg = 14
.rept 18
std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
reg = reg + 1
.endr
reg = 14
.rept 18
ld reg, __VCPU_GPR(reg)(r3)
reg = reg + 1
.endr
mfmsr r10
std r10, HSTATE_HOST_MSR(r13)
mr r4, r3
b fast_guest_entry_c
guest_exit_short_path:
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
reg = 14
.rept 18
std reg, __VCPU_GPR(reg)(r9)
reg = reg + 1
.endr
reg = 14
.rept 18
ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
reg = reg + 1
.endr
lwz r4, SFS+8(r1)
mtcr r4
mr r3, r12 /* trap number */
addi r1, r1, SFS
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
/* If we are in real mode, do a rfid to get back to the caller */
mfmsr r4
andi. r5, r4, MSR_IR
bnelr
rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
mtspr SPRN_SRR0, r0
ld r10, HSTATE_HOST_MSR(r13)
rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
mtspr SPRN_SRR1, r10
RFI_TO_KERNEL
b .
secondary_too_late: secondary_too_late:
li r12, 0 li r12, 0
stw r12, STACK_SLOT_TRAP(r1) stw r12, STACK_SLOT_TRAP(r1)
...@@ -1377,6 +1461,11 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ ...@@ -1377,6 +1461,11 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1: 1:
#endif /* CONFIG_KVM_XICS */ #endif /* CONFIG_KVM_XICS */
/* If we came in through the P9 short path, go back out to C now */
lwz r0, STACK_SLOT_SHORT_PATH(r1)
cmpwi r0, 0
bne guest_exit_short_path
/* For hash guest, read the guest SLB and save it away */ /* For hash guest, read the guest SLB and save it away */
ld r5, VCPU_KVM(r9) ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5) lbz r0, KVM_RADIX(r5)
......
...@@ -61,6 +61,69 @@ ...@@ -61,6 +61,69 @@
*/ */
#define XIVE_Q_GAP 2 #define XIVE_Q_GAP 2
/*
* Push a vcpu's context to the XIVE on guest entry.
* This assumes we are in virtual mode (MMU on)
*/
void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
{
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
if (!tima)
return;
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
vcpu->arch.xive_pushed = 1;
eieio();
/*
* We clear the irq_pending flag. There is a small chance of a
* race vs. the escalation interrupt happening on another
* processor setting it again, but the only consequence is to
* cause a spurious wakeup on the next H_CEDE, which is not an
* issue.
*/
vcpu->arch.irq_pending = 0;
/*
* In single escalation mode, if the escalation interrupt is
* on, we mask it.
*/
if (vcpu->arch.xive_esc_on) {
pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
XIVE_ESB_SET_PQ_01));
mb();
/*
* We have a possible subtle race here: The escalation
* interrupt might have fired and be on its way to the
* host queue while we mask it, and if we unmask it
* early enough (re-cede right away), there is a
* theorical possibility that it fires again, thus
* landing in the target queue more than once which is
* a big no-no.
*
* Fortunately, solving this is rather easy. If the
* above load setting PQ to 01 returns a previous
* value where P is set, then we know the escalation
* interrupt is somewhere on its way to the host. In
* that case we simply don't clear the xive_esc_on
* flag below. It will be eventually cleared by the
* handler for the escalation interrupt.
*
* Then, when doing a cede, we check that flag again
* before re-enabling the escalation interrupt, and if
* set, we abort the cede.
*/
if (!(pq & XIVE_ESB_VAL_P))
/* Now P is 0, we can clear the flag */
vcpu->arch.xive_esc_on = 0;
}
}
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/* /*
* This is a simple trigger for a generic XIVE IRQ. This must * This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page * only be called for interrupts that support a trigger page
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment