Commit 5a213b92 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'topic/ppc-kvm' of...

Merge branch 'topic/ppc-kvm' of https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux into HEAD

Fix conflicts between memslot overhaul and commit 511d25d6 ("KVM:
PPC: Book3S: Suppress warnings when allocating too big memory slots")
from the powerpc tree.
parents d8f6ef45 63fa47ba
......@@ -4144,6 +4144,14 @@
Override pmtimer IOPort with a hex value.
e.g. pmtmr=0x508
pmu_override= [PPC] Override the PMU.
This option takes over the PMU facility, so it is no
longer usable by perf. Setting this option starts the
PMU counters by setting MMCR0 to 0 (the FC bit is
cleared). If a number is given, then MMCR1 is set to
that number, otherwise (e.g., 'pmu_override=on'), MMCR1
remains 0.
pm_debug_messages [SUSPEND,KNL]
Enable suspend/resume debug messages during boot up.
......
......@@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
void kvmhv_save_host_pmu(void);
void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
......
......@@ -79,6 +79,7 @@
#define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800
#define BOOK3S_INTERRUPT_DECREMENTER 0x900
#define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980
#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980
#define BOOK3S_INTERRUPT_DOORBELL 0xa00
#define BOOK3S_INTERRUPT_SYSCALL 0xc00
#define BOOK3S_INTERRUPT_TRACE 0xd00
......
......@@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
return vcpu->arch.fault_dar;
}
/* Expiry time of vcpu DEC relative to host TB */
static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
{
return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset;
}
static inline bool is_kvmppc_resume_guest(int r)
{
return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
......
......@@ -44,7 +44,6 @@ struct kvm_nested_guest {
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
short prev_cpu[NR_CPUS];
u8 radix; /* is this nested guest radix */
};
......@@ -154,7 +153,9 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr);
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb);
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
......
......@@ -287,7 +287,6 @@ struct kvm_arch {
u32 online_vcores;
atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
u8 secure_guest;
......@@ -579,6 +578,10 @@ struct kvm_vcpu_arch {
ulong cfar;
ulong ppr;
u32 pspb;
u8 load_ebb;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
u8 load_tm;
#endif
ulong fscr;
ulong shadow_fscr;
ulong ebbhr;
......@@ -741,7 +744,7 @@ struct kvm_vcpu_arch {
struct hrtimer dec_timer;
u64 dec_jiffies;
u64 dec_expires;
u64 dec_expires; /* Relative to guest timebase. */
unsigned long pending_exceptions;
u8 ceded;
u8 prodded;
......
......@@ -550,8 +550,7 @@ extern void kvm_hv_vm_activated(void);
extern void kvm_hv_vm_deactivated(void);
extern bool kvm_hv_mode_active(void);
extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
struct kvm_nested_guest *nested);
extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu);
#else
static inline void __init kvm_cma_reserve(void)
......@@ -758,6 +757,7 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
void kvmppc_subcore_exit_guest(void);
long kvmppc_realmode_hmi_handler(void);
long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu);
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
......
......@@ -112,6 +112,9 @@ static inline void clear_task_ebb(struct task_struct *t)
#endif
}
void kvmppc_save_user_regs(void);
void kvmppc_save_current_sprs(void);
extern int set_thread_tidr(struct task_struct *t);
#endif /* _ASM_POWERPC_SWITCH_TO_H */
......@@ -18,6 +18,8 @@
#include <asm/vdso/timebase.h>
/* time.c */
extern u64 decrementer_max;
extern unsigned long tb_ticks_per_jiffy;
extern unsigned long tb_ticks_per_usec;
extern unsigned long tb_ticks_per_sec;
......@@ -97,19 +99,16 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
extern void secondary_cpu_time_init(void);
extern void __init time_init(void);
#ifdef CONFIG_PPC64
static inline unsigned long test_irq_work_pending(void)
{
unsigned long x;
DECLARE_PER_CPU(u64, decrementers_next_tb);
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, irq_work_pending)));
return x;
static inline u64 timer_get_next_tb(void)
{
return __this_cpu_read(decrementers_next_tb);
}
#endif
DECLARE_PER_CPU(u64, decrementers_next_tb);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void timer_rearm_host_dec(u64 now);
#endif
/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
......
......@@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
static void init_PMU(void)
{
mtspr(SPRN_MMCRA, 0);
mtspr(SPRN_MMCR0, 0);
mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
......@@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
{
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}
/*
......@@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
return;
mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
......@@ -150,6 +151,7 @@ void __restore_cpu_power7(void)
return;
mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
......@@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
return;
mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
......@@ -184,6 +187,7 @@ void __restore_cpu_power8(void)
return;
mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
......@@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
......@@ -223,6 +228,7 @@ void __restore_cpu_power9(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
......@@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
......@@ -264,6 +271,7 @@ void __restore_cpu_power10(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
......
......@@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)
mtspr(SPRN_LPCR, system_registers.lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
mtspr(SPRN_PCR, system_registers.pcr);
}
......@@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
}
mtspr(SPRN_LPID, 0);
mtspr(SPRN_AMOR, ~0);
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_LPES0; /* HV external interrupts */
......@@ -351,7 +353,7 @@ static void init_pmu_power8(void)
}
mtspr(SPRN_MMCRA, 0);
mtspr(SPRN_MMCR0, 0);
mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
mtspr(SPRN_MMCRS, 0);
......@@ -390,7 +392,7 @@ static void init_pmu_power9(void)
mtspr(SPRN_MMCRC, 0);
mtspr(SPRN_MMCRA, 0);
mtspr(SPRN_MMCR0, 0);
mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
......@@ -426,7 +428,7 @@ static void init_pmu_power10(void)
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}
static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
......
......@@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t)
#endif
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void kvmppc_save_user_regs(void)
{
unsigned long usermsr;
if (!current->thread.regs)
return;
usermsr = current->thread.regs->msr;
if (usermsr & MSR_FP)
save_fpu(current);
if (usermsr & MSR_VEC)
save_altivec(current);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (usermsr & MSR_TM) {
current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
current->thread.tm_texasr = mfspr(SPRN_TEXASR);
current->thread.regs->msr &= ~MSR_TM;
}
#endif
}
EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
void kvmppc_save_current_sprs(void)
{
save_sprs(&current->thread);
}
EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
static inline void restore_sprs(struct thread_struct *old_thread,
struct thread_struct *new_thread)
{
......
......@@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = {
#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev);
......@@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = {
EXPORT_SYMBOL(decrementer_clockevent);
DEFINE_PER_CPU(u64, decrementers_next_tb);
EXPORT_SYMBOL_GPL(decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#define XSEC_PER_SEC (1024*1024)
......@@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc);
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
static inline unsigned long test_irq_work_pending(void)
{
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, irq_work_pending)));
return x;
}
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
......@@ -539,13 +551,44 @@ void arch_irq_work_raise(void)
preempt_enable();
}
static void set_dec_or_work(u64 val)
{
set_dec(val);
/* We may have raced with new irq work */
if (unlikely(test_irq_work_pending()))
set_dec(1);
}
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
static void set_dec_or_work(u64 val)
{
set_dec(val);
}
#endif /* CONFIG_IRQ_WORK */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void timer_rearm_host_dec(u64 now)
{
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
WARN_ON_ONCE(!arch_irqs_disabled());
WARN_ON_ONCE(mfmsr() & MSR_EE);
if (now >= *next_tb) {
local_paca->irq_happened |= PACA_IRQ_DEC;
} else {
now = *next_tb - now;
if (now <= decrementer_max)
set_dec_or_work(now);
}
}
EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
#endif
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
......@@ -606,10 +649,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
} else {
now = *next_tb - now;
if (now <= decrementer_max)
set_dec(now);
/* We may have raced with new irq work */
if (test_irq_work_pending())
set_dec(1);
set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
......@@ -843,11 +883,7 @@ static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
__this_cpu_write(decrementers_next_tb, get_tb() + evt);
set_dec(evt);
/* We may have raced with new irq work */
if (test_irq_work_pending())
set_dec(1);
set_dec_or_work(evt);
return 0;
}
......
......@@ -131,6 +131,21 @@ config KVM_BOOK3S_HV_EXIT_TIMING
If unsure, say N.
config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT
depends on KVM_BOOK3S_HV_POSSIBLE
default !EXPERT
help
Old nested HV capable Linux guests have a bug where they don't
reflect the PMU in-use status of their L2 guest to the L0 host
while the L2 PMU registers are live. This can result in loss
of L2 PMU register state, causing perf to not work correctly in
L2 guests.
Selecting this option for the L0 host implements a workaround for
those buggy L1s which saves the L2 state, at the cost of performance
in all nested-capable guest entry/exit.
config KVM_BOOKE_HV
bool
......
......@@ -374,11 +374,16 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
BEGIN_FTR_SECTION
mtspr SPRN_DAWRX1,r10
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
mtspr SPRN_PID,r10
/*
* Switch to host MMU mode
* Switch to host MMU mode (don't have the real host PID but we aren't
* going back to userspace).
*/
hwsync
isync
mtspr SPRN_PID,r10
ld r10, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_KVM(r10)
lwz r10, KVM_HOST_LPID(r10)
......@@ -389,6 +394,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
ld r10, KVM_HOST_LPCR(r10)
mtspr SPRN_LPCR,r10
isync
/*
* Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
* MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
......
......@@ -57,6 +57,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
preempt_disable();
asm volatile("hwsync" ::: "memory");
isync();
/* switch the lpid first to avoid running host with unallocated pid */
old_lpid = mfspr(SPRN_LPID);
if (old_lpid != lpid)
......@@ -75,6 +77,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
ret = __copy_to_user_inatomic((void __user *)to, from, n);
pagefault_enable();
asm volatile("hwsync" ::: "memory");
isync();
/* switch the pid first to avoid running host with unallocated pid */
if (quadrant == 1 && pid != old_pid)
mtspr(SPRN_PID, old_pid);
......
......@@ -80,6 +80,7 @@
#include <asm/plpar_wrappers.h>
#include "book3s.h"
#include "book3s_hv.h"
#define CREATE_TRACE_POINTS
#include "trace_hv.h"
......@@ -127,11 +128,6 @@ static bool nested = true;
module_param(nested, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
static inline bool nesting_enabled(struct kvm *kvm)
{
return kvm->arch.nested_enable && kvm_is_radix(kvm);
}
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
......@@ -276,22 +272,26 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
* they should never fail.)
*/
static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
{
unsigned long flags;
WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
spin_lock_irqsave(&vc->stoltb_lock, flags);
vc->preempt_tb = mftb();
vc->preempt_tb = tb;
spin_unlock_irqrestore(&vc->stoltb_lock, flags);
}
static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb)
{
unsigned long flags;
WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
spin_lock_irqsave(&vc->stoltb_lock, flags);
if (vc->preempt_tb != TB_NIL) {
vc->stolen_tb += mftb() - vc->preempt_tb;
vc->stolen_tb += tb - vc->preempt_tb;
vc->preempt_tb = TB_NIL;
}
spin_unlock_irqrestore(&vc->stoltb_lock, flags);
......@@ -301,6 +301,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
u64 now;
if (cpu_has_feature(CPU_FTR_ARCH_300))
return;
now = mftb();
/*
* We can test vc->runner without taking the vcore lock,
......@@ -309,12 +315,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
* ever sets it to NULL.
*/
if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
kvmppc_core_end_stolen(vc);
kvmppc_core_end_stolen(vc, now);
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
vcpu->arch.busy_preempt != TB_NIL) {
vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt;
vcpu->arch.busy_preempt = TB_NIL;
}
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
......@@ -324,13 +330,19 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
u64 now;
if (cpu_has_feature(CPU_FTR_ARCH_300))
return;
now = mftb();
if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
kvmppc_core_start_stolen(vc);
kvmppc_core_start_stolen(vc, now);
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
vcpu->arch.busy_preempt = mftb();
vcpu->arch.busy_preempt = now;
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
......@@ -675,6 +687,8 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
u64 p;
unsigned long flags;
WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
spin_lock_irqsave(&vc->stoltb_lock, flags);
p = vc->stolen_tb;
if (vc->vcore_state != VCORE_INACTIVE &&
......@@ -684,35 +698,30 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
return p;
}
static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
struct kvmppc_vcore *vc)
static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
unsigned int pcpu, u64 now,
unsigned long stolen)
{
struct dtl_entry *dt;
struct lppaca *vpa;
unsigned long stolen;
unsigned long core_stolen;
u64 now;
unsigned long flags;
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
now = mftb();
core_stolen = vcore_stolen_time(vc, now);
stolen = core_stolen - vcpu->arch.stolen_logged;
vcpu->arch.stolen_logged = core_stolen;
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
if (!dt || !vpa)
return;
memset(dt, 0, sizeof(struct dtl_entry));
dt->dispatch_reason = 7;
dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
dt->timebase = cpu_to_be64(now + vc->tb_offset);
dt->preempt_reason = 0;
dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid);
dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
dt->ready_to_enqueue_time = 0;
dt->waiting_to_ready_time = 0;
dt->timebase = cpu_to_be64(now);
dt->fault_addr = 0;
dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
++dt;
if (dt == vcpu->arch.dtl.pinned_end)
dt = vcpu->arch.dtl.pinned_addr;
......@@ -723,6 +732,27 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.dtl.dirty = true;
}
static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
struct kvmppc_vcore *vc)
{
unsigned long stolen;
unsigned long core_stolen;
u64 now;
unsigned long flags;
now = mftb();
core_stolen = vcore_stolen_time(vc, now);
stolen = core_stolen - vcpu->arch.stolen_logged;
vcpu->arch.stolen_logged = core_stolen;
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
__kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen);
}
/* See if there is a doorbell interrupt pending for a vcpu */
static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
{
......@@ -731,6 +761,8 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
if (vcpu->arch.doorbell_request)
return true;
if (cpu_has_feature(CPU_FTR_ARCH_300))
return false;
/*
* Ensure that the read of vcore->dpdes comes after the read
* of vcpu->doorbell_request. This barrier matches the
......@@ -900,13 +932,14 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
* mode handler is not called but no other threads are in the
* source vcore.
*/
spin_lock(&vcore->lock);
if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
vcore->vcore_state != VCORE_INACTIVE &&
vcore->runner)
target = vcore->runner;
spin_unlock(&vcore->lock);
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
spin_lock(&vcore->lock);
if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
vcore->vcore_state != VCORE_INACTIVE &&
vcore->runner)
target = vcore->runner;
spin_unlock(&vcore->lock);
}
return kvm_vcpu_yield_to(target);
}
......@@ -1421,6 +1454,43 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
/*
* If the lppaca had pmcregs_in_use clear when we exited the guest, then
* HFSCR_PM is cleared for next entry. If the guest then tries to access
* the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM
* back in the guest HFSCR will cause the next entry to load the PMU SPRs and
* allow the guest access to continue.
*/
static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.hfscr_permitted & HFSCR_PM))
return EMULATE_FAIL;
vcpu->arch.hfscr |= HFSCR_PM;
return RESUME_GUEST;
}
static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB))
return EMULATE_FAIL;
vcpu->arch.hfscr |= HFSCR_EBB;
return RESUME_GUEST;
}
static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.hfscr_permitted & HFSCR_TM))
return EMULATE_FAIL;
vcpu->arch.hfscr |= HFSCR_TM;
return RESUME_GUEST;
}
static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
......@@ -1451,6 +1521,10 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
run->ready_for_interrupt_injection = 1;
switch (vcpu->arch.trap) {
/* We're good on these - the host merely wanted to get our attention */
case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
WARN_ON_ONCE(1); /* Should never happen */
vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
fallthrough;
case BOOK3S_INTERRUPT_HV_DECREMENTER:
vcpu->stat.dec_exits++;
r = RESUME_GUEST;
......@@ -1575,7 +1649,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
unsigned long vsid;
long err;
if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) {
r = RESUME_GUEST; /* Just retry if it's the canary */
break;
}
......@@ -1702,16 +1777,26 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* to emulate.
* Otherwise, we just generate a program interrupt to the guest.
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
u64 cause = vcpu->arch.hfscr >> 56;
r = EMULATE_FAIL;
if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmppc_emulate_doorbell_instr(vcpu);
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (cause == FSCR_MSGP_LG)
r = kvmppc_emulate_doorbell_instr(vcpu);
if (cause == FSCR_PM_LG)
r = kvmppc_pmu_unavailable(vcpu);
if (cause == FSCR_EBB_LG)
r = kvmppc_ebb_unavailable(vcpu);
if (cause == FSCR_TM_LG)
r = kvmppc_tm_unavailable(vcpu);
}
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
}
break;
}
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
......@@ -1768,6 +1853,12 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
/* These need to go to the nested HV */
case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
vcpu->stat.dec_exits++;
r = RESUME_HOST;
break;
/* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
case BOOK3S_INTERRUPT_HMI:
case BOOK3S_INTERRUPT_PERFMON:
......@@ -2096,8 +2187,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
* either vcore->dpdes or doorbell_request.
* On POWER8, doorbell_request is 0.
*/
*val = get_reg_val(id, vcpu->arch.vcore->dpdes |
vcpu->arch.doorbell_request);
if (cpu_has_feature(CPU_FTR_ARCH_300))
*val = get_reg_val(id, vcpu->arch.doorbell_request);
else
*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
break;
case KVM_REG_PPC_VTB:
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
......@@ -2238,8 +2331,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
break;
case KVM_REG_PPC_DEC_EXPIRY:
*val = get_reg_val(id, vcpu->arch.dec_expires +
vcpu->arch.vcore->tb_offset);
*val = get_reg_val(id, vcpu->arch.dec_expires);
break;
case KVM_REG_PPC_ONLINE:
*val = get_reg_val(id, vcpu->arch.online);
......@@ -2335,7 +2427,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
vcpu->arch.pspb = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DPDES:
vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
if (cpu_has_feature(CPU_FTR_ARCH_300))
vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1;
else
vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
break;
case KVM_REG_PPC_VTB:
vcpu->arch.vcore->vtb = set_reg_val(id, *val);
......@@ -2491,8 +2586,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DEC_EXPIRY:
vcpu->arch.dec_expires = set_reg_val(id, *val) -
vcpu->arch.vcore->tb_offset;
vcpu->arch.dec_expires = set_reg_val(id, *val);
break;
case KVM_REG_PPC_ONLINE:
i = set_reg_val(id, *val);
......@@ -2715,6 +2809,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
#endif
#endif
vcpu->arch.mmcr[0] = MMCR0_FC;
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
}
vcpu->arch.ctrl = CTRL_RUNLATCH;
/* default to host PVR, since we can't spoof it */
kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
......@@ -2745,6 +2844,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
/*
* PM, EBB, TM are demand-faulted so start with it clear.
*/
vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM);
kvmppc_mmu_book3s_hv_init(vcpu);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
......@@ -2869,13 +2973,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
unsigned long dec_nsec, now;
now = get_tb();
if (now > vcpu->arch.dec_expires) {
if (now > kvmppc_dec_expires_host_tb(vcpu)) {
/* decrementer has already gone negative */
kvmppc_core_queue_dec(vcpu);
kvmppc_core_prepare_to_enter(vcpu);
return;
}
dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now);
hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
vcpu->arch.timer_running = 1;
}
......@@ -2883,14 +2987,14 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
struct kvm_vcpu *vcpu, u64 tb)
{
u64 now;
if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
return;
spin_lock_irq(&vcpu->arch.tbacct_lock);
now = mftb();
now = tb;
vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
vcpu->arch.stolen_logged;
vcpu->arch.busy_preempt = now;
......@@ -2945,30 +3049,59 @@ static void kvmppc_release_hwthread(int cpu)
tpaca->kvm_hstate.kvm_split_mode = NULL;
}
static DEFINE_PER_CPU(struct kvm *, cpu_in_guest);
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
{
struct kvm_nested_guest *nested = vcpu->arch.nested;
cpumask_t *cpu_in_guest;
cpumask_t *need_tlb_flush;
int i;
if (nested)
need_tlb_flush = &nested->need_tlb_flush;
else
need_tlb_flush = &kvm->arch.need_tlb_flush;
cpu = cpu_first_tlb_thread_sibling(cpu);
if (nested) {
cpumask_set_cpu(cpu, &nested->need_tlb_flush);
cpu_in_guest = &nested->cpu_in_guest;
} else {
cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
cpu_in_guest = &kvm->arch.cpu_in_guest;
}
for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
i += cpu_tlb_thread_sibling_step())
cpumask_set_cpu(i, need_tlb_flush);
/*
* Make sure setting of bit in need_tlb_flush precedes
* testing of cpu_in_guest bits. The matching barrier on
* the other side is the first smp_mb() in kvmppc_run_core().
* Make sure setting of bit in need_tlb_flush precedes testing of
* cpu_in_guest. The matching barrier on the other side is hwsync
* when switching to guest MMU mode, which happens between
* cpu_in_guest being set to the guest kvm, and need_tlb_flush bit
* being tested.
*/
smp_mb();
for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
i += cpu_tlb_thread_sibling_step())
if (cpumask_test_cpu(i, cpu_in_guest))
i += cpu_tlb_thread_sibling_step()) {
struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i);
if (running == kvm)
smp_call_function_single(i, do_nothing, NULL, 1);
}
}
static void do_migrate_away_vcpu(void *arg)
{
struct kvm_vcpu *vcpu = arg;
struct kvm *kvm = vcpu->kvm;
/*
* If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync;
* ptesync sequence on the old CPU before migrating to a new one, in
* case we interrupted the guest between a tlbie ; eieio ;
* tlbsync; ptesync sequence.
*
* Otherwise, ptesync is sufficient for ordering tlbiel sequences.
*/
if (kvm->arch.lpcr & LPCR_GTSE)
asm volatile("eieio; tlbsync; ptesync");
else
asm volatile("ptesync");
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
......@@ -2994,14 +3127,17 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
* can move around between pcpus. To cope with this, when
* a vcpu moves from one pcpu to another, we need to tell
* any vcpus running on the same core as this vcpu previously
* ran to flush the TLB. The TLB is shared between threads,
* so we use a single bit in .need_tlb_flush for all 4 threads.
* ran to flush the TLB.
*/
if (prev_cpu != pcpu) {
if (prev_cpu >= 0 &&
cpu_first_tlb_thread_sibling(prev_cpu) !=
cpu_first_tlb_thread_sibling(pcpu))
radix_flush_cpu(kvm, prev_cpu, vcpu);
if (prev_cpu >= 0) {
if (cpu_first_tlb_thread_sibling(prev_cpu) !=
cpu_first_tlb_thread_sibling(pcpu))
radix_flush_cpu(kvm, prev_cpu, vcpu);
smp_call_function_single(prev_cpu,
do_migrate_away_vcpu, vcpu, 1);
}
if (nested)
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
else
......@@ -3013,7 +3149,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
struct kvm *kvm = vc->kvm;
cpu = vc->pcpu;
if (vcpu) {
......@@ -3024,7 +3159,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
cpu += vcpu->arch.ptid;
vcpu->cpu = vc->pcpu;
vcpu->arch.thread_cpu = cpu;
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
}
tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
......@@ -3125,6 +3259,8 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
{
struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
vc->vcore_state = VCORE_PREEMPT;
vc->pcpu = smp_processor_id();
if (vc->num_threads < threads_per_vcore(vc->kvm)) {
......@@ -3134,14 +3270,16 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
}
/* Start accumulating stolen time */
kvmppc_core_start_stolen(vc);
kvmppc_core_start_stolen(vc, mftb());
}
static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
{
struct preempted_vcore_list *lp;
kvmppc_core_end_stolen(vc);
WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
kvmppc_core_end_stolen(vc, mftb());
if (!list_empty(&vc->preempt_list)) {
lp = &per_cpu(preempted_vcores, vc->pcpu);
spin_lock(&lp->lock);
......@@ -3268,7 +3406,7 @@ static void prepare_threads(struct kvmppc_vcore *vc)
vcpu->arch.ret = RESUME_GUEST;
else
continue;
kvmppc_remove_runnable(vc, vcpu);
kvmppc_remove_runnable(vc, vcpu, mftb());
wake_up(&vcpu->arch.cpu_run);
}
}
......@@ -3287,7 +3425,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
list_del_init(&pvc->preempt_list);
if (pvc->runner == NULL) {
pvc->vcore_state = VCORE_INACTIVE;
kvmppc_core_end_stolen(pvc);
kvmppc_core_end_stolen(pvc, mftb());
}
spin_unlock(&pvc->lock);
continue;
......@@ -3296,7 +3434,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&pvc->lock);
continue;
}
kvmppc_core_end_stolen(pvc);
kvmppc_core_end_stolen(pvc, mftb());
pvc->vcore_state = VCORE_PIGGYBACK;
if (cip->total_threads >= target_threads)
break;
......@@ -3340,7 +3478,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
*/
spin_unlock(&vc->lock);
/* cancel pending dec exception if dec is positive */
if (now < vcpu->arch.dec_expires &&
if (now < kvmppc_dec_expires_host_tb(vcpu) &&
kvmppc_core_pending_dec(vcpu))
kvmppc_core_dequeue_dec(vcpu);
......@@ -3363,7 +3501,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
else
++still_running;
} else {
kvmppc_remove_runnable(vc, vcpu);
kvmppc_remove_runnable(vc, vcpu, mftb());
wake_up(&vcpu->arch.cpu_run);
}
}
......@@ -3372,7 +3510,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
kvmppc_vcore_preempt(vc);
} else if (vc->runner) {
vc->vcore_state = VCORE_PREEMPT;
kvmppc_core_start_stolen(vc);
kvmppc_core_start_stolen(vc, mftb());
} else {
vc->vcore_state = VCORE_INACTIVE;
}
......@@ -3503,7 +3641,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu);
kvmppc_remove_runnable(vc, vcpu, mftb());
wake_up(&vcpu->arch.cpu_run);
}
goto out;
......@@ -3748,7 +3886,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_release_hwthread(pcpu + i);
if (sip && sip->napped[i])
kvmppc_ipi_thread(pcpu + i);
cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
}
spin_unlock(&vc->lock);
......@@ -3770,211 +3907,137 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
static void load_spr_state(struct kvm_vcpu *vcpu)
{
mtspr(SPRN_DSCR, vcpu->arch.dscr);
mtspr(SPRN_IAMR, vcpu->arch.iamr);
mtspr(SPRN_PSPB, vcpu->arch.pspb);
mtspr(SPRN_FSCR, vcpu->arch.fscr);
mtspr(SPRN_TAR, vcpu->arch.tar);
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
mtspr(SPRN_BESCR, vcpu->arch.bescr);
mtspr(SPRN_TIDR, vcpu->arch.tid);
mtspr(SPRN_AMR, vcpu->arch.amr);
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
/*
* DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
* clear (or hstate set appropriately to catch those registers
* being clobbered if we take a MCE or SRESET), so those are done
* later.
*/
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
}
static void store_spr_state(struct kvm_vcpu *vcpu)
{
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.pspb = mfspr(SPRN_PSPB);
vcpu->arch.fscr = mfspr(SPRN_FSCR);
vcpu->arch.tar = mfspr(SPRN_TAR);
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
vcpu->arch.tid = mfspr(SPRN_TIDR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
vcpu->arch.dscr = mfspr(SPRN_DSCR);
}
/*
* Privileged (non-hypervisor) host registers to save.
*/
struct p9_host_os_sprs {
unsigned long dscr;
unsigned long tidr;
unsigned long iamr;
unsigned long amr;
unsigned long fscr;
};
static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
{
host_os_sprs->dscr = mfspr(SPRN_DSCR);
host_os_sprs->tidr = mfspr(SPRN_TIDR);
host_os_sprs->iamr = mfspr(SPRN_IAMR);
host_os_sprs->amr = mfspr(SPRN_AMR);
host_os_sprs->fscr = mfspr(SPRN_FSCR);
}
/* vcpu guest regs must already be saved */
static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
{
mtspr(SPRN_PSPB, 0);
mtspr(SPRN_UAMOR, 0);
mtspr(SPRN_DSCR, host_os_sprs->dscr);
mtspr(SPRN_TIDR, host_os_sprs->tidr);
mtspr(SPRN_IAMR, host_os_sprs->iamr);
if (host_os_sprs->amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_os_sprs->amr);
if (host_os_sprs->fscr != vcpu->arch.fscr)
mtspr(SPRN_FSCR, host_os_sprs->fscr);
/* Save guest CTRL register, set runlatch to 1 */
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, 1);
}
static inline bool hcall_is_xics(unsigned long req)
{
return req == H_EOI || req == H_CPPR || req == H_IPI ||
req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
}
/*
* Guest entry for POWER9 and later CPUs.
*/
static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
{
struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
if (lp) {
u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
lp->yield_count = cpu_to_be32(yield_count);
vcpu->arch.vpa.dirty = 1;
}
}
/* call our hypervisor to load up HV regs and go */
static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long host_psscr;
unsigned long msr;
struct hv_guest_state hvregs;
struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb;
int trap, save_pmu;
WARN_ON_ONCE(vcpu->arch.ceded);
int trap;
dec = mfspr(SPRN_DEC);
tb = mftb();
if (dec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
local_paca->kvm_hstate.dec_expires = dec + tb;
if (local_paca->kvm_hstate.dec_expires < time_limit)
time_limit = local_paca->kvm_hstate.dec_expires;
msr = mfmsr();
save_p9_host_os_sprs(&host_os_sprs);
kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
/*
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
* this is done in kvmhv_vcpu_entry_p9() below otherwise.
*/
host_psscr = mfspr(SPRN_PSSCR_PR);
kvmppc_subcore_enter_guest();
kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
if (lazy_irq_pending())
return 0;
vc->entry_exit_map = 1;
vc->in_guest = 1;
if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
msr = mfmsr(); /* TM restore can update msr */
if (vcpu->arch.vpa.pinned_addr) {
struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
lp->yield_count = cpu_to_be32(yield_count);
vcpu->arch.vpa.dirty = 1;
}
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
if (vcpu->arch.psscr != host_psscr)
mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
#ifdef CONFIG_PPC_PSERIES
if (kvmhv_on_pseries()) {
barrier();
if (vcpu->arch.vpa.pinned_addr) {
struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
} else {
get_lppaca()->pmcregs_in_use = 1;
}
barrier();
kvmhv_save_hv_regs(vcpu, &hvregs);
hvregs.lpcr = lpcr;
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
hvregs.version = HV_GUEST_STATE_VERSION;
if (vcpu->arch.nested) {
hvregs.lpid = vcpu->arch.nested->shadow_lpid;
hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
} else {
hvregs.lpid = vcpu->kvm->arch.lpid;
hvregs.vcpu_token = vcpu->vcpu_id;
}
#endif
kvmhv_load_guest_pmu(vcpu);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
load_fp_state(&vcpu->arch.fp);
#ifdef CONFIG_ALTIVEC
load_vr_state(&vcpu->arch.vr);
#endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
load_spr_state(vcpu);
hvregs.hdec_expiry = time_limit;
/*
* When setting DEC, we must always deal with irq_work_raise via NMI vs
* setting DEC. The problem occurs right as we switch into guest mode
* if a NMI hits and sets pending work and sets DEC, then that will
* apply to the guest and not bring us back to the host.
* When setting DEC, we must always deal with irq_work_raise
* via NMI vs setting DEC. The problem occurs right as we
* switch into guest mode if a NMI hits and sets pending work
* and sets DEC, then that will apply to the guest and not
* bring us back to the host.
*
* irq_work_raise could check a flag (or possibly LPCR[HDICE] for
* example) and set HDEC to 1? That wouldn't solve the nested hv
* case which needs to abort the hcall or zero the time limit.
* irq_work_raise could check a flag (or possibly LPCR[HDICE]
* for example) and set HDEC to 1? That wouldn't solve the
* nested hv case which needs to abort the hcall or zero the
* time limit.
*
* XXX: Another day's problem.
*/
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb);
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
switch_pmu_to_guest(vcpu, &host_os_sprs);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
__pa(&vcpu->arch.regs));
kvmhv_restore_hv_return_state(vcpu, &hvregs);
switch_pmu_to_host(vcpu, &host_os_sprs);
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
store_vcpu_state(vcpu);
if (kvmhv_on_pseries()) {
/*
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
* this is done in kvmhv_vcpu_entry_p9() below otherwise.
*/
unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
struct hv_guest_state hvregs;
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
*tb = mftb();
vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset);
host_psscr = mfspr(SPRN_PSSCR_PR);
mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
kvmhv_save_hv_regs(vcpu, &hvregs);
hvregs.lpcr = lpcr;
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
hvregs.version = HV_GUEST_STATE_VERSION;
if (vcpu->arch.nested) {
hvregs.lpid = vcpu->arch.nested->shadow_lpid;
hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
} else {
hvregs.lpid = vcpu->kvm->arch.lpid;
hvregs.vcpu_token = vcpu->vcpu_id;
}
hvregs.hdec_expiry = time_limit;
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
__pa(&vcpu->arch.regs));
kvmhv_restore_hv_return_state(vcpu, &hvregs);
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
timer_rearm_host_dec(*tb);
restore_p9_host_os_sprs(vcpu, &host_os_sprs);
if (vcpu->arch.psscr != host_psscr)
mtspr(SPRN_PSSCR_PR, host_psscr);
return trap;
}
/*
* Guest entry for POWER9 and later CPUs.
*/
static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr, u64 *tb)
{
u64 next_timer;
int trap;
next_timer = timer_get_next_tb();
if (*tb >= next_timer)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
if (next_timer < time_limit)
time_limit = next_timer;
else if (*tb >= time_limit) /* nested time limit */
return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER;
vcpu->arch.ceded = 0;
vcpu_vpa_increment_dispatch(vcpu);
if (kvmhv_on_pseries()) {
trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
......@@ -3982,9 +4045,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
struct kvm *kvm = vcpu->kvm;
kvmppc_xive_push_vcpu(vcpu);
trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
__this_cpu_write(cpu_in_guest, kvm);
trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
__this_cpu_write(cpu_in_guest, NULL);
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
!(vcpu->arch.shregs.msr & MSR_PR)) {
unsigned long req = kvmppc_get_gpr(vcpu, 3);
......@@ -4009,65 +4079,11 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
}
kvmppc_xive_pull_vcpu(vcpu);
if (kvm_is_radix(vcpu->kvm))
if (kvm_is_radix(kvm))
vcpu->arch.slb_max = 0;
}
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
tb = mftb();
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
store_spr_state(vcpu);
restore_p9_host_os_sprs(vcpu, &host_os_sprs);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
#ifdef CONFIG_ALTIVEC
store_vr_state(&vcpu->arch.vr);
#endif
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
save_pmu = 1;
if (vcpu->arch.vpa.pinned_addr) {
struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
lp->yield_count = cpu_to_be32(yield_count);
vcpu->arch.vpa.dirty = 1;
save_pmu = lp->pmcregs_in_use;
}
/* Must save pmu if this guest is capable of running nested guests */
save_pmu |= nesting_enabled(vcpu->kvm);
kvmhv_save_guest_pmu(vcpu, save_pmu);
#ifdef CONFIG_PPC_PSERIES
if (kvmhv_on_pseries()) {
barrier();
get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
barrier();
}
#endif
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
/* We may have raced with new irq work */
if (test_irq_work_pending())
set_dec(1);
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
kvmhv_load_host_pmu();
kvmppc_subcore_exit_guest();
vcpu_vpa_increment_dispatch(vcpu);
return trap;
}
......@@ -4132,6 +4148,13 @@ static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
return false;
}
static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu)
{
if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
return true;
return false;
}
/*
* Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
......@@ -4142,7 +4165,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
int i;
for_each_runnable_thread(i, vcpu, vc) {
if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
if (kvmppc_vcpu_check_block(vcpu))
return 1;
}
......@@ -4159,6 +4182,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
int do_sleep = 1;
u64 block_ns;
WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
/* Poll for pending exceptions and ceded state */
cur = start_poll = ktime_get();
if (vc->halt_poll_ns) {
......@@ -4355,7 +4380,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
for_each_runnable_thread(i, v, vc) {
kvmppc_core_prepare_to_enter(v);
if (signal_pending(v->arch.run_task)) {
kvmppc_remove_runnable(vc, v);
kvmppc_remove_runnable(vc, v, mftb());
v->stat.signal_exits++;
v->run->exit_reason = KVM_EXIT_INTR;
v->arch.ret = -EINTR;
......@@ -4396,7 +4421,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
kvmppc_vcore_end_preempt(vc);
if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
kvmppc_remove_runnable(vc, vcpu);
kvmppc_remove_runnable(vc, vcpu, mftb());
vcpu->stat.signal_exits++;
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
......@@ -4417,12 +4442,15 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
struct kvm_run *run = vcpu->run;
int trap, r, pcpu;
int srcu_idx;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
unsigned long flags;
u64 tb;
trace_kvmppc_run_vcpu_enter(vcpu);
......@@ -4433,16 +4461,11 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc = vcpu->arch.vcore;
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.busy_preempt = TB_NIL;
vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
vc->runnable_threads[0] = vcpu;
vc->n_runnable = 1;
vc->runner = vcpu;
/* See if the MMU is ready to go */
if (!kvm->arch.mmu_ready) {
if (unlikely(!kvm->arch.mmu_ready)) {
r = kvmhv_setup_mmu(vcpu);
if (r) {
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
......@@ -4457,29 +4480,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
kvmppc_update_vpas(vcpu);
init_vcore_to_run(vc);
vc->preempt_tb = TB_NIL;
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
if (kvm_is_radix(kvm))
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
/* flags save not required, but irq_pmu has no disable/enable API */
powerpc_local_irq_pmu_save(flags);
if (signal_pending(current))
goto sigpend;
if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
if (need_resched() || !kvm->arch.mmu_ready)
goto out;
if (!nested) {
kvmppc_core_prepare_to_enter(vcpu);
if (vcpu->arch.doorbell_request) {
vc->dpdes = 1;
smp_wmb();
vcpu->arch.doorbell_request = 0;
}
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
&vcpu->arch.pending_exceptions))
lpcr |= LPCR_MER;
......@@ -4490,16 +4505,23 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
goto out;
}
kvmppc_clear_host_core(pcpu);
if (vcpu->arch.timer_running) {
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
vcpu->arch.timer_running = 0;
}
local_paca->kvm_hstate.napping = 0;
local_paca->kvm_hstate.kvm_split_mode = NULL;
kvmppc_start_thread(vcpu, vc);
kvmppc_create_dtl_entry(vcpu, vc);
trace_kvm_guest_enter(vcpu);
tb = mftb();
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
vcpu->cpu = pcpu;
vcpu->arch.thread_cpu = pcpu;
vc->pcpu = pcpu;
local_paca->kvm_hstate.kvm_vcpu = vcpu;
local_paca->kvm_hstate.ptid = 0;
local_paca->kvm_hstate.fake_suspend = 0;
__kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
trace_kvm_guest_enter(vcpu);
guest_enter_irqoff();
......@@ -4510,7 +4532,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
/* Tell lockdep that we're about to enable interrupts */
trace_hardirqs_on();
trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr);
trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb);
vcpu->arch.trap = trap;
trace_hardirqs_off();
......@@ -4521,8 +4543,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
set_irq_happened(trap);
kvmppc_set_host_core(pcpu);
context_tracking_guest_exit();
if (!vtime_accounting_enabled_this_cpu()) {
local_irq_enable();
......@@ -4538,9 +4558,10 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
}
vtime_account_guest_exit();
local_irq_enable();
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
powerpc_local_irq_pmu_restore(flags);
preempt_enable();
......@@ -4550,7 +4571,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
* by L2 and the L1 decrementer is provided in hdec_expires
*/
if (kvmppc_core_pending_dec(vcpu) &&
((get_tb() < vcpu->arch.dec_expires) ||
((tb < kvmppc_dec_expires_host_tb(vcpu)) ||
(trap == BOOK3S_INTERRUPT_SYSCALL &&
kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
kvmppc_core_dequeue_dec(vcpu);
......@@ -4565,28 +4586,31 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
}
vcpu->arch.ret = r;
if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
!kvmppc_vcpu_woken(vcpu)) {
if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) {
kvmppc_set_timer(vcpu);
while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
prepare_to_rcuwait(wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current)) {
vcpu->stat.signal_exits++;
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
break;
}
spin_lock(&vc->lock);
kvmppc_vcore_blocked(vc);
spin_unlock(&vc->lock);
if (kvmppc_vcpu_check_block(vcpu))
break;
trace_kvmppc_vcore_blocked(vc, 0);
schedule();
trace_kvmppc_vcore_blocked(vc, 1);
}
finish_rcuwait(wait);
}
vcpu->arch.ceded = 0;
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_run_core(vc, 1);
done:
kvmppc_remove_runnable(vc, vcpu);
trace_kvmppc_run_vcpu_exit(vcpu);
return vcpu->arch.ret;
......@@ -4596,7 +4620,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
out:
local_irq_enable();
powerpc_local_irq_pmu_restore(flags);
preempt_enable();
goto done;
}
......@@ -4606,23 +4630,25 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
int r;
int srcu_idx;
unsigned long ebb_regs[3] = {}; /* shut up GCC */
unsigned long user_tar = 0;
unsigned int user_vrsave;
struct kvm *kvm;
unsigned long msr;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
/* No need to go into the guest when all we'll do is come back out */
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
return -EINTR;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Don't allow entry with a suspended transaction, because
* the guest entry/exit code will lose it.
* If the guest has TM enabled, save away their TM-related SPRs
* (they will get restored by the TM unavailable interrupt).
*/
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
(current->thread.regs->msr & MSR_TM)) {
if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
......@@ -4630,12 +4656,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
run->fail_entry.hardware_entry_failure_reason = 0;
return -EINVAL;
}
/* Enable TM so we can read the TM SPRs */
mtmsr(mfmsr() | MSR_TM);
current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
current->thread.tm_texasr = mfspr(SPRN_TEXASR);
current->thread.regs->msr &= ~MSR_TM;
}
#endif
......@@ -4650,29 +4670,30 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
kvmppc_core_prepare_to_enter(vcpu);
/* No need to go into the guest when all we'll do is come back out */
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
return -EINTR;
}
kvm = vcpu->kvm;
atomic_inc(&kvm->arch.vcpus_running);
/* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
smp_mb();
flush_all_to_thread(current);
msr = 0;
if (IS_ENABLED(CONFIG_PPC_FPU))
msr |= MSR_FP;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
msr |= MSR_VEC;
if (cpu_has_feature(CPU_FTR_VSX))
msr |= MSR_VSX;
if ((cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
(vcpu->arch.hfscr & HFSCR_TM))
msr |= MSR_TM;
msr = msr_check_and_set(msr);
/* Save userspace EBB and other register values */
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
ebb_regs[0] = mfspr(SPRN_EBBHR);
ebb_regs[1] = mfspr(SPRN_EBBRR);
ebb_regs[2] = mfspr(SPRN_BESCR);
user_tar = mfspr(SPRN_TAR);
}
user_vrsave = mfspr(SPRN_VRSAVE);
kvmppc_save_user_regs();
vcpu->arch.waitp = &vcpu->arch.vcore->wait;
kvmppc_save_current_sprs();
if (!cpu_has_feature(CPU_FTR_ARCH_300))
vcpu->arch.waitp = &vcpu->arch.vcore->wait;
vcpu->arch.pgdir = kvm->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
......@@ -4711,15 +4732,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
}
} while (is_kvmppc_resume_guest(r));
/* Restore userspace EBB and other register values */
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
mtspr(SPRN_EBBHR, ebb_regs[0]);
mtspr(SPRN_EBBRR, ebb_regs[1]);
mtspr(SPRN_BESCR, ebb_regs[2]);
mtspr(SPRN_TAR, user_tar);
}
mtspr(SPRN_VRSAVE, user_vrsave);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&kvm->arch.vcpus_running);
......@@ -4859,8 +4871,12 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
enum kvm_mr_change change)
{
if (change == KVM_MR_CREATE) {
new->arch.rmap = vzalloc(array_size(new->npages,
sizeof(*new->arch.rmap)));
unsigned long size = array_size(new->npages, sizeof(*new->arch.rmap));
if ((size >> PAGE_SHIFT) > totalram_pages())
return -ENOMEM;
new->arch.rmap = vzalloc(size);
if (!new->arch.rmap)
return -ENOMEM;
} else if (change != KVM_MR_DELETE) {
......@@ -5069,6 +5085,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
*/
int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
{
unsigned long lpcr, lpcr_mask;
if (nesting_enabled(kvm))
kvmhv_release_all_nested(kvm);
kvmppc_rmap_reset(kvm);
......@@ -5078,8 +5096,13 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
kvm->arch.radix = 0;
spin_unlock(&kvm->mmu_lock);
kvmppc_free_radix(kvm);
kvmppc_update_lpcr(kvm, LPCR_VPM1,
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
lpcr = LPCR_VPM1;
lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
if (cpu_has_feature(CPU_FTR_ARCH_31))
lpcr_mask |= LPCR_HAIL;
kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
return 0;
}
......@@ -5089,6 +5112,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
*/
int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
{
unsigned long lpcr, lpcr_mask;
int err;
err = kvmppc_init_vm_radix(kvm);
......@@ -5100,8 +5124,17 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
kvm->arch.radix = 1;
spin_unlock(&kvm->mmu_lock);
kvmppc_free_hpt(&kvm->arch.hpt);
kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR;
lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
lpcr_mask |= LPCR_HAIL;
if (cpu_has_feature(CPU_FTR_HVMODE) &&
(kvm->arch.host_lpcr & LPCR_HAIL))
lpcr |= LPCR_HAIL;
}
kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
return 0;
}
......@@ -5123,6 +5156,9 @@ void kvmppc_alloc_host_rm_ops(void)
int cpu, core;
int size;
if (cpu_has_feature(CPU_FTR_ARCH_300))
return;
/* Not the first time here ? */
if (kvmppc_host_rm_ops_hv != NULL)
return;
......@@ -5265,6 +5301,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm->arch.mmu_ready = 1;
lpcr &= ~LPCR_VPM1;
lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
if (cpu_has_feature(CPU_FTR_HVMODE) &&
cpu_has_feature(CPU_FTR_ARCH_31) &&
(kvm->arch.host_lpcr & LPCR_HAIL))
lpcr |= LPCR_HAIL;
ret = kvmppc_init_vm_radix(kvm);
if (ret) {
kvmppc_free_lpid(kvm->arch.lpid);
......@@ -6061,9 +6101,11 @@ static int kvmppc_book3s_init_hv(void)
if (r)
return r;
r = kvm_init_subcore_bitmap();
if (r)
return r;
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
r = kvm_init_subcore_bitmap();
if (r)
return r;
}
/*
* We need a way of accessing the XICS interrupt controller,
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Privileged (non-hypervisor) host registers to save.
*/
struct p9_host_os_sprs {
unsigned long iamr;
unsigned long amr;
unsigned int pmc1;
unsigned int pmc2;
unsigned int pmc3;
unsigned int pmc4;
unsigned int pmc5;
unsigned int pmc6;
unsigned long mmcr0;
unsigned long mmcr1;
unsigned long mmcr2;
unsigned long mmcr3;
unsigned long mmcra;
unsigned long siar;
unsigned long sier1;
unsigned long sier2;
unsigned long sier3;
unsigned long sdar;
};
static inline bool nesting_enabled(struct kvm *kvm)
{
return kvm->arch.nested_enable && kvm_is_radix(kvm);
}
bool load_vcpu_state(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs);
void store_vcpu_state(struct kvm_vcpu *vcpu);
void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs);
void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs);
void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs);
void switch_pmu_to_host(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs);
......@@ -649,6 +649,8 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
int ext;
unsigned long lpcr;
WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
/* Insert EXTERNAL bit into LPCR at the MER bit position */
ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
lpcr = mfspr(SPRN_LPCR);
......@@ -682,60 +684,23 @@ static void flush_guest_tlb(struct kvm *kvm)
unsigned long rb, set;
rb = PPC_BIT(52); /* IS = 2 */
if (kvm_is_radix(kvm)) {
/* R=1 PRS=1 RIC=2 */
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
/* R=0 PRS=0 RIC=0 */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r" (rb), "i" (1), "i" (1), "i" (2),
: : "r" (rb), "i" (0), "i" (0), "i" (0),
"r" (0) : "memory");
for (set = 1; set < kvm->arch.tlb_sets; ++set) {
rb += PPC_BIT(51); /* increment set number */
/* R=1 PRS=1 RIC=0 */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r" (rb), "i" (1), "i" (1), "i" (0),
"r" (0) : "memory");
}
asm volatile("ptesync": : :"memory");
// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
} else {
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
/* R=0 PRS=0 RIC=0 */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r" (rb), "i" (0), "i" (0), "i" (0),
"r" (0) : "memory");
rb += PPC_BIT(51); /* increment set number */
}
asm volatile("ptesync": : :"memory");
// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
if (cpu_has_feature(CPU_FTR_ARCH_300))
asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
rb += PPC_BIT(51); /* increment set number */
}
asm volatile("ptesync": : :"memory");
}
void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
struct kvm_nested_guest *nested)
void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu)
{
cpumask_t *need_tlb_flush;
/*
* On POWER9, individual threads can come in here, but the
* TLB is shared between the 4 threads in a core, hence
* invalidating on one thread invalidates for all.
* Thus we make all 4 threads use the same bit.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
pcpu = cpu_first_tlb_thread_sibling(pcpu);
if (nested)
need_tlb_flush = &nested->need_tlb_flush;
else
need_tlb_flush = &kvm->arch.need_tlb_flush;
if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) {
flush_guest_tlb(kvm);
/* Clear the bit after the TLB flush */
cpumask_clear_cpu(pcpu, need_tlb_flush);
cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush);
}
}
EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
......@@ -20,10 +20,15 @@ void wait_for_subcore_guest_exit(void)
/*
* NULL bitmap pointer indicates that KVM module hasn't
* been loaded yet and hence no guests are running.
* been loaded yet and hence no guests are running, or running
* on POWER9 or newer CPU.
*
* If no KVM is in use, no need to co-ordinate among threads
* as all of them will always be in host and no one is going
* to modify TB other than the opal hmi handler.
*
* POWER9 and newer don't need this synchronisation.
*
* Hence, just return from here.
*/
if (!local_paca->sibling_subcore_state)
......
......@@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtlr r0
blr
_GLOBAL(kvmhv_save_host_pmu)
/*
* void kvmhv_save_host_pmu(void)
*/
kvmhv_save_host_pmu:
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
li r3, -1
......@@ -138,14 +141,6 @@ BEGIN_FTR_SECTION
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
mfspr r5, SPRN_MMCR3
mfspr r6, SPRN_SIER2
mfspr r7, SPRN_SIER3
std r5, HSTATE_MMCR3(r13)
std r6, HSTATE_SIER2(r13)
std r7, HSTATE_SIER3(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
......
......@@ -358,6 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* convert TB values/offsets to host (L0) values */
hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
vc->tb_offset += l2_hv.tb_offset;
vcpu->arch.dec_expires += l2_hv.tb_offset;
/* set L1 state to L2 state */
vcpu->arch.nested = l2;
......@@ -374,11 +375,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
do {
if (mftb() >= hdec_exp) {
vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
r = RESUME_HOST;
break;
}
r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
} while (is_kvmppc_resume_guest(r));
......@@ -399,6 +395,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
if (l2_regs.msr & MSR_TS_MASK)
vcpu->arch.shregs.msr |= MSR_TS_S;
vc->tb_offset = saved_l1_hv.tb_offset;
/* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
vcpu->arch.dec_expires -= l2_hv.tb_offset;
restore_hv_regs(vcpu, &saved_l1_hv);
vcpu->arch.purr += delta_purr;
vcpu->arch.spurr += delta_spurr;
......@@ -582,7 +580,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
if (eaddr & (0xFFFUL << 52))
return H_PARAMETER;
buf = kzalloc(n, GFP_KERNEL);
buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
if (!buf)
return H_NO_MEM;
......
......@@ -4,8 +4,439 @@
#include <asm/asm-prototypes.h>
#include <asm/dbell.h>
#include <asm/kvm_ppc.h>
#include <asm/pmc.h>
#include <asm/ppc-opcode.h>
#include "book3s_hv.h"
static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
{
if (!(mmcr0 & MMCR0_FC))
goto do_freeze;
if (mmcra & MMCRA_SAMPLE_ENABLE)
goto do_freeze;
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
if (!(mmcr0 & MMCR0_PMCCEXT))
goto do_freeze;
if (!(mmcra & MMCRA_BHRB_DISABLE))
goto do_freeze;
}
return;
do_freeze:
mmcr0 = MMCR0_FC;
mmcra = 0;
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
mmcr0 |= MMCR0_PMCCEXT;
mmcra = MMCRA_BHRB_DISABLE;
}
mtspr(SPRN_MMCR0, mmcr0);
mtspr(SPRN_MMCRA, mmcra);
isync();
}
void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
{
struct lppaca *lp;
int load_pmu = 1;
lp = vcpu->arch.vpa.pinned_addr;
if (lp)
load_pmu = lp->pmcregs_in_use;
/* Save host */
if (ppc_get_pmu_inuse()) {
/*
* It might be better to put PMU handling (at least for the
* host) in the perf subsystem because it knows more about what
* is being used.
*/
/* POWER9, POWER10 do not implement HPMC or SPMC */
host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
host_os_sprs->sdar = mfspr(SPRN_SDAR);
host_os_sprs->siar = mfspr(SPRN_SIAR);
host_os_sprs->sier1 = mfspr(SPRN_SIER);
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
host_os_sprs->sier2 = mfspr(SPRN_SIER2);
host_os_sprs->sier3 = mfspr(SPRN_SIER3);
}
}
#ifdef CONFIG_PPC_PSERIES
/* After saving PMU, before loading guest PMU, flip pmcregs_in_use */
if (kvmhv_on_pseries()) {
barrier();
get_lppaca()->pmcregs_in_use = load_pmu;
barrier();
}
#endif
/*
* Load guest. If the VPA said the PMCs are not in use but the guest
* tried to access them anyway, HFSCR[PM] will be set by the HFAC
* fault so we can make forward progress.
*/
if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) {
mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
mtspr(SPRN_SDAR, vcpu->arch.sdar);
mtspr(SPRN_SIAR, vcpu->arch.siar);
mtspr(SPRN_SIER, vcpu->arch.sier[0]);
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]);
mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
}
/* Set MMCRA then MMCR0 last */
mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
/* No isync necessary because we're starting counters */
if (!vcpu->arch.nested &&
(vcpu->arch.hfscr_permitted & HFSCR_PM))
vcpu->arch.hfscr |= HFSCR_PM;
}
}
EXPORT_SYMBOL_GPL(switch_pmu_to_guest);
void switch_pmu_to_host(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
{
struct lppaca *lp;
int save_pmu = 1;
lp = vcpu->arch.vpa.pinned_addr;
if (lp)
save_pmu = lp->pmcregs_in_use;
if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) {
/*
* Save pmu if this guest is capable of running nested guests.
* This is option is for old L1s that do not set their
* lppaca->pmcregs_in_use properly when entering their L2.
*/
save_pmu |= nesting_enabled(vcpu->kvm);
}
if (save_pmu) {
vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
vcpu->arch.sdar = mfspr(SPRN_SDAR);
vcpu->arch.siar = mfspr(SPRN_SIAR);
vcpu->arch.sier[0] = mfspr(SPRN_SIER);
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
}
} else if (vcpu->arch.hfscr & HFSCR_PM) {
/*
* The guest accessed PMC SPRs without specifying they should
* be preserved, or it cleared pmcregs_in_use after the last
* access. Just ensure they are frozen.
*/
freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
/*
* Demand-fault PMU register access in the guest.
*
* This is used to grab the guest's VPA pmcregs_in_use value
* and reflect it into the host's VPA in the case of a nested
* hypervisor.
*
* It also avoids having to zero-out SPRs after each guest
* exit to avoid side-channels when.
*
* This is cleared here when we exit the guest, so later HFSCR
* interrupt handling can add it back to run the guest with
* PM enabled next time.
*/
if (!vcpu->arch.nested)
vcpu->arch.hfscr &= ~HFSCR_PM;
} /* otherwise the PMU should still be frozen */
#ifdef CONFIG_PPC_PSERIES
if (kvmhv_on_pseries()) {
barrier();
get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
barrier();
}
#endif
if (ppc_get_pmu_inuse()) {
mtspr(SPRN_PMC1, host_os_sprs->pmc1);
mtspr(SPRN_PMC2, host_os_sprs->pmc2);
mtspr(SPRN_PMC3, host_os_sprs->pmc3);
mtspr(SPRN_PMC4, host_os_sprs->pmc4);
mtspr(SPRN_PMC5, host_os_sprs->pmc5);
mtspr(SPRN_PMC6, host_os_sprs->pmc6);
mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
mtspr(SPRN_SDAR, host_os_sprs->sdar);
mtspr(SPRN_SIAR, host_os_sprs->siar);
mtspr(SPRN_SIER, host_os_sprs->sier1);
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
mtspr(SPRN_SIER2, host_os_sprs->sier2);
mtspr(SPRN_SIER3, host_os_sprs->sier3);
}
/* Set MMCRA then MMCR0 last */
mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
isync();
}
}
EXPORT_SYMBOL_GPL(switch_pmu_to_host);
static void load_spr_state(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
{
/* TAR is very fast */
mtspr(SPRN_TAR, vcpu->arch.tar);
#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
current->thread.vrsave != vcpu->arch.vrsave)
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
#endif
if (vcpu->arch.hfscr & HFSCR_EBB) {
if (current->thread.ebbhr != vcpu->arch.ebbhr)
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
if (current->thread.ebbrr != vcpu->arch.ebbrr)
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
if (current->thread.bescr != vcpu->arch.bescr)
mtspr(SPRN_BESCR, vcpu->arch.bescr);
}
if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
current->thread.tidr != vcpu->arch.tid)
mtspr(SPRN_TIDR, vcpu->arch.tid);
if (host_os_sprs->iamr != vcpu->arch.iamr)
mtspr(SPRN_IAMR, vcpu->arch.iamr);
if (host_os_sprs->amr != vcpu->arch.amr)
mtspr(SPRN_AMR, vcpu->arch.amr);
if (vcpu->arch.uamor != 0)
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
if (current->thread.fscr != vcpu->arch.fscr)
mtspr(SPRN_FSCR, vcpu->arch.fscr);
if (current->thread.dscr != vcpu->arch.dscr)
mtspr(SPRN_DSCR, vcpu->arch.dscr);
if (vcpu->arch.pspb != 0)
mtspr(SPRN_PSPB, vcpu->arch.pspb);
/*
* DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
* clear (or hstate set appropriately to catch those registers
* being clobbered if we take a MCE or SRESET), so those are done
* later.
*/
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, 0);
}
static void store_spr_state(struct kvm_vcpu *vcpu)
{
vcpu->arch.tar = mfspr(SPRN_TAR);
#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC))
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
#endif
if (vcpu->arch.hfscr & HFSCR_EBB) {
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
}
if (cpu_has_feature(CPU_FTR_P9_TIDR))
vcpu->arch.tid = mfspr(SPRN_TIDR);
vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
vcpu->arch.fscr = mfspr(SPRN_FSCR);
vcpu->arch.dscr = mfspr(SPRN_DSCR);
vcpu->arch.pspb = mfspr(SPRN_PSPB);
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
}
/* Returns true if current MSR and/or guest MSR may have changed */
bool load_vcpu_state(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
{
bool ret = false;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
unsigned long guest_msr = vcpu->arch.shregs.msr;
if (MSR_TM_ACTIVE(guest_msr)) {
kvmppc_restore_tm_hv(vcpu, guest_msr, true);
ret = true;
} else if (vcpu->arch.hfscr & HFSCR_TM) {
mtspr(SPRN_TEXASR, vcpu->arch.texasr);
mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
}
}
#endif
load_spr_state(vcpu, host_os_sprs);
load_fp_state(&vcpu->arch.fp);
#ifdef CONFIG_ALTIVEC
load_vr_state(&vcpu->arch.vr);
#endif
return ret;
}
EXPORT_SYMBOL_GPL(load_vcpu_state);
void store_vcpu_state(struct kvm_vcpu *vcpu)
{
store_spr_state(vcpu);
store_fp_state(&vcpu->arch.fp);
#ifdef CONFIG_ALTIVEC
store_vr_state(&vcpu->arch.vr);
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
unsigned long guest_msr = vcpu->arch.shregs.msr;
if (MSR_TM_ACTIVE(guest_msr)) {
kvmppc_save_tm_hv(vcpu, guest_msr, true);
} else if (vcpu->arch.hfscr & HFSCR_TM) {
vcpu->arch.texasr = mfspr(SPRN_TEXASR);
vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
if (!vcpu->arch.nested) {
vcpu->arch.load_tm++; /* see load_ebb comment */
if (!vcpu->arch.load_tm)
vcpu->arch.hfscr &= ~HFSCR_TM;
}
}
}
#endif
}
EXPORT_SYMBOL_GPL(store_vcpu_state);
void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
{
host_os_sprs->iamr = mfspr(SPRN_IAMR);
host_os_sprs->amr = mfspr(SPRN_AMR);
}
EXPORT_SYMBOL_GPL(save_p9_host_os_sprs);
/* vcpu guest regs must already be saved */
void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
{
/*
* current->thread.xxx registers must all be restored to host
* values before a potential context switch, othrewise the context
* switch itself will overwrite current->thread.xxx with the values
* from the guest SPRs.
*/
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
current->thread.tidr != vcpu->arch.tid)
mtspr(SPRN_TIDR, current->thread.tidr);
if (host_os_sprs->iamr != vcpu->arch.iamr)
mtspr(SPRN_IAMR, host_os_sprs->iamr);
if (vcpu->arch.uamor != 0)
mtspr(SPRN_UAMOR, 0);
if (host_os_sprs->amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_os_sprs->amr);
if (current->thread.fscr != vcpu->arch.fscr)
mtspr(SPRN_FSCR, current->thread.fscr);
if (current->thread.dscr != vcpu->arch.dscr)
mtspr(SPRN_DSCR, current->thread.dscr);
if (vcpu->arch.pspb != 0)
mtspr(SPRN_PSPB, 0);
/* Save guest CTRL register, set runlatch to 1 */
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, 1);
#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
vcpu->arch.vrsave != current->thread.vrsave)
mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif
if (vcpu->arch.hfscr & HFSCR_EBB) {
if (vcpu->arch.bescr != current->thread.bescr)
mtspr(SPRN_BESCR, current->thread.bescr);
if (vcpu->arch.ebbhr != current->thread.ebbhr)
mtspr(SPRN_EBBHR, current->thread.ebbhr);
if (vcpu->arch.ebbrr != current->thread.ebbrr)
mtspr(SPRN_EBBRR, current->thread.ebbrr);
if (!vcpu->arch.nested) {
/*
* This is like load_fp in context switching, turn off
* the facility after it wraps the u8 to try avoiding
* saving and restoring the registers each partition
* switch.
*/
vcpu->arch.load_ebb++;
if (!vcpu->arch.load_ebb)
vcpu->arch.hfscr &= ~HFSCR_EBB;
}
}
if (vcpu->arch.tar != current->thread.tar)
mtspr(SPRN_TAR, current->thread.tar);
}
EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs);
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
{
......@@ -56,10 +487,22 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
#define accumulate_time(vcpu, next) do {} while (0)
#endif
static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
static inline u64 mfslbv(unsigned int idx)
{
asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
u64 slbev;
asm volatile("slbmfev %0,%1" : "=r" (slbev) : "r" (idx));
return slbev;
}
static inline u64 mfslbe(unsigned int idx)
{
u64 slbee;
asm volatile("slbmfee %0,%1" : "=r" (slbee) : "r" (idx));
return slbee;
}
static inline void mtslb(u64 slbee, u64 slbev)
......@@ -100,17 +543,19 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6
lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
/*
* All the isync()s are overkill but trivially follow the ISA
* requirements. Some can likely be replaced with justification
* comment for why they are not needed.
* Prior memory accesses to host PID Q3 must be completed before we
* start switching, and stores must be drained to avoid not-my-LPAR
* logic (see switch_mmu_to_host).
*/
asm volatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_LPID, lpid);
isync();
mtspr(SPRN_LPCR, lpcr);
isync();
mtspr(SPRN_PID, vcpu->arch.pid);
isync();
/*
* isync not required here because we are HRFID'ing to guest before
* any guest context access, which is context synchronising.
*/
}
static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
......@@ -120,25 +565,41 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64
lpid = kvm->arch.lpid;
/*
* See switch_mmu_to_guest_radix. ptesync should not be required here
* even if the host is in HPT mode because speculative accesses would
* not cause RC updates (we are in real mode).
*/
asm volatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_LPID, lpid);
mtspr(SPRN_LPCR, lpcr);
mtspr(SPRN_PID, vcpu->arch.pid);
for (i = 0; i < vcpu->arch.slb_max; i++)
mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
isync();
/*
* isync not required here, see switch_mmu_to_guest_radix.
*/
}
static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
{
/*
* The guest has exited, so guest MMU context is no longer being
* non-speculatively accessed, but a hwsync is needed before the
* mtLPIDR / mtPIDR switch, in order to ensure all stores are drained,
* so the not-my-LPAR tlbie logic does not overlook them.
*/
asm volatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_PID, pid);
isync();
mtspr(SPRN_LPID, kvm->arch.host_lpid);
isync();
mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
isync();
/*
* isync is not required after the switch, because mtmsrd with L=0
* is performed after this switch, which is context synchronising.
*/
if (!radix_enabled())
slb_restore_bolted_realmode();
......@@ -171,8 +632,10 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
*/
for (i = 0; i < vcpu->arch.slb_nr; i++) {
u64 slbee, slbev;
mfslb(i, &slbee, &slbev);
slbee = mfslbe(i);
if (slbee & SLB_ESID_V) {
slbev = mfslbv(i);
vcpu->arch.slb[nr].orige = slbee | i;
vcpu->arch.slb[nr].origv = slbev;
nr++;
......@@ -183,15 +646,128 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
}
}
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
static void flush_guest_tlb(struct kvm *kvm)
{
unsigned long rb, set;
rb = PPC_BIT(52); /* IS = 2 */
if (kvm_is_radix(kvm)) {
/* R=1 PRS=1 RIC=2 */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r" (rb), "i" (1), "i" (1), "i" (2),
"r" (0) : "memory");
for (set = 1; set < kvm->arch.tlb_sets; ++set) {
rb += PPC_BIT(51); /* increment set number */
/* R=1 PRS=1 RIC=0 */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r" (rb), "i" (1), "i" (1), "i" (0),
"r" (0) : "memory");
}
asm volatile("ptesync": : :"memory");
// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
} else {
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
/* R=0 PRS=0 RIC=0 */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r" (rb), "i" (0), "i" (0), "i" (0),
"r" (0) : "memory");
rb += PPC_BIT(51); /* increment set number */
}
asm volatile("ptesync": : :"memory");
// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
}
}
static void check_need_tlb_flush(struct kvm *kvm, int pcpu,
struct kvm_nested_guest *nested)
{
cpumask_t *need_tlb_flush;
bool all_set = true;
int i;
if (nested)
need_tlb_flush = &nested->need_tlb_flush;
else
need_tlb_flush = &kvm->arch.need_tlb_flush;
if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush)))
return;
/*
* Individual threads can come in here, but the TLB is shared between
* the 4 threads in a core, hence invalidating on one thread
* invalidates for all, so only invalidate the first time (if all bits
* were set. The others must still execute a ptesync.
*
* If a race occurs and two threads do the TLB flush, that is not a
* problem, just sub-optimal.
*/
for (i = cpu_first_tlb_thread_sibling(pcpu);
i <= cpu_last_tlb_thread_sibling(pcpu);
i += cpu_tlb_thread_sibling_step()) {
if (!cpumask_test_cpu(i, need_tlb_flush)) {
all_set = false;
break;
}
}
if (all_set)
flush_guest_tlb(kvm);
else
asm volatile("ptesync" ::: "memory");
/* Clear the bit after the TLB flush */
cpumask_clear_cpu(pcpu, need_tlb_flush);
}
unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr)
{
unsigned long msr_needed = 0;
msr &= ~MSR_EE;
/* MSR bits may have been cleared by context switch so must recheck */
if (IS_ENABLED(CONFIG_PPC_FPU))
msr_needed |= MSR_FP;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
msr_needed |= MSR_VEC;
if (cpu_has_feature(CPU_FTR_VSX))
msr_needed |= MSR_VSX;
if ((cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
(vcpu->arch.hfscr & HFSCR_TM))
msr_needed |= MSR_TM;
/*
* This could be combined with MSR[RI] clearing, but that expands
* the unrecoverable window. It would be better to cover unrecoverable
* with KVM bad interrupt handling rather than use MSR[RI] at all.
*
* Much more difficult and less worthwhile to combine with IR/DR
* disable.
*/
if ((msr & msr_needed) != msr_needed) {
msr |= msr_needed;
__mtmsrd(msr, 0);
} else {
__hard_irq_disable();
}
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
return msr;
}
EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities);
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
{
struct p9_host_os_sprs host_os_sprs;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
s64 hdec;
u64 tb, purr, spurr;
s64 hdec, dec;
u64 purr, spurr;
u64 *exsave;
bool ri_set;
int trap;
unsigned long msr;
unsigned long host_hfscr;
......@@ -199,11 +775,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
unsigned long host_dawr0;
unsigned long host_dawrx0;
unsigned long host_psscr;
unsigned long host_hpsscr;
unsigned long host_pidr;
unsigned long host_dawr1;
unsigned long host_dawrx1;
unsigned long dpdes;
hdec = time_limit - mftb();
hdec = time_limit - *tb;
if (hdec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
......@@ -214,51 +792,84 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
vcpu->arch.ceded = 0;
if (vc->tb_offset) {
u64 new_tb = mftb() + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
mtspr(SPRN_TBU40, new_tb + 0x1000000);
vc->tb_offset_applied = vc->tb_offset;
}
msr = mfmsr();
/* Save MSR for restore, with EE clear. */
msr = mfmsr() & ~MSR_EE;
host_hfscr = mfspr(SPRN_HFSCR);
host_ciabr = mfspr(SPRN_CIABR);
host_dawr0 = mfspr(SPRN_DAWR0);
host_dawrx0 = mfspr(SPRN_DAWRX0);
host_psscr = mfspr(SPRN_PSSCR);
host_psscr = mfspr(SPRN_PSSCR_PR);
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
host_hpsscr = mfspr(SPRN_PSSCR);
host_pidr = mfspr(SPRN_PID);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
host_dawr1 = mfspr(SPRN_DAWR1);
host_dawrx1 = mfspr(SPRN_DAWRX1);
}
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
if (dawr_enabled()) {
host_dawr0 = mfspr(SPRN_DAWR0);
host_dawrx0 = mfspr(SPRN_DAWRX0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
host_dawr1 = mfspr(SPRN_DAWR1);
host_dawrx1 = mfspr(SPRN_DAWRX1);
}
}
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
save_p9_host_os_sprs(&host_os_sprs);
msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
if (lazy_irq_pending()) {
trap = 0;
goto out;
}
if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
msr = mfmsr(); /* MSR may have been updated */
if (vc->tb_offset) {
u64 new_tb = *tb + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
new_tb += 0x1000000;
mtspr(SPRN_TBU40, new_tb);
}
*tb = new_tb;
vc->tb_offset_applied = vc->tb_offset;
}
mtspr(SPRN_VTB, vc->vtb);
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
if (vcpu->arch.doorbell_request) {
vcpu->arch.doorbell_request = 0;
mtspr(SPRN_DPDES, 1);
}
if (dawr_enabled()) {
mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
if (vcpu->arch.dawr0 != host_dawr0)
mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
if (vcpu->arch.dawrx0 != host_dawrx0)
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
if (vcpu->arch.dawr1 != host_dawr1)
mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
if (vcpu->arch.dawrx1 != host_dawrx1)
mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
}
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
if (vcpu->arch.ciabr != host_ciabr)
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
} else {
if (vcpu->arch.psscr != host_psscr)
mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
}
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
......@@ -276,18 +887,34 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
* HDSI which should correctly update the HDSISR the second time HDSI
* entry.
*
* Just do this on all p9 processors for now.
* The "radix prefetch bug" test can be used to test for this bug, as
* it also exists fo DD2.1 and below.
*/
mtspr(SPRN_HDSISR, HDSISR_CANARY);
if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
mtspr(SPRN_HDSISR, HDSISR_CANARY);
mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
mtspr(SPRN_AMOR, ~0UL);
/*
* It might be preferable to load_vcpu_state here, in order to get the
* GPR/FP register loads executing in parallel with the previous mtSPR
* instructions, but for now that can't be done because the TM handling
* in load_vcpu_state can change some SPRs and vcpu state (nip, msr).
* But TM could be split out if this would be a significant benefit.
*/
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
/*
* MSR[RI] does not need to be cleared (and is not, for radix guests
* with no prefetch bug), because in_guest is set. If we take a SRESET
* or MCE with in_guest set but still in HV mode, then
* kvmppc_p9_bad_interrupt handles the interrupt, which effectively
* clears MSR[RI] and doesn't return.
*/
WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9);
barrier(); /* Open in_guest critical section */
/*
* Hash host, hash guest, or radix guest with prefetch bug, all have
......@@ -299,17 +926,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
save_clear_host_mmu(kvm);
if (kvm_is_radix(kvm)) {
if (kvm_is_radix(kvm))
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(0, 1); /* clear RI */
} else {
else
switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
}
/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
check_need_tlb_flush(kvm, vc->pcpu, nested);
/*
* P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
......@@ -317,6 +940,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDEC, hdec);
mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_return_to_guest:
#endif
......@@ -327,7 +952,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
accumulate_time(vcpu, &vcpu->arch.guest_time);
switch_pmu_to_guest(vcpu, &host_os_sprs);
kvmppc_p9_enter_guest(vcpu);
switch_pmu_to_host(vcpu, &host_os_sprs);
accumulate_time(vcpu, &vcpu->arch.rm_intr);
......@@ -340,36 +967,27 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
trap = local_paca->kvm_hstate.scratch0 & ~0x2;
/* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
ri_set = false;
if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
if (trap != BOOK3S_INTERRUPT_SYSCALL &&
(vcpu->arch.shregs.msr & MSR_RI))
ri_set = true;
if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK))
exsave = local_paca->exgen;
} else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET)
exsave = local_paca->exnmi;
} else { /* trap == 0x200 */
else /* trap == 0x200 */
exsave = local_paca->exmc;
}
vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
/*
* Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
* and hstate scratch (which we need to move into exsave to make
* re-entrant vs SRESET/MCE)
* After reading machine check regs (DAR, DSISR, SRR0/1) and hstate
* scratch (which we need to move into exsave to make re-entrant vs
* SRESET/MCE), register state is protected from reentrancy. However
* timebase, MMU, among other state is still set to guest, so don't
* enable MSR[RI] here. It gets enabled at the end, after in_guest
* is cleared.
*
* It is possible an NMI could come in here, which is why it is
* important to save the above state early so it can be debugged.
*/
if (ri_set) {
if (unlikely(!(mfmsr() & MSR_RI))) {
__mtmsrd(MSR_RI, 1);
WARN_ON_ONCE(1);
}
} else {
WARN_ON_ONCE(mfmsr() & MSR_RI);
__mtmsrd(MSR_RI, 1);
}
vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
......@@ -388,7 +1006,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
kvmppc_realmode_machine_check(vcpu);
} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
kvmppc_realmode_hmi_handler();
kvmppc_p9_realmode_hmi_handler(vcpu);
} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
......@@ -427,13 +1045,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
/*
* tm_return_to_guest re-loads SRR0/1, DAR,
* DSISR after RI is cleared, in case they had
* been clobbered by a MCE.
*/
__mtmsrd(0, 1); /* clear RI */
goto tm_return_to_guest;
}
}
......@@ -445,81 +1056,109 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
purr - vcpu->arch.purr);
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
spurr - vcpu->arch.spurr);
local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr;
local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr;
vcpu->arch.purr = purr;
vcpu->arch.spurr = spurr;
vcpu->arch.ic = mfspr(SPRN_IC);
vcpu->arch.pid = mfspr(SPRN_PID);
vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
mtspr(SPRN_PSSCR, host_psscr |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR0, host_dawr0);
mtspr(SPRN_DAWRX0, host_dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
mtspr(SPRN_DAWR1, host_dawr1);
mtspr(SPRN_DAWRX1, host_dawrx1);
}
if (kvm_is_radix(kvm)) {
/*
* Since this is radix, do a eieio; tlbsync; ptesync sequence
* in case we interrupted the guest between a tlbie and a
* ptesync.
*/
asm volatile("eieio; tlbsync; ptesync");
}
dpdes = mfspr(SPRN_DPDES);
if (dpdes)
vcpu->arch.doorbell_request = 1;
/*
* cp_abort is required if the processor supports local copy-paste
* to clear the copy buffer that was under control of the guest.
*/
if (cpu_has_feature(CPU_FTR_ARCH_31))
asm volatile(PPC_CP_ABORT);
vc->dpdes = mfspr(SPRN_DPDES);
vc->vtb = mfspr(SPRN_VTB);
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
mtspr(SPRN_PCR, PCR_MASK);
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
*tb = mftb();
vcpu->arch.dec_expires = dec + *tb;
if (vc->tb_offset_applied) {
u64 new_tb = mftb() - vc->tb_offset_applied;
u64 new_tb = *tb - vc->tb_offset_applied;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
mtspr(SPRN_TBU40, new_tb + 0x1000000);
if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
new_tb += 0x1000000;
mtspr(SPRN_TBU40, new_tb);
}
*tb = new_tb;
vc->tb_offset_applied = 0;
}
mtspr(SPRN_HDEC, 0x7fffffff);
save_clear_guest_mmu(kvm, vcpu);
switch_mmu_to_host(kvm, host_pidr);
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
/*
* If we are in real mode, only switch MMU on after the MMU is
* switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
* Enable MSR here in order to have facilities enabled to save
* guest registers. This enables MMU (if we were in realmode), so
* only switch MMU on after the MMU is switched to host, to avoid
* the P9_RADIX_PREFETCH_BUG or hash guest context.
*/
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
vcpu->arch.shregs.msr & MSR_TS_MASK)
vcpu->arch.shregs.msr & MSR_TS_MASK)
msr |= MSR_TS_S;
__mtmsrd(msr, 0);
store_vcpu_state(vcpu);
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
mtspr(SPRN_PSSCR, host_hpsscr |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
}
mtspr(SPRN_HFSCR, host_hfscr);
if (vcpu->arch.ciabr != host_ciabr)
mtspr(SPRN_CIABR, host_ciabr);
if (dawr_enabled()) {
if (vcpu->arch.dawr0 != host_dawr0)
mtspr(SPRN_DAWR0, host_dawr0);
if (vcpu->arch.dawrx0 != host_dawrx0)
mtspr(SPRN_DAWRX0, host_dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
if (vcpu->arch.dawr1 != host_dawr1)
mtspr(SPRN_DAWR1, host_dawr1);
if (vcpu->arch.dawrx1 != host_dawrx1)
mtspr(SPRN_DAWRX1, host_dawrx1);
}
}
if (dpdes)
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
mtspr(SPRN_PCR, PCR_MASK);
/* HDEC must be at least as large as DEC, so decrementer_max fits */
mtspr(SPRN_HDEC, decrementer_max);
timer_rearm_host_dec(*tb);
restore_p9_host_os_sprs(vcpu, &host_os_sprs);
barrier(); /* Close in_guest critical section */
WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE);
/* Interrupts are recoverable at this point */
/*
* cp_abort is required if the processor supports local copy-paste
* to clear the copy buffer that was under control of the guest.
*/
if (cpu_has_feature(CPU_FTR_ARCH_31))
asm volatile(PPC_CP_ABORT);
out:
end_timing(vcpu);
return trap;
......
......@@ -136,6 +136,60 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
vcpu->arch.mce_evt = mce_evt;
}
long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
long ret = 0;
/*
* Unapply and clear the offset first. That way, if the TB was not
* resynced then it will remain in host-offset, and if it was resynced
* then it is brought into host-offset. Then the tb offset is
* re-applied before continuing with the KVM exit.
*
* This way, we don't need to actually know whether not OPAL resynced
* the timebase or do any of the complicated dance that the P7/8
* path requires.
*/
if (vc->tb_offset_applied) {
u64 new_tb = mftb() - vc->tb_offset_applied;
mtspr(SPRN_TBU40, new_tb);
if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
new_tb += 0x1000000;
mtspr(SPRN_TBU40, new_tb);
}
vc->tb_offset_applied = 0;
}
local_paca->hmi_irqs++;
if (hmi_handle_debugtrig(NULL) >= 0) {
ret = 1;
goto out;
}
if (ppc_md.hmi_exception_early)
ppc_md.hmi_exception_early(NULL);
out:
if (vc->tb_offset) {
u64 new_tb = mftb() + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
new_tb += 0x1000000;
mtspr(SPRN_TBU40, new_tb);
}
vc->tb_offset_applied = vc->tb_offset;
}
return ret;
}
/*
* The following subcore HMI handling is all only for pre-POWER9 CPUs.
*/
/* Check if dynamic split is in force and return subcore size accordingly. */
static inline int kvmppc_cur_subcore_size(void)
{
......
......@@ -55,12 +55,6 @@ static int global_invalidates(struct kvm *kvm)
smp_wmb();
cpumask_setall(&kvm->arch.need_tlb_flush);
cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
/*
* On POWER9, threads are independent but the TLB is shared,
* so use the bit for the first thread to represent the core.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
cpu = cpu_first_tlb_thread_sibling(cpu);
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
}
......
......@@ -778,17 +778,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Restore AMR and UAMOR, set AMOR to all 1s */
ld r5,VCPU_AMR(r4)
ld r6,VCPU_UAMOR(r4)
li r7,-1
mtspr SPRN_AMR,r5
mtspr SPRN_UAMOR,r6
mtspr SPRN_AMOR,r7
/* Restore state of CTRL run bit; assume 1 on entry */
/* Restore state of CTRL run bit; the host currently has it set to 1 */
lwz r5,VCPU_CTRL(r4)
andi. r5,r5,1
bne 4f
mfspr r6,SPRN_CTRLF
clrrdi r6,r6,1
li r6,0
mtspr SPRN_CTRLT,r6
4:
/* Secondary threads wait for primary to have done partition switch */
......@@ -817,10 +814,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
* Set the decrementer to the guest decrementer.
*/
ld r8,VCPU_DEC_EXPIRES(r4)
/* r8 is a host timebase value here, convert to guest TB */
ld r5,HSTATE_KVM_VCORE(r13)
ld r6,VCORE_TB_OFFSET_APPL(r5)
add r8,r8,r6
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
......@@ -1195,9 +1188,6 @@ guest_bypass:
mftb r6
extsw r5,r5
16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
ld r4,VCORE_TB_OFFSET_APPL(r3)
subf r5,r4,r5
std r5,VCPU_DEC_EXPIRES(r9)
/* Increment exit count, poke other threads to exit */
......@@ -1211,12 +1201,12 @@ guest_bypass:
stw r0, VCPU_CPU(r9)
stw r0, VCPU_THREAD_CPU(r9)
/* Save guest CTRL register, set runlatch to 1 */
/* Save guest CTRL register, set runlatch to 1 if it was clear */
mfspr r6,SPRN_CTRLF
stw r6,VCPU_CTRL(r9)
andi. r0,r6,1
bne 4f
ori r6,r6,1
li r6,1
mtspr SPRN_CTRLT,r6
4:
/*
......@@ -2163,9 +2153,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/* save expiry time of guest decrementer */
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
ld r6, VCORE_TB_OFFSET_APPL(r5)
subf r3, r6, r3 /* convert to host TB value */
std r3, VCPU_DEC_EXPIRES(r4)
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
......@@ -2186,8 +2173,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
* Also clear the runlatch bit before napping.
*/
kvm_do_nap:
mfspr r0, SPRN_CTRLF
clrrdi r0, r0, 1
li r0,0
mtspr SPRN_CTRLT, r0
li r0,1
......@@ -2206,8 +2192,7 @@ kvm_nap_sequence: /* desired LPCR value in r5 */
bl isa206_idle_insn_mayloss
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
li r0,1
mtspr SPRN_CTRLT, r0
mtspr SPRN_SRR1, r3
......@@ -2264,9 +2249,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/* Restore guest decrementer */
ld r3, VCPU_DEC_EXPIRES(r4)
ld r5, HSTATE_KVM_VCORE(r13)
ld r6, VCORE_TB_OFFSET_APPL(r5)
add r3, r3, r6 /* convert host TB to guest TB value */
mftb r7
subf r3, r7, r3
mtspr SPRN_DEC, r3
......@@ -2778,10 +2760,11 @@ kvmppc_msr_interrupt:
blr
/*
* void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
*
* Load up guest PMU state. R3 points to the vcpu struct.
*/
_GLOBAL(kvmhv_load_guest_pmu)
EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
kvmhv_load_guest_pmu:
mr r4, r3
mflr r0
li r3, 1
......@@ -2815,27 +2798,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
mtspr SPRN_MMCRA, r6
mtspr SPRN_SIAR, r7
mtspr SPRN_SDAR, r8
BEGIN_FTR_SECTION
ld r5, VCPU_MMCR + 24(r4)
ld r6, VCPU_SIER + 8(r4)
ld r7, VCPU_SIER + 16(r4)
mtspr SPRN_MMCR3, r5
mtspr SPRN_SIER2, r6
mtspr SPRN_SIER3, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
BEGIN_FTR_SECTION
ld r5, VCPU_MMCR + 16(r4)
ld r6, VCPU_SIER(r4)
mtspr SPRN_MMCR2, r5
mtspr SPRN_SIER, r6
BEGIN_FTR_SECTION_NESTED(96)
lwz r7, VCPU_PMC + 24(r4)
lwz r8, VCPU_PMC + 28(r4)
ld r9, VCPU_MMCRS(r4)
mtspr SPRN_SPMC1, r7
mtspr SPRN_SPMC2, r8
mtspr SPRN_MMCRS, r9
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_MMCR0, r3
isync
......@@ -2843,10 +2816,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
blr
/*
* void kvmhv_load_host_pmu(void)
*
* Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
*/
_GLOBAL(kvmhv_load_host_pmu)
EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
kvmhv_load_host_pmu:
mflr r0
lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r4, 0
......@@ -2884,25 +2858,18 @@ BEGIN_FTR_SECTION
mtspr SPRN_MMCR2, r8
mtspr SPRN_SIER, r9
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
ld r5, HSTATE_MMCR3(r13)
ld r6, HSTATE_SIER2(r13)
ld r7, HSTATE_SIER3(r13)
mtspr SPRN_MMCR3, r5
mtspr SPRN_SIER2, r6
mtspr SPRN_SIER3, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
mtspr SPRN_MMCR0, r3
isync
mtlr r0
23: blr
/*
* void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
*
* Save guest PMU state into the vcpu struct.
* r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
*/
_GLOBAL(kvmhv_save_guest_pmu)
EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
kvmhv_save_guest_pmu:
mr r9, r3
mr r8, r4
BEGIN_FTR_SECTION
......@@ -2951,14 +2918,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
std r10, VCPU_MMCR + 16(r9)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
mfspr r5, SPRN_MMCR3
mfspr r6, SPRN_SIER2
mfspr r7, SPRN_SIER3
std r5, VCPU_MMCR + 24(r9)
std r6, VCPU_SIER + 8(r9)
std r7, VCPU_SIER + 16(r9)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
std r7, VCPU_SIAR(r9)
std r8, VCPU_SDAR(r9)
mfspr r3, SPRN_PMC1
......@@ -2976,7 +2935,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
BEGIN_FTR_SECTION
mfspr r5, SPRN_SIER
std r5, VCPU_SIER(r9)
BEGIN_FTR_SECTION_NESTED(96)
mfspr r6, SPRN_SPMC1
mfspr r7, SPRN_SPMC2
mfspr r8, SPRN_MMCRS
......@@ -2985,7 +2943,6 @@ BEGIN_FTR_SECTION_NESTED(96)
std r8, VCPU_MMCRS(r9)
lis r4, 0x8000
mtspr SPRN_MMCRS, r4
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22: blr
......
......@@ -572,18 +572,6 @@ void __init radix__early_init_devtree(void)
return;
}
static void radix_init_amor(void)
{
/*
* In HV mode, we init AMOR (Authority Mask Override Register) so that
* the hypervisor and guest can setup IAMR (Instruction Authority Mask
* Register), enable key 0 and set it to 1.
*
* AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
*/
mtspr(SPRN_AMOR, (3ul << 62));
}
void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
......@@ -644,7 +632,6 @@ void __init radix__early_init_mmu(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
radix_init_amor();
} else {
radix_init_pseries();
}
......@@ -668,8 +655,6 @@ void radix__early_init_mmu_secondary(void)
set_ptcr_when_no_uv(__pa(partition_tb) |
(PATB_SIZE_SHIFT - 12));
radix_init_amor();
}
radix__switch_mmu_context(NULL, &init_mm);
......
......@@ -2419,8 +2419,24 @@ int register_power_pmu(struct power_pmu *pmu)
}
#ifdef CONFIG_PPC64
static bool pmu_override = false;
static unsigned long pmu_override_val;
static void do_pmu_override(void *data)
{
ppc_set_pmu_inuse(1);
if (pmu_override_val)
mtspr(SPRN_MMCR1, pmu_override_val);
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
}
static int __init init_ppc64_pmu(void)
{
if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
pr_warn("disabling perf due to pmu_override= command line option.\n");
on_each_cpu(do_pmu_override, NULL, 1);
return 0;
}
/* run through all the pmu drivers one at a time */
if (!init_power5_pmu())
return 0;
......@@ -2442,4 +2458,23 @@ static int __init init_ppc64_pmu(void)
return init_generic_compat_pmu();
}
early_initcall(init_ppc64_pmu);
static int __init pmu_setup(char *str)
{
unsigned long val;
if (!early_cpu_has_feature(CPU_FTR_HVMODE))
return 0;
pmu_override = true;
if (kstrtoul(str, 0, &val))
val = 0;
pmu_override_val = val;
return 1;
}
__setup("pmu_override=", pmu_setup);
#endif
......@@ -306,8 +306,8 @@ struct p7_sprs {
/* per thread SPRs that get lost in shallow states */
u64 amr;
u64 iamr;
u64 amor;
u64 uamor;
/* amor is restored to constant ~0 */
};
static unsigned long power7_idle_insn(unsigned long type)
......@@ -378,7 +378,6 @@ static unsigned long power7_idle_insn(unsigned long type)
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
sprs.amr = mfspr(SPRN_AMR);
sprs.iamr = mfspr(SPRN_IAMR);
sprs.amor = mfspr(SPRN_AMOR);
sprs.uamor = mfspr(SPRN_UAMOR);
}
......@@ -397,7 +396,7 @@ static unsigned long power7_idle_insn(unsigned long type)
*/
mtspr(SPRN_AMR, sprs.amr);
mtspr(SPRN_IAMR, sprs.iamr);
mtspr(SPRN_AMOR, sprs.amor);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_UAMOR, sprs.uamor);
}
}
......@@ -589,7 +588,6 @@ struct p9_sprs {
u64 purr;
u64 spurr;
u64 dscr;
u64 wort;
u64 ciabr;
u64 mmcra;
......@@ -687,7 +685,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
sprs.amr = mfspr(SPRN_AMR);
sprs.iamr = mfspr(SPRN_IAMR);
sprs.amor = mfspr(SPRN_AMOR);
sprs.uamor = mfspr(SPRN_UAMOR);
srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
......@@ -708,7 +705,7 @@ static unsigned long power9_idle_stop(unsigned long psscr)
*/
mtspr(SPRN_AMR, sprs.amr);
mtspr(SPRN_IAMR, sprs.iamr);
mtspr(SPRN_AMOR, sprs.amor);
mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_UAMOR, sprs.uamor);
/*
......
......@@ -2107,8 +2107,14 @@ static void dump_300_sprs(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
return;
printf("pidr = %.16lx tidr = %.16lx\n",
mfspr(SPRN_PID), mfspr(SPRN_TIDR));
if (cpu_has_feature(CPU_FTR_P9_TIDR)) {
printf("pidr = %.16lx tidr = %.16lx\n",
mfspr(SPRN_PID), mfspr(SPRN_TIDR));
} else {
printf("pidr = %.16lx\n",
mfspr(SPRN_PID));
}
printf("psscr = %.16lx\n",
hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment