Commit e0b7ec05 authored by Paul Mackerras's avatar Paul Mackerras Committed by Alexander Graf

KVM: PPC: Book3S HV: Align physical and virtual CPU thread numbers

On a threaded processor such as POWER7, we group VCPUs into virtual
cores and arrange that the VCPUs in a virtual core run on the same
physical core.  Currently we don't enforce any correspondence between
virtual thread numbers within a virtual core and physical thread
numbers.  Physical threads are allocated starting at 0 on a first-come
first-served basis to runnable virtual threads (VCPUs).

POWER8 implements a new "msgsndp" instruction which guest kernels can
use to interrupt other threads in the same core or sub-core.  Since
the instruction takes the destination physical thread ID as a parameter,
it becomes necessary to align the physical thread IDs with the virtual
thread IDs, that is, to make sure virtual thread N within a virtual
core always runs on physical thread N.

This means that it's possible that thread 0, which is where we call
__kvmppc_vcore_entry, may end up running some other vcpu than the
one whose task called kvmppc_run_core(), or it may end up running
no vcpu at all, if for example thread 0 of the virtual core is
currently executing in userspace.  However, we do need thread 0
to be responsible for switching the MMU -- a previous version of
this patch that had other threads switching the MMU was found to
be responsible for occasional memory corruption and machine check
interrupts in the guest on POWER7 machines.

To accommodate this, we no longer pass the vcpu pointer to
__kvmppc_vcore_entry, but instead let the assembly code load it from
the PACA.  Since the assembly code will need to know the kvm pointer
and the thread ID for threads which don't have a vcpu, we move the
thread ID into the PACA and we add a kvm pointer to the virtual core
structure.

In the case where thread 0 has no vcpu to run, it still calls into
kvmppc_hv_entry in order to do the MMU switch, and then naps until
either its vcpu is ready to run in the guest, or some other thread
needs to exit the guest.  In the latter case, thread 0 jumps to the
code that switches the MMU back to the host.  This control flow means
that now we switch the MMU before loading any guest vcpu state.
Similarly, on guest exit we now save all the guest vcpu state before
switching the MMU back to the host.  This has required substantial
code movement, making the diff rather large.
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
parent eee7ff9d
......@@ -87,6 +87,7 @@ struct kvmppc_host_state {
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
u8 ptid;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
unsigned long xics_phys;
......
......@@ -288,6 +288,7 @@ struct kvmppc_vcore {
int n_woken;
int nap_count;
int napping_threads;
int first_vcpuid;
u16 pcpu;
u16 last_cpu;
u8 vcore_state;
......@@ -298,6 +299,7 @@ struct kvmppc_vcore {
u64 stolen_tb;
u64 preempt_tb;
struct kvm_vcpu *runner;
struct kvm *kvm;
u64 tb_offset; /* guest timebase - host timebase */
ulong lpcr;
u32 arch_compat;
......
......@@ -506,7 +506,6 @@ int main(void)
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
......@@ -514,6 +513,7 @@ int main(void)
DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
......@@ -583,6 +583,7 @@ int main(void)
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
HSTATE_FIELD(HSTATE_PMC, host_pmc);
HSTATE_FIELD(HSTATE_PURR, host_purr);
......
......@@ -990,6 +990,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
init_waitqueue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_core;
vcore->kvm = kvm;
}
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
......@@ -1003,6 +1005,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
++vcore->num_threads;
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
......@@ -1066,7 +1069,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
}
extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
extern void __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
......@@ -1140,15 +1143,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
tpaca = &paca[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
tpaca->kvm_hstate.kvm_vcore = vc;
tpaca->kvm_hstate.napping = 0;
tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
vcpu->cpu = vc->pcpu;
smp_wmb();
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
if (vcpu->arch.ptid) {
if (cpu != smp_processor_id()) {
#ifdef CONFIG_KVM_XICS
xics_wake_cpu(cpu);
#endif
++vc->n_woken;
if (vcpu->arch.ptid)
++vc->n_woken;
}
#endif
}
......@@ -1205,10 +1209,10 @@ static int on_primary_thread(void)
*/
static void kvmppc_run_core(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu, *vcpu0, *vnext;
struct kvm_vcpu *vcpu, *vnext;
long ret;
u64 now;
int ptid, i, need_vpa_update;
int i, need_vpa_update;
int srcu_idx;
struct kvm_vcpu *vcpus_to_update[threads_per_core];
......@@ -1245,25 +1249,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
spin_lock(&vc->lock);
}
/*
* Assign physical thread IDs, first to non-ceded vcpus
* and then to ceded ones.
*/
ptid = 0;
vcpu0 = NULL;
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
if (!vcpu->arch.ceded) {
if (!ptid)
vcpu0 = vcpu;
vcpu->arch.ptid = ptid++;
}
}
if (!vcpu0)
goto out; /* nothing to run; should never happen */
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
if (vcpu->arch.ceded)
vcpu->arch.ptid = ptid++;
/*
* Make sure we are running on thread 0, and that
* secondary threads are offline.
......@@ -1280,15 +1265,19 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_create_dtl_entry(vcpu, vc);
}
/* Set this explicitly in case thread 0 doesn't have a vcpu */
get_paca()->kvm_hstate.kvm_vcore = vc;
get_paca()->kvm_hstate.ptid = 0;
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
spin_unlock(&vc->lock);
kvm_guest_enter();
srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
__kvmppc_vcore_entry(NULL, vcpu0);
__kvmppc_vcore_entry();
spin_lock(&vc->lock);
/* disable sending of IPIs on virtual external irqs */
......@@ -1303,7 +1292,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_EXITING;
spin_unlock(&vc->lock);
srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
......@@ -1411,7 +1400,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (!signal_pending(current)) {
if (vc->vcore_state == VCORE_RUNNING &&
VCORE_EXIT_COUNT(vc) == 0) {
vcpu->arch.ptid = vc->n_runnable - 1;
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
......
......@@ -35,7 +35,7 @@
****************************************************************************/
/* Registers:
* r4: vcpu pointer
* none
*/
_GLOBAL(__kvmppc_vcore_entry)
......@@ -71,7 +71,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtmsrd r10,1
/* Save host PMU registers */
/* R4 is live here (vcpu pointer) but not r3 or r5 */
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
......@@ -136,16 +135,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
* enters the guest with interrupts enabled.
*/
BEGIN_FTR_SECTION
ld r4, HSTATE_KVM_VCPU(r13)
ld r0, VCPU_PENDING_EXC(r4)
li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
and. r0, r0, r7
beq 32f
mr r31, r4
lhz r3, PACAPACAINDEX(r13)
bl smp_send_reschedule
nop
mr r4, r31
32:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
#endif /* CONFIG_SMP */
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment