Commit a56ee9f8 authored by Yongji Xie's avatar Yongji Xie Committed by Paul Mackerras

KVM: PPC: Book3S HV: Add a per vcpu cache for recently page faulted MMIO entries

This keeps a per vcpu cache for recently page faulted MMIO entries.
On a page fault, if the entry exists in the cache, we can avoid some
time-consuming paths, for example, looking up HPT, locking HPTE twice
and searching mmio gfn from memslots, then directly call
kvmppc_hv_emulate_mmio().

In current implenment, we limit the size of cache to four. We think
it's enough to cover the high-frequency MMIO HPTEs in most case.
For example, considering the case of using virtio device, for virtio
legacy devices, one HPTE could handle notifications from up to
1024 (64K page / 64 byte Port IO register) devices, so one cache entry
is enough; for virtio modern devices, we always need one HPTE to handle
notification for each device because modern device would use a 8M MMIO
register to notify host instead of Port IO register, typically the
system's configuration should not exceed four virtio devices per
vcpu, four cache entry is also enough in this case. Of course, if needed,
we could also modify the macro to a module parameter in the future.
Signed-off-by: default avatarYongji Xie <xyjxie@linux.vnet.ibm.com>
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent f0585982
...@@ -246,6 +246,7 @@ struct kvm_arch { ...@@ -246,6 +246,7 @@ struct kvm_arch {
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned long hpt_virt; unsigned long hpt_virt;
struct revmap_entry *revmap; struct revmap_entry *revmap;
atomic64_t mmio_update;
unsigned int host_lpid; unsigned int host_lpid;
unsigned long host_lpcr; unsigned long host_lpcr;
unsigned long sdr1; unsigned long sdr1;
...@@ -408,6 +409,24 @@ struct kvmppc_passthru_irqmap { ...@@ -408,6 +409,24 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_MPIC 1 #define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2 #define KVMPPC_IRQ_XICS 2
#define MMIO_HPTE_CACHE_SIZE 4
struct mmio_hpte_cache_entry {
unsigned long hpte_v;
unsigned long hpte_r;
unsigned long rpte;
unsigned long pte_index;
unsigned long eaddr;
unsigned long slb_v;
long mmio_update;
unsigned int slb_base_pshift;
};
struct mmio_hpte_cache {
struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE];
unsigned int index;
};
struct openpic; struct openpic;
struct kvm_vcpu_arch { struct kvm_vcpu_arch {
...@@ -655,9 +674,11 @@ struct kvm_vcpu_arch { ...@@ -655,9 +674,11 @@ struct kvm_vcpu_arch {
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
struct kvm_vcpu_arch_shared shregs; struct kvm_vcpu_arch_shared shregs;
struct mmio_hpte_cache mmio_cache;
unsigned long pgfault_addr; unsigned long pgfault_addr;
long pgfault_index; long pgfault_index;
unsigned long pgfault_hpte[2]; unsigned long pgfault_hpte[2];
struct mmio_hpte_cache_entry *pgfault_cache;
struct task_struct *run_task; struct task_struct *run_task;
struct kvm_run *kvm_run; struct kvm_run *kvm_run;
......
...@@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) ...@@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
/* 128 (2**7) bytes in each HPTEG */ /* 128 (2**7) bytes in each HPTEG */
kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
atomic64_set(&kvm->arch.mmio_update, 0);
/* Allocate reverse map array */ /* Allocate reverse map array */
rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
if (!rev) { if (!rev) {
...@@ -451,6 +453,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -451,6 +453,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int writing, write_ok; unsigned int writing, write_ok;
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long rcbits; unsigned long rcbits;
long mmio_update;
/* /*
* Real-mode code has already searched the HPT and found the * Real-mode code has already searched the HPT and found the
...@@ -460,6 +463,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -460,6 +463,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/ */
if (ea != vcpu->arch.pgfault_addr) if (ea != vcpu->arch.pgfault_addr)
return RESUME_GUEST; return RESUME_GUEST;
if (vcpu->arch.pgfault_cache) {
mmio_update = atomic64_read(&kvm->arch.mmio_update);
if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
r = vcpu->arch.pgfault_cache->rpte;
psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r);
gpa_base = r & HPTE_R_RPN & ~(psize - 1);
gfn_base = gpa_base >> PAGE_SHIFT;
gpa = gpa_base | (ea & (psize - 1));
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
}
}
index = vcpu->arch.pgfault_index; index = vcpu->arch.pgfault_index;
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
rev = &kvm->arch.revmap[index]; rev = &kvm->arch.revmap[index];
......
...@@ -2970,6 +2970,15 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, ...@@ -2970,6 +2970,15 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
struct kvm_memslots *slots; struct kvm_memslots *slots;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
/*
* If we are making a new memslot, it might make
* some address that was previously cached as emulated
* MMIO be no longer emulated MMIO, so invalidate
* all the caches of emulated MMIO translations.
*/
if (npages)
atomic64_inc(&kvm->arch.mmio_update);
if (npages && old->npages) { if (npages && old->npages) {
/* /*
* If modifying a memslot, reset all the rmap dirty bits. * If modifying a memslot, reset all the rmap dirty bits.
......
...@@ -390,6 +390,13 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -390,6 +390,13 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
#endif #endif
static inline int is_mmio_hpte(unsigned long v, unsigned long r)
{
return ((v & HPTE_V_ABSENT) &&
(r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
}
static inline int try_lock_tlbie(unsigned int *lock) static inline int try_lock_tlbie(unsigned int *lock)
{ {
unsigned int tmp, old; unsigned int tmp, old;
...@@ -456,6 +463,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, ...@@ -456,6 +463,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
v = pte & ~HPTE_V_HVLOCK; v = pte & ~HPTE_V_HVLOCK;
pte = be64_to_cpu(hpte[1]);
if (v & HPTE_V_VALID) { if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
...@@ -476,6 +484,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, ...@@ -476,6 +484,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
note_hpte_modification(kvm, rev); note_hpte_modification(kvm, rev);
unlock_hpte(hpte, 0); unlock_hpte(hpte, 0);
if (is_mmio_hpte(v, pte))
atomic64_inc(&kvm->arch.mmio_update);
if (v & HPTE_V_ABSENT) if (v & HPTE_V_ABSENT)
v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
hpret[0] = v; hpret[0] = v;
...@@ -502,7 +513,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -502,7 +513,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
int global; int global;
long int ret = H_SUCCESS; long int ret = H_SUCCESS;
struct revmap_entry *rev, *revs[4]; struct revmap_entry *rev, *revs[4];
u64 hp0; u64 hp0, hp1;
global = global_invalidates(kvm, 0); global = global_invalidates(kvm, 0);
for (i = 0; i < 4 && ret == H_SUCCESS; ) { for (i = 0; i < 4 && ret == H_SUCCESS; ) {
...@@ -535,6 +546,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -535,6 +546,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
} }
found = 0; found = 0;
hp0 = be64_to_cpu(hp[0]); hp0 = be64_to_cpu(hp[0]);
hp1 = be64_to_cpu(hp[1]);
if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
switch (flags & 3) { switch (flags & 3) {
case 0: /* absolute */ case 0: /* absolute */
...@@ -565,6 +577,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -565,6 +577,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5); args[j] |= rcbits << (56 - 5);
hp[0] = 0; hp[0] = 0;
if (is_mmio_hpte(hp0, hp1))
atomic64_inc(&kvm->arch.mmio_update);
continue; continue;
} }
...@@ -625,6 +639,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -625,6 +639,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
} }
v = pte; v = pte;
pte = be64_to_cpu(hpte[1]);
bits = (flags << 55) & HPTE_R_PP0; bits = (flags << 55) & HPTE_R_PP0;
bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= (flags << 48) & HPTE_R_KEY_HI;
bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
...@@ -646,7 +661,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -646,7 +661,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
* readonly to writable. If it should be writable, we'll * readonly to writable. If it should be writable, we'll
* take a trap and let the page fault code sort it out. * take a trap and let the page fault code sort it out.
*/ */
pte = be64_to_cpu(hpte[1]);
r = (pte & ~mask) | bits; r = (pte & ~mask) | bits;
if (hpte_is_writable(r) && !hpte_is_writable(pte)) if (hpte_is_writable(r) && !hpte_is_writable(pte))
r = hpte_make_readonly(r); r = hpte_make_readonly(r);
...@@ -662,6 +676,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -662,6 +676,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
} }
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
asm volatile("ptesync" : : : "memory"); asm volatile("ptesync" : : : "memory");
if (is_mmio_hpte(v, pte))
atomic64_inc(&kvm->arch.mmio_update);
return H_SUCCESS; return H_SUCCESS;
} }
...@@ -832,6 +849,37 @@ static int slb_base_page_shift[4] = { ...@@ -832,6 +849,37 @@ static int slb_base_page_shift[4] = {
20, /* 1M, unsupported */ 20, /* 1M, unsupported */
}; };
static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
unsigned long eaddr, unsigned long slb_v, long mmio_update)
{
struct mmio_hpte_cache_entry *entry = NULL;
unsigned int pshift;
unsigned int i;
for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
entry = &vcpu->arch.mmio_cache.entry[i];
if (entry->mmio_update == mmio_update) {
pshift = entry->slb_base_pshift;
if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
entry->slb_v == slb_v)
return entry;
}
}
return NULL;
}
static struct mmio_hpte_cache_entry *
next_mmio_cache_entry(struct kvm_vcpu *vcpu)
{
unsigned int index = vcpu->arch.mmio_cache.index;
vcpu->arch.mmio_cache.index++;
if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
vcpu->arch.mmio_cache.index = 0;
return &vcpu->arch.mmio_cache.entry[index];
}
/* When called from virtmode, this func should be protected by /* When called from virtmode, this func should be protected by
* preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
* can trigger deadlock issue. * can trigger deadlock issue.
...@@ -933,25 +981,36 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, ...@@ -933,25 +981,36 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
unsigned long valid; unsigned long valid;
struct revmap_entry *rev; struct revmap_entry *rev;
unsigned long pp, key; unsigned long pp, key;
struct mmio_hpte_cache_entry *cache_entry = NULL;
long mmio_update = 0;
/* For protection fault, expect to find a valid HPTE */ /* For protection fault, expect to find a valid HPTE */
valid = HPTE_V_VALID; valid = HPTE_V_VALID;
if (status & DSISR_NOHPTE) if (status & DSISR_NOHPTE) {
valid |= HPTE_V_ABSENT; valid |= HPTE_V_ABSENT;
mmio_update = atomic64_read(&kvm->arch.mmio_update);
index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
if (index < 0) {
if (status & DSISR_NOHPTE)
return status; /* there really was no HPTE */
return 0; /* for prot fault, HPTE disappeared */
} }
hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); if (cache_entry) {
v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; index = cache_entry->pte_index;
r = be64_to_cpu(hpte[1]); v = cache_entry->hpte_v;
rev = real_vmalloc_addr(&kvm->arch.revmap[index]); r = cache_entry->hpte_r;
gr = rev->guest_rpte; gr = cache_entry->rpte;
} else {
index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
if (index < 0) {
if (status & DSISR_NOHPTE)
return status; /* there really was no HPTE */
return 0; /* for prot fault, HPTE disappeared */
}
hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[1]);
rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
gr = rev->guest_rpte;
unlock_hpte(hpte, v); unlock_hpte(hpte, v);
}
/* For not found, if the HPTE is valid by now, retry the instruction */ /* For not found, if the HPTE is valid by now, retry the instruction */
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
...@@ -989,12 +1048,32 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, ...@@ -989,12 +1048,32 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
vcpu->arch.pgfault_index = index; vcpu->arch.pgfault_index = index;
vcpu->arch.pgfault_hpte[0] = v; vcpu->arch.pgfault_hpte[0] = v;
vcpu->arch.pgfault_hpte[1] = r; vcpu->arch.pgfault_hpte[1] = r;
vcpu->arch.pgfault_cache = cache_entry;
/* Check the storage key to see if it is possibly emulated MMIO */ /* Check the storage key to see if it is possibly emulated MMIO */
if (data && (vcpu->arch.shregs.msr & MSR_IR) && if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
(r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
(HPTE_R_KEY_HI | HPTE_R_KEY_LO)) if (!cache_entry) {
return -2; /* MMIO emulation - load instr word */ unsigned int pshift = 12;
unsigned int pshift_index;
if (slb_v & SLB_VSID_L) {
pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
pshift = slb_base_page_shift[pshift_index];
}
cache_entry = next_mmio_cache_entry(vcpu);
cache_entry->eaddr = addr;
cache_entry->slb_base_pshift = pshift;
cache_entry->pte_index = index;
cache_entry->hpte_v = v;
cache_entry->hpte_r = r;
cache_entry->rpte = gr;
cache_entry->slb_v = slb_v;
cache_entry->mmio_update = mmio_update;
}
if (data && (vcpu->arch.shregs.msr & MSR_IR))
return -2; /* MMIO emulation - load instr word */
}
return -1; /* send fault up to host kernel mode */ return -1; /* send fault up to host kernel mode */
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment