Commit 6d6ab940 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-ppc-next' of...

Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

Apart from various bugfixes and code cleanups, the major new feature
is the ability to run guests using the hashed page table (HPT) MMU
mode on a host that is using the radix MMU mode.  Because of limitations
in the current POWER9 chip (all SMT threads in each core must use the
same MMU mode, HPT or radix), this requires the host to be configured
to run similar to POWER8: the host runs in single-threaded mode (only
thread 0 of each core online), and have KVM be able to wake up the other
threads when a KVM guest is to be run, and use the other threads for
running guest VCPUs.  A new module parameter, called "indep_threads_mode",
is normally Y on POWER9 but must be set to N before any HPT guests can
be run on a radix host:

    # echo N >/sys/module/kvm_hv/parameters/indep_threads_mode
    # ppc64_cpu --smt=off
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parents 9ffd986c c0101509
......@@ -216,7 +216,8 @@ extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
bool writing, bool *writable);
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
unsigned long *rmap, long pte_index, int realmode);
extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
unsigned long gfn, unsigned long psize);
extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index);
void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
......
......@@ -20,6 +20,8 @@
#ifndef __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__
#include <linux/string.h>
#include <asm/bitops.h>
#include <asm/book3s/64/mmu-hash.h>
/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
......@@ -107,18 +109,96 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
hpte[0] = cpu_to_be64(hpte_v);
}
/*
* These functions encode knowledge of the POWER7/8/9 hardware
* interpretations of the HPTE LP (large page size) field.
*/
static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
{
unsigned int lphi;
if (!(h & HPTE_V_LARGE))
return 12; /* 4kB */
lphi = (l >> 16) & 0xf;
switch ((l >> 12) & 0xf) {
case 0:
return !lphi ? 24 : -1; /* 16MB */
break;
case 1:
return 16; /* 64kB */
break;
case 3:
return !lphi ? 34 : -1; /* 16GB */
break;
case 7:
return (16 << 8) + 12; /* 64kB in 4kB */
break;
case 8:
if (!lphi)
return (24 << 8) + 16; /* 16MB in 64kkB */
if (lphi == 3)
return (24 << 8) + 12; /* 16MB in 4kB */
break;
}
return -1;
}
static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
{
return kvmppc_hpte_page_shifts(h, l) & 0xff;
}
static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l)
{
int tmp = kvmppc_hpte_page_shifts(h, l);
if (tmp >= 0x100)
tmp >>= 8;
return tmp;
}
static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
{
return 1ul << kvmppc_hpte_actual_page_shift(v, r);
}
static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
{
switch (base_shift) {
case 12:
switch (actual_shift) {
case 12:
return 0;
case 16:
return 7;
case 24:
return 0x38;
}
break;
case 16:
switch (actual_shift) {
case 16:
return 1;
case 24:
return 8;
}
break;
case 24:
return 0;
}
return -1;
}
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
unsigned int penc;
int a_pgshift, b_pgshift;
unsigned long rb = 0, va_low, sllp;
unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
if (v & HPTE_V_LARGE) {
i = hpte_page_sizes[lp];
b_psize = i & 0xf;
a_psize = i >> 4;
b_pgshift = a_pgshift = kvmppc_hpte_page_shifts(v, r);
if (a_pgshift >= 0x100) {
b_pgshift &= 0xff;
a_pgshift >>= 8;
}
/*
......@@ -152,37 +232,33 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
va_low ^= v >> (SID_SHIFT_1T - 16);
va_low &= 0x7ff;
switch (b_psize) {
case MMU_PAGE_4K:
sllp = get_sllp_encoding(a_psize);
rb |= sllp << 5; /* AP field */
if (b_pgshift == 12) {
if (a_pgshift > 12) {
sllp = (a_pgshift == 16) ? 5 : 4;
rb |= sllp << 5; /* AP field */
}
rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
break;
default:
{
} else {
int aval_shift;
/*
* remaining bits of AVA/LP fields
* Also contain the rr bits of LP
*/
rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
rb |= (va_low << b_pgshift) & 0x7ff000;
/*
* Now clear not needed LP bits based on actual psize
*/
rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
rb &= ~((1ul << a_pgshift) - 1);
/*
* AVAL field 58..77 - base_page_shift bits of va
* we have space for 58..64 bits, Missing bits should
* be zero filled. +1 is to take care of L bit shift
*/
aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
aval_shift = 64 - (77 - b_pgshift) + 1;
rb |= ((va_low << aval_shift) & 0xfe);
rb |= 1; /* L field */
penc = mmu_psize_defs[b_psize].penc[a_psize];
rb |= penc << 12; /* LP field */
break;
}
rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
}
rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
return rb;
......@@ -370,6 +446,28 @@ static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
return (1UL << (hpt->order - 7)) - 1;
}
/* Set bits in a dirty bitmap, which is in LE format */
static inline void set_dirty_bits(unsigned long *map, unsigned long i,
unsigned long npages)
{
if (npages >= 8)
memset((char *)map + i / 8, 0xff, npages / 8);
else
for (; npages; ++i, --npages)
__set_bit_le(i, map);
}
static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
unsigned long npages)
{
if (npages >= 8)
memset((char *)map + i / 8, 0xff, npages / 8);
else
for (; npages; ++i, --npages)
set_bit_le(i, map);
}
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */
......@@ -82,6 +82,16 @@ struct kvm_split_mode {
u8 do_nap;
u8 napped[MAX_SMT_THREADS];
struct kvmppc_vcore *vc[MAX_SUBCORES];
/* Bits for changing lpcr on P9 */
unsigned long lpcr_req;
unsigned long lpidr_req;
unsigned long host_lpcr;
u32 do_set;
u32 do_restore;
union {
u32 allphases;
u8 phase[4];
} lpcr_sync;
};
/*
......@@ -104,14 +114,11 @@ struct kvmppc_host_state {
u8 napping;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* hwthread_req/hwthread_state pair is used to pull sibling threads
* out of guest on pre-ISAv3.0B CPUs where threads share MMU.
*/
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
u8 ptid;
u8 ptid; /* thread number within subcore when split */
u8 tid; /* thread number within whole core */
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
void __iomem *xics_phys;
......
......@@ -235,10 +235,7 @@ struct revmap_entry {
*/
#define KVMPPC_RMAP_LOCK_BIT 63
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_CHG_SHIFT 48
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
......@@ -276,7 +273,7 @@ struct kvm_arch {
int tlbie_lock;
unsigned long lpcr;
unsigned long vrma_slb_v;
int hpte_setup_done;
int mmu_ready;
atomic_t vcpus_running;
u32 online_vcores;
atomic_t hpte_mod_interest;
......@@ -284,6 +281,7 @@ struct kvm_arch {
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
bool threads_indep;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir;
......
......@@ -168,6 +168,7 @@ extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
extern void kvmppc_rmap_reset(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
......@@ -177,6 +178,8 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
struct iommu_group *grp);
extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
struct iommu_group *grp);
extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
......
......@@ -642,6 +642,7 @@ int main(void)
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_TID, tid);
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
......@@ -667,6 +668,8 @@ int main(void)
OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64
......
......@@ -319,20 +319,13 @@ enter_winkle:
/*
* r3 - PSSCR value corresponding to the requested stop state.
*/
power_enter_stop:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
power_enter_stop_kvm_rm:
/*
* This is currently unused because POWER9 KVM does not have to
* gather secondary threads into sibling mode, but the code is
* here in case that function is required.
*
* Tell KVM we're entering idle.
*/
/* Tell KVM we're entering idle */
li r4,KVM_HWTHREAD_IN_IDLE
/* DO THIS IN REAL MODE! See comment above. */
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
power_enter_stop:
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
......@@ -496,18 +489,6 @@ pnv_powersave_wakeup_mce:
b pnv_powersave_wakeup
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm_start_guest_check:
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
sync
lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beqlr
b kvm_start_guest
#endif
/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
......@@ -532,9 +513,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
BEGIN_FTR_SECTION
bl kvm_start_guest_check
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
sync
lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beq 1f
b kvm_start_guest
1:
#endif
/* Return SRR1 from power7_nap() */
......
This diff is collapsed.
......@@ -474,26 +474,6 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return ret;
}
static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn, unsigned int order)
{
unsigned long i, limit;
unsigned long *dp;
if (!memslot->dirty_bitmap)
return;
limit = 1ul << order;
if (limit < BITS_PER_LONG) {
for (i = 0; i < limit; ++i)
mark_page_dirty(kvm, gfn + i);
return;
}
dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn);
limit /= BITS_PER_LONG;
for (i = 0; i < limit; ++i)
*dp++ = ~0ul;
}
/* Called with kvm->lock held */
int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
......@@ -508,12 +488,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
gpa, shift);
kvmppc_radix_tlbie_page(kvm, gpa, shift);
if (old & _PAGE_DIRTY) {
if (!shift)
mark_page_dirty(kvm, gfn);
else
mark_pages_dirty(kvm, memslot,
gfn, shift - PAGE_SHIFT);
if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
unsigned long npages = 1;
if (shift)
npages = 1ul << (shift - PAGE_SHIFT);
kvmppc_update_dirty_map(memslot, gfn, npages);
}
}
return 0;
......@@ -579,20 +558,8 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map)
{
unsigned long i, j;
unsigned long n, *p;
int npages;
/*
* Radix accumulates dirty bits in the first half of the
* memslot's dirty_bitmap area, for when pages are paged
* out or modified by the host directly. Pick up these
* bits and add them to the map.
*/
n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long);
p = memslot->dirty_bitmap;
for (i = 0; i < n; ++i)
map[i] |= xchg(&p[i], 0);
for (i = 0; i < memslot->npages; i = j) {
npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
......@@ -604,9 +571,10 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
* real address, if npages > 1 we can skip to i + npages.
*/
j = i + 1;
if (npages)
for (j = i; npages; ++j, --npages)
__set_bit_le(j, map);
if (npages) {
set_dirty_bits(map, i, npages);
i = j + npages;
}
}
return 0;
}
......@@ -694,6 +662,7 @@ void kvmppc_free_radix(struct kvm *kvm)
pgd_clear(pgd);
}
pgd_free(kvm->mm, kvm->arch.pgtable);
kvm->arch.pgtable = NULL;
}
static void pte_ctor(void *addr)
......
......@@ -113,7 +113,7 @@ slb_do_enter:
/* Remove all SLB entries that are in use. */
li r0, r0
li r0, 0
slbmte r0, r0
slbia
......
This diff is collapsed.
......@@ -278,7 +278,8 @@ void kvmhv_commence_exit(int trap)
struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
int ptid = local_paca->kvm_hstate.ptid;
struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
int me, ee, i;
int me, ee, i, t;
int cpu0;
/* Set our bit in the threads-exiting-guest map in the 0xff00
bits of vcore->entry_exit_map */
......@@ -320,6 +321,22 @@ void kvmhv_commence_exit(int trap)
if ((ee >> 8) == 0)
kvmhv_interrupt_vcore(vc, ee);
}
/*
* On POWER9 when running a HPT guest on a radix host (sip != NULL),
* we have to interrupt inactive CPU threads to get them to
* restore the host LPCR value.
*/
if (sip->lpcr_req) {
if (cmpxchg(&sip->do_restore, 0, 1) == 0) {
vc = local_paca->kvm_hstate.kvm_vcore;
cpu0 = vc->pcpu + ptid - local_paca->kvm_hstate.tid;
for (t = 1; t < threads_per_core; ++t) {
if (sip->napped[t])
kvmhv_rm_send_ipi(cpu0 + t);
}
}
}
}
struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
......@@ -529,6 +546,8 @@ static inline bool is_rm(void)
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_xirr(vcpu);
......@@ -541,6 +560,8 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
vcpu->arch.gpr[5] = get_tb();
if (xive_enabled()) {
if (is_rm())
......@@ -554,6 +575,8 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_ipoll(vcpu, server);
......@@ -567,6 +590,8 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_ipi(vcpu, server, mfrr);
......@@ -579,6 +604,8 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_cppr(vcpu, cppr);
......@@ -591,6 +618,8 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xive_enabled()) {
if (is_rm())
return xive_rm_h_eoi(vcpu, xirr);
......@@ -601,3 +630,89 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
return xics_rm_h_eoi(vcpu, xirr);
}
#endif /* CONFIG_KVM_XICS */
void kvmppc_bad_interrupt(struct pt_regs *regs)
{
die("Bad interrupt in KVM entry/exit code", regs, SIGABRT);
panic("Bad KVM trap");
}
/*
* Functions used to switch LPCR HR and UPRT bits on all threads
* when entering and exiting HPT guests on a radix host.
*/
#define PHASE_REALMODE 1 /* in real mode */
#define PHASE_SET_LPCR 2 /* have set LPCR */
#define PHASE_OUT_OF_GUEST 4 /* have finished executing in guest */
#define PHASE_RESET_LPCR 8 /* have reset LPCR to host value */
#define ALL(p) (((p) << 24) | ((p) << 16) | ((p) << 8) | (p))
static void wait_for_sync(struct kvm_split_mode *sip, int phase)
{
int thr = local_paca->kvm_hstate.tid;
sip->lpcr_sync.phase[thr] |= phase;
phase = ALL(phase);
while ((sip->lpcr_sync.allphases & phase) != phase) {
HMT_low();
barrier();
}
HMT_medium();
}
void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip)
{
unsigned long rb, set;
/* wait for every other thread to get to real mode */
wait_for_sync(sip, PHASE_REALMODE);
/* Set LPCR and LPIDR */
mtspr(SPRN_LPCR, sip->lpcr_req);
mtspr(SPRN_LPID, sip->lpidr_req);
isync();
/* Invalidate the TLB on thread 0 */
if (local_paca->kvm_hstate.tid == 0) {
sip->do_set = 0;
asm volatile("ptesync" : : : "memory");
for (set = 0; set < POWER9_TLB_SETS_RADIX; ++set) {
rb = TLBIEL_INVAL_SET_LPID +
(set << TLBIEL_INVAL_SET_SHIFT);
asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : :
"r" (rb), "r" (0));
}
asm volatile("ptesync" : : : "memory");
}
/* indicate that we have done so and wait for others */
wait_for_sync(sip, PHASE_SET_LPCR);
/* order read of sip->lpcr_sync.allphases vs. sip->do_set */
smp_rmb();
}
/*
* Called when a thread that has been in the guest needs
* to reload the host LPCR value - but only on POWER9 when
* running a HPT guest on a radix host.
*/
void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
{
/* we're out of the guest... */
wait_for_sync(sip, PHASE_OUT_OF_GUEST);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_LPCR, sip->host_lpcr);
isync();
if (local_paca->kvm_hstate.tid == 0) {
sip->do_restore = 0;
smp_wmb(); /* order store of do_restore vs. phase */
}
wait_for_sync(sip, PHASE_RESET_LPCR);
smp_mb();
local_paca->kvm_hstate.kvm_split_mode = NULL;
}
......@@ -107,30 +107,50 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
/* Update the changed page order field of an rmap entry */
void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
/* Update the dirty bitmap of a memslot */
void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
unsigned long gfn, unsigned long psize)
{
unsigned long order;
unsigned long npages;
if (!psize)
if (!psize || !memslot->dirty_bitmap)
return;
order = ilog2(psize);
order <<= KVMPPC_RMAP_CHG_SHIFT;
if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
*rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
gfn -= memslot->base_gfn;
set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
}
EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
static void kvmppc_set_dirty_from_hpte(struct kvm *kvm,
unsigned long hpte_v, unsigned long hpte_gr)
{
struct kvm_memory_slot *memslot;
unsigned long gfn;
unsigned long psize;
psize = kvmppc_actual_pgsz(hpte_v, hpte_gr);
gfn = hpte_rpn(hpte_gr, psize);
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (memslot && memslot->dirty_bitmap)
kvmppc_update_dirty_map(memslot, gfn, psize);
}
EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
unsigned long hpte_gr)
unsigned long hpte_gr,
struct kvm_memory_slot **memslotp,
unsigned long *gfnp)
{
struct kvm_memory_slot *memslot;
unsigned long *rmap;
unsigned long gfn;
gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr));
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (memslotp)
*memslotp = memslot;
if (gfnp)
*gfnp = gfn;
if (!memslot)
return NULL;
......@@ -147,10 +167,12 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
unsigned long ptel, head;
unsigned long *rmap;
unsigned long rcbits;
struct kvm_memory_slot *memslot;
unsigned long gfn;
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
ptel = rev->guest_rpte |= rcbits;
rmap = revmap_for_hpte(kvm, hpte_v, ptel);
rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn);
if (!rmap)
return;
lock_rmap(rmap);
......@@ -169,7 +191,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
}
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
if (rcbits & HPTE_R_C)
kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
kvmppc_update_dirty_map(memslot, gfn,
kvmppc_actual_pgsz(hpte_v, hpte_r));
unlock_rmap(rmap);
}
......@@ -193,7 +216,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
if (kvm_is_radix(kvm))
return H_FUNCTION;
psize = hpte_page_size(pteh, ptel);
psize = kvmppc_actual_pgsz(pteh, ptel);
if (!psize)
return H_PARAMETER;
writing = hpte_is_writable(ptel);
......@@ -797,7 +820,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
gr |= r & (HPTE_R_R | HPTE_R_C);
if (r & HPTE_R_R) {
kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
rmap = revmap_for_hpte(kvm, v, gr);
rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL);
if (rmap) {
lock_rmap(rmap);
*rmap |= KVMPPC_RMAP_REFERENCED;
......@@ -819,7 +842,6 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
__be64 *hpte;
unsigned long v, r, gr;
struct revmap_entry *rev;
unsigned long *rmap;
long ret = H_NOT_FOUND;
if (kvm_is_radix(kvm))
......@@ -848,16 +870,9 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
r = be64_to_cpu(hpte[1]);
gr |= r & (HPTE_R_R | HPTE_R_C);
if (r & HPTE_R_C) {
unsigned long psize = hpte_page_size(v, r);
hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
eieio();
rmap = revmap_for_hpte(kvm, v, gr);
if (rmap) {
lock_rmap(rmap);
*rmap |= KVMPPC_RMAP_CHANGED;
kvmppc_update_rmap_change(rmap, psize);
unlock_rmap(rmap);
}
kvmppc_set_dirty_from_hpte(kvm, v, gr);
}
}
vcpu->arch.gpr[4] = gr;
......@@ -1014,7 +1029,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
* Check the HPTE again, including base page size
*/
if ((v & valid) && (v & mask) == val &&
hpte_base_page_size(v, r) == (1ul << pshift))
kvmppc_hpte_base_page_shift(v, r) == pshift)
/* Return with the HPTE still locked */
return (hash << 3) + (i >> 1);
......
......@@ -31,6 +31,7 @@
#include <asm/tm.h>
#include <asm/opal.h>
#include <asm/xive-regs.h>
#include <asm/thread_info.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
......@@ -81,6 +82,19 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
RFI
kvmppc_call_hv_entry:
BEGIN_FTR_SECTION
/* On P9, do LPCR setting, if necessary */
ld r3, HSTATE_SPLIT_MODE(r13)
cmpdi r3, 0
beq 46f
lwz r4, KVM_SPLIT_DO_SET(r3)
cmpwi r4, 0
beq 46f
bl kvmhv_p9_set_lpcr
nop
46:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r4, HSTATE_KVM_VCPU(r13)
bl kvmppc_hv_entry
......@@ -149,11 +163,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
subf r4, r4, r3
mtspr SPRN_DEC, r4
BEGIN_FTR_SECTION
/* hwthread_req may have got set by cede or no vcpu, so clear it */
li r0, 0
stb r0, HSTATE_HWTHREAD_REQ(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/*
* For external interrupts we need to call the Linux
......@@ -316,7 +328,6 @@ kvm_novcpu_exit:
* Relocation is off and most register values are lost.
* r13 points to the PACA.
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
* This is not used by ISAv3.0B processors.
*/
.globl kvm_start_guest
kvm_start_guest:
......@@ -390,6 +401,7 @@ kvm_secondary_got_guest:
ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0
beq 63f
BEGIN_FTR_SECTION
ld r0, KVM_SPLIT_RPR(r6)
mtspr SPRN_RPR, r0
ld r0, KVM_SPLIT_PMMAR(r6)
......@@ -397,6 +409,15 @@ kvm_secondary_got_guest:
ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0
isync
FTR_SECTION_ELSE
/* On P9 we use the split_info for coordinating LPCR changes */
lwz r4, KVM_SPLIT_DO_SET(r6)
cmpwi r4, 0
beq 63f
mr r3, r6
bl kvmhv_p9_set_lpcr
nop
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
lwsync
......@@ -435,9 +456,6 @@ kvm_secondary_got_guest:
* While waiting we also need to check if we get given a vcpu to run.
*/
kvm_no_guest:
BEGIN_FTR_SECTION
twi 31,0,0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r3, HSTATE_HWTHREAD_REQ(r13)
cmpwi r3, 0
bne 53f
......@@ -470,6 +488,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r3, HSTATE_SPLIT_MODE(r13)
cmpdi r3, 0
beq kvm_no_guest
lwz r0, KVM_SPLIT_DO_SET(r3)
cmpwi r0, 0
bne kvmhv_do_set
lwz r0, KVM_SPLIT_DO_RESTORE(r3)
cmpwi r0, 0
bne kvmhv_do_restore
lbz r0, KVM_SPLIT_DO_NAP(r3)
cmpwi r0, 0
beq kvm_no_guest
......@@ -482,6 +506,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
stb r0, HSTATE_HWTHREAD_STATE(r13)
b kvm_no_guest
kvmhv_do_set:
/* Set LPCR, LPIDR etc. on P9 */
HMT_MEDIUM
bl kvmhv_p9_set_lpcr
nop
b kvm_no_guest
kvmhv_do_restore:
HMT_MEDIUM
bl kvmhv_p9_restore_lpcr
nop
b kvm_no_guest
/*
* Here the primary thread is trying to return the core to
* whole-core mode, so we need to nap.
......@@ -519,8 +556,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Set kvm_split_mode.napped[tid] = 1 */
ld r3, HSTATE_SPLIT_MODE(r13)
li r0, 1
lhz r4, PACAPACAINDEX(r13)
clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
lbz r4, HSTATE_TID(r13)
addi r4, r4, KVM_SPLIT_NAPPED
stbx r0, r3, r4
/* Check the do_nap flag again after setting napped[] */
......@@ -1914,10 +1950,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
19: lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8
16: ld r8,KVM_HOST_LPCR(r4)
16:
BEGIN_FTR_SECTION
/* On POWER9 with HPT-on-radix we need to wait for all other threads */
ld r3, HSTATE_SPLIT_MODE(r13)
cmpdi r3, 0
beq 47f
lwz r8, KVM_SPLIT_DO_RESTORE(r3)
cmpwi r8, 0
beq 47f
stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_p9_restore_lpcr
nop
lwz r12, STACK_SLOT_TRAP(r1)
b 48f
47:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r8,KVM_HOST_LPCR(r4)
mtspr SPRN_LPCR,r8
isync
48:
/* load host SLB entries */
BEGIN_MMU_FTR_SECTION
b 0f
......@@ -2543,10 +2595,8 @@ kvm_do_nap:
clrrdi r0, r0, 1
mtspr SPRN_CTRLT, r0
BEGIN_FTR_SECTION
li r0,1
stb r0,HSTATE_HWTHREAD_REQ(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
BEGIN_FTR_SECTION
......@@ -3134,10 +3184,139 @@ kvmppc_restore_tm:
/*
* We come here if we get any exception or interrupt while we are
* executing host real mode code while in guest MMU context.
* For now just spin, but we should do something better.
* r12 is (CR << 32) | vector
* r13 points to our PACA
* r12 is saved in HSTATE_SCRATCH0(r13)
* ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE
* r9 is saved in HSTATE_SCRATCH2(r13)
* r13 is saved in HSPRG1
* cfar is saved in HSTATE_CFAR(r13)
* ppr is saved in HSTATE_PPR(r13)
*/
kvmppc_bad_host_intr:
/*
* Switch to the emergency stack, but start half-way down in
* case we were already on it.
*/
mr r9, r1
std r1, PACAR1(r13)
ld r1, PACAEMERGSP(r13)
subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
std r9, 0(r1)
std r0, GPR0(r1)
std r9, GPR1(r1)
std r2, GPR2(r1)
SAVE_4GPRS(3, r1)
SAVE_2GPRS(7, r1)
srdi r0, r12, 32
clrldi r12, r12, 32
std r0, _CCR(r1)
std r12, _TRAP(r1)
andi. r0, r12, 2
beq 1f
mfspr r3, SPRN_HSRR0
mfspr r4, SPRN_HSRR1
mfspr r5, SPRN_HDAR
mfspr r6, SPRN_HDSISR
b 2f
1: mfspr r3, SPRN_SRR0
mfspr r4, SPRN_SRR1
mfspr r5, SPRN_DAR
mfspr r6, SPRN_DSISR
2: std r3, _NIP(r1)
std r4, _MSR(r1)
std r5, _DAR(r1)
std r6, _DSISR(r1)
ld r9, HSTATE_SCRATCH2(r13)
ld r12, HSTATE_SCRATCH0(r13)
GET_SCRATCH0(r0)
SAVE_4GPRS(9, r1)
std r0, GPR13(r1)
SAVE_NVGPRS(r1)
ld r5, HSTATE_CFAR(r13)
std r5, ORIG_GPR3(r1)
mflr r3
#ifdef CONFIG_RELOCATABLE
ld r4, HSTATE_SCRATCH1(r13)
#else
mfctr r4
#endif
mfxer r5
lbz r6, PACASOFTIRQEN(r13)
std r3, _LINK(r1)
std r4, _CTR(r1)
std r5, _XER(r1)
std r6, SOFTE(r1)
ld r2, PACATOC(r13)
LOAD_REG_IMMEDIATE(3, 0x7265677368657265)
std r3, STACK_FRAME_OVERHEAD-16(r1)
/*
* On POWER9 do a minimal restore of the MMU and call C code,
* which will print a message and panic.
* XXX On POWER7 and POWER8, we just spin here since we don't
* know what the other threads are doing (and we don't want to
* coordinate with them) - but at least we now have register state
* in memory that we might be able to look at from another CPU.
*/
BEGIN_FTR_SECTION
b .
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
ld r9, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_KVM(r9)
li r0, 0
mtspr SPRN_AMR, r0
mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX, r0
/* Flush the ERAT on radix P9 DD1 guest exit */
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
BEGIN_MMU_FTR_SECTION
b 4f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
slbmte r0, r0
slbia
ptesync
ld r8, PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
li r3, SLBSHADOW_SAVEAREA
LDX_BE r5, r8, r3
addi r3, r3, 8
LDX_BE r6, r8, r3
andis. r7, r5, SLB_ESID_V@h
beq 3f
slbmte r6, r5
3: addi r8, r8, 16
.endr
4: lwz r7, KVM_HOST_LPID(r10)
mtspr SPRN_LPID, r7
mtspr SPRN_PID, r0
ld r8, KVM_HOST_LPCR(r10)
mtspr SPRN_LPCR, r8
isync
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
/*
* Turn on the MMU and jump to C code
*/
bcl 20, 31, .+4
5: mflr r3
addi r3, r3, 9f - 5b
ld r4, PACAKMSR(r13)
mtspr SPRN_SRR0, r3
mtspr SPRN_SRR1, r4
rfid
9: addi r3, r1, STACK_FRAME_OVERHEAD
bl kvmppc_bad_interrupt
b 9b
/*
* This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
......
......@@ -1326,12 +1326,22 @@ static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
kvmppc_set_pvr_pr(vcpu, sregs->pvr);
vcpu3s->sdr1 = sregs->u.s.sdr1;
#ifdef CONFIG_PPC_BOOK3S_64
if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
/* Flush all SLB entries */
vcpu->arch.mmu.slbmte(vcpu, 0, 0);
vcpu->arch.mmu.slbia(vcpu);
for (i = 0; i < 64; i++) {
vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
sregs->u.s.ppc64.slb[i].slbe);
u64 rb = sregs->u.s.ppc64.slb[i].slbe;
u64 rs = sregs->u.s.ppc64.slb[i].slbv;
if (rb & SLB_ESID_V)
vcpu->arch.mmu.slbmte(vcpu, rs, rb);
}
} else {
} else
#endif
{
for (i = 0; i < 16; i++) {
vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
}
......
......@@ -419,6 +419,8 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
case H_PROTECT:
case H_BULK_REMOVE:
case H_PUT_TCE:
case H_PUT_TCE_INDIRECT:
case H_STUFF_TCE:
case H_CEDE:
case H_LOGICAL_CI_LOAD:
case H_LOGICAL_CI_STORE:
......
......@@ -377,7 +377,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
start = vma->vm_pgoff;
end = start +
((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
vma_pages(vma);
pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
......
......@@ -590,8 +590,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && radix_enabled());
break;
case KVM_CAP_PPC_MMU_HASH_V3:
r = !!(hv_enabled && !radix_enabled() &&
cpu_has_feature(CPU_FTR_ARCH_300));
r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300));
break;
#endif
case KVM_CAP_SYNC_MMU:
......@@ -644,8 +643,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
case KVM_CAP_PPC_HTM:
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
is_kvmppc_hv_enabled(kvm);
r = is_kvmppc_hv_enabled(kvm) &&
(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
break;
default:
r = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment