Commit 9d67121a authored by Paul Mackerras's avatar Paul Mackerras

Merge remote-tracking branch 'remotes/powerpc/topic/ppc-kvm' into kvm-ppc-next

This merges in the "ppc-kvm" topic branch of the powerpc tree to get a
series of commits that touch both general arch/powerpc code and KVM
code.  These commits will be merged both via the KVM tree and the
powerpc tree.
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parents aa227864 83a05510
...@@ -1922,6 +1922,7 @@ registers, find a list below: ...@@ -1922,6 +1922,7 @@ registers, find a list below:
PPC | KVM_REG_PPC_TIDR | 64 PPC | KVM_REG_PPC_TIDR | 64
PPC | KVM_REG_PPC_PSSCR | 64 PPC | KVM_REG_PPC_PSSCR | 64
PPC | KVM_REG_PPC_DEC_EXPIRY | 64 PPC | KVM_REG_PPC_DEC_EXPIRY | 64
PPC | KVM_REG_PPC_PTCR | 64
PPC | KVM_REG_PPC_TM_GPR0 | 64 PPC | KVM_REG_PPC_TM_GPR0 | 64
... ...
PPC | KVM_REG_PPC_TM_GPR31 | 64 PPC | KVM_REG_PPC_TM_GPR31 | 64
......
...@@ -150,4 +150,25 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache; ...@@ -150,4 +150,25 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_count_cache; extern long flush_count_cache;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
#else
static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
void kvmhv_save_host_pmu(void);
void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
unsigned long dabrx);
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
...@@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) ...@@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
BUG(); BUG();
} }
static inline unsigned int ap_to_shift(unsigned long ap)
{
int psize;
for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
if (mmu_psize_defs[psize].ap == ap)
return mmu_psize_defs[psize].shift;
}
return -1;
}
static inline unsigned long get_sllp_encoding(int psize) static inline unsigned long get_sllp_encoding(int psize)
{ {
unsigned long sllp; unsigned long sllp;
......
...@@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid, ...@@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
unsigned long addr, unsigned long addr,
unsigned long page_size); unsigned long page_size);
extern void radix__flush_pwc_lpid(unsigned int lpid); extern void radix__flush_pwc_lpid(unsigned int lpid);
extern void radix__flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid(unsigned int lpid); extern void radix__local_flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid); extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
......
...@@ -322,6 +322,11 @@ ...@@ -322,6 +322,11 @@
#define H_GET_24X7_DATA 0xF07C #define H_GET_24X7_DATA 0xF07C
#define H_GET_PERF_COUNTER_INFO 0xF080 #define H_GET_PERF_COUNTER_INFO 0xF080
/* Platform-specific hcalls used for nested HV KVM */
#define H_SET_PARTITION_TABLE 0xF800
#define H_ENTER_NESTED 0xF804
#define H_TLB_INVALIDATE 0xF808
/* Values for 2nd argument to H_SET_MODE */ /* Values for 2nd argument to H_SET_MODE */
#define H_SET_MODE_RESOURCE_SET_CIABR 1 #define H_SET_MODE_RESOURCE_SET_CIABR 1
#define H_SET_MODE_RESOURCE_SET_DAWR 2 #define H_SET_MODE_RESOURCE_SET_DAWR 2
...@@ -461,6 +466,42 @@ struct h_cpu_char_result { ...@@ -461,6 +466,42 @@ struct h_cpu_char_result {
u64 behaviour; u64 behaviour;
}; };
/* Register state for entering a nested guest with H_ENTER_NESTED */
struct hv_guest_state {
u64 version; /* version of this structure layout */
u32 lpid;
u32 vcpu_token;
/* These registers are hypervisor privileged (at least for writing) */
u64 lpcr;
u64 pcr;
u64 amor;
u64 dpdes;
u64 hfscr;
s64 tb_offset;
u64 dawr0;
u64 dawrx0;
u64 ciabr;
u64 hdec_expiry;
u64 purr;
u64 spurr;
u64 ic;
u64 vtb;
u64 hdar;
u64 hdsisr;
u64 heir;
u64 asdr;
/* These are OS privileged but need to be set late in guest entry */
u64 srr0;
u64 srr1;
u64 sprg[4];
u64 pidr;
u64 cfar;
u64 ppr;
};
/* Latest version of hv_guest_state structure */
#define HV_GUEST_STATE_VERSION 1
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */ #endif /* _ASM_POWERPC_HVCALL_H */
...@@ -84,7 +84,6 @@ ...@@ -84,7 +84,6 @@
#define BOOK3S_INTERRUPT_INST_STORAGE 0x400 #define BOOK3S_INTERRUPT_INST_STORAGE 0x400
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
#define BOOK3S_INTERRUPT_EXTERNAL 0x500 #define BOOK3S_INTERRUPT_EXTERNAL 0x500
#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502 #define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600
#define BOOK3S_INTERRUPT_PROGRAM 0x700 #define BOOK3S_INTERRUPT_PROGRAM 0x700
...@@ -134,8 +133,7 @@ ...@@ -134,8 +133,7 @@
#define BOOK3S_IRQPRIO_EXTERNAL 14 #define BOOK3S_IRQPRIO_EXTERNAL 14
#define BOOK3S_IRQPRIO_DECREMENTER 15 #define BOOK3S_IRQPRIO_DECREMENTER 15
#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 17 #define BOOK3S_IRQPRIO_MAX 17
#define BOOK3S_IRQPRIO_MAX 18
#define BOOK3S_HFLAG_DCBZ32 0x1 #define BOOK3S_HFLAG_DCBZ32 0x1
#define BOOK3S_HFLAG_SLB 0x2 #define BOOK3S_HFLAG_SLB 0x2
......
...@@ -188,14 +188,37 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); ...@@ -188,14 +188,37 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
struct kvm_vcpu *vcpu, struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr); unsigned long ea, unsigned long dsisr);
extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, u64 root,
u64 *pte_ret_p);
extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, u64 table,
int table_index, u64 *pte_ret_p);
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, bool data, bool iswrite); struct kvmppc_pte *gpte, bool data, bool iswrite);
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
unsigned int shift, struct kvm_memory_slot *memslot,
unsigned int lpid);
extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
bool writing, unsigned long gpa,
unsigned int lpid);
extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
unsigned long gpa,
struct kvm_memory_slot *memslot,
bool writing, bool kvm_ro,
pte_t *inserted_pte, unsigned int *levelp);
extern int kvmppc_init_vm_radix(struct kvm *kvm); extern int kvmppc_init_vm_radix(struct kvm *kvm);
extern void kvmppc_free_radix(struct kvm *kvm); extern void kvmppc_free_radix(struct kvm *kvm);
extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
unsigned int lpid);
extern int kvmppc_radix_init(void); extern int kvmppc_radix_init(void);
extern void kvmppc_radix_exit(void); extern void kvmppc_radix_exit(void);
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn); unsigned long gfn);
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
unsigned long gpa, unsigned int shift,
struct kvm_memory_slot *memslot,
unsigned int lpid);
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn); unsigned long gfn);
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
...@@ -271,6 +294,21 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {} ...@@ -271,6 +294,21 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
#endif #endif
long kvmhv_nested_init(void);
void kvmhv_nested_exit(void);
void kvmhv_vm_nested_init(struct kvm *kvm);
long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
void kvmhv_release_all_nested(struct kvm *kvm);
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu,
u64 time_limit, unsigned long lpcr);
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
struct hv_guest_state *hr);
long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
extern int kvm_irq_bypass; extern int kvm_irq_bypass;
...@@ -301,12 +339,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) ...@@ -301,12 +339,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{ {
vcpu->arch.cr = val; vcpu->arch.regs.ccr = val;
} }
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.cr; return vcpu->arch.regs.ccr;
} }
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
...@@ -384,9 +422,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu); ...@@ -384,9 +422,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
/* TO = 31 for unconditional trap */ /* TO = 31 for unconditional trap */
#define INS_TW 0x7fe00008 #define INS_TW 0x7fe00008
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
#define SPLIT_HACK_MASK 0xff000000 #define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000 #define SPLIT_HACK_OFFS 0xfb000000
......
...@@ -23,6 +23,108 @@ ...@@ -23,6 +23,108 @@
#include <linux/string.h> #include <linux/string.h>
#include <asm/bitops.h> #include <asm/bitops.h>
#include <asm/book3s/64/mmu-hash.h> #include <asm/book3s/64/mmu-hash.h>
#include <asm/cpu_has_feature.h>
#include <asm/ppc-opcode.h>
#ifdef CONFIG_PPC_PSERIES
static inline bool kvmhv_on_pseries(void)
{
return !cpu_has_feature(CPU_FTR_HVMODE);
}
#else
static inline bool kvmhv_on_pseries(void)
{
return false;
}
#endif
/*
* Structure for a nested guest, that is, for a guest that is managed by
* one of our guests.
*/
struct kvm_nested_guest {
struct kvm *l1_host; /* L1 VM that owns this nested guest */
int l1_lpid; /* lpid L1 guest thinks this guest is */
int shadow_lpid; /* real lpid of this nested guest */
pgd_t *shadow_pgtable; /* our page table for this guest */
u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
u64 process_table; /* process table entry for this guest */
long refcnt; /* number of pointers to this struct */
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
short prev_cpu[NR_CPUS];
};
/*
* We define a nested rmap entry as a single 64-bit quantity
* 0xFFF0000000000000 12-bit lpid field
* 0x000FFFFFFFFFF000 40-bit guest 4k page frame number
* 0x0000000000000001 1-bit single entry flag
*/
#define RMAP_NESTED_LPID_MASK 0xFFF0000000000000UL
#define RMAP_NESTED_LPID_SHIFT (52)
#define RMAP_NESTED_GPA_MASK 0x000FFFFFFFFFF000UL
#define RMAP_NESTED_IS_SINGLE_ENTRY 0x0000000000000001UL
/* Structure for a nested guest rmap entry */
struct rmap_nested {
struct llist_node list;
u64 rmap;
};
/*
* for_each_nest_rmap_safe - iterate over the list of nested rmap entries
* safe against removal of the list entry or NULL list
* @pos: a (struct rmap_nested *) to use as a loop cursor
* @node: pointer to the first entry
* NOTE: this can be NULL
* @rmapp: an (unsigned long *) in which to return the rmap entries on each
* iteration
* NOTE: this must point to already allocated memory
*
* The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
* rmap entry in the memslot. The list is always terminated by a "single entry"
* stored in the list element of the final entry of the llist. If there is ONLY
* a single entry then this is itself in the rmap entry of the memslot, not a
* llist head pointer.
*
* Note that the iterator below assumes that a nested rmap entry is always
* non-zero. This is true for our usage because the LPID field is always
* non-zero (zero is reserved for the host).
*
* This should be used to iterate over the list of rmap_nested entries with
* processing done on the u64 rmap value given by each iteration. This is safe
* against removal of list entries and it is always safe to call free on (pos).
*
* e.g.
* struct rmap_nested *cursor;
* struct llist_node *first;
* unsigned long rmap;
* for_each_nest_rmap_safe(cursor, first, &rmap) {
* do_something(rmap);
* free(cursor);
* }
*/
#define for_each_nest_rmap_safe(pos, node, rmapp) \
for ((pos) = llist_entry((node), typeof(*(pos)), list); \
(node) && \
(*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
((u64) (node)) : ((pos)->rmap))) && \
(((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
((struct llist_node *) ((pos) = NULL)) : \
(pos)->list.next)), true); \
(pos) = llist_entry((node), typeof(*(pos)), list))
struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
bool create);
void kvmhv_put_nested(struct kvm_nested_guest *gp);
int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid);
/* Encoding of first parameter for H_TLB_INVALIDATE */
#define H_TLBIE_P1_ENC(ric, prs, r) (___PPC_RIC(ric) | ___PPC_PRS(prs) | \
___PPC_R(r))
/* Power architecture requires HPT is at least 256kiB, at most 64TiB */ /* Power architecture requires HPT is at least 256kiB, at most 64TiB */
#define PPC_MIN_HPT_ORDER 18 #define PPC_MIN_HPT_ORDER 18
...@@ -435,6 +537,7 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) ...@@ -435,6 +537,7 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
} }
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
extern void kvmhv_radix_debugfs_init(struct kvm *kvm);
extern void kvmhv_rm_send_ipi(int cpu); extern void kvmhv_rm_send_ipi(int cpu);
...@@ -482,7 +585,7 @@ static inline u64 sanitize_msr(u64 msr) ...@@ -482,7 +585,7 @@ static inline u64 sanitize_msr(u64 msr)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.cr = vcpu->arch.cr_tm; vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
vcpu->arch.regs.xer = vcpu->arch.xer_tm; vcpu->arch.regs.xer = vcpu->arch.xer_tm;
vcpu->arch.regs.link = vcpu->arch.lr_tm; vcpu->arch.regs.link = vcpu->arch.lr_tm;
vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
...@@ -499,7 +602,7 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) ...@@ -499,7 +602,7 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.cr_tm = vcpu->arch.cr; vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
vcpu->arch.xer_tm = vcpu->arch.regs.xer; vcpu->arch.xer_tm = vcpu->arch.regs.xer;
vcpu->arch.lr_tm = vcpu->arch.regs.link; vcpu->arch.lr_tm = vcpu->arch.regs.link;
vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
...@@ -515,6 +618,17 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) ...@@ -515,6 +618,17 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
} }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
unsigned long gpa, unsigned int level,
unsigned long mmu_seq, unsigned int lpid,
unsigned long *rmapp, struct rmap_nested **n_rmap);
extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
struct rmap_nested **n_rmap);
extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
struct kvm_memory_slot *memslot,
unsigned long gpa, unsigned long hpa,
unsigned long nbytes);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */ #endif /* __ASM_KVM_BOOK3S_64_H__ */
...@@ -25,6 +25,9 @@ ...@@ -25,6 +25,9 @@
#define XICS_MFRR 0xc #define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */ #define XICS_IPI 2 /* interrupt source # for IPIs */
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
/* Maximum number of threads per physical core */ /* Maximum number of threads per physical core */
#define MAX_SMT_THREADS 8 #define MAX_SMT_THREADS 8
......
...@@ -46,12 +46,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) ...@@ -46,12 +46,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{ {
vcpu->arch.cr = val; vcpu->arch.regs.ccr = val;
} }
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.cr; return vcpu->arch.regs.ccr;
} }
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */ #include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES) #define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS
#else #else
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
...@@ -94,6 +95,7 @@ struct dtl_entry; ...@@ -94,6 +95,7 @@ struct dtl_entry;
struct kvmppc_vcpu_book3s; struct kvmppc_vcpu_book3s;
struct kvmppc_book3s_shadow_vcpu; struct kvmppc_book3s_shadow_vcpu;
struct kvm_nested_guest;
struct kvm_vm_stat { struct kvm_vm_stat {
ulong remote_tlb_flush; ulong remote_tlb_flush;
...@@ -287,10 +289,12 @@ struct kvm_arch { ...@@ -287,10 +289,12 @@ struct kvm_arch {
u8 radix; u8 radix;
u8 fwnmi_enabled; u8 fwnmi_enabled;
bool threads_indep; bool threads_indep;
bool nested_enable;
pgd_t *pgtable; pgd_t *pgtable;
u64 process_table; u64 process_table;
struct dentry *debugfs_dir; struct dentry *debugfs_dir;
struct dentry *htab_dentry; struct dentry *htab_dentry;
struct dentry *radix_dentry;
struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
...@@ -311,6 +315,9 @@ struct kvm_arch { ...@@ -311,6 +315,9 @@ struct kvm_arch {
#endif #endif
struct kvmppc_ops *kvm_ops; struct kvmppc_ops *kvm_ops;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
u64 l1_ptcr;
int max_nested_lpid;
struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
/* This array can grow quite large, keep it at the end */ /* This array can grow quite large, keep it at the end */
struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
#endif #endif
...@@ -360,7 +367,9 @@ struct kvmppc_pte { ...@@ -360,7 +367,9 @@ struct kvmppc_pte {
bool may_write : 1; bool may_write : 1;
bool may_execute : 1; bool may_execute : 1;
unsigned long wimg; unsigned long wimg;
unsigned long rc;
u8 page_size; /* MMU_PAGE_xxx */ u8 page_size; /* MMU_PAGE_xxx */
u8 page_shift;
}; };
struct kvmppc_mmu { struct kvmppc_mmu {
...@@ -537,8 +546,6 @@ struct kvm_vcpu_arch { ...@@ -537,8 +546,6 @@ struct kvm_vcpu_arch {
ulong tar; ulong tar;
#endif #endif
u32 cr;
#ifdef CONFIG_PPC_BOOK3S #ifdef CONFIG_PPC_BOOK3S
ulong hflags; ulong hflags;
ulong guest_owned_ext; ulong guest_owned_ext;
...@@ -707,6 +714,7 @@ struct kvm_vcpu_arch { ...@@ -707,6 +714,7 @@ struct kvm_vcpu_arch {
u8 hcall_needed; u8 hcall_needed;
u8 epr_flags; /* KVMPPC_EPR_xxx */ u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed; u8 epr_needed;
u8 external_oneshot; /* clear external irq after delivery */
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
...@@ -781,6 +789,10 @@ struct kvm_vcpu_arch { ...@@ -781,6 +789,10 @@ struct kvm_vcpu_arch {
u32 emul_inst; u32 emul_inst;
u32 online; u32 online;
/* For support of nested guests */
struct kvm_nested_guest *nested;
u32 nested_vcpu_id;
#endif #endif
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
......
...@@ -194,9 +194,7 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table( ...@@ -194,9 +194,7 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \ (stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS) H_PARAMETER : H_SUCCESS)
extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long tce);
extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
unsigned long *ua, unsigned long **prmap); unsigned long *ua, unsigned long **prmap);
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt, extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce); unsigned long idx, unsigned long tce);
...@@ -585,6 +583,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); ...@@ -585,6 +583,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status); int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
#else #else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; } u32 priority) { return -1; }
...@@ -607,6 +606,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur ...@@ -607,6 +606,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; } int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
#endif /* CONFIG_KVM_XIVE */ #endif /* CONFIG_KVM_XIVE */
/* /*
...@@ -652,6 +652,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, ...@@ -652,6 +652,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr); unsigned long mfrr);
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
/* /*
* Host-side operations we want to set up while running in real * Host-side operations we want to set up while running in real
......
...@@ -104,6 +104,7 @@ ...@@ -104,6 +104,7 @@
#define OP_31_XOP_LHZUX 311 #define OP_31_XOP_LHZUX 311
#define OP_31_XOP_MSGSNDP 142 #define OP_31_XOP_MSGSNDP 142
#define OP_31_XOP_MSGCLRP 174 #define OP_31_XOP_MSGCLRP 174
#define OP_31_XOP_TLBIE 306
#define OP_31_XOP_MFSPR 339 #define OP_31_XOP_MFSPR 339
#define OP_31_XOP_LWAX 341 #define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343 #define OP_31_XOP_LHAX 343
......
...@@ -415,6 +415,7 @@ ...@@ -415,6 +415,7 @@
#define HFSCR_DSCR __MASK(FSCR_DSCR_LG) #define HFSCR_DSCR __MASK(FSCR_DSCR_LG)
#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG) #define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG)
#define HFSCR_FP __MASK(FSCR_FP_LG) #define HFSCR_FP __MASK(FSCR_FP_LG)
#define HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */
#define SPRN_TAR 0x32f /* Target Address Register */ #define SPRN_TAR 0x32f /* Target Address Register */
#define SPRN_LPCR 0x13E /* LPAR Control Register */ #define SPRN_LPCR 0x13E /* LPAR Control Register */
#define LPCR_VPM0 ASM_CONST(0x8000000000000000) #define LPCR_VPM0 ASM_CONST(0x8000000000000000)
...@@ -766,6 +767,7 @@ ...@@ -766,6 +767,7 @@
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
#define HSRR1_DENORM 0x00100000 /* Denorm exception */ #define HSRR1_DENORM 0x00100000 /* Denorm exception */
#define HSRR1_HISI_WRITE 0x00010000 /* HISI bcs couldn't update mem */
#define SPRN_TBCTL 0x35f /* PA6T Timebase control register */ #define SPRN_TBCTL 0x35f /* PA6T Timebase control register */
#define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */ #define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */
......
...@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { ...@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
/* Transactional Memory checkpointed state: /* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs * This is all GPRs, all VSX regs and a subset of SPRs
......
...@@ -438,7 +438,7 @@ int main(void) ...@@ -438,7 +438,7 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S #ifdef CONFIG_PPC_BOOK3S
OFFSET(VCPU_TAR, kvm_vcpu, arch.tar); OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
#endif #endif
OFFSET(VCPU_CR, kvm_vcpu, arch.cr); OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip); OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr); OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
...@@ -503,6 +503,7 @@ int main(void) ...@@ -503,6 +503,7 @@ int main(void)
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
OFFSET(VCPU_CPU, kvm_vcpu, cpu); OFFSET(VCPU_CPU, kvm_vcpu, cpu);
OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif #endif
...@@ -695,7 +696,7 @@ int main(void) ...@@ -695,7 +696,7 @@ int main(void)
#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */ #else /* CONFIG_PPC_BOOK3S */
OFFSET(VCPU_CR, kvm_vcpu, arch.cr); OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer); OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link); OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr); OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
......
...@@ -147,8 +147,8 @@ __init_hvmode_206: ...@@ -147,8 +147,8 @@ __init_hvmode_206:
rldicl. r0,r3,4,63 rldicl. r0,r3,4,63
bnelr bnelr
ld r5,CPU_SPEC_FEATURES(r4) ld r5,CPU_SPEC_FEATURES(r4)
LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
xor r5,r5,r6 andc r5,r5,r6
std r5,CPU_SPEC_FEATURES(r4) std r5,CPU_SPEC_FEATURES(r4)
blr blr
......
...@@ -75,7 +75,8 @@ kvm-hv-y += \ ...@@ -75,7 +75,8 @@ kvm-hv-y += \
book3s_hv.o \ book3s_hv.o \
book3s_hv_interrupts.o \ book3s_hv_interrupts.o \
book3s_64_mmu_hv.o \ book3s_64_mmu_hv.o \
book3s_64_mmu_radix.o book3s_64_mmu_radix.o \
book3s_hv_nested.o
kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
book3s_hv_tm.o book3s_hv_tm.o
......
...@@ -153,7 +153,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) ...@@ -153,7 +153,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
...@@ -239,18 +238,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec); ...@@ -239,18 +238,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq) struct kvm_interrupt *irq)
{ {
unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL; /*
* This case (KVM_INTERRUPT_SET) should never actually arise for
if (irq->irq == KVM_INTERRUPT_SET_LEVEL) * a pseries guest (because pseries guests expect their interrupt
vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; * controllers to continue asserting an external interrupt request
* until it is acknowledged at the interrupt controller), but is
* included to avoid ABI breakage and potentially for other
* sorts of guest.
*
* There is a subtlety here: HV KVM does not test the
* external_oneshot flag in the code that synthesizes
* external interrupts for the guest just before entering
* the guest. That is OK even if userspace did do a
* KVM_INTERRUPT_SET on a pseries guest vcpu, because the
* caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
* which ends up doing a smp_send_reschedule(), which will
* pull the guest all the way out to the host, meaning that
* we will call kvmppc_core_prepare_to_enter() before entering
* the guest again, and that will handle the external_oneshot
* flag correctly.
*/
if (irq->irq == KVM_INTERRUPT_SET)
vcpu->arch.external_oneshot = 1;
kvmppc_book3s_queue_irqprio(vcpu, vec); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
} }
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{ {
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
} }
void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
...@@ -281,7 +297,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, ...@@ -281,7 +297,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
vec = BOOK3S_INTERRUPT_DECREMENTER; vec = BOOK3S_INTERRUPT_DECREMENTER;
break; break;
case BOOK3S_IRQPRIO_EXTERNAL: case BOOK3S_IRQPRIO_EXTERNAL:
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL; vec = BOOK3S_INTERRUPT_EXTERNAL;
break; break;
...@@ -355,8 +370,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) ...@@ -355,8 +370,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
case BOOK3S_IRQPRIO_DECREMENTER: case BOOK3S_IRQPRIO_DECREMENTER:
/* DEC interrupts get cleared by mtdec */ /* DEC interrupts get cleared by mtdec */
return false; return false;
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: case BOOK3S_IRQPRIO_EXTERNAL:
/* External interrupts get cleared by userspace */ /*
* External interrupts get cleared by userspace
* except when set by the KVM_INTERRUPT ioctl with
* KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
*/
if (vcpu->arch.external_oneshot) {
vcpu->arch.external_oneshot = 0;
return true;
}
return false; return false;
} }
......
...@@ -268,14 +268,13 @@ int kvmppc_mmu_hv_init(void) ...@@ -268,14 +268,13 @@ int kvmppc_mmu_hv_init(void)
{ {
unsigned long host_lpid, rsvd_lpid; unsigned long host_lpid, rsvd_lpid;
if (!cpu_has_feature(CPU_FTR_HVMODE))
return -EINVAL;
if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL; return -EINVAL;
/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
host_lpid = mfspr(SPRN_LPID); host_lpid = 0;
if (cpu_has_feature(CPU_FTR_HVMODE))
host_lpid = mfspr(SPRN_LPID);
rsvd_lpid = LPID_RSVD; rsvd_lpid = LPID_RSVD;
kvmppc_init_lpid(rsvd_lpid + 1); kvmppc_init_lpid(rsvd_lpid + 1);
......
This diff is collapsed.
...@@ -363,6 +363,40 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -363,6 +363,40 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return ret; return ret;
} }
static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{
unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
enum dma_data_direction dir = iommu_tce_direction(tce);
struct kvmppc_spapr_tce_iommu_table *stit;
unsigned long ua = 0;
/* Allow userspace to poison TCE table */
if (dir == DMA_NONE)
return H_SUCCESS;
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_TOO_HARD;
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
unsigned long hpa = 0;
struct mm_iommu_table_group_mem_t *mem;
long shift = stit->tbl->it_page_shift;
mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
if (!mem)
return H_TOO_HARD;
if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
return H_TOO_HARD;
}
return H_SUCCESS;
}
static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
{ {
unsigned long hpa = 0; unsigned long hpa = 0;
...@@ -401,7 +435,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, ...@@ -401,7 +435,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
long ret; long ret;
if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
return H_HARDWARE; return H_TOO_HARD;
if (dir == DMA_NONE) if (dir == DMA_NONE)
return H_SUCCESS; return H_SUCCESS;
...@@ -449,15 +483,15 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, ...@@ -449,15 +483,15 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
return H_TOO_HARD; return H_TOO_HARD;
if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
return H_HARDWARE; return H_TOO_HARD;
if (mm_iommu_mapped_inc(mem)) if (mm_iommu_mapped_inc(mem))
return H_CLOSED; return H_TOO_HARD;
ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) { if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem); mm_iommu_mapped_dec(mem);
return H_HARDWARE; return H_TOO_HARD;
} }
if (dir != DMA_NONE) if (dir != DMA_NONE)
...@@ -517,8 +551,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -517,8 +551,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
idx = srcu_read_lock(&vcpu->kvm->srcu); idx = srcu_read_lock(&vcpu->kvm->srcu);
if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) {
ret = H_PARAMETER; ret = H_PARAMETER;
goto unlock_exit; goto unlock_exit;
} }
...@@ -533,14 +566,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -533,14 +566,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
entry, ua, dir); entry, ua, dir);
if (ret == H_SUCCESS) if (ret != H_SUCCESS) {
continue; kvmppc_clear_tce(stit->tbl, entry);
if (ret == H_TOO_HARD)
goto unlock_exit; goto unlock_exit;
}
WARN_ON_ONCE(1);
kvmppc_clear_tce(stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry, tce); kvmppc_tce_put(stt, entry, tce);
...@@ -583,7 +612,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -583,7 +612,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
return ret; return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu); idx = srcu_read_lock(&vcpu->kvm->srcu);
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
ret = H_TOO_HARD; ret = H_TOO_HARD;
goto unlock_exit; goto unlock_exit;
} }
...@@ -599,10 +628,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -599,10 +628,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
ret = kvmppc_tce_validate(stt, tce); ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
goto unlock_exit; goto unlock_exit;
}
for (i = 0; i < npages; ++i) {
/*
* This looks unsafe, because we validate, then regrab
* the TCE from userspace which could have been changed by
* another thread.
*
* But it actually is safe, because the relevant checks will be
* re-executed in the following code. If userspace tries to
* change this dodgily it will result in a messier failure mode
* but won't threaten the host.
*/
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tce = be64_to_cpu(tce);
if (kvmppc_gpa_to_ua(vcpu->kvm, if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
&ua, NULL))
return H_PARAMETER; return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
...@@ -610,14 +655,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -610,14 +655,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
stit->tbl, entry + i, ua, stit->tbl, entry + i, ua,
iommu_tce_direction(tce)); iommu_tce_direction(tce));
if (ret == H_SUCCESS) if (ret != H_SUCCESS) {
continue; kvmppc_clear_tce(stit->tbl, entry);
if (ret == H_TOO_HARD)
goto unlock_exit; goto unlock_exit;
}
WARN_ON_ONCE(1);
kvmppc_clear_tce(stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry + i, tce); kvmppc_tce_put(stt, entry + i, tce);
......
...@@ -87,6 +87,7 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, ...@@ -87,6 +87,7 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
} }
EXPORT_SYMBOL_GPL(kvmppc_find_table); EXPORT_SYMBOL_GPL(kvmppc_find_table);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* /*
* Validates TCE address. * Validates TCE address.
* At the moment flags and page mask are validated. * At the moment flags and page mask are validated.
...@@ -94,14 +95,14 @@ EXPORT_SYMBOL_GPL(kvmppc_find_table); ...@@ -94,14 +95,14 @@ EXPORT_SYMBOL_GPL(kvmppc_find_table);
* to the table and user space is supposed to process them), we can skip * to the table and user space is supposed to process them), we can skip
* checking other things (such as TCE is a guest RAM address or the page * checking other things (such as TCE is a guest RAM address or the page
* was actually allocated). * was actually allocated).
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/ */
long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{ {
unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE); unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
enum dma_data_direction dir = iommu_tce_direction(tce); enum dma_data_direction dir = iommu_tce_direction(tce);
struct kvmppc_spapr_tce_iommu_table *stit;
unsigned long ua = 0;
/* Allow userspace to poison TCE table */ /* Allow userspace to poison TCE table */
if (dir == DMA_NONE) if (dir == DMA_NONE)
...@@ -110,9 +111,25 @@ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) ...@@ -110,9 +111,25 @@ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
if (iommu_tce_check_gpa(stt->page_shift, gpa)) if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER; return H_PARAMETER;
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
unsigned long hpa = 0;
struct mm_iommu_table_group_mem_t *mem;
long shift = stit->tbl->it_page_shift;
mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
if (!mem)
return H_TOO_HARD;
if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
return H_TOO_HARD;
}
return H_SUCCESS; return H_SUCCESS;
} }
EXPORT_SYMBOL_GPL(kvmppc_tce_validate); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
/* Note on the use of page_address() in real mode, /* Note on the use of page_address() in real mode,
* *
...@@ -164,10 +181,10 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, ...@@ -164,10 +181,10 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
} }
EXPORT_SYMBOL_GPL(kvmppc_tce_put); EXPORT_SYMBOL_GPL(kvmppc_tce_put);
long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long *ua, unsigned long **prmap) unsigned long *ua, unsigned long **prmap)
{ {
unsigned long gfn = gpa >> PAGE_SHIFT; unsigned long gfn = tce >> PAGE_SHIFT;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
memslot = search_memslots(kvm_memslots(kvm), gfn); memslot = search_memslots(kvm_memslots(kvm), gfn);
...@@ -175,7 +192,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, ...@@ -175,7 +192,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
return -EINVAL; return -EINVAL;
*ua = __gfn_to_hva_memslot(memslot, gfn) | *ua = __gfn_to_hva_memslot(memslot, gfn) |
(gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (prmap) if (prmap)
...@@ -184,7 +201,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, ...@@ -184,7 +201,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
...@@ -300,10 +317,10 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, ...@@ -300,10 +317,10 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
&hpa))) &hpa)))
return H_HARDWARE; return H_TOO_HARD;
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_CLOSED; return H_TOO_HARD;
ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) { if (ret) {
...@@ -368,13 +385,12 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -368,13 +385,12 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
return ret; return ret;
ret = kvmppc_tce_validate(stt, tce); ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
return ret; return ret;
dir = iommu_tce_direction(tce); dir = iommu_tce_direction(tce);
if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
return H_PARAMETER; return H_PARAMETER;
entry = ioba >> stt->page_shift; entry = ioba >> stt->page_shift;
...@@ -387,14 +403,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -387,14 +403,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir); stit->tbl, entry, ua, dir);
if (ret == H_SUCCESS) if (ret != H_SUCCESS) {
continue; kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
if (ret == H_TOO_HARD)
return ret; return ret;
}
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry, tce); kvmppc_tce_put(stt, entry, tce);
...@@ -480,7 +492,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -480,7 +492,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/ */
struct mm_iommu_table_group_mem_t *mem; struct mm_iommu_table_group_mem_t *mem;
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
return H_TOO_HARD; return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
...@@ -496,12 +508,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -496,12 +508,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
* We do not require memory to be preregistered in this case * We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte(). * so lock rmap and do __find_linux_pte_or_hugepte().
*/ */
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
return H_TOO_HARD; return H_TOO_HARD;
rmap = (void *) vmalloc_to_phys(rmap); rmap = (void *) vmalloc_to_phys(rmap);
if (WARN_ON_ONCE_RM(!rmap)) if (WARN_ON_ONCE_RM(!rmap))
return H_HARDWARE; return H_TOO_HARD;
/* /*
* Synchronize with the MMU notifier callbacks in * Synchronize with the MMU notifier callbacks in
...@@ -521,14 +533,16 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -521,14 +533,16 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i) { for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ret = kvmppc_tce_validate(stt, tce); ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
goto unlock_exit; goto unlock_exit;
}
for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0; ua = 0;
if (kvmppc_gpa_to_ua(vcpu->kvm, if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
&ua, NULL))
return H_PARAMETER; return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
...@@ -536,14 +550,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -536,14 +550,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
stit->tbl, entry + i, ua, stit->tbl, entry + i, ua,
iommu_tce_direction(tce)); iommu_tce_direction(tce));
if (ret == H_SUCCESS) if (ret != H_SUCCESS) {
continue; kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
entry);
if (ret == H_TOO_HARD)
goto unlock_exit; goto unlock_exit;
}
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry + i, tce); kvmppc_tce_put(stt, entry + i, tce);
......
...@@ -36,7 +36,6 @@ ...@@ -36,7 +36,6 @@
#define OP_31_XOP_MTSR 210 #define OP_31_XOP_MTSR 210
#define OP_31_XOP_MTSRIN 242 #define OP_31_XOP_MTSRIN 242
#define OP_31_XOP_TLBIEL 274 #define OP_31_XOP_TLBIEL 274
#define OP_31_XOP_TLBIE 306
/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */ /* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
#define OP_31_XOP_FAKE_SC1 308 #define OP_31_XOP_FAKE_SC1 308
#define OP_31_XOP_SLBMTE 402 #define OP_31_XOP_SLBMTE 402
...@@ -110,7 +109,7 @@ static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu) ...@@ -110,7 +109,7 @@ static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
vcpu->arch.tar_tm = vcpu->arch.tar; vcpu->arch.tar_tm = vcpu->arch.tar;
vcpu->arch.lr_tm = vcpu->arch.regs.link; vcpu->arch.lr_tm = vcpu->arch.regs.link;
vcpu->arch.cr_tm = vcpu->arch.cr; vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
vcpu->arch.xer_tm = vcpu->arch.regs.xer; vcpu->arch.xer_tm = vcpu->arch.regs.xer;
vcpu->arch.vrsave_tm = vcpu->arch.vrsave; vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
} }
...@@ -129,7 +128,7 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu) ...@@ -129,7 +128,7 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
vcpu->arch.tar = vcpu->arch.tar_tm; vcpu->arch.tar = vcpu->arch.tar_tm;
vcpu->arch.regs.link = vcpu->arch.lr_tm; vcpu->arch.regs.link = vcpu->arch.lr_tm;
vcpu->arch.cr = vcpu->arch.cr_tm; vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
vcpu->arch.regs.xer = vcpu->arch.xer_tm; vcpu->arch.regs.xer = vcpu->arch.xer_tm;
vcpu->arch.vrsave = vcpu->arch.vrsave_tm; vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
} }
...@@ -141,7 +140,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val) ...@@ -141,7 +140,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
uint64_t texasr; uint64_t texasr;
/* CR0 = 0 | MSR[TS] | 0 */ /* CR0 = 0 | MSR[TS] | 0 */
vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
<< CR0_SHIFT); << CR0_SHIFT);
...@@ -220,7 +219,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) ...@@ -220,7 +219,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
tm_abort(ra_val); tm_abort(ra_val);
/* CR0 = 0 | MSR[TS] | 0 */ /* CR0 = 0 | MSR[TS] | 0 */
vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
<< CR0_SHIFT); << CR0_SHIFT);
...@@ -494,8 +493,8 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -494,8 +493,8 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!(kvmppc_get_msr(vcpu) & MSR_PR)) { if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
preempt_disable(); preempt_disable();
vcpu->arch.cr = (CR0_TBEGIN_FAILURE | vcpu->arch.regs.ccr = (CR0_TBEGIN_FAILURE |
(vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT))); (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)));
vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT | vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
(((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT)) (((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
......
This diff is collapsed.
...@@ -231,6 +231,15 @@ void kvmhv_rm_send_ipi(int cpu) ...@@ -231,6 +231,15 @@ void kvmhv_rm_send_ipi(int cpu)
void __iomem *xics_phys; void __iomem *xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
/* For a nested hypervisor, use the XICS via hcall */
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
IPI_PRIORITY);
return;
}
/* On POWER9 we can use msgsnd for any destination cpu. */ /* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu); msg |= get_hard_smp_processor_id(cpu);
...@@ -460,12 +469,19 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -460,12 +469,19 @@ static long kvmppc_read_one_intr(bool *again)
return 1; return 1;
/* Now read the interrupt from the ICP */ /* Now read the interrupt from the ICP */
xics_phys = local_paca->kvm_hstate.xics_phys; if (kvmhv_on_pseries()) {
rc = 0; unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
if (!xics_phys)
rc = opal_int_get_xirr(&xirr, false); rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
else xirr = cpu_to_be32(retbuf[0]);
xirr = __raw_rm_readl(xics_phys + XICS_XIRR); } else {
xics_phys = local_paca->kvm_hstate.xics_phys;
rc = 0;
if (!xics_phys)
rc = opal_int_get_xirr(&xirr, false);
else
xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
}
if (rc < 0) if (rc < 0)
return 1; return 1;
...@@ -494,7 +510,13 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -494,7 +510,13 @@ static long kvmppc_read_one_intr(bool *again)
*/ */
if (xisr == XICS_IPI) { if (xisr == XICS_IPI) {
rc = 0; rc = 0;
if (xics_phys) { if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf,
hard_smp_processor_id(), 0xff);
plpar_hcall_raw(H_EOI, retbuf, h_xirr);
} else if (xics_phys) {
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR); __raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
__raw_rm_writel(xirr, xics_phys + XICS_XIRR); __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
} else { } else {
...@@ -520,7 +542,13 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -520,7 +542,13 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host, /* We raced with the host,
* we need to resend that IPI, bummer * we need to resend that IPI, bummer
*/ */
if (xics_phys) if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf,
hard_smp_processor_id(),
IPI_PRIORITY);
} else if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY, __raw_rm_writeb(IPI_PRIORITY,
xics_phys + XICS_MFRR); xics_phys + XICS_MFRR);
else else
...@@ -729,3 +757,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) ...@@ -729,3 +757,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
smp_mb(); smp_mb();
local_paca->kvm_hstate.kvm_split_mode = NULL; local_paca->kvm_hstate.kvm_split_mode = NULL;
} }
/*
* Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
* Can we inject a Decrementer or a External interrupt?
*/
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
{
int ext;
unsigned long vec = 0;
unsigned long lpcr;
/* Insert EXTERNAL bit into LPCR at the MER bit position */
ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
lpcr = mfspr(SPRN_LPCR);
lpcr |= ext << LPCR_MER_SH;
mtspr(SPRN_LPCR, lpcr);
isync();
if (vcpu->arch.shregs.msr & MSR_EE) {
if (ext) {
vec = BOOK3S_INTERRUPT_EXTERNAL;
} else {
long int dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD))
dec = (int) dec;
if (dec < 0)
vec = BOOK3S_INTERRUPT_DECREMENTER;
}
}
if (vec) {
unsigned long msr, old_msr = vcpu->arch.shregs.msr;
kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
kvmppc_set_srr1(vcpu, old_msr);
kvmppc_set_pc(vcpu, vec);
msr = vcpu->arch.intr_msr;
if (MSR_TM_ACTIVE(old_msr))
msr |= MSR_TS_S;
vcpu->arch.shregs.msr = msr;
}
if (vcpu->arch.doorbell_request) {
mtspr(SPRN_DPDES, 1);
vcpu->arch.vcore->dpdes = 1;
smp_wmb();
vcpu->arch.doorbell_request = 0;
}
}
...@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION ...@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Save host PMU registers */ /* Save host PMU registers */
BEGIN_FTR_SECTION bl kvmhv_save_host_pmu
/* Work around P8 PMAE bug */
li r3, -1
clrrdi r3, r3, 10
mfspr r8, SPRN_MMCR2
mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
isync
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
mfspr r6, SPRN_MMCRA
/* Clear MMCRA in order to disable SDAR updates */
li r5, 0
mtspr SPRN_MMCRA, r5
isync
lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
mfspr r9, SPRN_SIAR
mfspr r10, SPRN_SDAR
std r7, HSTATE_MMCR0(r13)
std r5, HSTATE_MMCR1(r13)
std r6, HSTATE_MMCRA(r13)
std r9, HSTATE_SIAR(r13)
std r10, HSTATE_SDAR(r13)
BEGIN_FTR_SECTION
mfspr r9, SPRN_SIER
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
mfspr r7, SPRN_PMC4
mfspr r8, SPRN_PMC5
mfspr r9, SPRN_PMC6
stw r3, HSTATE_PMC1(r13)
stw r5, HSTATE_PMC2(r13)
stw r6, HSTATE_PMC3(r13)
stw r7, HSTATE_PMC4(r13)
stw r8, HSTATE_PMC5(r13)
stw r9, HSTATE_PMC6(r13)
31:
/* /*
* Put whatever is in the decrementer into the * Put whatever is in the decrementer into the
...@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ...@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r0, PPC_LR_STKOFF(r1) ld r0, PPC_LR_STKOFF(r1)
mtlr r0 mtlr r0
blr blr
_GLOBAL(kvmhv_save_host_pmu)
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
li r3, -1
clrrdi r3, r3, 10
mfspr r8, SPRN_MMCR2
mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
isync
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
mfspr r6, SPRN_MMCRA
/* Clear MMCRA in order to disable SDAR updates */
li r5, 0
mtspr SPRN_MMCRA, r5
isync
lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
mfspr r9, SPRN_SIAR
mfspr r10, SPRN_SDAR
std r7, HSTATE_MMCR0(r13)
std r5, HSTATE_MMCR1(r13)
std r6, HSTATE_MMCRA(r13)
std r9, HSTATE_SIAR(r13)
std r10, HSTATE_SDAR(r13)
BEGIN_FTR_SECTION
mfspr r9, SPRN_SIER
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
mfspr r7, SPRN_PMC4
mfspr r8, SPRN_PMC5
mfspr r9, SPRN_PMC6
stw r3, HSTATE_PMC1(r13)
stw r5, HSTATE_PMC2(r13)
stw r6, HSTATE_PMC3(r13)
stw r7, HSTATE_PMC4(r13)
stw r8, HSTATE_PMC5(r13)
stw r9, HSTATE_PMC6(r13)
31: blr
This diff is collapsed.
...@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void) ...@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 1; local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
} }
EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
void kvmppc_subcore_exit_guest(void) void kvmppc_subcore_exit_guest(void)
{ {
...@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void) ...@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 0; local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
} }
EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
static bool kvmppc_tb_resync_required(void) static bool kvmppc_tb_resync_required(void)
{ {
...@@ -331,5 +333,13 @@ long kvmppc_realmode_hmi_handler(void) ...@@ -331,5 +333,13 @@ long kvmppc_realmode_hmi_handler(void)
} else { } else {
wait_for_tb_resync(); wait_for_tb_resync();
} }
/*
* Reset tb_offset_applied so the guest exit code won't try
* to subtract the previous timebase offset from the timebase.
*/
if (local_paca->kvm_hstate.kvm_vcore)
local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
return 0; return 0;
} }
...@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, ...@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
/* Mark the target VCPU as having an interrupt pending */ /* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++; vcpu->stat.queue_intr++;
set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
/* Kick self ? Just set MER and return */ /* Kick self ? Just set MER and return */
if (vcpu == this_vcpu) { if (vcpu == this_vcpu) {
...@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, ...@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
{ {
/* Note: Only called on self ! */ /* Note: Only called on self ! */
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
&vcpu->arch.pending_exceptions);
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
} }
...@@ -768,6 +767,14 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) ...@@ -768,6 +767,14 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
void __iomem *xics_phys; void __iomem *xics_phys;
int64_t rc; int64_t rc;
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
iosync();
plpar_hcall_raw(H_EOI, retbuf, hwirq);
return;
}
rc = pnv_opal_pci_msi_eoi(c, hwirq); rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc) if (rc)
......
This diff is collapsed.
...@@ -130,7 +130,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) ...@@ -130,7 +130,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
return RESUME_GUEST; return RESUME_GUEST;
} }
/* Set CR0 to indicate previous transactional state */ /* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
/* L=1 => tresume, L=0 => tsuspend */ /* L=1 => tresume, L=0 => tsuspend */
if (instr & (1 << 21)) { if (instr & (1 << 21)) {
...@@ -174,7 +174,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) ...@@ -174,7 +174,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
copy_from_checkpoint(vcpu); copy_from_checkpoint(vcpu);
/* Set CR0 to indicate previous transactional state */ /* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
vcpu->arch.shregs.msr &= ~MSR_TS_MASK; vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
return RESUME_GUEST; return RESUME_GUEST;
...@@ -204,7 +204,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) ...@@ -204,7 +204,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
copy_to_checkpoint(vcpu); copy_to_checkpoint(vcpu);
/* Set CR0 to indicate previous transactional state */ /* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
vcpu->arch.shregs.msr = msr | MSR_TS_S; vcpu->arch.shregs.msr = msr | MSR_TS_S;
return RESUME_GUEST; return RESUME_GUEST;
......
...@@ -89,7 +89,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) ...@@ -89,7 +89,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
if (instr & (1 << 21)) if (instr & (1 << 21))
vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
/* Set CR0 to 0b0010 */ /* Set CR0 to 0b0010 */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000; vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
0x20000000;
return 1; return 1;
} }
...@@ -105,5 +106,5 @@ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu) ...@@ -105,5 +106,5 @@ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
vcpu->arch.regs.nip = vcpu->arch.tfhar; vcpu->arch.regs.nip = vcpu->arch.tfhar;
copy_from_checkpoint(vcpu); copy_from_checkpoint(vcpu);
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
} }
...@@ -167,7 +167,7 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu) ...@@ -167,7 +167,7 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
svcpu->gpr[11] = vcpu->arch.regs.gpr[11]; svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
svcpu->gpr[12] = vcpu->arch.regs.gpr[12]; svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
svcpu->gpr[13] = vcpu->arch.regs.gpr[13]; svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
svcpu->cr = vcpu->arch.cr; svcpu->cr = vcpu->arch.regs.ccr;
svcpu->xer = vcpu->arch.regs.xer; svcpu->xer = vcpu->arch.regs.xer;
svcpu->ctr = vcpu->arch.regs.ctr; svcpu->ctr = vcpu->arch.regs.ctr;
svcpu->lr = vcpu->arch.regs.link; svcpu->lr = vcpu->arch.regs.link;
...@@ -249,7 +249,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) ...@@ -249,7 +249,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
vcpu->arch.regs.gpr[11] = svcpu->gpr[11]; vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
vcpu->arch.regs.gpr[12] = svcpu->gpr[12]; vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
vcpu->arch.regs.gpr[13] = svcpu->gpr[13]; vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
vcpu->arch.cr = svcpu->cr; vcpu->arch.regs.ccr = svcpu->cr;
vcpu->arch.regs.xer = svcpu->xer; vcpu->arch.regs.xer = svcpu->xer;
vcpu->arch.regs.ctr = svcpu->ctr; vcpu->arch.regs.ctr = svcpu->ctr;
vcpu->arch.regs.link = svcpu->lr; vcpu->arch.regs.link = svcpu->lr;
...@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST; r = RESUME_GUEST;
break; break;
case BOOK3S_INTERRUPT_EXTERNAL: case BOOK3S_INTERRUPT_EXTERNAL:
case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
case BOOK3S_INTERRUPT_EXTERNAL_HV: case BOOK3S_INTERRUPT_EXTERNAL_HV:
case BOOK3S_INTERRUPT_H_VIRT: case BOOK3S_INTERRUPT_H_VIRT:
vcpu->stat.ext_intr_exits++; vcpu->stat.ext_intr_exits++;
......
...@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp, ...@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
*/ */
if (new.out_ee) { if (new.out_ee) {
kvmppc_book3s_queue_irqprio(icp->vcpu, kvmppc_book3s_queue_irqprio(icp->vcpu,
BOOK3S_INTERRUPT_EXTERNAL_LEVEL); BOOK3S_INTERRUPT_EXTERNAL);
if (!change_self) if (!change_self)
kvmppc_fast_vcpu_kick(icp->vcpu); kvmppc_fast_vcpu_kick(icp->vcpu);
} }
...@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) ...@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
u32 xirr; u32 xirr;
/* First, remove EE from the processor */ /* First, remove EE from the processor */
kvmppc_book3s_dequeue_irqprio(icp->vcpu, kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
/* /*
* ICP State: Accept_Interrupt * ICP State: Accept_Interrupt
...@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) ...@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
* We can remove EE from the current processor, the update * We can remove EE from the current processor, the update
* transaction will set it again if needed * transaction will set it again if needed
*/ */
kvmppc_book3s_dequeue_irqprio(icp->vcpu, kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
do { do {
old_state = new_state = READ_ONCE(icp->state); old_state = new_state = READ_ONCE(icp->state);
...@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) ...@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
* Deassert the CPU interrupt request. * Deassert the CPU interrupt request.
* icp_try_update will reassert it if necessary. * icp_try_update will reassert it if necessary.
*/ */
kvmppc_book3s_dequeue_irqprio(icp->vcpu, kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
/* /*
* Note that if we displace an interrupt from old_state.xisr, * Note that if we displace an interrupt from old_state.xisr,
...@@ -1393,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) ...@@ -1393,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
} }
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206)) { if (cpu_has_feature(CPU_FTR_ARCH_206) &&
cpu_has_feature(CPU_FTR_HVMODE)) {
/* Enable real mode support */ /* Enable real mode support */
xics->real_mode = ENABLE_REALMODE; xics->real_mode = ENABLE_REALMODE;
xics->real_mode_dbg = DEBUG_REALMODE; xics->real_mode_dbg = DEBUG_REALMODE;
......
...@@ -61,6 +61,69 @@ ...@@ -61,6 +61,69 @@
*/ */
#define XIVE_Q_GAP 2 #define XIVE_Q_GAP 2
/*
* Push a vcpu's context to the XIVE on guest entry.
* This assumes we are in virtual mode (MMU on)
*/
void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
{
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
if (!tima)
return;
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
vcpu->arch.xive_pushed = 1;
eieio();
/*
* We clear the irq_pending flag. There is a small chance of a
* race vs. the escalation interrupt happening on another
* processor setting it again, but the only consequence is to
* cause a spurious wakeup on the next H_CEDE, which is not an
* issue.
*/
vcpu->arch.irq_pending = 0;
/*
* In single escalation mode, if the escalation interrupt is
* on, we mask it.
*/
if (vcpu->arch.xive_esc_on) {
pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
XIVE_ESB_SET_PQ_01));
mb();
/*
* We have a possible subtle race here: The escalation
* interrupt might have fired and be on its way to the
* host queue while we mask it, and if we unmask it
* early enough (re-cede right away), there is a
* theorical possibility that it fires again, thus
* landing in the target queue more than once which is
* a big no-no.
*
* Fortunately, solving this is rather easy. If the
* above load setting PQ to 01 returns a previous
* value where P is set, then we know the escalation
* interrupt is somewhere on its way to the host. In
* that case we simply don't clear the xive_esc_on
* flag below. It will be eventually cleared by the
* handler for the escalation interrupt.
*
* Then, when doing a cede, we check that flag again
* before re-enabling the escalation interrupt, and if
* set, we abort the cede.
*/
if (!(pq & XIVE_ESB_VAL_P))
/* Now P is 0, we can clear the flag */
vcpu->arch.xive_esc_on = 0;
}
}
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/* /*
* This is a simple trigger for a generic XIVE IRQ. This must * This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page * only be called for interrupts that support a trigger page
......
...@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu) ...@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
/* First collect pending bits from HW */ /* First collect pending bits from HW */
GLUE(X_PFX,ack_pending)(xc); GLUE(X_PFX,ack_pending)(xc);
/*
* Cleanup the old-style bits if needed (they may have been
* set by pull or an escalation interrupts).
*/
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n", pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
xc->pending, xc->hw_cppr, xc->cppr); xc->pending, xc->hw_cppr, xc->cppr);
......
...@@ -182,7 +182,7 @@ ...@@ -182,7 +182,7 @@
*/ */
PPC_LL r4, PACACURRENT(r13) PPC_LL r4, PACACURRENT(r13)
PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4) PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
stw r10, VCPU_CR(r4) PPC_STL r10, VCPU_CR(r4)
PPC_STL r11, VCPU_GPR(R4)(r4) PPC_STL r11, VCPU_GPR(R4)(r4)
PPC_STL r5, VCPU_GPR(R5)(r4) PPC_STL r5, VCPU_GPR(R5)(r4)
PPC_STL r6, VCPU_GPR(R6)(r4) PPC_STL r6, VCPU_GPR(R6)(r4)
...@@ -292,7 +292,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1) ...@@ -292,7 +292,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11) PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, THREAD_NORMSAVE(0)(r10) PPC_LL r4, THREAD_NORMSAVE(0)(r10)
PPC_STL r5, VCPU_GPR(R5)(r11) PPC_STL r5, VCPU_GPR(R5)(r11)
stw r13, VCPU_CR(r11) PPC_STL r13, VCPU_CR(r11)
mfspr r5, \srr0 mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R10)(r11) PPC_STL r3, VCPU_GPR(R10)(r11)
PPC_LL r3, THREAD_NORMSAVE(2)(r10) PPC_LL r3, THREAD_NORMSAVE(2)(r10)
...@@ -319,7 +319,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1) ...@@ -319,7 +319,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11) PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, GPR9(r8) PPC_LL r4, GPR9(r8)
PPC_STL r5, VCPU_GPR(R5)(r11) PPC_STL r5, VCPU_GPR(R5)(r11)
stw r9, VCPU_CR(r11) PPC_STL r9, VCPU_CR(r11)
mfspr r5, \srr0 mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R8)(r11) PPC_STL r3, VCPU_GPR(R8)(r11)
PPC_LL r3, GPR10(r8) PPC_LL r3, GPR10(r8)
...@@ -643,7 +643,7 @@ lightweight_exit: ...@@ -643,7 +643,7 @@ lightweight_exit:
PPC_LL r3, VCPU_LR(r4) PPC_LL r3, VCPU_LR(r4)
PPC_LL r5, VCPU_XER(r4) PPC_LL r5, VCPU_XER(r4)
PPC_LL r6, VCPU_CTR(r4) PPC_LL r6, VCPU_CTR(r4)
lwz r7, VCPU_CR(r4) PPC_LL r7, VCPU_CR(r4)
PPC_LL r8, VCPU_PC(r4) PPC_LL r8, VCPU_PC(r4)
PPC_LD(r9, VCPU_SHARED_MSR, r11) PPC_LD(r9, VCPU_SHARED_MSR, r11)
PPC_LL r0, VCPU_GPR(R0)(r4) PPC_LL r0, VCPU_GPR(R0)(r4)
......
...@@ -117,7 +117,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) ...@@ -117,7 +117,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
emulated = EMULATE_FAIL; emulated = EMULATE_FAIL;
vcpu->arch.regs.msr = vcpu->arch.shared->msr; vcpu->arch.regs.msr = vcpu->arch.shared->msr;
vcpu->arch.regs.ccr = vcpu->arch.cr;
if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
int type = op.type & INSTR_TYPE_MASK; int type = op.type & INSTR_TYPE_MASK;
int size = GETSIZE(op.type); int size = GETSIZE(op.type);
......
...@@ -594,7 +594,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -594,7 +594,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && radix_enabled()); r = !!(hv_enabled && radix_enabled());
break; break;
case KVM_CAP_PPC_MMU_HASH_V3: case KVM_CAP_PPC_MMU_HASH_V3:
r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300)); r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
cpu_has_feature(CPU_FTR_HVMODE));
break; break;
#endif #endif
case KVM_CAP_SYNC_MMU: case KVM_CAP_SYNC_MMU:
......
This diff is collapsed.
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
{0x400, "INST_STORAGE"}, \ {0x400, "INST_STORAGE"}, \
{0x480, "INST_SEGMENT"}, \ {0x480, "INST_SEGMENT"}, \
{0x500, "EXTERNAL"}, \ {0x500, "EXTERNAL"}, \
{0x501, "EXTERNAL_LEVEL"}, \
{0x502, "EXTERNAL_HV"}, \ {0x502, "EXTERNAL_HV"}, \
{0x600, "ALIGNMENT"}, \ {0x600, "ALIGNMENT"}, \
{0x700, "PROGRAM"}, \ {0x700, "PROGRAM"}, \
......
...@@ -830,6 +830,15 @@ void radix__flush_pwc_lpid(unsigned int lpid) ...@@ -830,6 +830,15 @@ void radix__flush_pwc_lpid(unsigned int lpid)
} }
EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
/*
* Flush partition scoped translations from LPID (=LPIDR)
*/
void radix__flush_tlb_lpid(unsigned int lpid)
{
_tlbie_lpid(lpid, RIC_FLUSH_ALL);
}
EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
/* /*
* Flush partition scoped translations from LPID (=LPIDR) * Flush partition scoped translations from LPID (=LPIDR)
*/ */
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
{0x400, "INST_STORAGE"}, \ {0x400, "INST_STORAGE"}, \
{0x480, "INST_SEGMENT"}, \ {0x480, "INST_SEGMENT"}, \
{0x500, "EXTERNAL"}, \ {0x500, "EXTERNAL"}, \
{0x501, "EXTERNAL_LEVEL"}, \
{0x502, "EXTERNAL_HV"}, \ {0x502, "EXTERNAL_HV"}, \
{0x600, "ALIGNMENT"}, \ {0x600, "ALIGNMENT"}, \
{0x700, "PROGRAM"}, \ {0x700, "PROGRAM"}, \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment