Commit ffcb09f2 authored by Radim Krčmář's avatar Radim Krčmář

Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

PPC KVM update for 4.10:

 * Support for KVM guests on POWER9 using the hashed page table MMU.
 * Updates and improvements to the halt-polling support on PPC, from
   Suraj Jitindar Singh.
 * An optimization to speed up emulated MMIO, from Yongji Xie.
 * Various other minor cleanups.
parents bf65014d 6ccad8ce
...@@ -6,6 +6,8 @@ cpuid.txt ...@@ -6,6 +6,8 @@ cpuid.txt
- KVM-specific cpuid leaves (x86). - KVM-specific cpuid leaves (x86).
devices/ devices/
- KVM_CAP_DEVICE_CTRL userspace API. - KVM_CAP_DEVICE_CTRL userspace API.
halt-polling.txt
- notes on halt-polling
hypercalls.txt hypercalls.txt
- KVM hypercalls. - KVM hypercalls.
locking.txt locking.txt
......
...@@ -2023,6 +2023,8 @@ registers, find a list below: ...@@ -2023,6 +2023,8 @@ registers, find a list below:
PPC | KVM_REG_PPC_WORT | 64 PPC | KVM_REG_PPC_WORT | 64
PPC | KVM_REG_PPC_SPRG9 | 64 PPC | KVM_REG_PPC_SPRG9 | 64
PPC | KVM_REG_PPC_DBSR | 32 PPC | KVM_REG_PPC_DBSR | 32
PPC | KVM_REG_PPC_TIDR | 64
PPC | KVM_REG_PPC_PSSCR | 64
PPC | KVM_REG_PPC_TM_GPR0 | 64 PPC | KVM_REG_PPC_TM_GPR0 | 64
... ...
PPC | KVM_REG_PPC_TM_GPR31 | 64 PPC | KVM_REG_PPC_TM_GPR31 | 64
...@@ -2039,6 +2041,7 @@ registers, find a list below: ...@@ -2039,6 +2041,7 @@ registers, find a list below:
PPC | KVM_REG_PPC_TM_VSCR | 32 PPC | KVM_REG_PPC_TM_VSCR | 32
PPC | KVM_REG_PPC_TM_DSCR | 64 PPC | KVM_REG_PPC_TM_DSCR | 64
PPC | KVM_REG_PPC_TM_TAR | 64 PPC | KVM_REG_PPC_TM_TAR | 64
PPC | KVM_REG_PPC_TM_XER | 64
| | | |
MIPS | KVM_REG_MIPS_R0 | 64 MIPS | KVM_REG_MIPS_R0 | 64
... ...
......
The KVM halt polling system
===========================
The KVM halt polling system provides a feature within KVM whereby the latency
of a guest can, under some circumstances, be reduced by polling in the host
for some time period after the guest has elected to no longer run by cedeing.
That is, when a guest vcpu has ceded, or in the case of powerpc when all of the
vcpus of a single vcore have ceded, the host kernel polls for wakeup conditions
before giving up the cpu to the scheduler in order to let something else run.
Polling provides a latency advantage in cases where the guest can be run again
very quickly by at least saving us a trip through the scheduler, normally on
the order of a few micro-seconds, although performance benefits are workload
dependant. In the event that no wakeup source arrives during the polling
interval or some other task on the runqueue is runnable the scheduler is
invoked. Thus halt polling is especially useful on workloads with very short
wakeup periods where the time spent halt polling is minimised and the time
savings of not invoking the scheduler are distinguishable.
The generic halt polling code is implemented in:
virt/kvm/kvm_main.c: kvm_vcpu_block()
The powerpc kvm-hv specific case is implemented in:
arch/powerpc/kvm/book3s_hv.c: kvmppc_vcore_blocked()
Halt Polling Interval
=====================
The maximum time for which to poll before invoking the scheduler, referred to
as the halt polling interval, is increased and decreased based on the perceived
effectiveness of the polling in an attempt to limit pointless polling.
This value is stored in either the vcpu struct:
kvm_vcpu->halt_poll_ns
or in the case of powerpc kvm-hv, in the vcore struct:
kvmppc_vcore->halt_poll_ns
Thus this is a per vcpu (or vcore) value.
During polling if a wakeup source is received within the halt polling interval,
the interval is left unchanged. In the event that a wakeup source isn't
received during the polling interval (and thus schedule is invoked) there are
two options, either the polling interval and total block time[0] were less than
the global max polling interval (see module params below), or the total block
time was greater than the global max polling interval.
In the event that both the polling interval and total block time were less than
the global max polling interval then the polling interval can be increased in
the hope that next time during the longer polling interval the wake up source
will be received while the host is polling and the latency benefits will be
received. The polling interval is grown in the function grow_halt_poll_ns() and
is multiplied by the module parameter halt_poll_ns_grow.
In the event that the total block time was greater than the global max polling
interval then the host will never poll for long enough (limited by the global
max) to wakeup during the polling interval so it may as well be shrunk in order
to avoid pointless polling. The polling interval is shrunk in the function
shrink_halt_poll_ns() and is divided by the module parameter
halt_poll_ns_shrink, or set to 0 iff halt_poll_ns_shrink == 0.
It is worth noting that this adjustment process attempts to hone in on some
steady state polling interval but will only really do a good job for wakeups
which come at an approximately constant rate, otherwise there will be constant
adjustment of the polling interval.
[0] total block time: the time between when the halt polling function is
invoked and a wakeup source received (irrespective of
whether the scheduler is invoked within that function).
Module Parameters
=================
The kvm module has 3 tuneable module parameters to adjust the global max
polling interval as well as the rate at which the polling interval is grown and
shrunk. These variables are defined in include/linux/kvm_host.h and as module
parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
powerpc kvm-hv case.
Module Parameter | Description | Default Value
--------------------------------------------------------------------------------
halt_poll_ns | The global max polling interval | KVM_HALT_POLL_NS_DEFAULT
| which defines the ceiling value |
| of the polling interval for | (per arch value)
| each vcpu. |
--------------------------------------------------------------------------------
halt_poll_ns_grow | The value by which the halt | 2
| polling interval is multiplied |
| in the grow_halt_poll_ns() |
| function. |
--------------------------------------------------------------------------------
halt_poll_ns_shrink | The value by which the halt | 0
| polling interval is divided in |
| the shrink_halt_poll_ns() |
| function. |
--------------------------------------------------------------------------------
These module parameters can be set from the debugfs files in:
/sys/module/kvm/parameters/
Note: that these module parameters are system wide values and are not able to
be tuned on a per vm basis.
Further Notes
=============
- Care should be taken when setting the halt_poll_ns module parameter as a
large value has the potential to drive the cpu usage to 100% on a machine which
would be almost entirely idle otherwise. This is because even if a guest has
wakeups during which very little work is done and which are quite far apart, if
the period is shorter than the global max polling interval (halt_poll_ns) then
the host will always poll for the entire block time and thus cpu utilisation
will go to 100%.
- Halt polling essentially presents a trade off between power usage and latency
and the module parameters should be used to tune the affinity for this. Idle
cpu time is essentially converted to host kernel time with the aim of decreasing
latency when entering the guest.
- Halt polling will only be conducted by the host when no other tasks are
runnable on that cpu, otherwise the polling will cease immediately and
schedule will be invoked to allow that other task to run. Thus this doesn't
allow a guest to denial of service the cpu.
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/kprobes.h> #include <linux/kprobes.h>
#ifdef CONFIG_KVM
#include <linux/kvm_host.h>
#endif
#include <uapi/asm/ucontext.h> #include <uapi/asm/ucontext.h>
...@@ -109,4 +112,45 @@ void early_setup_secondary(void); ...@@ -109,4 +112,45 @@ void early_setup_secondary(void);
/* time */ /* time */
void accumulate_stolen_time(void); void accumulate_stolen_time(void);
/* kvm */
#ifdef CONFIG_KVM
long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages);
long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_value, unsigned long npages);
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count);
long kvmppc_h_random(struct kvm_vcpu *vcpu);
void kvmhv_commence_exit(int trap);
long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
void kvmppc_subcore_exit_guest(void);
long kvmppc_realmode_hmi_handler(void);
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn);
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn,
unsigned long va);
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
unsigned long slb_v, unsigned int status, bool data);
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
#endif
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
...@@ -70,7 +70,9 @@ ...@@ -70,7 +70,9 @@
#define HPTE_V_SSIZE_SHIFT 62 #define HPTE_V_SSIZE_SHIFT 62
#define HPTE_V_AVPN_SHIFT 7 #define HPTE_V_AVPN_SHIFT 7
#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff)
#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80)
#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80)
#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
...@@ -80,14 +82,16 @@ ...@@ -80,14 +82,16 @@
#define HPTE_V_VALID ASM_CONST(0x0000000000000001) #define HPTE_V_VALID ASM_CONST(0x0000000000000001)
/* /*
* ISA 3.0 have a different HPTE format. * ISA 3.0 has a different HPTE format.
*/ */
#define HPTE_R_3_0_SSIZE_SHIFT 58 #define HPTE_R_3_0_SSIZE_SHIFT 58
#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT)
#define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
#define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000)
#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
#define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN_SHIFT 12
#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
#define HPTE_R_PP ASM_CONST(0x0000000000000003) #define HPTE_R_PP ASM_CONST(0x0000000000000003)
#define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_PPP ASM_CONST(0x8000000000000003)
#define HPTE_R_N ASM_CONST(0x0000000000000004) #define HPTE_R_N ASM_CONST(0x0000000000000004)
...@@ -316,11 +320,42 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, ...@@ -316,11 +320,42 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
*/ */
v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
v <<= HPTE_V_AVPN_SHIFT; v <<= HPTE_V_AVPN_SHIFT;
if (!cpu_has_feature(CPU_FTR_ARCH_300)) v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
return v; return v;
} }
/*
* ISA v3.0 defines a new HPTE format, which differs from the old
* format in having smaller AVPN and ARPN fields, and the B field
* in the second dword instead of the first.
*/
static inline unsigned long hpte_old_to_new_v(unsigned long v)
{
/* trim AVPN, drop B */
return v & HPTE_V_COMMON_BITS;
}
static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r)
{
/* move B field from 1st to 2nd dword, trim ARPN */
return (r & ~HPTE_R_3_0_SSIZE_MASK) |
(((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT);
}
static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r)
{
/* insert B field */
return (v & HPTE_V_COMMON_BITS) |
((r & HPTE_R_3_0_SSIZE_MASK) <<
(HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT));
}
static inline unsigned long hpte_new_to_old_r(unsigned long r)
{
/* clear out B field */
return r & ~HPTE_R_3_0_SSIZE_MASK;
}
/* /*
* This function sets the AVPN and L fields of the HPTE appropriately * This function sets the AVPN and L fields of the HPTE appropriately
* using the base page size and actual page size. * using the base page size and actual page size.
...@@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, ...@@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
* aligned for the requested page size * aligned for the requested page size
*/ */
static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
int actual_psize, int ssize) int actual_psize)
{ {
if (cpu_has_feature(CPU_FTR_ARCH_300))
pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
/* A 4K page needs no special encoding */ /* A 4K page needs no special encoding */
if (actual_psize == MMU_PAGE_4K) if (actual_psize == MMU_PAGE_4K)
return pa & HPTE_R_RPN; return pa & HPTE_R_RPN;
......
...@@ -99,6 +99,7 @@ ...@@ -99,6 +99,7 @@
#define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 #define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40
#define BOOK3S_INTERRUPT_HMI 0xe60 #define BOOK3S_INTERRUPT_HMI 0xe60
#define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 #define BOOK3S_INTERRUPT_H_DOORBELL 0xe80
#define BOOK3S_INTERRUPT_H_VIRT 0xea0
#define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_PERFMON 0xf00
#define BOOK3S_INTERRUPT_ALTIVEC 0xf20 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20
#define BOOK3S_INTERRUPT_VSX 0xf40 #define BOOK3S_INTERRUPT_VSX 0xf40
......
...@@ -48,7 +48,7 @@ ...@@ -48,7 +48,7 @@
#ifdef CONFIG_KVM_MMIO #ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif #endif
#define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
/* These values are internal and can be increased later */ /* These values are internal and can be increased later */
#define KVM_NR_IRQCHIPS 1 #define KVM_NR_IRQCHIPS 1
...@@ -244,8 +244,10 @@ struct kvm_arch_memory_slot { ...@@ -244,8 +244,10 @@ struct kvm_arch_memory_slot {
struct kvm_arch { struct kvm_arch {
unsigned int lpid; unsigned int lpid;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned int tlb_sets;
unsigned long hpt_virt; unsigned long hpt_virt;
struct revmap_entry *revmap; struct revmap_entry *revmap;
atomic64_t mmio_update;
unsigned int host_lpid; unsigned int host_lpid;
unsigned long host_lpcr; unsigned long host_lpcr;
unsigned long sdr1; unsigned long sdr1;
...@@ -408,6 +410,24 @@ struct kvmppc_passthru_irqmap { ...@@ -408,6 +410,24 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_MPIC 1 #define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2 #define KVMPPC_IRQ_XICS 2
#define MMIO_HPTE_CACHE_SIZE 4
struct mmio_hpte_cache_entry {
unsigned long hpte_v;
unsigned long hpte_r;
unsigned long rpte;
unsigned long pte_index;
unsigned long eaddr;
unsigned long slb_v;
long mmio_update;
unsigned int slb_base_pshift;
};
struct mmio_hpte_cache {
struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE];
unsigned int index;
};
struct openpic; struct openpic;
struct kvm_vcpu_arch { struct kvm_vcpu_arch {
...@@ -498,6 +518,8 @@ struct kvm_vcpu_arch { ...@@ -498,6 +518,8 @@ struct kvm_vcpu_arch {
ulong tcscr; ulong tcscr;
ulong acop; ulong acop;
ulong wort; ulong wort;
ulong tid;
ulong psscr;
ulong shadow_srr1; ulong shadow_srr1;
#endif #endif
u32 vrsave; /* also USPRG0 */ u32 vrsave; /* also USPRG0 */
...@@ -546,6 +568,7 @@ struct kvm_vcpu_arch { ...@@ -546,6 +568,7 @@ struct kvm_vcpu_arch {
u64 tfiar; u64 tfiar;
u32 cr_tm; u32 cr_tm;
u64 xer_tm;
u64 lr_tm; u64 lr_tm;
u64 ctr_tm; u64 ctr_tm;
u64 amr_tm; u64 amr_tm;
...@@ -655,9 +678,11 @@ struct kvm_vcpu_arch { ...@@ -655,9 +678,11 @@ struct kvm_vcpu_arch {
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
struct kvm_vcpu_arch_shared shregs; struct kvm_vcpu_arch_shared shregs;
struct mmio_hpte_cache mmio_cache;
unsigned long pgfault_addr; unsigned long pgfault_addr;
long pgfault_index; long pgfault_index;
unsigned long pgfault_hpte[2]; unsigned long pgfault_hpte[2];
struct mmio_hpte_cache_entry *pgfault_cache;
struct task_struct *run_task; struct task_struct *run_task;
struct kvm_run *kvm_run; struct kvm_run *kvm_run;
......
...@@ -483,9 +483,10 @@ extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq, ...@@ -483,9 +483,10 @@ extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq); unsigned long host_irq);
extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq, extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq); unsigned long host_irq);
extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr, extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
struct kvmppc_irq_map *irq_map, struct kvmppc_irq_map *irq_map,
struct kvmppc_passthru_irqmap *pimap); struct kvmppc_passthru_irqmap *pimap,
bool *again);
extern int h_ipi_redirect; extern int h_ipi_redirect;
#else #else
static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
......
...@@ -208,6 +208,11 @@ extern u64 ppc64_rma_size; ...@@ -208,6 +208,11 @@ extern u64 ppc64_rma_size;
/* Cleanup function used by kexec */ /* Cleanup function used by kexec */
extern void mmu_cleanup_all(void); extern void mmu_cleanup_all(void);
extern void radix__mmu_cleanup_all(void); extern void radix__mmu_cleanup_all(void);
/* Functions for creating and updating partition table on POWER9 */
extern void mmu_partition_table_init(void);
extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
unsigned long dw1);
#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC64 */
struct mm_struct; struct mm_struct;
......
...@@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, ...@@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
int64_t opal_pci_poll2(uint64_t id, uint64_t data); int64_t opal_pci_poll2(uint64_t id, uint64_t data);
int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll);
int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll);
int64_t opal_int_set_cppr(uint8_t cppr); int64_t opal_int_set_cppr(uint8_t cppr);
int64_t opal_int_eoi(uint32_t xirr); int64_t opal_int_eoi(uint32_t xirr);
int64_t opal_rm_int_eoi(uint32_t xirr);
int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
uint32_t pe_num, uint32_t tce_size, uint32_t pe_num, uint32_t tce_size,
uint64_t dma_addr, uint32_t npages); uint64_t dma_addr, uint32_t npages);
......
...@@ -153,6 +153,8 @@ ...@@ -153,6 +153,8 @@
#define PSSCR_EC 0x00100000 /* Exit Criterion */ #define PSSCR_EC 0x00100000 /* Exit Criterion */
#define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */ #define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
/* Floating Point Status and Control Register (FPSCR) Fields */ /* Floating Point Status and Control Register (FPSCR) Fields */
#define FPSCR_FX 0x80000000 /* FPU exception summary */ #define FPSCR_FX 0x80000000 /* FPU exception summary */
...@@ -236,6 +238,7 @@ ...@@ -236,6 +238,7 @@
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ #define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
#define SPRN_TIDR 144 /* Thread ID register */
#define SPRN_CTRLF 0x088 #define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098 #define SPRN_CTRLT 0x098
#define CTRL_CT 0xc0000000 /* current thread */ #define CTRL_CT 0xc0000000 /* current thread */
...@@ -294,6 +297,7 @@ ...@@ -294,6 +297,7 @@
#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
#define SPRN_LMRR 0x32D /* Load Monitor Region Register */ #define SPRN_LMRR 0x32D /* Load Monitor Region Register */
#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */ #define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */
#define SPRN_ASDR 0x330 /* Access segment descriptor register */
#define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_IC 0x350 /* Virtual Instruction Count */
#define SPRN_VTB 0x351 /* Virtual Time Base */ #define SPRN_VTB 0x351 /* Virtual Time Base */
#define SPRN_LDBAR 0x352 /* LD Base Address Register */ #define SPRN_LDBAR 0x352 /* LD Base Address Register */
...@@ -305,6 +309,7 @@ ...@@ -305,6 +309,7 @@
/* HFSCR and FSCR bit numbers are the same */ /* HFSCR and FSCR bit numbers are the same */
#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */ #define FSCR_LM_LG 11 /* Enable Load Monitor Registers */
#define FSCR_MSGP_LG 10 /* Enable MSGP */
#define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */
#define FSCR_EBB_LG 7 /* Enable Event Based Branching */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */
#define FSCR_TM_LG 5 /* Enable Transactional Memory */ #define FSCR_TM_LG 5 /* Enable Transactional Memory */
...@@ -320,6 +325,7 @@ ...@@ -320,6 +325,7 @@
#define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define FSCR_DSCR __MASK(FSCR_DSCR_LG)
#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */
#define HFSCR_LM __MASK(FSCR_LM_LG) #define HFSCR_LM __MASK(FSCR_LM_LG)
#define HFSCR_MSGP __MASK(FSCR_MSGP_LG)
#define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG)
#define HFSCR_EBB __MASK(FSCR_EBB_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG)
#define HFSCR_TM __MASK(FSCR_TM_LG) #define HFSCR_TM __MASK(FSCR_TM_LG)
...@@ -355,8 +361,10 @@ ...@@ -355,8 +361,10 @@
#define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */ #define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */
#define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */ #define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */
#define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */ #define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */
#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */
#define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */ #define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */
#define LPCR_MER_SH 11 #define LPCR_MER_SH 11
#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */
#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */ #define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */
#define LPCR_LPES 0x0000000c #define LPCR_LPES 0x0000000c
#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */ #define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */
...@@ -377,6 +385,12 @@ ...@@ -377,6 +385,12 @@
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ #define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
/*
* These bits are used in the function kvmppc_set_arch_compat() to specify and
* determine both the compatibility level which we want to emulate and the
* compatibility level which the host is capable of emulating.
*/
#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
#define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_206 0x4 /* Architecture 2.06 */
#define PCR_ARCH_205 0x2 /* Architecture 2.05 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */
#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
...@@ -1218,6 +1232,7 @@ ...@@ -1218,6 +1232,7 @@
#define PVR_ARCH_206 0x0f000003 #define PVR_ARCH_206 0x0f000003
#define PVR_ARCH_206p 0x0f100003 #define PVR_ARCH_206p 0x0f100003
#define PVR_ARCH_207 0x0f000004 #define PVR_ARCH_207 0x0f000004
#define PVR_ARCH_300 0x0f000005
/* Macros for setting and retrieving special purpose registers */ /* Macros for setting and retrieving special purpose registers */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -573,6 +573,10 @@ struct kvm_get_htab_header { ...@@ -573,6 +573,10 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) #define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
/* POWER9 registers */
#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
/* Transactional Memory checkpointed state: /* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs * This is all GPRs, all VSX regs and a subset of SPRs
*/ */
...@@ -596,6 +600,7 @@ struct kvm_get_htab_header { ...@@ -596,6 +600,7 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
/* PPC64 eXternal Interrupt Controller Specification */ /* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
......
...@@ -487,6 +487,7 @@ int main(void) ...@@ -487,6 +487,7 @@ int main(void)
/* book3s */ /* book3s */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets));
DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
...@@ -548,6 +549,8 @@ int main(void) ...@@ -548,6 +549,8 @@ int main(void)
DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
DEFINE(VCPU_TID, offsetof(struct kvm_vcpu, arch.tid));
DEFINE(VCPU_PSSCR, offsetof(struct kvm_vcpu, arch.psscr));
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
...@@ -569,6 +572,7 @@ int main(void) ...@@ -569,6 +572,7 @@ int main(void)
DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm));
DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
......
...@@ -174,7 +174,7 @@ __init_FSCR: ...@@ -174,7 +174,7 @@ __init_FSCR:
__init_HFSCR: __init_HFSCR:
mfspr r3,SPRN_HFSCR mfspr r3,SPRN_HFSCR
ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP
mtspr SPRN_HFSCR,r3 mtspr SPRN_HFSCR,r3
blr blr
......
...@@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) ...@@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
/* 128 (2**7) bytes in each HPTEG */ /* 128 (2**7) bytes in each HPTEG */
kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
atomic64_set(&kvm->arch.mmio_update, 0);
/* Allocate reverse map array */ /* Allocate reverse map array */
rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
if (!rev) { if (!rev) {
...@@ -255,7 +257,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) ...@@ -255,7 +257,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, msr); kvmppc_set_msr(vcpu, msr);
} }
long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh, long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret) unsigned long ptel, unsigned long *pte_idx_ret)
{ {
...@@ -312,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, ...@@ -312,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_slb *slbe; struct kvmppc_slb *slbe;
unsigned long slb_v; unsigned long slb_v;
unsigned long pp, key; unsigned long pp, key;
unsigned long v, gr; unsigned long v, orig_v, gr;
__be64 *hptep; __be64 *hptep;
int index; int index;
int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
...@@ -337,10 +339,12 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, ...@@ -337,10 +339,12 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
return -ENOENT; return -ENOENT;
} }
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
if (cpu_has_feature(CPU_FTR_ARCH_300))
v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
gr = kvm->arch.revmap[index].guest_rpte; gr = kvm->arch.revmap[index].guest_rpte;
unlock_hpte(hptep, v); unlock_hpte(hptep, orig_v);
preempt_enable(); preempt_enable();
gpte->eaddr = eaddr; gpte->eaddr = eaddr;
...@@ -438,6 +442,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -438,6 +442,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
unsigned long hpte[3], r; unsigned long hpte[3], r;
unsigned long hnow_v, hnow_r;
__be64 *hptep; __be64 *hptep;
unsigned long mmu_seq, psize, pte_size; unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base; unsigned long gpa_base, gfn_base;
...@@ -451,6 +456,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -451,6 +456,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int writing, write_ok; unsigned int writing, write_ok;
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long rcbits; unsigned long rcbits;
long mmio_update;
/* /*
* Real-mode code has already searched the HPT and found the * Real-mode code has already searched the HPT and found the
...@@ -460,6 +466,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -460,6 +466,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/ */
if (ea != vcpu->arch.pgfault_addr) if (ea != vcpu->arch.pgfault_addr)
return RESUME_GUEST; return RESUME_GUEST;
if (vcpu->arch.pgfault_cache) {
mmio_update = atomic64_read(&kvm->arch.mmio_update);
if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
r = vcpu->arch.pgfault_cache->rpte;
psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r);
gpa_base = r & HPTE_R_RPN & ~(psize - 1);
gfn_base = gpa_base >> PAGE_SHIFT;
gpa = gpa_base | (ea & (psize - 1));
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
}
}
index = vcpu->arch.pgfault_index; index = vcpu->arch.pgfault_index;
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
rev = &kvm->arch.revmap[index]; rev = &kvm->arch.revmap[index];
...@@ -472,6 +491,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -472,6 +491,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unlock_hpte(hptep, hpte[0]); unlock_hpte(hptep, hpte[0]);
preempt_enable(); preempt_enable();
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]);
hpte[1] = hpte_new_to_old_r(hpte[1]);
}
if (hpte[0] != vcpu->arch.pgfault_hpte[0] || if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
hpte[1] != vcpu->arch.pgfault_hpte[1]) hpte[1] != vcpu->arch.pgfault_hpte[1])
return RESUME_GUEST; return RESUME_GUEST;
...@@ -575,16 +598,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -575,16 +598,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/ */
if (psize < PAGE_SIZE) if (psize < PAGE_SIZE)
psize = PAGE_SIZE; psize = PAGE_SIZE;
r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1)); r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
((pfn << PAGE_SHIFT) & ~(psize - 1));
if (hpte_is_writable(r) && !write_ok) if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r); r = hpte_make_readonly(r);
ret = RESUME_GUEST; ret = RESUME_GUEST;
preempt_disable(); preempt_disable();
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
cpu_relax(); cpu_relax();
if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || hnow_v = be64_to_cpu(hptep[0]);
be64_to_cpu(hptep[1]) != hpte[1] || hnow_r = be64_to_cpu(hptep[1]);
rev->guest_rpte != hpte[2]) if (cpu_has_feature(CPU_FTR_ARCH_300)) {
hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
hnow_r = hpte_new_to_old_r(hnow_r);
}
if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
rev->guest_rpte != hpte[2])
/* HPTE has been changed under us; let the guest retry */ /* HPTE has been changed under us; let the guest retry */
goto out_unlock; goto out_unlock;
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
...@@ -615,6 +644,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -615,6 +644,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
} }
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
r = hpte_old_to_new_r(hpte[0], r);
hpte[0] = hpte_old_to_new_v(hpte[0]);
}
hptep[1] = cpu_to_be64(r); hptep[1] = cpu_to_be64(r);
eieio(); eieio();
__unlock_hpte(hptep, hpte[0]); __unlock_hpte(hptep, hpte[0]);
...@@ -758,6 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, ...@@ -758,6 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
hpte_rpn(ptel, psize) == gfn) { hpte_rpn(ptel, psize) == gfn) {
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
kvmppc_invalidate_hpte(kvm, hptep, i); kvmppc_invalidate_hpte(kvm, hptep, i);
hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
/* Harvest R and C */ /* Harvest R and C */
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
...@@ -1165,7 +1199,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp, ...@@ -1165,7 +1199,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
unsigned long *hpte, struct revmap_entry *revp, unsigned long *hpte, struct revmap_entry *revp,
int want_valid, int first_pass) int want_valid, int first_pass)
{ {
unsigned long v, r; unsigned long v, r, hr;
unsigned long rcbits_unset; unsigned long rcbits_unset;
int ok = 1; int ok = 1;
int valid, dirty; int valid, dirty;
...@@ -1192,6 +1226,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp, ...@@ -1192,6 +1226,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
cpu_relax(); cpu_relax();
v = be64_to_cpu(hptp[0]); v = be64_to_cpu(hptp[0]);
hr = be64_to_cpu(hptp[1]);
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
v = hpte_new_to_old_v(v, hr);
hr = hpte_new_to_old_r(hr);
}
/* re-evaluate valid and dirty from synchronized HPTE value */ /* re-evaluate valid and dirty from synchronized HPTE value */
valid = !!(v & HPTE_V_VALID); valid = !!(v & HPTE_V_VALID);
...@@ -1199,8 +1238,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp, ...@@ -1199,8 +1238,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
/* Harvest R and C into guest view if necessary */ /* Harvest R and C into guest view if necessary */
rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { if (valid && (rcbits_unset & hr)) {
revp->guest_rpte |= (be64_to_cpu(hptp[1]) & revp->guest_rpte |= (hr &
(HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
dirty = 1; dirty = 1;
} }
...@@ -1608,7 +1647,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf, ...@@ -1608,7 +1647,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
return ret; return ret;
} }
ssize_t debugfs_htab_write(struct file *file, const char __user *buf, static ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos) size_t len, loff_t *ppos)
{ {
return -EACCES; return -EACCES;
......
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
#include <asm/udbg.h> #include <asm/udbg.h>
#include <asm/iommu.h> #include <asm/iommu.h>
#include <asm/tce.h> #include <asm/tce.h>
#include <asm/iommu.h> #include <asm/asm-prototypes.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
......
This diff is collapsed.
...@@ -26,6 +26,9 @@ ...@@ -26,6 +26,9 @@
#include <asm/dbell.h> #include <asm/dbell.h>
#include <asm/cputhreads.h> #include <asm/cputhreads.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/asm-prototypes.h>
#include <asm/opal.h>
#include <asm/smp.h>
#define KVM_CMA_CHUNK_ORDER 18 #define KVM_CMA_CHUNK_ORDER 18
...@@ -205,12 +208,18 @@ static inline void rm_writeb(unsigned long paddr, u8 val) ...@@ -205,12 +208,18 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
void kvmhv_rm_send_ipi(int cpu) void kvmhv_rm_send_ipi(int cpu)
{ {
unsigned long xics_phys; unsigned long xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
/* On POWER8 for IPIs to threads in the same core, use msgsnd */ /* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu);
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
return;
}
/* On POWER8 for IPIs to threads in the same core, use msgsnd. */
if (cpu_has_feature(CPU_FTR_ARCH_207S) && if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
cpu_first_thread_sibling(cpu) == cpu_first_thread_sibling(cpu) ==
cpu_first_thread_sibling(raw_smp_processor_id())) { cpu_first_thread_sibling(raw_smp_processor_id())) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
msg |= cpu_thread_in_core(cpu); msg |= cpu_thread_in_core(cpu);
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
return; return;
...@@ -218,7 +227,11 @@ void kvmhv_rm_send_ipi(int cpu) ...@@ -218,7 +227,11 @@ void kvmhv_rm_send_ipi(int cpu)
/* Else poke the target with an IPI */ /* Else poke the target with an IPI */
xics_phys = paca[cpu].kvm_hstate.xics_phys; xics_phys = paca[cpu].kvm_hstate.xics_phys;
rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); if (xics_phys)
rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
else
opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu),
IPI_PRIORITY);
} }
/* /*
...@@ -329,7 +342,7 @@ static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap, ...@@ -329,7 +342,7 @@ static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
* saved a copy of the XIRR in the PACA, it will be picked up by * saved a copy of the XIRR in the PACA, it will be picked up by
* the host ICP driver. * the host ICP driver.
*/ */
static int kvmppc_check_passthru(u32 xisr, __be32 xirr) static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
{ {
struct kvmppc_passthru_irqmap *pimap; struct kvmppc_passthru_irqmap *pimap;
struct kvmppc_irq_map *irq_map; struct kvmppc_irq_map *irq_map;
...@@ -348,11 +361,11 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr) ...@@ -348,11 +361,11 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
/* We're handling this interrupt, generic code doesn't need to */ /* We're handling this interrupt, generic code doesn't need to */
local_paca->kvm_hstate.saved_xirr = 0; local_paca->kvm_hstate.saved_xirr = 0;
return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap); return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again);
} }
#else #else
static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
{ {
return 1; return 1;
} }
...@@ -367,14 +380,31 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) ...@@ -367,14 +380,31 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
* -1 if there was a guest wakeup IPI (which has now been cleared) * -1 if there was a guest wakeup IPI (which has now been cleared)
* -2 if there is PCI passthrough external interrupt that was handled * -2 if there is PCI passthrough external interrupt that was handled
*/ */
static long kvmppc_read_one_intr(bool *again);
long kvmppc_read_intr(void) long kvmppc_read_intr(void)
{
long ret = 0;
long rc;
bool again;
do {
again = false;
rc = kvmppc_read_one_intr(&again);
if (rc && (ret == 0 || rc > ret))
ret = rc;
} while (again);
return ret;
}
static long kvmppc_read_one_intr(bool *again)
{ {
unsigned long xics_phys; unsigned long xics_phys;
u32 h_xirr; u32 h_xirr;
__be32 xirr; __be32 xirr;
u32 xisr; u32 xisr;
u8 host_ipi; u8 host_ipi;
int64_t rc;
/* see if a host IPI is pending */ /* see if a host IPI is pending */
host_ipi = local_paca->kvm_hstate.host_ipi; host_ipi = local_paca->kvm_hstate.host_ipi;
...@@ -383,8 +413,14 @@ long kvmppc_read_intr(void) ...@@ -383,8 +413,14 @@ long kvmppc_read_intr(void)
/* Now read the interrupt from the ICP */ /* Now read the interrupt from the ICP */
xics_phys = local_paca->kvm_hstate.xics_phys; xics_phys = local_paca->kvm_hstate.xics_phys;
if (unlikely(!xics_phys)) if (!xics_phys) {
return 1; /* Use OPAL to read the XIRR */
rc = opal_rm_int_get_xirr(&xirr, false);
if (rc < 0)
return 1;
} else {
xirr = _lwzcix(xics_phys + XICS_XIRR);
}
/* /*
* Save XIRR for later. Since we get control in reverse endian * Save XIRR for later. Since we get control in reverse endian
...@@ -392,7 +428,6 @@ long kvmppc_read_intr(void) ...@@ -392,7 +428,6 @@ long kvmppc_read_intr(void)
* host endian. Note that xirr is the value read from the * host endian. Note that xirr is the value read from the
* XIRR register, while h_xirr is the host endian version. * XIRR register, while h_xirr is the host endian version.
*/ */
xirr = _lwzcix(xics_phys + XICS_XIRR);
h_xirr = be32_to_cpu(xirr); h_xirr = be32_to_cpu(xirr);
local_paca->kvm_hstate.saved_xirr = h_xirr; local_paca->kvm_hstate.saved_xirr = h_xirr;
xisr = h_xirr & 0xffffff; xisr = h_xirr & 0xffffff;
...@@ -411,8 +446,16 @@ long kvmppc_read_intr(void) ...@@ -411,8 +446,16 @@ long kvmppc_read_intr(void)
* If it is an IPI, clear the MFRR and EOI it. * If it is an IPI, clear the MFRR and EOI it.
*/ */
if (xisr == XICS_IPI) { if (xisr == XICS_IPI) {
_stbcix(xics_phys + XICS_MFRR, 0xff); if (xics_phys) {
_stwcix(xics_phys + XICS_XIRR, xirr); _stbcix(xics_phys + XICS_MFRR, 0xff);
_stwcix(xics_phys + XICS_XIRR, xirr);
} else {
opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff);
rc = opal_rm_int_eoi(h_xirr);
/* If rc > 0, there is another interrupt pending */
*again = rc > 0;
}
/* /*
* Need to ensure side effects of above stores * Need to ensure side effects of above stores
* complete before proceeding. * complete before proceeding.
...@@ -429,7 +472,11 @@ long kvmppc_read_intr(void) ...@@ -429,7 +472,11 @@ long kvmppc_read_intr(void)
/* We raced with the host, /* We raced with the host,
* we need to resend that IPI, bummer * we need to resend that IPI, bummer
*/ */
_stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); if (xics_phys)
_stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
else
opal_rm_int_set_mfrr(hard_smp_processor_id(),
IPI_PRIORITY);
/* Let side effects complete */ /* Let side effects complete */
smp_mb(); smp_mb();
return 1; return 1;
...@@ -440,5 +487,5 @@ long kvmppc_read_intr(void) ...@@ -440,5 +487,5 @@ long kvmppc_read_intr(void)
return -1; return -1;
} }
return kvmppc_check_passthru(xisr, xirr); return kvmppc_check_passthru(xisr, xirr, again);
} }
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/machdep.h> #include <asm/machdep.h>
#include <asm/cputhreads.h> #include <asm/cputhreads.h>
#include <asm/hmi.h> #include <asm/hmi.h>
#include <asm/asm-prototypes.h>
/* SRR1 bits for machine check on POWER7 */ /* SRR1 bits for machine check on POWER7 */
#define SRR1_MC_LDSTERR (1ul << (63-42)) #define SRR1_MC_LDSTERR (1ul << (63-42))
......
This diff is collapsed.
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <asm/pnv-pci.h> #include <asm/pnv-pci.h>
#include <asm/opal.h> #include <asm/opal.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/asm-prototypes.h>
#include "book3s_xics.h" #include "book3s_xics.h"
...@@ -70,7 +71,11 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) ...@@ -70,7 +71,11 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu = hcore << threads_shift; hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
icp_native_cause_ipi_rm(hcpu); if (paca[hcpu].kvm_hstate.xics_phys)
icp_native_cause_ipi_rm(hcpu);
else
opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu),
IPI_PRIORITY);
} }
#else #else
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
...@@ -737,7 +742,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) ...@@ -737,7 +742,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
unsigned long eoi_rc; unsigned long eoi_rc;
static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
{ {
unsigned long xics_phys; unsigned long xics_phys;
int64_t rc; int64_t rc;
...@@ -751,7 +756,12 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) ...@@ -751,7 +756,12 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
/* EOI it */ /* EOI it */
xics_phys = local_paca->kvm_hstate.xics_phys; xics_phys = local_paca->kvm_hstate.xics_phys;
_stwcix(xics_phys + XICS_XIRR, xirr); if (xics_phys) {
_stwcix(xics_phys + XICS_XIRR, xirr);
} else {
rc = opal_rm_int_eoi(be32_to_cpu(xirr));
*again = rc > 0;
}
} }
static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
...@@ -809,9 +819,10 @@ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) ...@@ -809,9 +819,10 @@ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
} }
long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
u32 xirr, __be32 xirr,
struct kvmppc_irq_map *irq_map, struct kvmppc_irq_map *irq_map,
struct kvmppc_passthru_irqmap *pimap) struct kvmppc_passthru_irqmap *pimap,
bool *again)
{ {
struct kvmppc_xics *xics; struct kvmppc_xics *xics;
struct kvmppc_icp *icp; struct kvmppc_icp *icp;
...@@ -825,7 +836,8 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, ...@@ -825,7 +836,8 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
icp_rm_deliver_irq(xics, icp, irq); icp_rm_deliver_irq(xics, icp, irq);
/* EOI the interrupt */ /* EOI the interrupt */
icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
again);
if (check_too_hard(xics, icp) == H_TOO_HARD) if (check_too_hard(xics, icp) == H_TOO_HARD)
return 2; return 2;
......
...@@ -501,17 +501,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -501,17 +501,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
cmpwi r0, 0 cmpwi r0, 0
beq 57f beq 57f
li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
mfspr r4, SPRN_LPCR mfspr r5, SPRN_LPCR
rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
mtspr SPRN_LPCR, r4 b kvm_nap_sequence
isync
std r0, HSTATE_SCRATCH0(r13)
ptesync
ld r0, HSTATE_SCRATCH0(r13)
1: cmpd r0, r0
bne 1b
nap
b .
57: li r0, 0 57: li r0, 0
stbx r0, r3, r4 stbx r0, r3, r4
...@@ -523,6 +515,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -523,6 +515,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* * * *
*****************************************************************************/ *****************************************************************************/
/* Stack frame offsets */
#define STACK_SLOT_TID (112-16)
#define STACK_SLOT_PSSCR (112-24)
.global kvmppc_hv_entry .global kvmppc_hv_entry
kvmppc_hv_entry: kvmppc_hv_entry:
...@@ -581,12 +577,14 @@ kvmppc_hv_entry: ...@@ -581,12 +577,14 @@ kvmppc_hv_entry:
ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
cmpwi r6,0 cmpwi r6,0
bne 10f bne 10f
ld r6,KVM_SDR1(r9)
lwz r7,KVM_LPID(r9) lwz r7,KVM_LPID(r9)
BEGIN_FTR_SECTION
ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */ li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0 mtspr SPRN_LPID,r0
ptesync ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */ mtspr SPRN_SDR1,r6 /* switch to partition page table */
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7 mtspr SPRN_LPID,r7
isync isync
...@@ -607,12 +605,8 @@ kvmppc_hv_entry: ...@@ -607,12 +605,8 @@ kvmppc_hv_entry:
stdcx. r7,0,r6 stdcx. r7,0,r6
bne 23b bne 23b
/* Flush the TLB of any entries for this LPID */ /* Flush the TLB of any entries for this LPID */
/* use arch 2.07S as a proxy for POWER8 */ lwz r6,KVM_TLB_SETS(r9)
BEGIN_FTR_SECTION li r0,0 /* RS for P9 version of tlbiel */
li r6,512 /* POWER8 has 512 sets */
FTR_SECTION_ELSE
li r6,128 /* POWER7 has 128 sets */
ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
mtctr r6 mtctr r6
li r7,0x800 /* IS field = 0b10 */ li r7,0x800 /* IS field = 0b10 */
ptesync ptesync
...@@ -698,6 +692,14 @@ kvmppc_got_guest: ...@@ -698,6 +692,14 @@ kvmppc_got_guest:
mtspr SPRN_PURR,r7 mtspr SPRN_PURR,r7
mtspr SPRN_SPURR,r8 mtspr SPRN_SPURR,r8
/* Save host values of some registers */
BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
/* Set partition DABR */ /* Set partition DABR */
/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
...@@ -750,14 +752,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) ...@@ -750,14 +752,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
ld r5, VCPU_MMCR + 24(r4) ld r5, VCPU_MMCR + 24(r4)
ld r6, VCPU_SIER(r4) ld r6, VCPU_SIER(r4)
mtspr SPRN_MMCR2, r5
mtspr SPRN_SIER, r6
BEGIN_FTR_SECTION_NESTED(96)
lwz r7, VCPU_PMC + 24(r4) lwz r7, VCPU_PMC + 24(r4)
lwz r8, VCPU_PMC + 28(r4) lwz r8, VCPU_PMC + 28(r4)
ld r9, VCPU_MMCR + 32(r4) ld r9, VCPU_MMCR + 32(r4)
mtspr SPRN_MMCR2, r5
mtspr SPRN_SIER, r6
mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC1, r7
mtspr SPRN_SPMC2, r8 mtspr SPRN_SPMC2, r8
mtspr SPRN_MMCRS, r9 mtspr SPRN_MMCRS, r9
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_MMCR0, r3 mtspr SPRN_MMCR0, r3
isync isync
...@@ -813,20 +817,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ...@@ -813,20 +817,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_EBBHR, r8 mtspr SPRN_EBBHR, r8
ld r5, VCPU_EBBRR(r4) ld r5, VCPU_EBBRR(r4)
ld r6, VCPU_BESCR(r4) ld r6, VCPU_BESCR(r4)
ld r7, VCPU_CSIGR(r4) lwz r7, VCPU_GUEST_PID(r4)
ld r8, VCPU_TACR(r4) ld r8, VCPU_WORT(r4)
mtspr SPRN_EBBRR, r5 mtspr SPRN_EBBRR, r5
mtspr SPRN_BESCR, r6 mtspr SPRN_BESCR, r6
mtspr SPRN_CSIGR, r7 mtspr SPRN_PID, r7
mtspr SPRN_TACR, r8 mtspr SPRN_WORT, r8
BEGIN_FTR_SECTION
/* POWER8-only registers */
ld r5, VCPU_TCSCR(r4) ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4) ld r6, VCPU_ACOP(r4)
lwz r7, VCPU_GUEST_PID(r4) ld r7, VCPU_CSIGR(r4)
ld r8, VCPU_WORT(r4) ld r8, VCPU_TACR(r4)
mtspr SPRN_TCSCR, r5 mtspr SPRN_TCSCR, r5
mtspr SPRN_ACOP, r6 mtspr SPRN_ACOP, r6
mtspr SPRN_PID, r7 mtspr SPRN_CSIGR, r7
mtspr SPRN_WORT, r8 mtspr SPRN_TACR, r8
FTR_SECTION_ELSE
/* POWER9-only registers */
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8: 8:
/* /*
...@@ -1341,20 +1355,29 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ...@@ -1341,20 +1355,29 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r8, VCPU_EBBHR(r9) std r8, VCPU_EBBHR(r9)
mfspr r5, SPRN_EBBRR mfspr r5, SPRN_EBBRR
mfspr r6, SPRN_BESCR mfspr r6, SPRN_BESCR
mfspr r7, SPRN_CSIGR mfspr r7, SPRN_PID
mfspr r8, SPRN_TACR mfspr r8, SPRN_WORT
std r5, VCPU_EBBRR(r9) std r5, VCPU_EBBRR(r9)
std r6, VCPU_BESCR(r9) std r6, VCPU_BESCR(r9)
std r7, VCPU_CSIGR(r9) stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_TACR(r9) std r8, VCPU_WORT(r9)
BEGIN_FTR_SECTION
mfspr r5, SPRN_TCSCR mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP mfspr r6, SPRN_ACOP
mfspr r7, SPRN_PID mfspr r7, SPRN_CSIGR
mfspr r8, SPRN_WORT mfspr r8, SPRN_TACR
std r5, VCPU_TCSCR(r9) std r5, VCPU_TCSCR(r9)
std r6, VCPU_ACOP(r9) std r6, VCPU_ACOP(r9)
stw r7, VCPU_GUEST_PID(r9) std r7, VCPU_CSIGR(r9)
std r8, VCPU_WORT(r9) std r8, VCPU_TACR(r9)
FTR_SECTION_ELSE
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
std r5, VCPU_TID(r9)
rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
rotldi r6, r6, 60
std r6, VCPU_PSSCR(r9)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* /*
* Restore various registers to 0, where non-zero values * Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host. * set by the guest could disrupt the host.
...@@ -1363,12 +1386,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ...@@ -1363,12 +1386,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_IAMR, r0 mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0 mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX, r0 mtspr SPRN_DAWRX, r0
mtspr SPRN_TCSCR, r0
mtspr SPRN_WORT, r0 mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1 li r0, 1
sldi r0, r0, 31 sldi r0, r0, 31
mtspr SPRN_MMCRS, r0 mtspr SPRN_MMCRS, r0
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
8: 8:
/* Save and reset AMR and UAMOR before turning on the MMU */ /* Save and reset AMR and UAMOR before turning on the MMU */
...@@ -1502,15 +1527,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -1502,15 +1527,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stw r8, VCPU_PMC + 20(r9) stw r8, VCPU_PMC + 20(r9)
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
mfspr r5, SPRN_SIER mfspr r5, SPRN_SIER
std r5, VCPU_SIER(r9)
BEGIN_FTR_SECTION_NESTED(96)
mfspr r6, SPRN_SPMC1 mfspr r6, SPRN_SPMC1
mfspr r7, SPRN_SPMC2 mfspr r7, SPRN_SPMC2
mfspr r8, SPRN_MMCRS mfspr r8, SPRN_MMCRS
std r5, VCPU_SIER(r9)
stw r6, VCPU_PMC + 24(r9) stw r6, VCPU_PMC + 24(r9)
stw r7, VCPU_PMC + 28(r9) stw r7, VCPU_PMC + 28(r9)
std r8, VCPU_MMCR + 32(r9) std r8, VCPU_MMCR + 32(r9)
lis r4, 0x8000 lis r4, 0x8000
mtspr SPRN_MMCRS, r4 mtspr SPRN_MMCRS, r4
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22: 22:
/* Clear out SLB */ /* Clear out SLB */
...@@ -1519,6 +1546,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -1519,6 +1546,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
slbia slbia
ptesync ptesync
/* Restore host values of some registers */
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
/* /*
* POWER7/POWER8 guest -> host partition switch code. * POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do * We don't have to lock against tlbies but we do
...@@ -1552,12 +1587,14 @@ kvmhv_switch_to_host: ...@@ -1552,12 +1587,14 @@ kvmhv_switch_to_host:
beq 19f beq 19f
/* Primary thread switches back to host partition */ /* Primary thread switches back to host partition */
ld r6,KVM_HOST_SDR1(r4)
lwz r7,KVM_HOST_LPID(r4) lwz r7,KVM_HOST_LPID(r4)
BEGIN_FTR_SECTION
ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */ li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8 mtspr SPRN_LPID,r8
ptesync ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */ mtspr SPRN_SDR1,r6 /* switch to host page table */
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7 mtspr SPRN_LPID,r7
isync isync
...@@ -2211,6 +2248,21 @@ BEGIN_FTR_SECTION ...@@ -2211,6 +2248,21 @@ BEGIN_FTR_SECTION
ori r5, r5, LPCR_PECEDH ori r5, r5, LPCR_PECEDH
rlwimi r5, r3, 0, LPCR_PECEDP rlwimi r5, r3, 0, LPCR_PECEDP
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvm_nap_sequence: /* desired LPCR value in r5 */
BEGIN_FTR_SECTION
/*
* PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
* enable state loss = 1 (allow SMT mode switch)
* requested level = 0 (just stop dispatching)
*/
lis r3, (PSSCR_EC | PSSCR_ESL)@h
mtspr SPRN_PSSCR, r3
/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
li r4, LPCR_PECE_HVEE@higher
sldi r4, r4, 32
or r5, r5, r4
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5 mtspr SPRN_LPCR,r5
isync isync
li r0, 0 li r0, 0
...@@ -2219,7 +2271,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -2219,7 +2271,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
ld r0, HSTATE_SCRATCH0(r13) ld r0, HSTATE_SCRATCH0(r13)
1: cmpd r0, r0 1: cmpd r0, r0
bne 1b bne 1b
BEGIN_FTR_SECTION
nap nap
FTR_SECTION_ELSE
PPC_STOP
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
b . b .
33: mr r4, r3 33: mr r4, r3
...@@ -2600,11 +2656,13 @@ kvmppc_save_tm: ...@@ -2600,11 +2656,13 @@ kvmppc_save_tm:
mfctr r7 mfctr r7
mfspr r8, SPRN_AMR mfspr r8, SPRN_AMR
mfspr r10, SPRN_TAR mfspr r10, SPRN_TAR
mfxer r11
std r5, VCPU_LR_TM(r9) std r5, VCPU_LR_TM(r9)
stw r6, VCPU_CR_TM(r9) stw r6, VCPU_CR_TM(r9)
std r7, VCPU_CTR_TM(r9) std r7, VCPU_CTR_TM(r9)
std r8, VCPU_AMR_TM(r9) std r8, VCPU_AMR_TM(r9)
std r10, VCPU_TAR_TM(r9) std r10, VCPU_TAR_TM(r9)
std r11, VCPU_XER_TM(r9)
/* Restore r12 as trap number. */ /* Restore r12 as trap number. */
lwz r12, VCPU_TRAP(r9) lwz r12, VCPU_TRAP(r9)
...@@ -2697,11 +2755,13 @@ kvmppc_restore_tm: ...@@ -2697,11 +2755,13 @@ kvmppc_restore_tm:
ld r7, VCPU_CTR_TM(r4) ld r7, VCPU_CTR_TM(r4)
ld r8, VCPU_AMR_TM(r4) ld r8, VCPU_AMR_TM(r4)
ld r9, VCPU_TAR_TM(r4) ld r9, VCPU_TAR_TM(r4)
ld r10, VCPU_XER_TM(r4)
mtlr r5 mtlr r5
mtcr r6 mtcr r6
mtctr r7 mtctr r7
mtspr SPRN_AMR, r8 mtspr SPRN_AMR, r8
mtspr SPRN_TAR, r9 mtspr SPRN_TAR, r9
mtxer r10
/* /*
* Load up PPR and DSCR values but don't put them in the actual SPRs * Load up PPR and DSCR values but don't put them in the actual SPRs
......
...@@ -536,7 +536,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -536,7 +536,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64: case KVM_CAP_SPAPR_TCE_64:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_FIXUP_HCALL:
case KVM_CAP_PPC_ENABLE_HCALL: case KVM_CAP_PPC_ENABLE_HCALL:
...@@ -545,13 +544,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -545,13 +544,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif #endif
r = 1; r = 1;
break; break;
case KVM_CAP_PPC_ALLOC_HTAB:
r = hv_enabled;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT: case KVM_CAP_PPC_SMT:
if (hv_enabled) r = 0;
r = threads_per_subcore; if (hv_enabled) {
else if (cpu_has_feature(CPU_FTR_ARCH_300))
r = 0; r = 1;
else
r = threads_per_subcore;
}
break; break;
case KVM_CAP_PPC_RMA: case KVM_CAP_PPC_RMA:
r = 0; r = 0;
......
...@@ -449,7 +449,7 @@ TRACE_EVENT(kvmppc_vcore_wakeup, ...@@ -449,7 +449,7 @@ TRACE_EVENT(kvmppc_vcore_wakeup,
__entry->tgid = current->tgid; __entry->tgid = current->tgid;
), ),
TP_printk("%s time %lld ns, tgid=%d", TP_printk("%s time %llu ns, tgid=%d",
__entry->waited ? "wait" : "poll", __entry->waited ? "wait" : "poll",
__entry->ns, __entry->tgid) __entry->ns, __entry->tgid)
); );
......
...@@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, ...@@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
return -1; return -1;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) { if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
i, hpte_v, hpte_r); i, hpte_v, hpte_r);
} }
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
hpte_v = hpte_old_to_new_v(hpte_v);
}
hptep->r = cpu_to_be64(hpte_r); hptep->r = cpu_to_be64(hpte_r);
/* Guarantee the second dword is visible before the valid bit */ /* Guarantee the second dword is visible before the valid bit */
eieio(); eieio();
...@@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, ...@@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
vpn, want_v & HPTE_V_AVPN, slot, newpp); vpn, want_v & HPTE_V_AVPN, slot, newpp);
hpte_v = be64_to_cpu(hptep->v); hpte_v = be64_to_cpu(hptep->v);
if (cpu_has_feature(CPU_FTR_ARCH_300))
hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/* /*
* We need to invalidate the TLB always because hpte_remove doesn't do * We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
...@@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, ...@@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
native_lock_hpte(hptep); native_lock_hpte(hptep);
/* recheck with locks held */ /* recheck with locks held */
hpte_v = be64_to_cpu(hptep->v); hpte_v = be64_to_cpu(hptep->v);
if (cpu_has_feature(CPU_FTR_ARCH_300))
hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))) { !(hpte_v & HPTE_V_VALID))) {
ret = -1; ret = -1;
...@@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) ...@@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
for (i = 0; i < HPTES_PER_GROUP; i++) { for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + slot; hptep = htab_address + slot;
hpte_v = be64_to_cpu(hptep->v); hpte_v = be64_to_cpu(hptep->v);
if (cpu_has_feature(CPU_FTR_ARCH_300))
hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
/* HPTE matches */ /* HPTE matches */
...@@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, ...@@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
want_v = hpte_encode_avpn(vpn, bpsize, ssize); want_v = hpte_encode_avpn(vpn, bpsize, ssize);
native_lock_hpte(hptep); native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v); hpte_v = be64_to_cpu(hptep->v);
if (cpu_has_feature(CPU_FTR_ARCH_300))
hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/* /*
* We need to invalidate the TLB always because hpte_remove doesn't do * We need to invalidate the TLB always because hpte_remove doesn't do
...@@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid, ...@@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
want_v = hpte_encode_avpn(vpn, psize, ssize); want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep); native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v); hpte_v = be64_to_cpu(hptep->v);
if (cpu_has_feature(CPU_FTR_ARCH_300))
hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/* Even if we miss, we need to invalidate the TLB */ /* Even if we miss, we need to invalidate the TLB */
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
...@@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, ...@@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
/* Look at the 8 bit LP value */ /* Look at the 8 bit LP value */
unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
hpte_r = hpte_new_to_old_r(hpte_r);
}
if (!(hpte_v & HPTE_V_LARGE)) { if (!(hpte_v & HPTE_V_LARGE)) {
size = MMU_PAGE_4K; size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K; a_size = MMU_PAGE_4K;
...@@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, ...@@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
a_size = hpte_page_sizes[lp] >> 4; a_size = hpte_page_sizes[lp] >> 4;
} }
/* This works for all page sizes, and for 256M and 1T segments */ /* This works for all page sizes, and for 256M and 1T segments */
if (cpu_has_feature(CPU_FTR_ARCH_300)) *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
*ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
else
*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
shift = mmu_psize_defs[size].shift; shift = mmu_psize_defs[size].shift;
avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
...@@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local) ...@@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local)
want_v = hpte_encode_avpn(vpn, psize, ssize); want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep); native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v); hpte_v = be64_to_cpu(hptep->v);
if (cpu_has_feature(CPU_FTR_ARCH_300))
hpte_v = hpte_new_to_old_v(hpte_v,
be64_to_cpu(hptep->r));
if (!HPTE_V_COMPARE(hpte_v, want_v) || if (!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID)) !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep); native_unlock_hpte(hptep);
......
...@@ -792,37 +792,17 @@ static void update_hid_for_hash(void) ...@@ -792,37 +792,17 @@ static void update_hid_for_hash(void)
static void __init hash_init_partition_table(phys_addr_t hash_table, static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size) unsigned long htab_size)
{ {
unsigned long ps_field; mmu_partition_table_init();
unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
/* /*
* slb llp encoding for the page size used in VPM real mode. * PS field (VRMA page size) is not used for LPID 0, hence set to 0.
* We can ignore that for lpid 0 * For now, UPRT is 0 and we have no segment table.
*/ */
ps_field = 0;
htab_size = __ilog2(htab_size) - 18; htab_size = __ilog2(htab_size) - 18;
mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
MEMBLOCK_ALLOC_ANYWHERE));
/* Initialize the Partition Table with no entries */
memset((void *)partition_tb, 0, patb_size);
partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
/*
* FIXME!! This should be done via update_partition table
* For now UPRT is 0 for us.
*/
partition_tb->patb1 = 0;
pr_info("Partition table %p\n", partition_tb); pr_info("Partition table %p\n", partition_tb);
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) if (cpu_has_feature(CPU_FTR_POWER9_DD1))
update_hid_for_hash(); update_hid_for_hash();
/*
* update partition table control register,
* 64 K size.
*/
mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
} }
static void __init htab_initialize(void) static void __init htab_initialize(void)
......
...@@ -177,23 +177,15 @@ static void __init radix_init_pgtable(void) ...@@ -177,23 +177,15 @@ static void __init radix_init_pgtable(void)
static void __init radix_init_partition_table(void) static void __init radix_init_partition_table(void)
{ {
unsigned long rts_field; unsigned long rts_field, dw0;
mmu_partition_table_init();
rts_field = radix__get_tree_size(); rts_field = radix__get_tree_size();
dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
mmu_partition_table_set_entry(0, dw0, 0);
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
RADIX_PGD_INDEX_SIZE | PATB_HR);
pr_info("Initializing Radix MMU\n"); pr_info("Initializing Radix MMU\n");
pr_info("Partition table %p\n", partition_tb); pr_info("Partition table %p\n", partition_tb);
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
/*
* update partition table control register,
* 64 K size.
*/
mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
} }
void __init radix_init_native(void) void __init radix_init_native(void)
...@@ -378,6 +370,8 @@ void __init radix__early_init_mmu(void) ...@@ -378,6 +370,8 @@ void __init radix__early_init_mmu(void)
radix_init_partition_table(); radix_init_partition_table();
} }
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
radix_init_pgtable(); radix_init_pgtable();
} }
......
...@@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) ...@@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
} }
} }
#endif #endif
#ifdef CONFIG_PPC_BOOK3S_64
void __init mmu_partition_table_init(void)
{
unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
MEMBLOCK_ALLOC_ANYWHERE));
/* Initialize the Partition Table with no entries */
memset((void *)partition_tb, 0, patb_size);
/*
* update partition table control register,
* 64 K size.
*/
mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
}
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
unsigned long dw1)
{
partition_tb[lpid].patb0 = cpu_to_be64(dw0);
partition_tb[lpid].patb1 = cpu_to_be64(dw1);
/* Global flush of TLBs and partition table caches for this lpid */
asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
#endif /* CONFIG_PPC_BOOK3S_64 */
...@@ -304,8 +304,11 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); ...@@ -304,8 +304,11 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR);
OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI);
OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR);
OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL);
...@@ -896,3 +896,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind); ...@@ -896,3 +896,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind);
EXPORT_SYMBOL_GPL(opal_leds_set_ind); EXPORT_SYMBOL_GPL(opal_leds_set_ind);
/* Export this symbol for PowerNV Operator Panel class driver */ /* Export this symbol for PowerNV Operator Panel class driver */
EXPORT_SYMBOL_GPL(opal_write_oppanel_async); EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
/* Export this for KVM */
EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
...@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, ...@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
vflags &= ~HPTE_V_SECONDARY; vflags &= ~HPTE_V_SECONDARY;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
spin_lock_irqsave(&ps3_htab_lock, flags); spin_lock_irqsave(&ps3_htab_lock, flags);
......
...@@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, ...@@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
hpte_group, vpn, pa, rflags, vflags, psize); hpte_group, vpn, pa, rflags, vflags, psize);
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) if (!(vflags & HPTE_V_BOLTED))
pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
......
...@@ -1113,6 +1113,10 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) ...@@ -1113,6 +1113,10 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
extern bool kvm_rebooting; extern bool kvm_rebooting;
extern unsigned int halt_poll_ns;
extern unsigned int halt_poll_ns_grow;
extern unsigned int halt_poll_ns_shrink;
struct kvm_device { struct kvm_device {
struct kvm_device_ops *ops; struct kvm_device_ops *ops;
struct kvm *kvm; struct kvm *kvm;
......
...@@ -651,6 +651,9 @@ struct kvm_enable_cap { ...@@ -651,6 +651,9 @@ struct kvm_enable_cap {
}; };
/* for KVM_PPC_GET_PVINFO */ /* for KVM_PPC_GET_PVINFO */
#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
struct kvm_ppc_pvinfo { struct kvm_ppc_pvinfo {
/* out */ /* out */
__u32 flags; __u32 flags;
...@@ -682,8 +685,6 @@ struct kvm_ppc_smmu_info { ...@@ -682,8 +685,6 @@ struct kvm_ppc_smmu_info {
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
}; };
#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
#define KVMIO 0xAE #define KVMIO 0xAE
/* machine type bits, to be used as argument to KVM_CREATE_VM */ /* machine type bits, to be used as argument to KVM_CREATE_VM */
......
...@@ -70,16 +70,19 @@ MODULE_AUTHOR("Qumranet"); ...@@ -70,16 +70,19 @@ MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
/* Architectures should define their poll value according to the halt latency */ /* Architectures should define their poll value according to the halt latency */
static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */ /* Default doubles per-vcpu halt_poll_ns. */
static unsigned int halt_poll_ns_grow = 2; unsigned int halt_poll_ns_grow = 2;
module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
/* Default resets per-vcpu halt_poll_ns . */ /* Default resets per-vcpu halt_poll_ns . */
static unsigned int halt_poll_ns_shrink; unsigned int halt_poll_ns_shrink;
module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
/* /*
* Ordering of locks: * Ordering of locks:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment