Commit 03f95332 authored by Paul Mackerras's avatar Paul Mackerras

KVM: PPC: Book3S: Allow XICS emulation to work in nested hosts using XIVE

Currently, the KVM code assumes that if the host kernel is using the
XIVE interrupt controller (the new interrupt controller that first
appeared in POWER9 systems), then the in-kernel XICS emulation will
use the XIVE hardware to deliver interrupts to the guest.  However,
this only works when the host is running in hypervisor mode and has
full access to all of the XIVE functionality.  It doesn't work in any
nested virtualization scenario, either with PR KVM or nested-HV KVM,
because the XICS-on-XIVE code calls directly into the native-XIVE
routines, which are not initialized and cannot function correctly
because they use OPAL calls, and OPAL is not available in a guest.

This means that using the in-kernel XICS emulation in a nested
hypervisor that is using XIVE as its interrupt controller will cause a
(nested) host kernel crash.  To fix this, we change most of the places
where the current code calls xive_enabled() to select between the
XICS-on-XIVE emulation and the plain XICS emulation to call a new
function, xics_on_xive(), which returns false in a guest.

However, there is a further twist.  The plain XICS emulation has some
functions which are used in real mode and access the underlying XICS
controller (the interrupt controller of the host) directly.  In the
case of a nested hypervisor, this means doing XICS hypercalls
directly.  When the nested host is using XIVE as its interrupt
controller, these hypercalls will fail.  Therefore this also adds
checks in the places where the XICS emulation wants to access the
underlying interrupt controller directly, and if that is XIVE, makes
the code use the virtual mode fallback paths, which call generic
kernel infrastructure rather than doing direct XICS access.
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
Reviewed-by: default avatarCédric Le Goater <clg@kaod.org>
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent f1adb9c4
...@@ -36,6 +36,8 @@ ...@@ -36,6 +36,8 @@
#endif #endif
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/paca.h> #include <asm/paca.h>
#include <asm/xive.h>
#include <asm/cpu_has_feature.h>
#endif #endif
/* /*
...@@ -616,6 +618,18 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir ...@@ -616,6 +618,18 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
#endif /* CONFIG_KVM_XIVE */ #endif /* CONFIG_KVM_XIVE */
#ifdef CONFIG_PPC_POWERNV
static inline bool xics_on_xive(void)
{
return xive_enabled() && cpu_has_feature(CPU_FTR_HVMODE);
}
#else
static inline bool xics_on_xive(void)
{
return false;
}
#endif
/* /*
* Prototypes for functions called only from assembler code. * Prototypes for functions called only from assembler code.
* Having prototypes reduces sparse errors. * Having prototypes reduces sparse errors.
......
...@@ -635,7 +635,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, ...@@ -635,7 +635,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
r = -ENXIO; r = -ENXIO;
break; break;
} }
if (xive_enabled()) if (xics_on_xive())
*val = get_reg_val(id, kvmppc_xive_get_icp(vcpu)); *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
else else
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
...@@ -708,7 +708,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, ...@@ -708,7 +708,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
r = -ENXIO; r = -ENXIO;
break; break;
} }
if (xive_enabled()) if (xics_on_xive())
r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val)); r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
else else
r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
...@@ -984,7 +984,7 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) ...@@ -984,7 +984,7 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status) bool line_status)
{ {
if (xive_enabled()) if (xics_on_xive())
return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level, return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
line_status); line_status);
else else
...@@ -1037,7 +1037,7 @@ static int kvmppc_book3s_init(void) ...@@ -1037,7 +1037,7 @@ static int kvmppc_book3s_init(void)
#ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XICS
#ifdef CONFIG_KVM_XIVE #ifdef CONFIG_KVM_XIVE
if (xive_enabled()) { if (xics_on_xive()) {
kvmppc_xive_init_module(); kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
} else } else
...@@ -1050,7 +1050,7 @@ static int kvmppc_book3s_init(void) ...@@ -1050,7 +1050,7 @@ static int kvmppc_book3s_init(void)
static void kvmppc_book3s_exit(void) static void kvmppc_book3s_exit(void)
{ {
#ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XICS
if (xive_enabled()) if (xics_on_xive())
kvmppc_xive_exit_module(); kvmppc_xive_exit_module();
#endif #endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
......
...@@ -922,7 +922,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ...@@ -922,7 +922,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_IPOLL: case H_IPOLL:
case H_XIRR_X: case H_XIRR_X:
if (kvmppc_xics_enabled(vcpu)) { if (kvmppc_xics_enabled(vcpu)) {
if (xive_enabled()) { if (xics_on_xive()) {
ret = H_NOT_AVAILABLE; ret = H_NOT_AVAILABLE;
return RESUME_GUEST; return RESUME_GUEST;
} }
...@@ -1431,7 +1431,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ...@@ -1431,7 +1431,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
case BOOK3S_INTERRUPT_HV_RM_HARD: case BOOK3S_INTERRUPT_HV_RM_HARD:
vcpu->arch.trap = 0; vcpu->arch.trap = 0;
r = RESUME_GUEST; r = RESUME_GUEST;
if (!xive_enabled()) if (!xics_on_xive())
kvmppc_xics_rm_complete(vcpu, 0); kvmppc_xics_rm_complete(vcpu, 0);
break; break;
default: default:
...@@ -3649,7 +3649,7 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) ...@@ -3649,7 +3649,7 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
#ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XICS
static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
{ {
if (!xive_enabled()) if (!xics_on_xive())
return false; return false;
return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
vcpu->arch.xive_saved_state.cppr; vcpu->arch.xive_saved_state.cppr;
...@@ -4209,7 +4209,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) ...@@ -4209,7 +4209,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
srcu_read_unlock(&kvm->srcu, srcu_idx); srcu_read_unlock(&kvm->srcu, srcu_idx);
} else if (r == RESUME_PASSTHROUGH) { } else if (r == RESUME_PASSTHROUGH) {
if (WARN_ON(xive_enabled())) if (WARN_ON(xics_on_xive()))
r = H_SUCCESS; r = H_SUCCESS;
else else
r = kvmppc_xics_rm_complete(vcpu, 0); r = kvmppc_xics_rm_complete(vcpu, 0);
...@@ -4733,7 +4733,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) ...@@ -4733,7 +4733,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
* If xive is enabled, we route 0x500 interrupts directly * If xive is enabled, we route 0x500 interrupts directly
* to the guest. * to the guest.
*/ */
if (xive_enabled()) if (xics_on_xive())
lpcr |= LPCR_LPES; lpcr |= LPCR_LPES;
} }
...@@ -4969,7 +4969,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) ...@@ -4969,7 +4969,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
if (i == pimap->n_mapped) if (i == pimap->n_mapped)
pimap->n_mapped++; pimap->n_mapped++;
if (xive_enabled()) if (xics_on_xive())
rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
else else
kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
...@@ -5010,7 +5010,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) ...@@ -5010,7 +5010,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
return -ENODEV; return -ENODEV;
} }
if (xive_enabled()) if (xics_on_xive())
rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
else else
kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
...@@ -5387,7 +5387,7 @@ static int kvmppc_book3s_init_hv(void) ...@@ -5387,7 +5387,7 @@ static int kvmppc_book3s_init_hv(void)
* indirectly, via OPAL. * indirectly, via OPAL.
*/ */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (!xive_enabled() && !kvmhv_on_pseries() && if (!xics_on_xive() && !kvmhv_on_pseries() &&
!local_paca->kvm_hstate.xics_phys) { !local_paca->kvm_hstate.xics_phys) {
struct device_node *np; struct device_node *np;
......
...@@ -257,7 +257,7 @@ void kvmhv_rm_send_ipi(int cpu) ...@@ -257,7 +257,7 @@ void kvmhv_rm_send_ipi(int cpu)
} }
/* We should never reach this */ /* We should never reach this */
if (WARN_ON_ONCE(xive_enabled())) if (WARN_ON_ONCE(xics_on_xive()))
return; return;
/* Else poke the target with an IPI */ /* Else poke the target with an IPI */
...@@ -577,7 +577,7 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) ...@@ -577,7 +577,7 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{ {
if (!kvmppc_xics_enabled(vcpu)) if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD; return H_TOO_HARD;
if (xive_enabled()) { if (xics_on_xive()) {
if (is_rm()) if (is_rm())
return xive_rm_h_xirr(vcpu); return xive_rm_h_xirr(vcpu);
if (unlikely(!__xive_vm_h_xirr)) if (unlikely(!__xive_vm_h_xirr))
...@@ -592,7 +592,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) ...@@ -592,7 +592,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
if (!kvmppc_xics_enabled(vcpu)) if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD; return H_TOO_HARD;
vcpu->arch.regs.gpr[5] = get_tb(); vcpu->arch.regs.gpr[5] = get_tb();
if (xive_enabled()) { if (xics_on_xive()) {
if (is_rm()) if (is_rm())
return xive_rm_h_xirr(vcpu); return xive_rm_h_xirr(vcpu);
if (unlikely(!__xive_vm_h_xirr)) if (unlikely(!__xive_vm_h_xirr))
...@@ -606,7 +606,7 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) ...@@ -606,7 +606,7 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
{ {
if (!kvmppc_xics_enabled(vcpu)) if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD; return H_TOO_HARD;
if (xive_enabled()) { if (xics_on_xive()) {
if (is_rm()) if (is_rm())
return xive_rm_h_ipoll(vcpu, server); return xive_rm_h_ipoll(vcpu, server);
if (unlikely(!__xive_vm_h_ipoll)) if (unlikely(!__xive_vm_h_ipoll))
...@@ -621,7 +621,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, ...@@ -621,7 +621,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
{ {
if (!kvmppc_xics_enabled(vcpu)) if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD; return H_TOO_HARD;
if (xive_enabled()) { if (xics_on_xive()) {
if (is_rm()) if (is_rm())
return xive_rm_h_ipi(vcpu, server, mfrr); return xive_rm_h_ipi(vcpu, server, mfrr);
if (unlikely(!__xive_vm_h_ipi)) if (unlikely(!__xive_vm_h_ipi))
...@@ -635,7 +635,7 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) ...@@ -635,7 +635,7 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{ {
if (!kvmppc_xics_enabled(vcpu)) if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD; return H_TOO_HARD;
if (xive_enabled()) { if (xics_on_xive()) {
if (is_rm()) if (is_rm())
return xive_rm_h_cppr(vcpu, cppr); return xive_rm_h_cppr(vcpu, cppr);
if (unlikely(!__xive_vm_h_cppr)) if (unlikely(!__xive_vm_h_cppr))
...@@ -649,7 +649,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) ...@@ -649,7 +649,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{ {
if (!kvmppc_xics_enabled(vcpu)) if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD; return H_TOO_HARD;
if (xive_enabled()) { if (xics_on_xive()) {
if (is_rm()) if (is_rm())
return xive_rm_h_eoi(vcpu, xirr); return xive_rm_h_eoi(vcpu, xirr);
if (unlikely(!__xive_vm_h_eoi)) if (unlikely(!__xive_vm_h_eoi))
......
...@@ -144,6 +144,13 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, ...@@ -144,6 +144,13 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return; return;
} }
if (xive_enabled() && kvmhv_on_pseries()) {
/* No XICS access or hypercalls available, too hard */
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
return;
}
/* /*
* Check if the core is loaded, * Check if the core is loaded,
* if not, find an available host core to post to wake the VCPU, * if not, find an available host core to post to wake the VCPU,
......
...@@ -33,7 +33,7 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) ...@@ -33,7 +33,7 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
server = be32_to_cpu(args->args[1]); server = be32_to_cpu(args->args[1]);
priority = be32_to_cpu(args->args[2]); priority = be32_to_cpu(args->args[2]);
if (xive_enabled()) if (xics_on_xive())
rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority); rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
else else
rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
...@@ -56,7 +56,7 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) ...@@ -56,7 +56,7 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]); irq = be32_to_cpu(args->args[0]);
server = priority = 0; server = priority = 0;
if (xive_enabled()) if (xics_on_xive())
rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority); rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
else else
rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
...@@ -83,7 +83,7 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) ...@@ -83,7 +83,7 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]); irq = be32_to_cpu(args->args[0]);
if (xive_enabled()) if (xics_on_xive())
rc = kvmppc_xive_int_off(vcpu->kvm, irq); rc = kvmppc_xive_int_off(vcpu->kvm, irq);
else else
rc = kvmppc_xics_int_off(vcpu->kvm, irq); rc = kvmppc_xics_int_off(vcpu->kvm, irq);
...@@ -105,7 +105,7 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) ...@@ -105,7 +105,7 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]); irq = be32_to_cpu(args->args[0]);
if (xive_enabled()) if (xics_on_xive())
rc = kvmppc_xive_int_on(vcpu->kvm, irq); rc = kvmppc_xive_int_on(vcpu->kvm, irq);
else else
rc = kvmppc_xics_int_on(vcpu->kvm, irq); rc = kvmppc_xics_int_on(vcpu->kvm, irq);
......
...@@ -748,7 +748,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) ...@@ -748,7 +748,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
break; break;
case KVMPPC_IRQ_XICS: case KVMPPC_IRQ_XICS:
if (xive_enabled()) if (xics_on_xive())
kvmppc_xive_cleanup_vcpu(vcpu); kvmppc_xive_cleanup_vcpu(vcpu);
else else
kvmppc_xics_free_icp(vcpu); kvmppc_xics_free_icp(vcpu);
...@@ -1931,7 +1931,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, ...@@ -1931,7 +1931,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = -EPERM; r = -EPERM;
dev = kvm_device_from_filp(f.file); dev = kvm_device_from_filp(f.file);
if (dev) { if (dev) {
if (xive_enabled()) if (xics_on_xive())
r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]); r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
else else
r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment