Commit bf4159da authored by Benjamin Herrenschmidt's avatar Benjamin Herrenschmidt Committed by Paul Mackerras

KVM: PPC: Book3S HV: Enable use of the new XIVE "single escalation" feature

That feature, provided by Power9 DD2.0 and later, when supported
by newer OPAL versions, allows us to sacrifice a queue (priority 7)
in favor of merging all the escalation interrupts of the queues
of a single VP into a single interrupt.

This reduces the number of host interrupts used up by KVM guests
especially when those guests use multiple priorities.

It will also enable a future change to control the masking of the
escalation interrupts more precisely to avoid spurious ones.
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent c424c108
...@@ -1073,6 +1073,7 @@ enum { ...@@ -1073,6 +1073,7 @@ enum {
/* Flags for OPAL_XIVE_GET/SET_VP_INFO */ /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
enum { enum {
OPAL_XIVE_VP_ENABLED = 0x00000001, OPAL_XIVE_VP_ENABLED = 0x00000001,
OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002,
}; };
/* "Any chip" replacement for chip ID for allocation functions */ /* "Any chip" replacement for chip ID for allocation functions */
......
...@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); ...@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq); extern void xive_native_sync_source(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip); extern bool is_xive_irq(struct irq_chip *chip);
extern int xive_native_enable_vp(u32 vp_id); extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id); extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
extern bool xive_native_has_single_escalation(void);
#else #else
......
...@@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) ...@@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
return -EIO; return -EIO;
} }
/* if (xc->xive->single_escalation)
* Future improvement: start with them disabled name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
* and handle DD2 and later scheme of merged escalation vcpu->kvm->arch.lpid, xc->server_num);
* interrupts else
*/ name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", vcpu->kvm->arch.lpid, xc->server_num, prio);
vcpu->kvm->arch.lpid, xc->server_num, prio);
if (!name) { if (!name) {
pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
prio, xc->server_num); prio, xc->server_num);
rc = -ENOMEM; rc = -ENOMEM;
goto error; goto error;
} }
pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
rc = request_irq(xc->esc_virq[prio], xive_esc_irq, rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
IRQF_NO_THREAD, name, vcpu); IRQF_NO_THREAD, name, vcpu);
if (rc) { if (rc) {
...@@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) ...@@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
pr_devel("Provisioning prio... %d\n", prio); pr_devel("Provisioning prio... %d\n", prio);
/* Provision each VCPU and enable escalations */ /* Provision each VCPU and enable escalations if needed */
kvm_for_each_vcpu(i, vcpu, kvm) { kvm_for_each_vcpu(i, vcpu, kvm) {
if (!vcpu->arch.xive_vcpu) if (!vcpu->arch.xive_vcpu)
continue; continue;
rc = xive_provision_queue(vcpu, prio); rc = xive_provision_queue(vcpu, prio);
if (rc == 0) if (rc == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, prio); xive_attach_escalation(vcpu, prio);
if (rc) if (rc)
return rc; return rc;
...@@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, ...@@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
/* Allocate IPI */ /* Allocate IPI */
xc->vp_ipi = xive_native_alloc_irq(); xc->vp_ipi = xive_native_alloc_irq();
if (!xc->vp_ipi) { if (!xc->vp_ipi) {
pr_err("Failed to allocate xive irq for VCPU IPI\n");
r = -EIO; r = -EIO;
goto bail; goto bail;
} }
...@@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, ...@@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r) if (r)
goto bail; goto bail;
/*
* Enable the VP first as the single escalation mode will
* affect escalation interrupts numbering
*/
r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
if (r) {
pr_err("Failed to enable VP in OPAL, err %d\n", r);
goto bail;
}
/* /*
* Initialize queues. Initially we set them all for no queueing * Initialize queues. Initially we set them all for no queueing
* and we enable escalation for queue 0 only which we'll use for * and we enable escalation for queue 0 only which we'll use for
* our mfrr change notifications. If the VCPU is hot-plugged, we * our mfrr change notifications. If the VCPU is hot-plugged, we
* do handle provisioning however. * do handle provisioning however based on the existing "map"
* of enabled queues.
*/ */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i]; struct xive_q *q = &xc->queues[i];
/* Single escalation, no queue 7 */
if (i == 7 && xive->single_escalation)
break;
/* Is queue already enabled ? Provision it */ /* Is queue already enabled ? Provision it */
if (xive->qmap & (1 << i)) { if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i); r = xive_provision_queue(vcpu, i);
if (r == 0) if (r == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, i); xive_attach_escalation(vcpu, i);
if (r) if (r)
goto bail; goto bail;
...@@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, ...@@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r) if (r)
goto bail; goto bail;
/* Enable the VP */
r = xive_native_enable_vp(xc->vp_id);
if (r)
goto bail;
/* Route the IPI */ /* Route the IPI */
r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
if (!r) if (!r)
...@@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) ...@@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
val, server, guest_prio); val, server, guest_prio);
/* /*
* If the source doesn't already have an IPI, allocate * If the source doesn't already have an IPI, allocate
* one and get the corresponding data * one and get the corresponding data
...@@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) ...@@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
if (xive->vp_base == XIVE_INVALID_VP) if (xive->vp_base == XIVE_INVALID_VP)
ret = -ENOMEM; ret = -ENOMEM;
xive->single_escalation = xive_native_has_single_escalation();
if (ret) { if (ret) {
kfree(xive); kfree(xive);
return ret; return ret;
......
...@@ -120,6 +120,8 @@ struct kvmppc_xive { ...@@ -120,6 +120,8 @@ struct kvmppc_xive {
u32 q_order; u32 q_order;
u32 q_page_order; u32 q_page_order;
/* Flags */
u8 single_escalation;
}; };
#define KVMPPC_XIVE_Q_COUNT 8 #define KVMPPC_XIVE_Q_COUNT 8
...@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp ...@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
* is as follow. * is as follow.
* *
* Guest request for 0...6 are honored. Guest request for anything * Guest request for 0...6 are honored. Guest request for anything
* higher results in a priority of 7 being applied. * higher results in a priority of 6 being applied.
*
* However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
* in order to match AIX expectations
* *
* Similar mapping is done for CPPR values * Similar mapping is done for CPPR values
*/ */
static inline u8 xive_prio_from_guest(u8 prio) static inline u8 xive_prio_from_guest(u8 prio)
{ {
if (prio == 0xff || prio < 8) if (prio == 0xff || prio < 6)
return prio; return prio;
return 7; return 6;
} }
static inline u8 xive_prio_to_guest(u8 prio) static inline u8 xive_prio_to_guest(u8 prio)
{ {
if (prio == 0xff || prio < 7) return prio;
return prio;
return 0xb;
} }
static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle) static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
......
...@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count; ...@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
static u32 xive_queue_shift; static u32 xive_queue_shift;
static u32 xive_pool_vps = XIVE_INVALID_VP; static u32 xive_pool_vps = XIVE_INVALID_VP;
static struct kmem_cache *xive_provision_cache; static struct kmem_cache *xive_provision_cache;
static bool xive_has_single_esc;
int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
{ {
...@@ -571,6 +572,10 @@ bool __init xive_native_init(void) ...@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
break; break;
} }
/* Do we support single escalation */
if (of_get_property(np, "single-escalation-support", NULL) != NULL)
xive_has_single_esc = true;
/* Configure Thread Management areas for KVM */ /* Configure Thread Management areas for KVM */
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
kvmppc_set_xive_tima(cpu, r.start, tima); kvmppc_set_xive_tima(cpu, r.start, tima);
...@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base) ...@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
} }
EXPORT_SYMBOL_GPL(xive_native_free_vp_block); EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
int xive_native_enable_vp(u32 vp_id) int xive_native_enable_vp(u32 vp_id, bool single_escalation)
{ {
s64 rc; s64 rc;
u64 flags = OPAL_XIVE_VP_ENABLED;
if (single_escalation)
flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
for (;;) { for (;;) {
rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0); rc = opal_xive_set_vp_info(vp_id, flags, 0);
if (rc != OPAL_BUSY) if (rc != OPAL_BUSY)
break; break;
msleep(1); msleep(1);
...@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) ...@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(xive_native_get_vp_info); EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
bool xive_native_has_single_escalation(void)
{
return xive_has_single_esc;
}
EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment