Commit 20b3d54e authored by Marc Zyngier's avatar Marc Zyngier

irqchip/gic-v3-its: Add device proxy for VPE management if !DirectLpi

When we don't have the DirectLPI feature, we must work around the
architecture shortcomings to be able to perform the required
maintenance (interrupt masking, clearing and injection).

For this, we create a fake device whose sole purpose is to
provide a way to issue commands as if we were dealing with LPIs
coming from that device (while they actually originate from
the ITS). This fake device doesn't have LPIs allocated to it,
but instead uses the VPE LPIs.

Of course, this could be a real bottleneck, and a naive
implementation would require 6 commands to issue an invalidation.

Instead, let's allocate at least one event per physical CPU
(rounded up to the next power of 2), and opportunistically
map the VPE doorbell to an event. This doorbell will be mapped
until we roll over and need to reallocate this slot.

This ensures that most of the time, we only need 2 commands
to issue an INV, INT or CLEAR, making the performance a lot
better, given that we always issue a CLEAR on entry, and
an INV on each side of a trapped WFI.
Signed-off-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
parent 93f94ea0
...@@ -136,6 +136,13 @@ struct its_device { ...@@ -136,6 +136,13 @@ struct its_device {
u32 device_id; u32 device_id;
}; };
static struct {
raw_spinlock_t lock;
struct its_device *dev;
struct its_vpe **vpes;
int next_victim;
} vpe_proxy;
static LIST_HEAD(its_nodes); static LIST_HEAD(its_nodes);
static DEFINE_SPINLOCK(its_lock); static DEFINE_SPINLOCK(its_lock);
static struct rdists *gic_rdists; static struct rdists *gic_rdists;
...@@ -2090,6 +2097,16 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, ...@@ -2090,6 +2097,16 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
msi_info = msi_get_domain_info(domain); msi_info = msi_get_domain_info(domain);
its = msi_info->data; its = msi_info->data;
if (!gic_rdists->has_direct_lpi &&
vpe_proxy.dev &&
vpe_proxy.dev->its == its &&
dev_id == vpe_proxy.dev->device_id) {
/* Bad luck. Get yourself a better implementation */
WARN_ONCE(1, "DevId %x clashes with GICv4 VPE proxy device\n",
dev_id);
return -EINVAL;
}
its_dev = its_find_device(its, dev_id); its_dev = its_find_device(its, dev_id);
if (its_dev) { if (its_dev) {
/* /*
...@@ -2237,6 +2254,70 @@ static const struct irq_domain_ops its_domain_ops = { ...@@ -2237,6 +2254,70 @@ static const struct irq_domain_ops its_domain_ops = {
.deactivate = its_irq_domain_deactivate, .deactivate = its_irq_domain_deactivate,
}; };
/*
* This is insane.
*
* If a GICv4 doesn't implement Direct LPIs (which is extremely
* likely), the only way to perform an invalidate is to use a fake
* device to issue an INV command, implying that the LPI has first
* been mapped to some event on that device. Since this is not exactly
* cheap, we try to keep that mapping around as long as possible, and
* only issue an UNMAP if we're short on available slots.
*
* Broken by design(tm).
*/
static void its_vpe_db_proxy_unmap_locked(struct its_vpe *vpe)
{
/* Already unmapped? */
if (vpe->vpe_proxy_event == -1)
return;
its_send_discard(vpe_proxy.dev, vpe->vpe_proxy_event);
vpe_proxy.vpes[vpe->vpe_proxy_event] = NULL;
/*
* We don't track empty slots at all, so let's move the
* next_victim pointer if we can quickly reuse that slot
* instead of nuking an existing entry. Not clear that this is
* always a win though, and this might just generate a ripple
* effect... Let's just hope VPEs don't migrate too often.
*/
if (vpe_proxy.vpes[vpe_proxy.next_victim])
vpe_proxy.next_victim = vpe->vpe_proxy_event;
vpe->vpe_proxy_event = -1;
}
static void its_vpe_db_proxy_unmap(struct its_vpe *vpe)
{
if (!gic_rdists->has_direct_lpi) {
unsigned long flags;
raw_spin_lock_irqsave(&vpe_proxy.lock, flags);
its_vpe_db_proxy_unmap_locked(vpe);
raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags);
}
}
static void its_vpe_db_proxy_map_locked(struct its_vpe *vpe)
{
/* Already mapped? */
if (vpe->vpe_proxy_event != -1)
return;
/* This slot was already allocated. Kick the other VPE out. */
if (vpe_proxy.vpes[vpe_proxy.next_victim])
its_vpe_db_proxy_unmap_locked(vpe_proxy.vpes[vpe_proxy.next_victim]);
/* Map the new VPE instead */
vpe_proxy.vpes[vpe_proxy.next_victim] = vpe;
vpe->vpe_proxy_event = vpe_proxy.next_victim;
vpe_proxy.next_victim = (vpe_proxy.next_victim + 1) % vpe_proxy.dev->nr_ites;
vpe_proxy.dev->event_map.col_map[vpe->vpe_proxy_event] = vpe->col_idx;
its_send_mapti(vpe_proxy.dev, vpe->vpe_db_lpi, vpe->vpe_proxy_event);
}
static int its_vpe_set_affinity(struct irq_data *d, static int its_vpe_set_affinity(struct irq_data *d,
const struct cpumask *mask_val, const struct cpumask *mask_val,
bool force) bool force)
...@@ -2246,9 +2327,11 @@ static int its_vpe_set_affinity(struct irq_data *d, ...@@ -2246,9 +2327,11 @@ static int its_vpe_set_affinity(struct irq_data *d,
/* /*
* Changing affinity is mega expensive, so let's be as lazy as * Changing affinity is mega expensive, so let's be as lazy as
* we can and only do it if we really have to. * we can and only do it if we really have to. Also, if mapped
* into the proxy device, we need to nuke that mapping.
*/ */
if (vpe->col_idx != cpu) { if (vpe->col_idx != cpu) {
its_vpe_db_proxy_unmap(vpe);
vpe->col_idx = cpu; vpe->col_idx = cpu;
its_send_vmovp(vpe); its_send_vmovp(vpe);
} }
...@@ -2343,15 +2426,33 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) ...@@ -2343,15 +2426,33 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
} }
} }
static void its_vpe_send_cmd(struct its_vpe *vpe,
void (*cmd)(struct its_device *, u32))
{
unsigned long flags;
raw_spin_lock_irqsave(&vpe_proxy.lock, flags);
its_vpe_db_proxy_map_locked(vpe);
cmd(vpe_proxy.dev, vpe->vpe_proxy_event);
raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags);
}
static void its_vpe_send_inv(struct irq_data *d) static void its_vpe_send_inv(struct irq_data *d)
{ {
struct its_vpe *vpe = irq_data_get_irq_chip_data(d); struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
if (gic_rdists->has_direct_lpi) {
void __iomem *rdbase; void __iomem *rdbase;
rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR); gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR);
while (gic_read_lpir(rdbase + GICR_SYNCR) & 1) while (gic_read_lpir(rdbase + GICR_SYNCR) & 1)
cpu_relax(); cpu_relax();
} else {
its_vpe_send_cmd(vpe, its_send_inv);
}
} }
static void its_vpe_mask_irq(struct irq_data *d) static void its_vpe_mask_irq(struct irq_data *d)
...@@ -2417,12 +2518,14 @@ static int its_vpe_init(struct its_vpe *vpe) ...@@ -2417,12 +2518,14 @@ static int its_vpe_init(struct its_vpe *vpe)
vpe->vpe_id = vpe_id; vpe->vpe_id = vpe_id;
vpe->vpt_page = vpt_page; vpe->vpt_page = vpt_page;
vpe->vpe_proxy_event = -1;
return 0; return 0;
} }
static void its_vpe_teardown(struct its_vpe *vpe) static void its_vpe_teardown(struct its_vpe *vpe)
{ {
its_vpe_db_proxy_unmap(vpe);
its_vpe_id_free(vpe->vpe_id); its_vpe_id_free(vpe->vpe_id);
its_free_pending_table(vpe->vpt_page); its_free_pending_table(vpe->vpt_page);
} }
...@@ -2653,6 +2756,42 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) ...@@ -2653,6 +2756,42 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
static int its_init_vpe_domain(void) static int its_init_vpe_domain(void)
{ {
struct its_node *its;
u32 devid;
int entries;
if (gic_rdists->has_direct_lpi) {
pr_info("ITS: Using DirectLPI for VPE invalidation\n");
return 0;
}
/* Any ITS will do, even if not v4 */
its = list_first_entry(&its_nodes, struct its_node, entry);
entries = roundup_pow_of_two(nr_cpu_ids);
vpe_proxy.vpes = kzalloc(sizeof(*vpe_proxy.vpes) * entries,
GFP_KERNEL);
if (!vpe_proxy.vpes) {
pr_err("ITS: Can't allocate GICv4 proxy device array\n");
return -ENOMEM;
}
/* Use the last possible DevID */
devid = GENMASK(its->device_ids - 1, 0);
vpe_proxy.dev = its_create_device(its, devid, entries, false);
if (!vpe_proxy.dev) {
kfree(vpe_proxy.vpes);
pr_err("ITS: Can't allocate GICv4 proxy device\n");
return -ENOMEM;
}
BUG_ON(entries != vpe_proxy.dev->nr_ites);
raw_spin_lock_init(&vpe_proxy.lock);
vpe_proxy.next_victim = 0;
pr_info("ITS: Allocated DevID %x as GICv4 proxy device (%d slots)\n",
devid, vpe_proxy.dev->nr_ites);
return 0; return 0;
} }
......
...@@ -39,6 +39,8 @@ struct its_vpe { ...@@ -39,6 +39,8 @@ struct its_vpe {
/* Doorbell interrupt */ /* Doorbell interrupt */
int irq; int irq;
irq_hw_number_t vpe_db_lpi; irq_hw_number_t vpe_db_lpi;
/* VPE proxy mapping */
int vpe_proxy_event;
/* /*
* This collection ID is used to indirect the target * This collection ID is used to indirect the target
* redistributor for this VPE. The ID itself isn't involved in * redistributor for this VPE. The ID itself isn't involved in
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment