Commit 880cb3cd authored by Marc Zyngier's avatar Marc Zyngier

irqchip/gic-v3-its: Refactor LPI allocator

Our current LPI allocator relies on a bitmap, each bit representing
a chunk of 32 LPIs, meaning that each device gets allocated LPIs
in multiple of 32. It served us well so far, but new use cases now
require much more finer grain allocations, down the the individual
LPI.

Given the size of the IntID space (up to 32bit), it isn't practical
to continue using a bitmap, so let's use a different data structure
altogether.

We switch to a list, where each element represent a contiguous range
of LPIs. On allocation, we simply grab the first group big enough to
satisfy the allocation, and substract what we need from it. If the
group becomes empty, we just remove it. On freeing interrupts, we
insert a new group of interrupt in the list, sort it and fuse the
adjacent groups.

This makes freeing interrupt much more expensive than allocating
them (an unusual behaviour), but that's fine as long as we consider
that freeing interrupts is an extremely rare event.

We still allocate interrupts in blocks of 32 for the time being,
but subsequent patches will relax this.
Signed-off-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
parent 9d3cce1e
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#include <linux/dma-iommu.h> #include <linux/dma-iommu.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/irqdomain.h> #include <linux/irqdomain.h>
#include <linux/list.h>
#include <linux/list_sort.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/msi.h> #include <linux/msi.h>
...@@ -1421,112 +1423,177 @@ static struct irq_chip its_irq_chip = { ...@@ -1421,112 +1423,177 @@ static struct irq_chip its_irq_chip = {
.irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity,
}; };
/* /*
* How we allocate LPIs: * How we allocate LPIs:
* *
* The GIC has id_bits bits for interrupt identifiers. From there, we * lpi_range_list contains ranges of LPIs that are to available to
* must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as * allocate from. To allocate LPIs, just pick the first range that
* we allocate LPIs by chunks of 32, we can shift the whole thing by 5 * fits the required allocation, and reduce it by the required
* bits to the right. * amount. Once empty, remove the range from the list.
*
* To free a range of LPIs, add a free range to the list, sort it and
* merge the result if the new range happens to be adjacent to an
* already free block.
* *
* This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations. * The consequence of the above is that allocation is cost is low, but
* freeing is expensive. We assumes that freeing rarely occurs.
*/
/*
* Compatibility defines until we fully refactor the allocator
*/ */
#define IRQS_PER_CHUNK_SHIFT 5 #define IRQS_PER_CHUNK_SHIFT 5
#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT) #define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT)
#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ #define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */
static unsigned long *lpi_bitmap; static DEFINE_MUTEX(lpi_range_lock);
static u32 lpi_chunks; static LIST_HEAD(lpi_range_list);
static DEFINE_SPINLOCK(lpi_lock);
static int its_lpi_to_chunk(int lpi) struct lpi_range {
struct list_head entry;
u32 base_id;
u32 span;
};
static struct lpi_range *mk_lpi_range(u32 base, u32 span)
{ {
return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT; struct lpi_range *range;
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (range) {
INIT_LIST_HEAD(&range->entry);
range->base_id = base;
range->span = span;
}
return range;
} }
static int its_chunk_to_lpi(int chunk) static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b)
{ {
return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192; struct lpi_range *ra, *rb;
ra = container_of(a, struct lpi_range, entry);
rb = container_of(b, struct lpi_range, entry);
return rb->base_id - ra->base_id;
} }
static int __init its_lpi_init(u32 id_bits) static void merge_lpi_ranges(void)
{ {
lpi_chunks = its_lpi_to_chunk(1UL << id_bits); struct lpi_range *range, *tmp;
lpi_bitmap = kcalloc(BITS_TO_LONGS(lpi_chunks), sizeof(long), list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
GFP_KERNEL); if (!list_is_last(&range->entry, &lpi_range_list) &&
if (!lpi_bitmap) { (tmp->base_id == (range->base_id + range->span))) {
lpi_chunks = 0; tmp->base_id = range->base_id;
return -ENOMEM; tmp->span += range->span;
list_del(&range->entry);
kfree(range);
}
} }
}
pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks); static int alloc_lpi_range(u32 nr_lpis, u32 *base)
return 0; {
struct lpi_range *range, *tmp;
int err = -ENOSPC;
mutex_lock(&lpi_range_lock);
list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
if (range->span >= nr_lpis) {
*base = range->base_id;
range->base_id += nr_lpis;
range->span -= nr_lpis;
if (range->span == 0) {
list_del(&range->entry);
kfree(range);
}
err = 0;
break;
}
}
mutex_unlock(&lpi_range_lock);
pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis);
return err;
} }
static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) static int free_lpi_range(u32 base, u32 nr_lpis)
{ {
unsigned long *bitmap = NULL; struct lpi_range *new;
int chunk_id; int err = 0;
int nr_chunks;
int i;
nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK); mutex_lock(&lpi_range_lock);
spin_lock(&lpi_lock); new = mk_lpi_range(base, nr_lpis);
if (!new) {
err = -ENOMEM;
goto out;
}
list_add(&new->entry, &lpi_range_list);
list_sort(NULL, &lpi_range_list, lpi_range_cmp);
merge_lpi_ranges();
out:
mutex_unlock(&lpi_range_lock);
return err;
}
static int __init its_lpi_init(u32 id_bits)
{
u32 lpis = (1UL << id_bits) - 8192;
int err;
/*
* Initializing the allocator is just the same as freeing the
* full range of LPIs.
*/
err = free_lpi_range(8192, lpis);
pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis);
return err;
}
static unsigned long *its_lpi_alloc_chunks(int nr_irqs, u32 *base, int *nr_ids)
{
unsigned long *bitmap = NULL;
int err = 0;
int nr_lpis;
nr_lpis = round_up(nr_irqs, IRQS_PER_CHUNK);
do { do {
chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks, err = alloc_lpi_range(nr_lpis, base);
0, nr_chunks, 0); if (!err)
if (chunk_id < lpi_chunks)
break; break;
nr_chunks--; nr_lpis -= IRQS_PER_CHUNK;
} while (nr_chunks > 0); } while (nr_lpis > 0);
if (!nr_chunks) if (err)
goto out; goto out;
bitmap = kcalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK), bitmap = kcalloc(BITS_TO_LONGS(nr_lpis), sizeof (long), GFP_ATOMIC);
sizeof(long),
GFP_ATOMIC);
if (!bitmap) if (!bitmap)
goto out; goto out;
for (i = 0; i < nr_chunks; i++) *nr_ids = nr_lpis;
set_bit(chunk_id + i, lpi_bitmap);
*base = its_chunk_to_lpi(chunk_id);
*nr_ids = nr_chunks * IRQS_PER_CHUNK;
out: out:
spin_unlock(&lpi_lock);
if (!bitmap) if (!bitmap)
*base = *nr_ids = 0; *base = *nr_ids = 0;
return bitmap; return bitmap;
} }
static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids) static void its_lpi_free_chunks(unsigned long *bitmap, u32 base, u32 nr_ids)
{ {
int lpi; WARN_ON(free_lpi_range(base, nr_ids));
spin_lock(&lpi_lock);
for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) {
int chunk = its_lpi_to_chunk(lpi);
BUG_ON(chunk > lpi_chunks);
if (test_bit(chunk, lpi_bitmap)) {
clear_bit(chunk, lpi_bitmap);
} else {
pr_err("Bad LPI chunk %d\n", chunk);
}
}
spin_unlock(&lpi_lock);
kfree(bitmap); kfree(bitmap);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment