Commit d2f3c384 authored by Dennis Zhou (Facebook)'s avatar Dennis Zhou (Facebook) Committed by Tejun Heo

percpu: increase minimum percpu allocation size and align first regions

This patch increases the minimum allocation size of percpu memory to
4-bytes. This change will help minimize the metadata overhead
associated with the bitmap allocator. The assumption is that most
allocations will be of objects or structs greater than 2 bytes with
integers or longs being used rather than shorts.

The first chunk regions are now aligned with the minimum allocation
size. The reserved region is expected to be set as a multiple of the
minimum allocation size. The static region is aligned up and the delta
is removed from the dynamic size. This works because the dynamic size is
increased to be page aligned. If the static size is not minimum
allocation size aligned, then there must be a gap that is added to the
dynamic size. The dynamic size will never be smaller than the set value.
Signed-off-by: default avatarDennis Zhou <dennisszhou@gmail.com>
Reviewed-by: default avatarJosef Bacik <jbacik@fb.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 0cecf50c
...@@ -21,6 +21,10 @@ ...@@ -21,6 +21,10 @@
/* minimum unit size, also is the maximum supported allocation size */ /* minimum unit size, also is the maximum supported allocation size */
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
/* minimum allocation size and shift in bytes */
#define PCPU_MIN_ALLOC_SHIFT 2
#define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT)
/* /*
* Percpu allocator can serve percpu allocations before slab is * Percpu allocator can serve percpu allocations before slab is
* initialized which allows slab to depend on the percpu allocator. * initialized which allows slab to depend on the percpu allocator.
......
...@@ -956,10 +956,10 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, ...@@ -956,10 +956,10 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
* We want the lowest bit of offset available for in-use/free * We want the lowest bit of offset available for in-use/free
* indicator, so force >= 16bit alignment and make size even. * indicator, so force >= 16bit alignment and make size even.
*/ */
if (unlikely(align < 2)) if (unlikely(align < PCPU_MIN_ALLOC_SIZE))
align = 2; align = PCPU_MIN_ALLOC_SIZE;
size = ALIGN(size, 2); size = ALIGN(size, PCPU_MIN_ALLOC_SIZE);
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE || if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||
!is_power_of_2(align))) { !is_power_of_2(align))) {
...@@ -1653,6 +1653,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1653,6 +1653,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
size_t static_size, dyn_size;
struct pcpu_chunk *chunk; struct pcpu_chunk *chunk;
unsigned long *group_offsets; unsigned long *group_offsets;
size_t *group_sizes; size_t *group_sizes;
...@@ -1686,6 +1687,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1686,6 +1687,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE); PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
PCPU_SETUP_BUG_ON(!ai->dyn_size); PCPU_SETUP_BUG_ON(!ai->dyn_size);
PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE));
PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
/* process group information and build config tables accordingly */ /* process group information and build config tables accordingly */
...@@ -1763,6 +1765,17 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1763,6 +1765,17 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
for (i = 0; i < pcpu_nr_slots; i++) for (i = 0; i < pcpu_nr_slots; i++)
INIT_LIST_HEAD(&pcpu_slot[i]); INIT_LIST_HEAD(&pcpu_slot[i]);
/*
* The end of the static region needs to be aligned with the
* minimum allocation size as this offsets the reserved and
* dynamic region. The first chunk ends page aligned by
* expanding the dynamic region, therefore the dynamic region
* can be shrunk to compensate while still staying above the
* configured sizes.
*/
static_size = ALIGN(ai->static_size, PCPU_MIN_ALLOC_SIZE);
dyn_size = ai->dyn_size - (static_size - ai->static_size);
/* /*
* Initialize first chunk. * Initialize first chunk.
* If the reserved_size is non-zero, this initializes the reserved * If the reserved_size is non-zero, this initializes the reserved
...@@ -1771,8 +1784,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1771,8 +1784,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
* pcpu_first_chunk, will always point to the chunk that serves * pcpu_first_chunk, will always point to the chunk that serves
* the dynamic region. * the dynamic region.
*/ */
tmp_addr = (unsigned long)base_addr + ai->static_size; tmp_addr = (unsigned long)base_addr + static_size;
map_size = ai->reserved_size ?: ai->dyn_size; map_size = ai->reserved_size ?: dyn_size;
chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, smap, chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, smap,
ARRAY_SIZE(smap)); ARRAY_SIZE(smap));
...@@ -1780,9 +1793,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1780,9 +1793,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
if (ai->reserved_size) { if (ai->reserved_size) {
pcpu_reserved_chunk = chunk; pcpu_reserved_chunk = chunk;
tmp_addr = (unsigned long)base_addr + ai->static_size + tmp_addr = (unsigned long)base_addr + static_size +
ai->reserved_size; ai->reserved_size;
map_size = ai->dyn_size; map_size = dyn_size;
chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, dmap, chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, dmap,
ARRAY_SIZE(dmap)); ARRAY_SIZE(dmap));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment