Commit 3aff5fac authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu

Pull percpu updates from Dennis Zhou:

 - scan hint update which helps address performance issues with heavily
   fragmented blocks

 - lockdep fix when freeing an allocation causes balance work to be
   scheduled

* 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu:
  percpu: remove spurious lock dependency between percpu and sched
  percpu: use chunk scan_hint to skip some scanning
  percpu: convert chunk hints to be based on pcpu_block_md
  percpu: make pcpu_block_md generic
  percpu: use block scan_hint to only scan forward
  percpu: remember largest area skipped during allocation
  percpu: add block level scan_hint
  percpu: set PCPU_BITMAP_BLOCK_SIZE to PAGE_SIZE
  percpu: relegate chunks unusable when failing small allocations
  percpu: manage chunks based on contig_bits instead of free_bytes
  percpu: introduce helper to determine if two regions overlap
  percpu: do not search past bitmap when allocating an area
  percpu: update free path with correct new free region
parents 0aed4b28 198790d9
...@@ -26,16 +26,10 @@ ...@@ -26,16 +26,10 @@
#define PCPU_MIN_ALLOC_SHIFT 2 #define PCPU_MIN_ALLOC_SHIFT 2
#define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT) #define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT)
/* number of bits per page, used to trigger a scan if blocks are > PAGE_SIZE */
#define PCPU_BITS_PER_PAGE (PAGE_SIZE >> PCPU_MIN_ALLOC_SHIFT)
/* /*
* This determines the size of each metadata block. There are several subtle * The PCPU_BITMAP_BLOCK_SIZE must be the same size as PAGE_SIZE as the
* constraints around this constant. The reserved region must be a multiple of * updating of hints is used to manage the nr_empty_pop_pages in both
* PCPU_BITMAP_BLOCK_SIZE. Additionally, PCPU_BITMAP_BLOCK_SIZE must be a * the chunk and globally.
* multiple of PAGE_SIZE or PAGE_SIZE must be a multiple of
* PCPU_BITMAP_BLOCK_SIZE to align with the populated page map. The unit_size
* also has to be a multiple of PCPU_BITMAP_BLOCK_SIZE to ensure full blocks.
*/ */
#define PCPU_BITMAP_BLOCK_SIZE PAGE_SIZE #define PCPU_BITMAP_BLOCK_SIZE PAGE_SIZE
#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \ #define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \
......
...@@ -9,8 +9,17 @@ ...@@ -9,8 +9,17 @@
* pcpu_block_md is the metadata block struct. * pcpu_block_md is the metadata block struct.
* Each chunk's bitmap is split into a number of full blocks. * Each chunk's bitmap is split into a number of full blocks.
* All units are in terms of bits. * All units are in terms of bits.
*
* The scan hint is the largest known contiguous area before the contig hint.
* It is not necessarily the actual largest contig hint though. There is an
* invariant that the scan_hint_start > contig_hint_start iff
* scan_hint == contig_hint. This is necessary because when scanning forward,
* we don't know if a new contig hint would be better than the current one.
*/ */
struct pcpu_block_md { struct pcpu_block_md {
int scan_hint; /* scan hint for block */
int scan_hint_start; /* block relative starting
position of the scan hint */
int contig_hint; /* contig hint for block */ int contig_hint; /* contig hint for block */
int contig_hint_start; /* block relative starting int contig_hint_start; /* block relative starting
position of the contig hint */ position of the contig hint */
...@@ -19,6 +28,7 @@ struct pcpu_block_md { ...@@ -19,6 +28,7 @@ struct pcpu_block_md {
int right_free; /* size of free space along int right_free; /* size of free space along
the right side of the block */ the right side of the block */
int first_free; /* block position of first free */ int first_free; /* block position of first free */
int nr_bits; /* total bits responsible for */
}; };
struct pcpu_chunk { struct pcpu_chunk {
...@@ -29,9 +39,7 @@ struct pcpu_chunk { ...@@ -29,9 +39,7 @@ struct pcpu_chunk {
struct list_head list; /* linked to pcpu_slot lists */ struct list_head list; /* linked to pcpu_slot lists */
int free_bytes; /* free bytes in the chunk */ int free_bytes; /* free bytes in the chunk */
int contig_bits; /* max contiguous size hint */ struct pcpu_block_md chunk_md;
int contig_bits_start; /* contig_bits starting
offset */
void *base_addr; /* base address of this chunk */ void *base_addr; /* base address of this chunk */
unsigned long *alloc_map; /* allocation map */ unsigned long *alloc_map; /* allocation map */
...@@ -39,7 +47,6 @@ struct pcpu_chunk { ...@@ -39,7 +47,6 @@ struct pcpu_chunk {
struct pcpu_block_md *md_blocks; /* metadata blocks */ struct pcpu_block_md *md_blocks; /* metadata blocks */
void *data; /* chunk data */ void *data; /* chunk data */
int first_bit; /* no free below this */
bool immutable; /* no [de]population allowed */ bool immutable; /* no [de]population allowed */
int start_offset; /* the overlap with the previous int start_offset; /* the overlap with the previous
region to have a page aligned region to have a page aligned
......
...@@ -70,7 +70,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) ...@@ -70,7 +70,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
chunk->base_addr = page_address(pages); chunk->base_addr = page_address(pages);
spin_lock_irqsave(&pcpu_lock, flags); spin_lock_irqsave(&pcpu_lock, flags);
pcpu_chunk_populated(chunk, 0, nr_pages, false); pcpu_chunk_populated(chunk, 0, nr_pages);
spin_unlock_irqrestore(&pcpu_lock, flags); spin_unlock_irqrestore(&pcpu_lock, flags);
pcpu_stats_chunk_alloc(); pcpu_stats_chunk_alloc();
......
...@@ -53,6 +53,7 @@ static int find_max_nr_alloc(void) ...@@ -53,6 +53,7 @@ static int find_max_nr_alloc(void)
static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
int *buffer) int *buffer)
{ {
struct pcpu_block_md *chunk_md = &chunk->chunk_md;
int i, last_alloc, as_len, start, end; int i, last_alloc, as_len, start, end;
int *alloc_sizes, *p; int *alloc_sizes, *p;
/* statistics */ /* statistics */
...@@ -121,9 +122,9 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, ...@@ -121,9 +122,9 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
P("nr_alloc", chunk->nr_alloc); P("nr_alloc", chunk->nr_alloc);
P("max_alloc_size", chunk->max_alloc_size); P("max_alloc_size", chunk->max_alloc_size);
P("empty_pop_pages", chunk->nr_empty_pop_pages); P("empty_pop_pages", chunk->nr_empty_pop_pages);
P("first_bit", chunk->first_bit); P("first_bit", chunk_md->first_free);
P("free_bytes", chunk->free_bytes); P("free_bytes", chunk->free_bytes);
P("contig_bytes", chunk->contig_bits * PCPU_MIN_ALLOC_SIZE); P("contig_bytes", chunk_md->contig_hint * PCPU_MIN_ALLOC_SIZE);
P("sum_frag", sum_frag); P("sum_frag", sum_frag);
P("max_frag", max_frag); P("max_frag", max_frag);
P("cur_min_alloc", cur_min_alloc); P("cur_min_alloc", cur_min_alloc);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment