Commit 3aff5fac authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu

Pull percpu updates from Dennis Zhou:

 - scan hint update which helps address performance issues with heavily
   fragmented blocks

 - lockdep fix when freeing an allocation causes balance work to be
   scheduled

* 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu:
  percpu: remove spurious lock dependency between percpu and sched
  percpu: use chunk scan_hint to skip some scanning
  percpu: convert chunk hints to be based on pcpu_block_md
  percpu: make pcpu_block_md generic
  percpu: use block scan_hint to only scan forward
  percpu: remember largest area skipped during allocation
  percpu: add block level scan_hint
  percpu: set PCPU_BITMAP_BLOCK_SIZE to PAGE_SIZE
  percpu: relegate chunks unusable when failing small allocations
  percpu: manage chunks based on contig_bits instead of free_bytes
  percpu: introduce helper to determine if two regions overlap
  percpu: do not search past bitmap when allocating an area
  percpu: update free path with correct new free region
parents 0aed4b28 198790d9
......@@ -26,16 +26,10 @@
#define PCPU_MIN_ALLOC_SHIFT 2
#define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT)
/* number of bits per page, used to trigger a scan if blocks are > PAGE_SIZE */
#define PCPU_BITS_PER_PAGE (PAGE_SIZE >> PCPU_MIN_ALLOC_SHIFT)
/*
* This determines the size of each metadata block. There are several subtle
* constraints around this constant. The reserved region must be a multiple of
* PCPU_BITMAP_BLOCK_SIZE. Additionally, PCPU_BITMAP_BLOCK_SIZE must be a
* multiple of PAGE_SIZE or PAGE_SIZE must be a multiple of
* PCPU_BITMAP_BLOCK_SIZE to align with the populated page map. The unit_size
* also has to be a multiple of PCPU_BITMAP_BLOCK_SIZE to ensure full blocks.
* The PCPU_BITMAP_BLOCK_SIZE must be the same size as PAGE_SIZE as the
* updating of hints is used to manage the nr_empty_pop_pages in both
* the chunk and globally.
*/
#define PCPU_BITMAP_BLOCK_SIZE PAGE_SIZE
#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \
......
......@@ -9,8 +9,17 @@
* pcpu_block_md is the metadata block struct.
* Each chunk's bitmap is split into a number of full blocks.
* All units are in terms of bits.
*
* The scan hint is the largest known contiguous area before the contig hint.
* It is not necessarily the actual largest contig hint though. There is an
* invariant that the scan_hint_start > contig_hint_start iff
* scan_hint == contig_hint. This is necessary because when scanning forward,
* we don't know if a new contig hint would be better than the current one.
*/
struct pcpu_block_md {
int scan_hint; /* scan hint for block */
int scan_hint_start; /* block relative starting
position of the scan hint */
int contig_hint; /* contig hint for block */
int contig_hint_start; /* block relative starting
position of the contig hint */
......@@ -19,6 +28,7 @@ struct pcpu_block_md {
int right_free; /* size of free space along
the right side of the block */
int first_free; /* block position of first free */
int nr_bits; /* total bits responsible for */
};
struct pcpu_chunk {
......@@ -29,9 +39,7 @@ struct pcpu_chunk {
struct list_head list; /* linked to pcpu_slot lists */
int free_bytes; /* free bytes in the chunk */
int contig_bits; /* max contiguous size hint */
int contig_bits_start; /* contig_bits starting
offset */
struct pcpu_block_md chunk_md;
void *base_addr; /* base address of this chunk */
unsigned long *alloc_map; /* allocation map */
......@@ -39,7 +47,6 @@ struct pcpu_chunk {
struct pcpu_block_md *md_blocks; /* metadata blocks */
void *data; /* chunk data */
int first_bit; /* no free below this */
bool immutable; /* no [de]population allowed */
int start_offset; /* the overlap with the previous
region to have a page aligned
......
......@@ -70,7 +70,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
chunk->base_addr = page_address(pages);
spin_lock_irqsave(&pcpu_lock, flags);
pcpu_chunk_populated(chunk, 0, nr_pages, false);
pcpu_chunk_populated(chunk, 0, nr_pages);
spin_unlock_irqrestore(&pcpu_lock, flags);
pcpu_stats_chunk_alloc();
......
......@@ -53,6 +53,7 @@ static int find_max_nr_alloc(void)
static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
int *buffer)
{
struct pcpu_block_md *chunk_md = &chunk->chunk_md;
int i, last_alloc, as_len, start, end;
int *alloc_sizes, *p;
/* statistics */
......@@ -121,9 +122,9 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
P("nr_alloc", chunk->nr_alloc);
P("max_alloc_size", chunk->max_alloc_size);
P("empty_pop_pages", chunk->nr_empty_pop_pages);
P("first_bit", chunk->first_bit);
P("first_bit", chunk_md->first_free);
P("free_bytes", chunk->free_bytes);
P("contig_bytes", chunk->contig_bits * PCPU_MIN_ALLOC_SIZE);
P("contig_bytes", chunk_md->contig_hint * PCPU_MIN_ALLOC_SIZE);
P("sum_frag", sum_frag);
P("max_frag", max_frag);
P("cur_min_alloc", cur_min_alloc);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment