Commit fe92ebf3 authored by Josh Aas's avatar Josh Aas Committed by Linus Torvalds

[PATCH] improve speed of freeing bootmem

Attached is a patch that greatly improves the speed of freeing boot memory.
 On ia64 machines with 2GB or more memory (I didn't test with less, but I
can't imagine there being a problem), the speed improvement is about 75%
for the function free_all_bootmem_core.  This translates to savings on the
order of 1 minute / TB of memory during boot time.  That number comes from
testing on a machine with 512GB, and extrapolating based on profiling of an
unpatched 4TB machine.  For 4 and 8 TB machines, the time spent in this
function is about 1 minutes/TB, which is painful especially given that
there is no indication of what is going on put to the console (this issue
to possibly be addressed later).

The basic idea is to free higher order pages instead of going through every
single one.  Also, some unnecessary atomic operations are done away with
and replaced with non-atomic equivalents, and prefetching is done where it
helps the most.  For a more in-depth discusion of this patch, please see
the linux-ia64 archives (topic is "free bootmem feedback patch").

The patch is originally Tony Luck's, and I added some further optimizations
(non-atomic ops improvements and prefetching).
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
Signed-off-by: default avatarJosh Aas <josha@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 13c61952
...@@ -233,6 +233,7 @@ extern unsigned long __read_page_state(unsigned offset); ...@@ -233,6 +233,7 @@ extern unsigned long __read_page_state(unsigned offset);
#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) #define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) #define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags)
#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) #define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags)
#define __ClearPageReserved(page) __clear_bit(PG_reserved, &(page)->flags)
#define SetPagePrivate(page) set_bit(PG_private, &(page)->flags) #define SetPagePrivate(page) set_bit(PG_private, &(page)->flags)
#define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags) #define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags)
......
...@@ -259,6 +259,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) ...@@ -259,6 +259,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
unsigned long i, count, total = 0; unsigned long i, count, total = 0;
unsigned long idx; unsigned long idx;
unsigned long *map; unsigned long *map;
int gofast = 0;
BUG_ON(!bdata->node_bootmem_map); BUG_ON(!bdata->node_bootmem_map);
...@@ -267,14 +268,32 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) ...@@ -267,14 +268,32 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
page = virt_to_page(phys_to_virt(bdata->node_boot_start)); page = virt_to_page(phys_to_virt(bdata->node_boot_start));
idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
map = bdata->node_bootmem_map; map = bdata->node_bootmem_map;
/* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
if (bdata->node_boot_start == 0 ||
ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG))
gofast = 1;
for (i = 0; i < idx; ) { for (i = 0; i < idx; ) {
unsigned long v = ~map[i / BITS_PER_LONG]; unsigned long v = ~map[i / BITS_PER_LONG];
if (v) { if (gofast && v == ~0UL) {
int j;
count += BITS_PER_LONG;
__ClearPageReserved(page);
set_page_count(page, 1);
for (j = 1; j < BITS_PER_LONG; j++) {
if (j + 16 < BITS_PER_LONG)
prefetchw(page + j + 16);
__ClearPageReserved(page + j);
}
__free_pages(page, ffs(BITS_PER_LONG)-1);
i += BITS_PER_LONG;
page += BITS_PER_LONG;
} else if (v) {
unsigned long m; unsigned long m;
for (m = 1; m && i < idx; m<<=1, page++, i++) { for (m = 1; m && i < idx; m<<=1, page++, i++) {
if (v & m) { if (v & m) {
count++; count++;
ClearPageReserved(page); __ClearPageReserved(page);
set_page_count(page, 1); set_page_count(page, 1);
__free_page(page); __free_page(page);
} }
...@@ -294,7 +313,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) ...@@ -294,7 +313,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
count = 0; count = 0;
for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
count++; count++;
ClearPageReserved(page); __ClearPageReserved(page);
set_page_count(page, 1); set_page_count(page, 1);
__free_page(page); __free_page(page);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment