Commit 9705bea5 authored by Arun KS's avatar Arun KS Committed by Linus Torvalds

mm: convert zone->managed_pages to atomic variable

totalram_pages, zone->managed_pages and totalhigh_pages updates are
protected by managed_page_count_lock, but readers never care about it.
Convert these variables to atomic to avoid readers potentially seeing a
store tear.

This patch converts zone->managed_pages.  Subsequent patches will convert
totalram_panges, totalhigh_pages and eventually managed_page_count_lock
will be removed.

Main motivation was that managed_page_count_lock handling was complicating
things.  It was discussed in length here,
https://lore.kernel.org/patchwork/patch/995739/#1181785 So it seemes
better to remove the lock and convert variables to atomic, with preventing
poteintial store-to-read tearing as a bonus.

Link: http://lkml.kernel.org/r/1542090790-21750-3-git-send-email-arunks@codeaurora.orgSigned-off-by: default avatarArun KS <arunks@codeaurora.org>
Suggested-by: default avatarMichal Hocko <mhocko@suse.com>
Suggested-by: default avatarVlastimil Babka <vbabka@suse.cz>
Reviewed-by: default avatarKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: default avatarDavid Hildenbrand <david@redhat.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Reviewed-by: default avatarPavel Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3d6357de
...@@ -853,7 +853,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, ...@@ -853,7 +853,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
*/ */
pgdat = NODE_DATA(numa_node_id); pgdat = NODE_DATA(numa_node_id);
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
mem_in_bytes += pgdat->node_zones[zone_type].managed_pages; mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
mem_in_bytes <<= PAGE_SHIFT; mem_in_bytes <<= PAGE_SHIFT;
sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
......
...@@ -435,7 +435,7 @@ struct zone { ...@@ -435,7 +435,7 @@ struct zone {
* adjust_managed_page_count() should be used instead of directly * adjust_managed_page_count() should be used instead of directly
* touching zone->managed_pages and totalram_pages. * touching zone->managed_pages and totalram_pages.
*/ */
unsigned long managed_pages; atomic_long_t managed_pages;
unsigned long spanned_pages; unsigned long spanned_pages;
unsigned long present_pages; unsigned long present_pages;
...@@ -524,6 +524,11 @@ enum pgdat_flags { ...@@ -524,6 +524,11 @@ enum pgdat_flags {
PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
}; };
static inline unsigned long zone_managed_pages(struct zone *zone)
{
return (unsigned long)atomic_long_read(&zone->managed_pages);
}
static inline unsigned long zone_end_pfn(const struct zone *zone) static inline unsigned long zone_end_pfn(const struct zone *zone)
{ {
return zone->zone_start_pfn + zone->spanned_pages; return zone->zone_start_pfn + zone->spanned_pages;
...@@ -820,7 +825,7 @@ static inline bool is_dev_zone(const struct zone *zone) ...@@ -820,7 +825,7 @@ static inline bool is_dev_zone(const struct zone *zone)
*/ */
static inline bool managed_zone(struct zone *zone) static inline bool managed_zone(struct zone *zone)
{ {
return zone->managed_pages; return zone_managed_pages(zone);
} }
/* Returns true if a zone has memory */ /* Returns true if a zone has memory */
......
...@@ -28,7 +28,7 @@ void show_mem(unsigned int filter, nodemask_t *nodemask) ...@@ -28,7 +28,7 @@ void show_mem(unsigned int filter, nodemask_t *nodemask)
continue; continue;
total += zone->present_pages; total += zone->present_pages;
reserved += zone->present_pages - zone->managed_pages; reserved += zone->present_pages - zone_managed_pages(zone);
if (is_highmem_idx(zoneid)) if (is_highmem_idx(zoneid))
highmem += zone->present_pages; highmem += zone->present_pages;
......
...@@ -1950,7 +1950,7 @@ void reset_node_managed_pages(pg_data_t *pgdat) ...@@ -1950,7 +1950,7 @@ void reset_node_managed_pages(pg_data_t *pgdat)
struct zone *z; struct zone *z;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->managed_pages = 0; atomic_long_set(&z->managed_pages, 0);
} }
void __init reset_all_zones_managed_pages(void) void __init reset_all_zones_managed_pages(void)
......
...@@ -1280,7 +1280,7 @@ static void __init __free_pages_boot_core(struct page *page, unsigned int order) ...@@ -1280,7 +1280,7 @@ static void __init __free_pages_boot_core(struct page *page, unsigned int order)
__ClearPageReserved(p); __ClearPageReserved(p);
set_page_count(p, 0); set_page_count(p, 0);
page_zone(page)->managed_pages += nr_pages; atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
set_page_refcounted(page); set_page_refcounted(page);
__free_pages(page, order); __free_pages(page, order);
} }
...@@ -2259,7 +2259,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, ...@@ -2259,7 +2259,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
* Limit the number reserved to 1 pageblock or roughly 1% of a zone. * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
* Check is race-prone but harmless. * Check is race-prone but harmless.
*/ */
max_managed = (zone->managed_pages / 100) + pageblock_nr_pages; max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages;
if (zone->nr_reserved_highatomic >= max_managed) if (zone->nr_reserved_highatomic >= max_managed)
return; return;
...@@ -4661,7 +4661,7 @@ static unsigned long nr_free_zone_pages(int offset) ...@@ -4661,7 +4661,7 @@ static unsigned long nr_free_zone_pages(int offset)
struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
for_each_zone_zonelist(zone, z, zonelist, offset) { for_each_zone_zonelist(zone, z, zonelist, offset) {
unsigned long size = zone->managed_pages; unsigned long size = zone_managed_pages(zone);
unsigned long high = high_wmark_pages(zone); unsigned long high = high_wmark_pages(zone);
if (size > high) if (size > high)
sum += size - high; sum += size - high;
...@@ -4768,7 +4768,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) ...@@ -4768,7 +4768,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
pg_data_t *pgdat = NODE_DATA(nid); pg_data_t *pgdat = NODE_DATA(nid);
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
managed_pages += pgdat->node_zones[zone_type].managed_pages; managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
val->totalram = managed_pages; val->totalram = managed_pages;
val->sharedram = node_page_state(pgdat, NR_SHMEM); val->sharedram = node_page_state(pgdat, NR_SHMEM);
val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
...@@ -4777,7 +4777,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) ...@@ -4777,7 +4777,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
struct zone *zone = &pgdat->node_zones[zone_type]; struct zone *zone = &pgdat->node_zones[zone_type];
if (is_highmem(zone)) { if (is_highmem(zone)) {
managed_highpages += zone->managed_pages; managed_highpages += zone_managed_pages(zone);
free_highpages += zone_page_state(zone, NR_FREE_PAGES); free_highpages += zone_page_state(zone, NR_FREE_PAGES);
} }
} }
...@@ -4984,7 +4984,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -4984,7 +4984,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
K(zone->present_pages), K(zone->present_pages),
K(zone->managed_pages), K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB), zone_page_state(zone, NR_KERNEL_STACK_KB),
K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_PAGETABLE)),
...@@ -5656,7 +5656,7 @@ static int zone_batchsize(struct zone *zone) ...@@ -5656,7 +5656,7 @@ static int zone_batchsize(struct zone *zone)
* The per-cpu-pages pools are set to around 1000th of the * The per-cpu-pages pools are set to around 1000th of the
* size of the zone. * size of the zone.
*/ */
batch = zone->managed_pages / 1024; batch = zone_managed_pages(zone) / 1024;
/* But no more than a meg. */ /* But no more than a meg. */
if (batch * PAGE_SIZE > 1024 * 1024) if (batch * PAGE_SIZE > 1024 * 1024)
batch = (1024 * 1024) / PAGE_SIZE; batch = (1024 * 1024) / PAGE_SIZE;
...@@ -5766,7 +5766,7 @@ static void pageset_set_high_and_batch(struct zone *zone, ...@@ -5766,7 +5766,7 @@ static void pageset_set_high_and_batch(struct zone *zone,
{ {
if (percpu_pagelist_fraction) if (percpu_pagelist_fraction)
pageset_set_high(pcp, pageset_set_high(pcp,
(zone->managed_pages / (zone_managed_pages(zone) /
percpu_pagelist_fraction)); percpu_pagelist_fraction));
else else
pageset_set_batch(pcp, zone_batchsize(zone)); pageset_set_batch(pcp, zone_batchsize(zone));
...@@ -6323,7 +6323,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) ...@@ -6323,7 +6323,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
unsigned long remaining_pages) unsigned long remaining_pages)
{ {
zone->managed_pages = remaining_pages; atomic_long_set(&zone->managed_pages, remaining_pages);
zone_set_nid(zone, nid); zone_set_nid(zone, nid);
zone->name = zone_names[idx]; zone->name = zone_names[idx];
zone->zone_pgdat = NODE_DATA(nid); zone->zone_pgdat = NODE_DATA(nid);
...@@ -7076,7 +7076,7 @@ early_param("movablecore", cmdline_parse_movablecore); ...@@ -7076,7 +7076,7 @@ early_param("movablecore", cmdline_parse_movablecore);
void adjust_managed_page_count(struct page *page, long count) void adjust_managed_page_count(struct page *page, long count)
{ {
spin_lock(&managed_page_count_lock); spin_lock(&managed_page_count_lock);
page_zone(page)->managed_pages += count; atomic_long_add(count, &page_zone(page)->managed_pages);
totalram_pages += count; totalram_pages += count;
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
if (PageHighMem(page)) if (PageHighMem(page))
...@@ -7124,7 +7124,7 @@ void free_highmem_page(struct page *page) ...@@ -7124,7 +7124,7 @@ void free_highmem_page(struct page *page)
{ {
__free_reserved_page(page); __free_reserved_page(page);
totalram_pages++; totalram_pages++;
page_zone(page)->managed_pages++; atomic_long_inc(&page_zone(page)->managed_pages);
totalhigh_pages++; totalhigh_pages++;
} }
#endif #endif
...@@ -7257,7 +7257,7 @@ static void calculate_totalreserve_pages(void) ...@@ -7257,7 +7257,7 @@ static void calculate_totalreserve_pages(void)
for (i = 0; i < MAX_NR_ZONES; i++) { for (i = 0; i < MAX_NR_ZONES; i++) {
struct zone *zone = pgdat->node_zones + i; struct zone *zone = pgdat->node_zones + i;
long max = 0; long max = 0;
unsigned long managed_pages = zone->managed_pages; unsigned long managed_pages = zone_managed_pages(zone);
/* Find valid and maximum lowmem_reserve in the zone */ /* Find valid and maximum lowmem_reserve in the zone */
for (j = i; j < MAX_NR_ZONES; j++) { for (j = i; j < MAX_NR_ZONES; j++) {
...@@ -7293,7 +7293,7 @@ static void setup_per_zone_lowmem_reserve(void) ...@@ -7293,7 +7293,7 @@ static void setup_per_zone_lowmem_reserve(void)
for_each_online_pgdat(pgdat) { for_each_online_pgdat(pgdat) {
for (j = 0; j < MAX_NR_ZONES; j++) { for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j; struct zone *zone = pgdat->node_zones + j;
unsigned long managed_pages = zone->managed_pages; unsigned long managed_pages = zone_managed_pages(zone);
zone->lowmem_reserve[j] = 0; zone->lowmem_reserve[j] = 0;
...@@ -7311,7 +7311,7 @@ static void setup_per_zone_lowmem_reserve(void) ...@@ -7311,7 +7311,7 @@ static void setup_per_zone_lowmem_reserve(void)
lower_zone->lowmem_reserve[j] = lower_zone->lowmem_reserve[j] =
managed_pages / sysctl_lowmem_reserve_ratio[idx]; managed_pages / sysctl_lowmem_reserve_ratio[idx];
} }
managed_pages += lower_zone->managed_pages; managed_pages += zone_managed_pages(lower_zone);
} }
} }
} }
...@@ -7330,14 +7330,14 @@ static void __setup_per_zone_wmarks(void) ...@@ -7330,14 +7330,14 @@ static void __setup_per_zone_wmarks(void)
/* Calculate total number of !ZONE_HIGHMEM pages */ /* Calculate total number of !ZONE_HIGHMEM pages */
for_each_zone(zone) { for_each_zone(zone) {
if (!is_highmem(zone)) if (!is_highmem(zone))
lowmem_pages += zone->managed_pages; lowmem_pages += zone_managed_pages(zone);
} }
for_each_zone(zone) { for_each_zone(zone) {
u64 tmp; u64 tmp;
spin_lock_irqsave(&zone->lock, flags); spin_lock_irqsave(&zone->lock, flags);
tmp = (u64)pages_min * zone->managed_pages; tmp = (u64)pages_min * zone_managed_pages(zone);
do_div(tmp, lowmem_pages); do_div(tmp, lowmem_pages);
if (is_highmem(zone)) { if (is_highmem(zone)) {
/* /*
...@@ -7351,7 +7351,7 @@ static void __setup_per_zone_wmarks(void) ...@@ -7351,7 +7351,7 @@ static void __setup_per_zone_wmarks(void)
*/ */
unsigned long min_pages; unsigned long min_pages;
min_pages = zone->managed_pages / 1024; min_pages = zone_managed_pages(zone) / 1024;
min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL); min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
zone->watermark[WMARK_MIN] = min_pages; zone->watermark[WMARK_MIN] = min_pages;
} else { } else {
...@@ -7368,7 +7368,7 @@ static void __setup_per_zone_wmarks(void) ...@@ -7368,7 +7368,7 @@ static void __setup_per_zone_wmarks(void)
* ensure a minimum size on small systems. * ensure a minimum size on small systems.
*/ */
tmp = max_t(u64, tmp >> 2, tmp = max_t(u64, tmp >> 2,
mult_frac(zone->managed_pages, mult_frac(zone_managed_pages(zone),
watermark_scale_factor, 10000)); watermark_scale_factor, 10000));
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
...@@ -7498,7 +7498,7 @@ static void setup_min_unmapped_ratio(void) ...@@ -7498,7 +7498,7 @@ static void setup_min_unmapped_ratio(void)
pgdat->min_unmapped_pages = 0; pgdat->min_unmapped_pages = 0;
for_each_zone(zone) for_each_zone(zone)
zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages * zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) *
sysctl_min_unmapped_ratio) / 100; sysctl_min_unmapped_ratio) / 100;
} }
...@@ -7526,7 +7526,7 @@ static void setup_min_slab_ratio(void) ...@@ -7526,7 +7526,7 @@ static void setup_min_slab_ratio(void)
pgdat->min_slab_pages = 0; pgdat->min_slab_pages = 0;
for_each_zone(zone) for_each_zone(zone)
zone->zone_pgdat->min_slab_pages += (zone->managed_pages * zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) *
sysctl_min_slab_ratio) / 100; sysctl_min_slab_ratio) / 100;
} }
......
...@@ -227,7 +227,7 @@ int calculate_normal_threshold(struct zone *zone) ...@@ -227,7 +227,7 @@ int calculate_normal_threshold(struct zone *zone)
* 125 1024 10 16-32 GB 9 * 125 1024 10 16-32 GB 9
*/ */
mem = zone->managed_pages >> (27 - PAGE_SHIFT); mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
...@@ -1569,7 +1569,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, ...@@ -1569,7 +1569,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
high_wmark_pages(zone), high_wmark_pages(zone),
zone->spanned_pages, zone->spanned_pages,
zone->present_pages, zone->present_pages,
zone->managed_pages); zone_managed_pages(zone));
seq_printf(m, seq_printf(m,
"\n protection: (%ld", "\n protection: (%ld",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment