Commit f19298b9 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm/vmstat: convert NUMA statistics to basic NUMA counters

NUMA statistics are maintained on the zone level for hits, misses, foreign
etc but nothing relies on them being perfectly accurate for functional
correctness.  The counters are used by userspace to get a general overview
of a workloads NUMA behaviour but the page allocator incurs a high cost to
maintain perfect accuracy similar to what is required for a vmstat like
NR_FREE_PAGES.  There even is a sysctl vm.numa_stat to allow userspace to
turn off the collection of NUMA statistics like NUMA_HIT.

This patch converts NUMA_HIT and friends to be NUMA events with similar
accuracy to VM events.  There is a possibility that slight errors will be
introduced but the overall trend as seen by userspace will be similar.
The counters are no longer updated from vmstat_refresh context as it is
unnecessary overhead for counters that may never be read by userspace.
Note that counters could be maintained at the node level to save space but
it would have a user-visible impact due to /proc/zoneinfo.

[lkp@intel.com: Fix misplaced closing brace for !CONFIG_NUMA]

Link: https://lkml.kernel.org/r/20210512095458.30632-4-mgorman@techsingularity.netSigned-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent dbbee9d5
...@@ -482,6 +482,7 @@ static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL); ...@@ -482,6 +482,7 @@ static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL);
static ssize_t node_read_numastat(struct device *dev, static ssize_t node_read_numastat(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
fold_vm_numa_events();
return sysfs_emit(buf, return sysfs_emit(buf,
"numa_hit %lu\n" "numa_hit %lu\n"
"numa_miss %lu\n" "numa_miss %lu\n"
...@@ -489,12 +490,12 @@ static ssize_t node_read_numastat(struct device *dev, ...@@ -489,12 +490,12 @@ static ssize_t node_read_numastat(struct device *dev,
"interleave_hit %lu\n" "interleave_hit %lu\n"
"local_node %lu\n" "local_node %lu\n"
"other_node %lu\n", "other_node %lu\n",
sum_zone_numa_state(dev->id, NUMA_HIT), sum_zone_numa_event_state(dev->id, NUMA_HIT),
sum_zone_numa_state(dev->id, NUMA_MISS), sum_zone_numa_event_state(dev->id, NUMA_MISS),
sum_zone_numa_state(dev->id, NUMA_FOREIGN), sum_zone_numa_event_state(dev->id, NUMA_FOREIGN),
sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), sum_zone_numa_event_state(dev->id, NUMA_INTERLEAVE_HIT),
sum_zone_numa_state(dev->id, NUMA_LOCAL), sum_zone_numa_event_state(dev->id, NUMA_LOCAL),
sum_zone_numa_state(dev->id, NUMA_OTHER)); sum_zone_numa_event_state(dev->id, NUMA_OTHER));
} }
static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL); static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL);
...@@ -512,10 +513,11 @@ static ssize_t node_read_vmstat(struct device *dev, ...@@ -512,10 +513,11 @@ static ssize_t node_read_vmstat(struct device *dev,
sum_zone_node_page_state(nid, i)); sum_zone_node_page_state(nid, i));
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) fold_vm_numa_events();
for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
len += sysfs_emit_at(buf, len, "%s %lu\n", len += sysfs_emit_at(buf, len, "%s %lu\n",
numa_stat_name(i), numa_stat_name(i),
sum_zone_numa_state(nid, i)); sum_zone_numa_event_state(nid, i));
#endif #endif
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
......
...@@ -135,10 +135,10 @@ enum numa_stat_item { ...@@ -135,10 +135,10 @@ enum numa_stat_item {
NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
NUMA_LOCAL, /* allocation from local node */ NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */ NUMA_OTHER, /* allocation from other node */
NR_VM_NUMA_STAT_ITEMS NR_VM_NUMA_EVENT_ITEMS
}; };
#else #else
#define NR_VM_NUMA_STAT_ITEMS 0 #define NR_VM_NUMA_EVENT_ITEMS 0
#endif #endif
enum zone_stat_item { enum zone_stat_item {
...@@ -357,7 +357,12 @@ struct per_cpu_zonestat { ...@@ -357,7 +357,12 @@ struct per_cpu_zonestat {
s8 stat_threshold; s8 stat_threshold;
#endif #endif
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; /*
* Low priority inaccurate counters that are only folded
* on demand. Use a large type to avoid the overhead of
* folding during refresh_cpu_vm_stats.
*/
unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
#endif #endif
}; };
...@@ -623,7 +628,7 @@ struct zone { ...@@ -623,7 +628,7 @@ struct zone {
ZONE_PADDING(_pad3_) ZONE_PADDING(_pad3_)
/* Zone statistics */ /* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
enum pgdat_flags { enum pgdat_flags {
......
...@@ -138,34 +138,27 @@ static inline void vm_events_fold_cpu(int cpu) ...@@ -138,34 +138,27 @@ static inline void vm_events_fold_cpu(int cpu)
* Zone and node-based page accounting with per cpu differentials. * Zone and node-based page accounting with per cpu differentials.
*/ */
extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
static inline void zone_numa_state_add(long x, struct zone *zone, static inline void zone_numa_event_add(long x, struct zone *zone,
enum numa_stat_item item) enum numa_stat_item item)
{ {
atomic_long_add(x, &zone->vm_numa_stat[item]); atomic_long_add(x, &zone->vm_numa_event[item]);
atomic_long_add(x, &vm_numa_stat[item]); atomic_long_add(x, &vm_numa_event[item]);
} }
static inline unsigned long global_numa_state(enum numa_stat_item item) static inline unsigned long zone_numa_event_state(struct zone *zone,
enum numa_stat_item item)
{ {
long x = atomic_long_read(&vm_numa_stat[item]); return atomic_long_read(&zone->vm_numa_event[item]);
return x;
} }
static inline unsigned long zone_numa_state_snapshot(struct zone *zone, static inline unsigned long
enum numa_stat_item item) global_numa_event_state(enum numa_stat_item item)
{ {
long x = atomic_long_read(&zone->vm_numa_stat[item]); return atomic_long_read(&vm_numa_event[item]);
int cpu;
for_each_online_cpu(cpu)
x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item];
return x;
} }
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
...@@ -245,18 +238,22 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, ...@@ -245,18 +238,22 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
} }
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); extern void __count_numa_event(struct zone *zone, enum numa_stat_item item);
extern unsigned long sum_zone_node_page_state(int node, extern unsigned long sum_zone_node_page_state(int node,
enum zone_stat_item item); enum zone_stat_item item);
extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item); extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
extern unsigned long node_page_state(struct pglist_data *pgdat, extern unsigned long node_page_state(struct pglist_data *pgdat,
enum node_stat_item item); enum node_stat_item item);
extern unsigned long node_page_state_pages(struct pglist_data *pgdat, extern unsigned long node_page_state_pages(struct pglist_data *pgdat,
enum node_stat_item item); enum node_stat_item item);
extern void fold_vm_numa_events(void);
#else #else
#define sum_zone_node_page_state(node, item) global_zone_page_state(item) #define sum_zone_node_page_state(node, item) global_zone_page_state(item)
#define node_page_state(node, item) global_node_page_state(item) #define node_page_state(node, item) global_node_page_state(item)
#define node_page_state_pages(node, item) global_node_page_state_pages(item) #define node_page_state_pages(node, item) global_node_page_state_pages(item)
static inline void fold_vm_numa_events(void)
{
}
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -428,7 +425,7 @@ static inline const char *numa_stat_name(enum numa_stat_item item) ...@@ -428,7 +425,7 @@ static inline const char *numa_stat_name(enum numa_stat_item item)
static inline const char *node_stat_name(enum node_stat_item item) static inline const char *node_stat_name(enum node_stat_item item)
{ {
return vmstat_text[NR_VM_ZONE_STAT_ITEMS + return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS +
item]; item];
} }
...@@ -440,7 +437,7 @@ static inline const char *lru_list_name(enum lru_list lru) ...@@ -440,7 +437,7 @@ static inline const char *lru_list_name(enum lru_list lru)
static inline const char *writeback_stat_name(enum writeback_stat_item item) static inline const char *writeback_stat_name(enum writeback_stat_item item)
{ {
return vmstat_text[NR_VM_ZONE_STAT_ITEMS + return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS +
NR_VM_NODE_STAT_ITEMS + NR_VM_NODE_STAT_ITEMS +
item]; item];
} }
...@@ -449,7 +446,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item) ...@@ -449,7 +446,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item)
static inline const char *vm_event_name(enum vm_event_item item) static inline const char *vm_event_name(enum vm_event_item item)
{ {
return vmstat_text[NR_VM_ZONE_STAT_ITEMS + return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS +
NR_VM_NODE_STAT_ITEMS + NR_VM_NODE_STAT_ITEMS +
NR_VM_WRITEBACK_STAT_ITEMS + NR_VM_WRITEBACK_STAT_ITEMS +
item]; item];
......
...@@ -2150,7 +2150,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, ...@@ -2150,7 +2150,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
return page; return page;
if (page && page_to_nid(page) == nid) { if (page && page_to_nid(page) == nid) {
preempt_disable(); preempt_disable();
__inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); __count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT);
preempt_enable(); preempt_enable();
} }
return page; return page;
......
...@@ -3480,12 +3480,12 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) ...@@ -3480,12 +3480,12 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
local_stat = NUMA_OTHER; local_stat = NUMA_OTHER;
if (zone_to_nid(z) == zone_to_nid(preferred_zone)) if (zone_to_nid(z) == zone_to_nid(preferred_zone))
__inc_numa_state(z, NUMA_HIT); __count_numa_event(z, NUMA_HIT);
else { else {
__inc_numa_state(z, NUMA_MISS); __count_numa_event(z, NUMA_MISS);
__inc_numa_state(preferred_zone, NUMA_FOREIGN); __count_numa_event(preferred_zone, NUMA_FOREIGN);
} }
__inc_numa_state(z, local_stat); __count_numa_event(z, local_stat);
#endif #endif
} }
...@@ -6785,8 +6785,8 @@ void __init setup_per_cpu_pageset(void) ...@@ -6785,8 +6785,8 @@ void __init setup_per_cpu_pageset(void)
*/ */
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu); struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu);
memset(pzstats->vm_numa_stat_diff, 0, memset(pzstats->vm_numa_event, 0,
sizeof(pzstats->vm_numa_stat_diff)); sizeof(pzstats->vm_numa_event));
} }
#endif #endif
......
...@@ -31,8 +31,6 @@ ...@@ -31,8 +31,6 @@
#include "internal.h" #include "internal.h"
#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
...@@ -41,11 +39,12 @@ static void zero_zone_numa_counters(struct zone *zone) ...@@ -41,11 +39,12 @@ static void zero_zone_numa_counters(struct zone *zone)
{ {
int item, cpu; int item, cpu;
for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) { for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
atomic_long_set(&zone->vm_numa_stat[item], 0); atomic_long_set(&zone->vm_numa_event[item], 0);
for_each_online_cpu(cpu) for_each_online_cpu(cpu) {
per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item] per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
= 0; = 0;
}
} }
} }
...@@ -63,8 +62,8 @@ static void zero_global_numa_counters(void) ...@@ -63,8 +62,8 @@ static void zero_global_numa_counters(void)
{ {
int item; int item;
for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
atomic_long_set(&vm_numa_stat[item], 0); atomic_long_set(&vm_numa_event[item], 0);
} }
static void invalid_numa_statistics(void) static void invalid_numa_statistics(void)
...@@ -161,10 +160,9 @@ void vm_events_fold_cpu(int cpu) ...@@ -161,10 +160,9 @@ void vm_events_fold_cpu(int cpu)
* vm_stat contains the global counters * vm_stat contains the global counters
*/ */
atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
EXPORT_SYMBOL(vm_zone_stat); EXPORT_SYMBOL(vm_zone_stat);
EXPORT_SYMBOL(vm_numa_stat);
EXPORT_SYMBOL(vm_node_stat); EXPORT_SYMBOL(vm_node_stat);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -706,8 +704,7 @@ EXPORT_SYMBOL(dec_node_page_state); ...@@ -706,8 +704,7 @@ EXPORT_SYMBOL(dec_node_page_state);
* Fold a differential into the global counters. * Fold a differential into the global counters.
* Returns the number of counters updated. * Returns the number of counters updated.
*/ */
#ifdef CONFIG_NUMA static int fold_diff(int *zone_diff, int *node_diff)
static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
{ {
int i; int i;
int changes = 0; int changes = 0;
...@@ -718,12 +715,6 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) ...@@ -718,12 +715,6 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
changes++; changes++;
} }
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
if (numa_diff[i]) {
atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
changes++;
}
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
if (node_diff[i]) { if (node_diff[i]) {
atomic_long_add(node_diff[i], &vm_node_stat[i]); atomic_long_add(node_diff[i], &vm_node_stat[i]);
...@@ -731,26 +722,34 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) ...@@ -731,26 +722,34 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
} }
return changes; return changes;
} }
#else
static int fold_diff(int *zone_diff, int *node_diff) #ifdef CONFIG_NUMA
static void fold_vm_zone_numa_events(struct zone *zone)
{ {
int i; unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
int changes = 0; int cpu;
enum numa_stat_item item;
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) for_each_online_cpu(cpu) {
if (zone_diff[i]) { struct per_cpu_zonestat *pzstats;
atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
changes++;
}
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
if (node_diff[i]) { for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
atomic_long_add(node_diff[i], &vm_node_stat[i]); zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
changes++;
} }
return changes;
for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
zone_numa_event_add(zone_numa_events[item], zone, item);
} }
#endif /* CONFIG_NUMA */
void fold_vm_numa_events(void)
{
struct zone *zone;
for_each_populated_zone(zone)
fold_vm_zone_numa_events(zone);
}
#endif
/* /*
* Update the zone counters for the current cpu. * Update the zone counters for the current cpu.
...@@ -774,9 +773,6 @@ static int refresh_cpu_vm_stats(bool do_pagesets) ...@@ -774,9 +773,6 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
struct zone *zone; struct zone *zone;
int i; int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
#ifdef CONFIG_NUMA
int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
#endif
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
int changes = 0; int changes = 0;
...@@ -801,17 +797,6 @@ static int refresh_cpu_vm_stats(bool do_pagesets) ...@@ -801,17 +797,6 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
} }
} }
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
int v;
v = this_cpu_xchg(pzstats->vm_numa_stat_diff[i], 0);
if (v) {
atomic_long_add(v, &zone->vm_numa_stat[i]);
global_numa_diff[i] += v;
__this_cpu_write(pcp->expire, 3);
}
}
if (do_pagesets) { if (do_pagesets) {
cond_resched(); cond_resched();
...@@ -859,12 +844,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets) ...@@ -859,12 +844,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
} }
} }
#ifdef CONFIG_NUMA
changes += fold_diff(global_zone_diff, global_numa_diff,
global_node_diff);
#else
changes += fold_diff(global_zone_diff, global_node_diff); changes += fold_diff(global_zone_diff, global_node_diff);
#endif
return changes; return changes;
} }
...@@ -879,9 +859,6 @@ void cpu_vm_stats_fold(int cpu) ...@@ -879,9 +859,6 @@ void cpu_vm_stats_fold(int cpu)
struct zone *zone; struct zone *zone;
int i; int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
#ifdef CONFIG_NUMA
int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
#endif
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
for_each_populated_zone(zone) { for_each_populated_zone(zone) {
...@@ -889,7 +866,7 @@ void cpu_vm_stats_fold(int cpu) ...@@ -889,7 +866,7 @@ void cpu_vm_stats_fold(int cpu)
pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
if (pzstats->vm_stat_diff[i]) { if (pzstats->vm_stat_diff[i]) {
int v; int v;
...@@ -898,17 +875,17 @@ void cpu_vm_stats_fold(int cpu) ...@@ -898,17 +875,17 @@ void cpu_vm_stats_fold(int cpu)
atomic_long_add(v, &zone->vm_stat[i]); atomic_long_add(v, &zone->vm_stat[i]);
global_zone_diff[i] += v; global_zone_diff[i] += v;
} }
}
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
if (pzstats->vm_numa_stat_diff[i]) { if (pzstats->vm_numa_event[i]) {
int v; unsigned long v;
v = pzstats->vm_numa_stat_diff[i]; v = pzstats->vm_numa_event[i];
pzstats->vm_numa_stat_diff[i] = 0; pzstats->vm_numa_event[i] = 0;
atomic_long_add(v, &zone->vm_numa_stat[i]); zone_numa_event_add(v, zone, i);
global_numa_diff[i] += v;
} }
}
#endif #endif
} }
...@@ -928,11 +905,7 @@ void cpu_vm_stats_fold(int cpu) ...@@ -928,11 +905,7 @@ void cpu_vm_stats_fold(int cpu)
} }
} }
#ifdef CONFIG_NUMA
fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
#else
fold_diff(global_zone_diff, global_node_diff); fold_diff(global_zone_diff, global_node_diff);
#endif
} }
/* /*
...@@ -941,43 +914,37 @@ void cpu_vm_stats_fold(int cpu) ...@@ -941,43 +914,37 @@ void cpu_vm_stats_fold(int cpu)
*/ */
void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
{ {
unsigned long v;
int i; int i;
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
if (pzstats->vm_stat_diff[i]) { if (pzstats->vm_stat_diff[i]) {
int v = pzstats->vm_stat_diff[i]; v = pzstats->vm_stat_diff[i];
pzstats->vm_stat_diff[i] = 0; pzstats->vm_stat_diff[i] = 0;
atomic_long_add(v, &zone->vm_stat[i]); zone_page_state_add(v, zone, i);
atomic_long_add(v, &vm_zone_stat[i]);
} }
}
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
if (pzstats->vm_numa_stat_diff[i]) { if (pzstats->vm_numa_event[i]) {
int v = pzstats->vm_numa_stat_diff[i]; v = pzstats->vm_numa_event[i];
pzstats->vm_numa_event[i] = 0;
pzstats->vm_numa_stat_diff[i] = 0; zone_numa_event_add(v, zone, i);
atomic_long_add(v, &zone->vm_numa_stat[i]);
atomic_long_add(v, &vm_numa_stat[i]);
} }
}
#endif #endif
} }
#endif #endif
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
void __inc_numa_state(struct zone *zone, /* See __count_vm_event comment on why raw_cpu_inc is used. */
void __count_numa_event(struct zone *zone,
enum numa_stat_item item) enum numa_stat_item item)
{ {
struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
u16 __percpu *p = pzstats->vm_numa_stat_diff + item;
u16 v;
v = __this_cpu_inc_return(*p);
if (unlikely(v > NUMA_STATS_THRESHOLD)) { raw_cpu_inc(pzstats->vm_numa_event[item]);
zone_numa_state_add(v, zone, item);
__this_cpu_write(*p, 0);
}
} }
/* /*
...@@ -998,19 +965,16 @@ unsigned long sum_zone_node_page_state(int node, ...@@ -998,19 +965,16 @@ unsigned long sum_zone_node_page_state(int node,
return count; return count;
} }
/* /* Determine the per node value of a numa stat item. */
* Determine the per node value of a numa stat item. To avoid deviation, unsigned long sum_zone_numa_event_state(int node,
* the per cpu stat number in vm_numa_stat_diff[] is also included.
*/
unsigned long sum_zone_numa_state(int node,
enum numa_stat_item item) enum numa_stat_item item)
{ {
struct zone *zones = NODE_DATA(node)->node_zones; struct zone *zones = NODE_DATA(node)->node_zones;
int i;
unsigned long count = 0; unsigned long count = 0;
int i;
for (i = 0; i < MAX_NR_ZONES; i++) for (i = 0; i < MAX_NR_ZONES; i++)
count += zone_numa_state_snapshot(zones + i, item); count += zone_numa_event_state(zones + i, item);
return count; return count;
} }
...@@ -1689,9 +1653,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, ...@@ -1689,9 +1653,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
zone_page_state(zone, i)); zone_page_state(zone, i));
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
seq_printf(m, "\n %-12s %lu", numa_stat_name(i), seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
zone_numa_state_snapshot(zone, i)); zone_numa_event_state(zone, i));
#endif #endif
seq_printf(m, "\n pagesets"); seq_printf(m, "\n pagesets");
...@@ -1745,7 +1709,7 @@ static const struct seq_operations zoneinfo_op = { ...@@ -1745,7 +1709,7 @@ static const struct seq_operations zoneinfo_op = {
}; };
#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
NR_VM_NUMA_STAT_ITEMS + \ NR_VM_NUMA_EVENT_ITEMS + \
NR_VM_NODE_STAT_ITEMS + \ NR_VM_NODE_STAT_ITEMS + \
NR_VM_WRITEBACK_STAT_ITEMS + \ NR_VM_WRITEBACK_STAT_ITEMS + \
(IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
...@@ -1760,6 +1724,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) ...@@ -1760,6 +1724,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
return NULL; return NULL;
BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS); BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
fold_vm_numa_events();
v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL); v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
m->private = v; m->private = v;
if (!v) if (!v)
...@@ -1769,9 +1734,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) ...@@ -1769,9 +1734,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
v += NR_VM_ZONE_STAT_ITEMS; v += NR_VM_ZONE_STAT_ITEMS;
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
v[i] = global_numa_state(i); v[i] = global_numa_event_state(i);
v += NR_VM_NUMA_STAT_ITEMS; v += NR_VM_NUMA_EVENT_ITEMS;
#endif #endif
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
...@@ -1941,11 +1906,7 @@ static bool need_update(int cpu) ...@@ -1941,11 +1906,7 @@ static bool need_update(int cpu)
if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
sizeof(pzstats->vm_stat_diff[0]))) sizeof(pzstats->vm_stat_diff[0])))
return true; return true;
#ifdef CONFIG_NUMA
if (memchr_inv(pzstats->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
sizeof(pzstats->vm_numa_stat_diff[0])))
return true;
#endif
if (last_pgdat == zone->zone_pgdat) if (last_pgdat == zone->zone_pgdat)
continue; continue;
last_pgdat = zone->zone_pgdat; last_pgdat = zone->zone_pgdat;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment