Commit 7a159cc9 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

memcg: use native word page statistics counters

The statistic counters are in units of pages, there is no reason to make
them 64-bit wide on 32-bit machines.

Make them native words.  Since they are signed, this leaves 31 bit on
32-bit machines, which can represent roughly 8TB assuming a page size of
4k.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarGreg Thelen <gthelen@google.com>
Acked-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: default avatarBalbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e9f8974f
...@@ -73,15 +73,6 @@ static int really_do_swap_account __initdata = 0; ...@@ -73,15 +73,6 @@ static int really_do_swap_account __initdata = 0;
#define do_swap_account (0) #define do_swap_account (0)
#endif #endif
/*
* Per memcg event counter is incremented at every pagein/pageout. This counter
* is used for trigger some periodic events. This is straightforward and better
* than using jiffies etc. to handle periodic memcg event.
*
* These values will be used as !((event) & ((1 <<(thresh)) - 1))
*/
#define THRESHOLDS_EVENTS_THRESH (7) /* once in 128 */
#define SOFTLIMIT_EVENTS_THRESH (10) /* once in 1024 */
/* /*
* Statistics for memory cgroup. * Statistics for memory cgroup.
...@@ -105,10 +96,24 @@ enum mem_cgroup_events_index { ...@@ -105,10 +96,24 @@ enum mem_cgroup_events_index {
MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */ MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */
MEM_CGROUP_EVENTS_NSTATS, MEM_CGROUP_EVENTS_NSTATS,
}; };
/*
* Per memcg event counter is incremented at every pagein/pageout. With THP,
* it will be incremated by the number of pages. This counter is used for
* for trigger some periodic events. This is straightforward and better
* than using jiffies etc. to handle periodic memcg event.
*/
enum mem_cgroup_events_target {
MEM_CGROUP_TARGET_THRESH,
MEM_CGROUP_TARGET_SOFTLIMIT,
MEM_CGROUP_NTARGETS,
};
#define THRESHOLDS_EVENTS_TARGET (128)
#define SOFTLIMIT_EVENTS_TARGET (1024)
struct mem_cgroup_stat_cpu { struct mem_cgroup_stat_cpu {
s64 count[MEM_CGROUP_STAT_NSTATS]; long count[MEM_CGROUP_STAT_NSTATS];
unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
unsigned long targets[MEM_CGROUP_NTARGETS];
}; };
/* /*
...@@ -546,11 +551,11 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) ...@@ -546,11 +551,11 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
* common workload, threashold and synchonization as vmstat[] should be * common workload, threashold and synchonization as vmstat[] should be
* implemented. * implemented.
*/ */
static s64 mem_cgroup_read_stat(struct mem_cgroup *mem, static long mem_cgroup_read_stat(struct mem_cgroup *mem,
enum mem_cgroup_stat_index idx) enum mem_cgroup_stat_index idx)
{ {
long val = 0;
int cpu; int cpu;
s64 val = 0;
get_online_cpus(); get_online_cpus();
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
...@@ -564,9 +569,9 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup *mem, ...@@ -564,9 +569,9 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup *mem,
return val; return val;
} }
static s64 mem_cgroup_local_usage(struct mem_cgroup *mem) static long mem_cgroup_local_usage(struct mem_cgroup *mem)
{ {
s64 ret; long ret;
ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
...@@ -634,13 +639,34 @@ static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, ...@@ -634,13 +639,34 @@ static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
return total; return total;
} }
static bool __memcg_event_check(struct mem_cgroup *mem, int event_mask_shift) static bool __memcg_event_check(struct mem_cgroup *mem, int target)
{
unsigned long val, next;
val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]);
next = this_cpu_read(mem->stat->targets[target]);
/* from time_after() in jiffies.h */
return ((long)next - (long)val < 0);
}
static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
{ {
unsigned long val; unsigned long val, next;
val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]);
return !(val & ((1 << event_mask_shift) - 1)); switch (target) {
case MEM_CGROUP_TARGET_THRESH:
next = val + THRESHOLDS_EVENTS_TARGET;
break;
case MEM_CGROUP_TARGET_SOFTLIMIT:
next = val + SOFTLIMIT_EVENTS_TARGET;
break;
default:
return;
}
this_cpu_write(mem->stat->targets[target], next);
} }
/* /*
...@@ -650,10 +676,15 @@ static bool __memcg_event_check(struct mem_cgroup *mem, int event_mask_shift) ...@@ -650,10 +676,15 @@ static bool __memcg_event_check(struct mem_cgroup *mem, int event_mask_shift)
static void memcg_check_events(struct mem_cgroup *mem, struct page *page) static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
{ {
/* threshold event is triggered in finer grain than soft limit */ /* threshold event is triggered in finer grain than soft limit */
if (unlikely(__memcg_event_check(mem, THRESHOLDS_EVENTS_THRESH))) { if (unlikely(__memcg_event_check(mem, MEM_CGROUP_TARGET_THRESH))) {
mem_cgroup_threshold(mem); mem_cgroup_threshold(mem);
if (unlikely(__memcg_event_check(mem, SOFTLIMIT_EVENTS_THRESH))) __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH);
if (unlikely(__memcg_event_check(mem,
MEM_CGROUP_TARGET_SOFTLIMIT))){
mem_cgroup_update_tree(mem, page); mem_cgroup_update_tree(mem, page);
__mem_cgroup_target_update(mem,
MEM_CGROUP_TARGET_SOFTLIMIT);
}
} }
} }
...@@ -1787,7 +1818,7 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) ...@@ -1787,7 +1818,7 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu)
spin_lock(&mem->pcp_counter_lock); spin_lock(&mem->pcp_counter_lock);
for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
s64 x = per_cpu(mem->stat->count[i], cpu); long x = per_cpu(mem->stat->count[i], cpu);
per_cpu(mem->stat->count[i], cpu) = 0; per_cpu(mem->stat->count[i], cpu) = 0;
mem->nocpu_base.count[i] += x; mem->nocpu_base.count[i] += x;
...@@ -3499,13 +3530,13 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, ...@@ -3499,13 +3530,13 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
} }
static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem,
enum mem_cgroup_stat_index idx) enum mem_cgroup_stat_index idx)
{ {
struct mem_cgroup *iter; struct mem_cgroup *iter;
s64 val = 0; long val = 0;
/* each per cpu's value can be minus.Then, use s64 */ /* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, mem) for_each_mem_cgroup_tree(iter, mem)
val += mem_cgroup_read_stat(iter, idx); val += mem_cgroup_read_stat(iter, idx);
...@@ -3525,12 +3556,11 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) ...@@ -3525,12 +3556,11 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
return res_counter_read_u64(&mem->memsw, RES_USAGE); return res_counter_read_u64(&mem->memsw, RES_USAGE);
} }
val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE); val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE);
val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS); val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS);
if (swap) if (swap)
val += mem_cgroup_get_recursive_idx_stat(mem, val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
MEM_CGROUP_STAT_SWAPOUT);
return val << PAGE_SHIFT; return val << PAGE_SHIFT;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment