Commit 410f8e82 authored by Shakeel Butt's avatar Shakeel Butt Committed by Andrew Morton

memcg: extract memcg_vmstats from struct mem_cgroup

Patch series "memcg: reduce memory overhead of memory cgroups".

Currently a lot of memory is wasted to maintain the vmevents for memory
cgroups as we have multiple arrays of size NR_VM_EVENT_ITEMS which can be
as large as 110.  However memcg code uses small portion of those entries. 
This patch series eliminate this overhead by removing the unneeded vmevent
entries from memory cgroup data structures.


This patch (of 3):

This is a preparatory patch to reduce the memory overhead of memory
cgroup. The struct memcg_vmstats is the largest object embedded into the
struct mem_cgroup. This patch extracts struct memcg_vmstats from struct
mem_cgroup to ease the following patches in reducing the size of struct
memcg_vmstats.

Link: https://lkml.kernel.org/r/20220907043537.3457014-1-shakeelb@google.com
Link: https://lkml.kernel.org/r/20220907043537.3457014-2-shakeelb@google.comSigned-off-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarRoman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent fc5dfebc
......@@ -80,29 +80,8 @@ enum mem_cgroup_events_target {
MEM_CGROUP_NTARGETS,
};
struct memcg_vmstats_percpu {
/* Local (CPU and cgroup) page state & events */
long state[MEMCG_NR_STAT];
unsigned long events[NR_VM_EVENT_ITEMS];
/* Delta calculation for lockless upward propagation */
long state_prev[MEMCG_NR_STAT];
unsigned long events_prev[NR_VM_EVENT_ITEMS];
/* Cgroup1: threshold notifications & softlimit tree updates */
unsigned long nr_page_events;
unsigned long targets[MEM_CGROUP_NTARGETS];
};
struct memcg_vmstats {
/* Aggregated (CPU and subtree) page state & events */
long state[MEMCG_NR_STAT];
unsigned long events[NR_VM_EVENT_ITEMS];
/* Pending child counts during tree propagation */
long state_pending[MEMCG_NR_STAT];
unsigned long events_pending[NR_VM_EVENT_ITEMS];
};
struct memcg_vmstats_percpu;
struct memcg_vmstats;
struct mem_cgroup_reclaim_iter {
struct mem_cgroup *position;
......@@ -298,7 +277,7 @@ struct mem_cgroup {
CACHELINE_PADDING(_pad1_);
/* memory.stat */
struct memcg_vmstats vmstats;
struct memcg_vmstats *vmstats;
/* memory.events */
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
......@@ -1001,15 +980,7 @@ static inline void mod_memcg_page_state(struct page *page,
rcu_read_unlock();
}
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
long x = READ_ONCE(memcg->vmstats.state[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
return x;
}
unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
......
......@@ -669,6 +669,40 @@ static void flush_memcg_stats_dwork(struct work_struct *w)
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
}
struct memcg_vmstats_percpu {
/* Local (CPU and cgroup) page state & events */
long state[MEMCG_NR_STAT];
unsigned long events[NR_VM_EVENT_ITEMS];
/* Delta calculation for lockless upward propagation */
long state_prev[MEMCG_NR_STAT];
unsigned long events_prev[NR_VM_EVENT_ITEMS];
/* Cgroup1: threshold notifications & softlimit tree updates */
unsigned long nr_page_events;
unsigned long targets[MEM_CGROUP_NTARGETS];
};
struct memcg_vmstats {
/* Aggregated (CPU and subtree) page state & events */
long state[MEMCG_NR_STAT];
unsigned long events[NR_VM_EVENT_ITEMS];
/* Pending child counts during tree propagation */
long state_pending[MEMCG_NR_STAT];
unsigned long events_pending[NR_VM_EVENT_ITEMS];
};
unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
long x = READ_ONCE(memcg->vmstats->state[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
return x;
}
/**
* __mod_memcg_state - update cgroup memory statistics
* @memcg: the memory cgroup
......@@ -827,7 +861,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
{
return READ_ONCE(memcg->vmstats.events[event]);
return READ_ONCE(memcg->vmstats->events[event]);
}
static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
......@@ -5170,6 +5204,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
kfree(memcg->vmstats);
free_percpu(memcg->vmstats_percpu);
kfree(memcg);
}
......@@ -5199,6 +5234,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
goto fail;
}
memcg->vmstats = kzalloc(sizeof(struct memcg_vmstats), GFP_KERNEL);
if (!memcg->vmstats)
goto fail;
memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu,
GFP_KERNEL_ACCOUNT);
if (!memcg->vmstats_percpu)
......@@ -5418,9 +5457,9 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
* below us. We're in a per-cpu loop here and this is
* a global counter, so the first cycle will get them.
*/
delta = memcg->vmstats.state_pending[i];
delta = memcg->vmstats->state_pending[i];
if (delta)
memcg->vmstats.state_pending[i] = 0;
memcg->vmstats->state_pending[i] = 0;
/* Add CPU changes on this level since the last flush */
v = READ_ONCE(statc->state[i]);
......@@ -5433,15 +5472,15 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
continue;
/* Aggregate counts on this level and propagate upwards */
memcg->vmstats.state[i] += delta;
memcg->vmstats->state[i] += delta;
if (parent)
parent->vmstats.state_pending[i] += delta;
parent->vmstats->state_pending[i] += delta;
}
for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
delta = memcg->vmstats.events_pending[i];
delta = memcg->vmstats->events_pending[i];
if (delta)
memcg->vmstats.events_pending[i] = 0;
memcg->vmstats->events_pending[i] = 0;
v = READ_ONCE(statc->events[i]);
if (v != statc->events_prev[i]) {
......@@ -5452,9 +5491,9 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (!delta)
continue;
memcg->vmstats.events[i] += delta;
memcg->vmstats->events[i] += delta;
if (parent)
parent->vmstats.events_pending[i] += delta;
parent->vmstats->events_pending[i] += delta;
}
for_each_node_state(nid, N_MEMORY) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment