Commit 836809ec authored by David Hildenbrand's avatar David Hildenbrand Committed by Linus Torvalds

mm/memory_hotplug: track present pages in memory groups

Let's track all present pages in each memory group.  Especially, track
memory present in ZONE_MOVABLE and memory present in one of the kernel
zones (which really only is ZONE_NORMAL right now as memory groups only
apply to hotplugged memory) separately within a memory group, to prepare
for making smart auto-online decision for individual memory blocks within
a memory group based on group statistics.

Link: https://lkml.kernel.org/r/20210806124715.17090-5-david@redhat.comSigned-off-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hui Zhu <teawater@gmail.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Marek Kedzierski <mkedzier@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 028fc57a
...@@ -198,7 +198,7 @@ static int memory_block_online(struct memory_block *mem) ...@@ -198,7 +198,7 @@ static int memory_block_online(struct memory_block *mem)
} }
ret = online_pages(start_pfn + nr_vmemmap_pages, ret = online_pages(start_pfn + nr_vmemmap_pages,
nr_pages - nr_vmemmap_pages, zone); nr_pages - nr_vmemmap_pages, zone, mem->group);
if (ret) { if (ret) {
if (nr_vmemmap_pages) if (nr_vmemmap_pages)
mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
...@@ -210,7 +210,7 @@ static int memory_block_online(struct memory_block *mem) ...@@ -210,7 +210,7 @@ static int memory_block_online(struct memory_block *mem)
* now already properly populated. * now already properly populated.
*/ */
if (nr_vmemmap_pages) if (nr_vmemmap_pages)
adjust_present_page_count(pfn_to_page(start_pfn), adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
nr_vmemmap_pages); nr_vmemmap_pages);
return ret; return ret;
...@@ -228,16 +228,16 @@ static int memory_block_offline(struct memory_block *mem) ...@@ -228,16 +228,16 @@ static int memory_block_offline(struct memory_block *mem)
* can properly be torn down in offline_pages(). * can properly be torn down in offline_pages().
*/ */
if (nr_vmemmap_pages) if (nr_vmemmap_pages)
adjust_present_page_count(pfn_to_page(start_pfn), adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
-nr_vmemmap_pages); -nr_vmemmap_pages);
ret = offline_pages(start_pfn + nr_vmemmap_pages, ret = offline_pages(start_pfn + nr_vmemmap_pages,
nr_pages - nr_vmemmap_pages); nr_pages - nr_vmemmap_pages, mem->group);
if (ret) { if (ret) {
/* offline_pages() failed. Account back. */ /* offline_pages() failed. Account back. */
if (nr_vmemmap_pages) if (nr_vmemmap_pages)
adjust_present_page_count(pfn_to_page(start_pfn), adjust_present_page_count(pfn_to_page(start_pfn),
nr_vmemmap_pages); mem->group, nr_vmemmap_pages);
return ret; return ret;
} }
......
...@@ -27,6 +27,10 @@ ...@@ -27,6 +27,10 @@
* struct memory_group - a logical group of memory blocks * struct memory_group - a logical group of memory blocks
* @nid: The node id for all memory blocks inside the memory group. * @nid: The node id for all memory blocks inside the memory group.
* @blocks: List of all memory blocks belonging to this memory group. * @blocks: List of all memory blocks belonging to this memory group.
* @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this
* memory group.
* @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this
* memory group.
* @is_dynamic: The memory group type: static vs. dynamic * @is_dynamic: The memory group type: static vs. dynamic
* @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum * @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
* number of pages we'll have in this static memory group. * number of pages we'll have in this static memory group.
...@@ -48,6 +52,8 @@ ...@@ -48,6 +52,8 @@
struct memory_group { struct memory_group {
int nid; int nid;
struct list_head memory_blocks; struct list_head memory_blocks;
unsigned long present_kernel_pages;
unsigned long present_movable_pages;
bool is_dynamic; bool is_dynamic;
union { union {
struct { struct {
......
...@@ -12,6 +12,7 @@ struct zone; ...@@ -12,6 +12,7 @@ struct zone;
struct pglist_data; struct pglist_data;
struct mem_section; struct mem_section;
struct memory_block; struct memory_block;
struct memory_group;
struct resource; struct resource;
struct vmem_altmap; struct vmem_altmap;
...@@ -100,13 +101,15 @@ static inline void zone_seqlock_init(struct zone *zone) ...@@ -100,13 +101,15 @@ static inline void zone_seqlock_init(struct zone *zone)
extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
extern void adjust_present_page_count(struct page *page, long nr_pages); extern void adjust_present_page_count(struct page *page,
struct memory_group *group,
long nr_pages);
/* VM interface that may be used by firmware interface */ /* VM interface that may be used by firmware interface */
extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
struct zone *zone); struct zone *zone);
extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages, extern int online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone); struct zone *zone, struct memory_group *group);
extern struct zone *test_pages_in_a_zone(unsigned long start_pfn, extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
unsigned long end_pfn); unsigned long end_pfn);
extern void __offline_isolated_pages(unsigned long start_pfn, extern void __offline_isolated_pages(unsigned long start_pfn,
...@@ -296,7 +299,8 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {} ...@@ -296,7 +299,8 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
extern void try_offline_node(int nid); extern void try_offline_node(int nid);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
struct memory_group *group);
extern int remove_memory(u64 start, u64 size); extern int remove_memory(u64 start, u64 size);
extern void __remove_memory(u64 start, u64 size); extern void __remove_memory(u64 start, u64 size);
extern int offline_and_remove_memory(u64 start, u64 size); extern int offline_and_remove_memory(u64 start, u64 size);
...@@ -304,7 +308,8 @@ extern int offline_and_remove_memory(u64 start, u64 size); ...@@ -304,7 +308,8 @@ extern int offline_and_remove_memory(u64 start, u64 size);
#else #else
static inline void try_offline_node(int nid) {} static inline void try_offline_node(int nid) {}
static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages) static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
struct memory_group *group)
{ {
return -EINVAL; return -EINVAL;
} }
......
...@@ -915,9 +915,11 @@ struct zone *zone_for_pfn_range(int online_type, int nid, ...@@ -915,9 +915,11 @@ struct zone *zone_for_pfn_range(int online_type, int nid,
* This function should only be called by memory_block_{online,offline}, * This function should only be called by memory_block_{online,offline},
* and {online,offline}_pages. * and {online,offline}_pages.
*/ */
void adjust_present_page_count(struct page *page, long nr_pages) void adjust_present_page_count(struct page *page, struct memory_group *group,
long nr_pages)
{ {
struct zone *zone = page_zone(page); struct zone *zone = page_zone(page);
const bool movable = zone_idx(zone) == ZONE_MOVABLE;
/* /*
* We only support onlining/offlining/adding/removing of complete * We only support onlining/offlining/adding/removing of complete
...@@ -927,6 +929,11 @@ void adjust_present_page_count(struct page *page, long nr_pages) ...@@ -927,6 +929,11 @@ void adjust_present_page_count(struct page *page, long nr_pages)
zone->present_early_pages += nr_pages; zone->present_early_pages += nr_pages;
zone->present_pages += nr_pages; zone->present_pages += nr_pages;
zone->zone_pgdat->node_present_pages += nr_pages; zone->zone_pgdat->node_present_pages += nr_pages;
if (group && movable)
group->present_movable_pages += nr_pages;
else if (group && !movable)
group->present_kernel_pages += nr_pages;
} }
int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
...@@ -972,7 +979,8 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages) ...@@ -972,7 +979,8 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages)); kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
} }
int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone) int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group)
{ {
unsigned long flags; unsigned long flags;
int need_zonelists_rebuild = 0; int need_zonelists_rebuild = 0;
...@@ -1025,7 +1033,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z ...@@ -1025,7 +1033,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
} }
online_pages_range(pfn, nr_pages); online_pages_range(pfn, nr_pages);
adjust_present_page_count(pfn_to_page(pfn), nr_pages); adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
node_states_set_node(nid, &arg); node_states_set_node(nid, &arg);
if (need_zonelists_rebuild) if (need_zonelists_rebuild)
...@@ -1769,7 +1777,8 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, ...@@ -1769,7 +1777,8 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
return 0; return 0;
} }
int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
struct memory_group *group)
{ {
const unsigned long end_pfn = start_pfn + nr_pages; const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn, system_ram_pages = 0; unsigned long pfn, system_ram_pages = 0;
...@@ -1905,7 +1914,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) ...@@ -1905,7 +1914,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
/* removal success */ /* removal success */
adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
adjust_present_page_count(pfn_to_page(start_pfn), -nr_pages); adjust_present_page_count(pfn_to_page(start_pfn), group, -nr_pages);
/* reinitialise watermarks and update pcp limits */ /* reinitialise watermarks and update pcp limits */
init_per_zone_wmark_min(); init_per_zone_wmark_min();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment