Commit b958d4d0 authored by Muchun Song's avatar Muchun Song Committed by Andrew Morton

mm: hugetlb: simplify per-node sysfs creation and removal

Patch series "simplify handling of per-node sysfs creation and removal",
v4.


This patch (of 2):

The following commit offload per-node sysfs creation and removal to a
kworker and did not say why it is needed.  And it also said "I don't know
that this is absolutely required".  It seems like the author was not sure
as well.  Since it only complicates the code, this patch will revert the
changes to simplify the code.

  39da08cb ("hugetlb: offload per node attribute registrations")

We could use memory hotplug notifier to do per-node sysfs creation and
removal instead of inserting those operations to node registration and
unregistration.  Then, it can reduce the code coupling between node.c and
hugetlb.c.  Also, it can simplify the code.

Link: https://lkml.kernel.org/r/20220914072603.60293-1-songmuchun@bytedance.com
Link: https://lkml.kernel.org/r/20220914072603.60293-2-songmuchun@bytedance.comSigned-off-by: default avatarMuchun Song <songmuchun@bytedance.com>
Acked-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent aaa31e05
...@@ -587,64 +587,9 @@ static const struct attribute_group *node_dev_groups[] = { ...@@ -587,64 +587,9 @@ static const struct attribute_group *node_dev_groups[] = {
NULL NULL
}; };
#ifdef CONFIG_HUGETLBFS
/*
* hugetlbfs per node attributes registration interface:
* When/if hugetlb[fs] subsystem initializes [sometime after this module],
* it will register its per node attributes for all online nodes with
* memory. It will also call register_hugetlbfs_with_node(), below, to
* register its attribute registration functions with this node driver.
* Once these hooks have been initialized, the node driver will call into
* the hugetlb module to [un]register attributes for hot-plugged nodes.
*/
static node_registration_func_t __hugetlb_register_node;
static node_registration_func_t __hugetlb_unregister_node;
static inline bool hugetlb_register_node(struct node *node)
{
if (__hugetlb_register_node &&
node_state(node->dev.id, N_MEMORY)) {
__hugetlb_register_node(node);
return true;
}
return false;
}
static inline void hugetlb_unregister_node(struct node *node)
{
if (__hugetlb_unregister_node)
__hugetlb_unregister_node(node);
}
void register_hugetlbfs_with_node(node_registration_func_t doregister,
node_registration_func_t unregister)
{
__hugetlb_register_node = doregister;
__hugetlb_unregister_node = unregister;
}
#else
static inline void hugetlb_register_node(struct node *node) {}
static inline void hugetlb_unregister_node(struct node *node) {}
#endif
static void node_device_release(struct device *dev) static void node_device_release(struct device *dev)
{ {
struct node *node = to_node(dev); kfree(to_node(dev));
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
/*
* We schedule the work only when a memory section is
* onlined/offlined on this node. When we come here,
* all the memory on this node has been offlined,
* so we won't enqueue new work to this work.
*
* The work is using node->node_work, so we should
* flush work before freeing the memory.
*/
flush_work(&node->node_work);
#endif
kfree(node);
} }
/* /*
...@@ -665,11 +610,9 @@ static int register_node(struct node *node, int num) ...@@ -665,11 +610,9 @@ static int register_node(struct node *node, int num)
if (error) if (error)
put_device(&node->dev); put_device(&node->dev);
else { else
hugetlb_register_node(node);
compaction_register_node(node); compaction_register_node(node);
}
return error; return error;
} }
...@@ -683,7 +626,6 @@ static int register_node(struct node *node, int num) ...@@ -683,7 +626,6 @@ static int register_node(struct node *node, int num)
void unregister_node(struct node *node) void unregister_node(struct node *node)
{ {
compaction_unregister_node(node); compaction_unregister_node(node);
hugetlb_unregister_node(node); /* no-op, if memoryless node */
node_remove_accesses(node); node_remove_accesses(node);
node_remove_caches(node); node_remove_caches(node);
device_unregister(&node->dev); device_unregister(&node->dev);
...@@ -905,74 +847,8 @@ void register_memory_blocks_under_node(int nid, unsigned long start_pfn, ...@@ -905,74 +847,8 @@ void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
(void *)&nid, func); (void *)&nid, func);
return; return;
} }
#ifdef CONFIG_HUGETLBFS
/*
* Handle per node hstate attribute [un]registration on transistions
* to/from memoryless state.
*/
static void node_hugetlb_work(struct work_struct *work)
{
struct node *node = container_of(work, struct node, node_work);
/*
* We only get here when a node transitions to/from memoryless state.
* We can detect which transition occurred by examining whether the
* node has memory now. hugetlb_register_node() already check this
* so we try to register the attributes. If that fails, then the
* node has transitioned to memoryless, try to unregister the
* attributes.
*/
if (!hugetlb_register_node(node))
hugetlb_unregister_node(node);
}
static void init_node_hugetlb_work(int nid)
{
INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
}
static int node_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_notify *mnb = arg;
int nid = mnb->status_change_nid;
switch (action) {
case MEM_ONLINE:
case MEM_OFFLINE:
/*
* offload per node hstate [un]registration to a work thread
* when transitioning to/from memoryless state.
*/
if (nid != NUMA_NO_NODE)
schedule_work(&node_devices[nid]->node_work);
break;
case MEM_GOING_ONLINE:
case MEM_GOING_OFFLINE:
case MEM_CANCEL_ONLINE:
case MEM_CANCEL_OFFLINE:
default:
break;
}
return NOTIFY_OK;
}
#endif /* CONFIG_HUGETLBFS */
#endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* CONFIG_MEMORY_HOTPLUG */
#if !defined(CONFIG_MEMORY_HOTPLUG) || !defined(CONFIG_HUGETLBFS)
static inline int node_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
return NOTIFY_OK;
}
static void init_node_hugetlb_work(int nid) { }
#endif
int __register_one_node(int nid) int __register_one_node(int nid)
{ {
int error; int error;
...@@ -991,8 +867,6 @@ int __register_one_node(int nid) ...@@ -991,8 +867,6 @@ int __register_one_node(int nid)
} }
INIT_LIST_HEAD(&node_devices[nid]->access_list); INIT_LIST_HEAD(&node_devices[nid]->access_list);
/* initialize work queue for memory hot plug */
init_node_hugetlb_work(nid);
node_init_caches(nid); node_init_caches(nid);
return error; return error;
...@@ -1063,13 +937,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = { ...@@ -1063,13 +937,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
NULL, NULL,
}; };
#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
void __init node_dev_init(void) void __init node_dev_init(void)
{ {
static struct notifier_block node_memory_callback_nb = {
.notifier_call = node_memory_callback,
.priority = NODE_CALLBACK_PRI,
};
int ret, i; int ret, i;
BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
...@@ -1079,8 +948,6 @@ void __init node_dev_init(void) ...@@ -1079,8 +948,6 @@ void __init node_dev_init(void)
if (ret) if (ret)
panic("%s() failed to register subsystem: %d\n", __func__, ret); panic("%s() failed to register subsystem: %d\n", __func__, ret);
register_hotmemory_notifier(&node_memory_callback_nb);
/* /*
* Create all node devices, which will properly link the node * Create all node devices, which will properly link the node
* to applicable memory block devices and already created cpu devices. * to applicable memory block devices and already created cpu devices.
......
...@@ -2,15 +2,15 @@ ...@@ -2,15 +2,15 @@
/* /*
* include/linux/node.h - generic node definition * include/linux/node.h - generic node definition
* *
* This is mainly for topological representation. We define the * This is mainly for topological representation. We define the
* basic 'struct node' here, which can be embedded in per-arch * basic 'struct node' here, which can be embedded in per-arch
* definitions of processors. * definitions of processors.
* *
* Basic handling of the devices is done in drivers/base/node.c * Basic handling of the devices is done in drivers/base/node.c
* and system devices are handled in drivers/base/sys.c. * and system devices are handled in drivers/base/sys.c.
* *
* Nodes are exported via driverfs in the class/node/devices/ * Nodes are exported via driverfs in the class/node/devices/
* directory. * directory.
*/ */
#ifndef _LINUX_NODE_H_ #ifndef _LINUX_NODE_H_
#define _LINUX_NODE_H_ #define _LINUX_NODE_H_
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/workqueue.h>
/** /**
* struct node_hmem_attrs - heterogeneous memory performance attributes * struct node_hmem_attrs - heterogeneous memory performance attributes
...@@ -84,10 +83,6 @@ static inline void node_set_perf_attrs(unsigned int nid, ...@@ -84,10 +83,6 @@ static inline void node_set_perf_attrs(unsigned int nid,
struct node { struct node {
struct device dev; struct device dev;
struct list_head access_list; struct list_head access_list;
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
struct work_struct node_work;
#endif
#ifdef CONFIG_HMEM_REPORTING #ifdef CONFIG_HMEM_REPORTING
struct list_head cache_attrs; struct list_head cache_attrs;
struct device *cache_dev; struct device *cache_dev;
...@@ -96,7 +91,6 @@ struct node { ...@@ -96,7 +91,6 @@ struct node {
struct memory_block; struct memory_block;
extern struct node *node_devices[]; extern struct node *node_devices[];
typedef void (*node_registration_func_t)(struct node *);
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA)
void register_memory_blocks_under_node(int nid, unsigned long start_pfn, void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
...@@ -144,11 +138,6 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); ...@@ -144,11 +138,6 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
extern int register_memory_node_under_compute_node(unsigned int mem_nid, extern int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid, unsigned int cpu_nid,
unsigned access); unsigned access);
#ifdef CONFIG_HUGETLBFS
extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
node_registration_func_t unregister);
#endif
#else #else
static inline void node_dev_init(void) static inline void node_dev_init(void)
{ {
...@@ -176,11 +165,6 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) ...@@ -176,11 +165,6 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
{ {
} }
static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
node_registration_func_t unreg)
{
}
#endif #endif
#define to_node(device) container_of(device, struct node, dev) #define to_node(device) container_of(device, struct node, dev)
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/migrate.h> #include <linux/migrate.h>
#include <linux/nospec.h> #include <linux/nospec.h>
#include <linux/delayacct.h> #include <linux/delayacct.h>
#include <linux/memory.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
...@@ -4000,6 +4001,23 @@ static void hugetlb_register_node(struct node *node) ...@@ -4000,6 +4001,23 @@ static void hugetlb_register_node(struct node *node)
} }
} }
static int __meminit hugetlb_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_notify *mnb = arg;
int nid = mnb->status_change_nid;
if (nid == NUMA_NO_NODE)
return NOTIFY_DONE;
if (action == MEM_GOING_ONLINE)
hugetlb_register_node(node_devices[nid]);
else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
hugetlb_unregister_node(node_devices[nid]);
return NOTIFY_OK;
}
/* /*
* hugetlb init time: register hstate attributes for all registered node * hugetlb init time: register hstate attributes for all registered node
* devices of nodes that have memory. All on-line nodes should have * devices of nodes that have memory. All on-line nodes should have
...@@ -4009,18 +4027,11 @@ static void __init hugetlb_register_all_nodes(void) ...@@ -4009,18 +4027,11 @@ static void __init hugetlb_register_all_nodes(void)
{ {
int nid; int nid;
for_each_node_state(nid, N_MEMORY) { get_online_mems();
struct node *node = node_devices[nid]; hotplug_memory_notifier(hugetlb_memory_callback, 0);
if (node->dev.id == nid) for_each_node_state(nid, N_MEMORY)
hugetlb_register_node(node); hugetlb_register_node(node_devices[nid]);
} put_online_mems();
/*
* Let the node device driver know we're here so it can
* [un]register hstate attributes on node hotplug.
*/
register_hugetlbfs_with_node(hugetlb_register_node,
hugetlb_unregister_node);
} }
#else /* !CONFIG_NUMA */ #else /* !CONFIG_NUMA */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment