Commit 9a305230 authored by Lee Schermerhorn's avatar Lee Schermerhorn Committed by Linus Torvalds

hugetlb: add per node hstate attributes

Add the per huge page size control/query attributes to the per node
sysdevs:

/sys/devices/system/node/node<ID>/hugepages/hugepages-<size>/
	nr_hugepages       - r/w
	free_huge_pages    - r/o
	surplus_huge_pages - r/o

The patch attempts to re-use/share as much of the existing global hstate
attribute initialization and handling, and the "nodes_allowed" constraint
processing as possible.

Calling set_max_huge_pages() with no node indicates a change to global
hstate parameters.  In this case, any non-default task mempolicy will be
used to generate the nodes_allowed mask.  A valid node id indicates an
update to that node's hstate parameters, and the count argument specifies
the target count for the specified node.  From this info, we compute the
target global count for the hstate and construct a nodes_allowed node mask
contain only the specified node.

Setting the node specific nr_hugepages via the per node attribute
effectively ignores any task mempolicy or cpuset constraints.

With this patch:

(me):ls /sys/devices/system/node/node0/hugepages/hugepages-2048kB
./  ../  free_hugepages  nr_hugepages  surplus_hugepages

Starting from:
Node 0 HugePages_Total:     0
Node 0 HugePages_Free:      0
Node 0 HugePages_Surp:      0
Node 1 HugePages_Total:     0
Node 1 HugePages_Free:      0
Node 1 HugePages_Surp:      0
Node 2 HugePages_Total:     0
Node 2 HugePages_Free:      0
Node 2 HugePages_Surp:      0
Node 3 HugePages_Total:     0
Node 3 HugePages_Free:      0
Node 3 HugePages_Surp:      0
vm.nr_hugepages = 0

Allocate 16 persistent huge pages on node 2:
(me):echo 16 >/sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages

[Note that this is equivalent to:
	numactl -m 2 hugeadmin --pool-pages-min 2M:+16
]

Yields:
Node 0 HugePages_Total:     0
Node 0 HugePages_Free:      0
Node 0 HugePages_Surp:      0
Node 1 HugePages_Total:     0
Node 1 HugePages_Free:      0
Node 1 HugePages_Surp:      0
Node 2 HugePages_Total:    16
Node 2 HugePages_Free:     16
Node 2 HugePages_Surp:      0
Node 3 HugePages_Total:     0
Node 3 HugePages_Free:      0
Node 3 HugePages_Surp:      0
vm.nr_hugepages = 16

Global controls work as expected--reduce pool to 8 persistent huge pages:
(me):echo 8 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages

Node 0 HugePages_Total:     0
Node 0 HugePages_Free:      0
Node 0 HugePages_Surp:      0
Node 1 HugePages_Total:     0
Node 1 HugePages_Free:      0
Node 1 HugePages_Surp:      0
Node 2 HugePages_Total:     8
Node 2 HugePages_Free:      8
Node 2 HugePages_Surp:      0
Node 3 HugePages_Total:     0
Node 3 HugePages_Free:      0
Node 3 HugePages_Surp:      0
Signed-off-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: default avatarMel Gorman <mel@csn.ul.ie>
Reviewed-by: default avatarAndi Kleen <andi@firstfloor.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 4e25b257
...@@ -173,6 +173,43 @@ static ssize_t node_read_distance(struct sys_device * dev, ...@@ -173,6 +173,43 @@ static ssize_t node_read_distance(struct sys_device * dev,
} }
static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
#ifdef CONFIG_HUGETLBFS
/*
* hugetlbfs per node attributes registration interface:
* When/if hugetlb[fs] subsystem initializes [sometime after this module],
* it will register its per node attributes for all nodes online at that
* time. It will also call register_hugetlbfs_with_node(), below, to
* register its attribute registration functions with this node driver.
* Once these hooks have been initialized, the node driver will call into
* the hugetlb module to [un]register attributes for hot-plugged nodes.
*/
static node_registration_func_t __hugetlb_register_node;
static node_registration_func_t __hugetlb_unregister_node;
static inline void hugetlb_register_node(struct node *node)
{
if (__hugetlb_register_node)
__hugetlb_register_node(node);
}
static inline void hugetlb_unregister_node(struct node *node)
{
if (__hugetlb_unregister_node)
__hugetlb_unregister_node(node);
}
void register_hugetlbfs_with_node(node_registration_func_t doregister,
node_registration_func_t unregister)
{
__hugetlb_register_node = doregister;
__hugetlb_unregister_node = unregister;
}
#else
static inline void hugetlb_register_node(struct node *node) {}
static inline void hugetlb_unregister_node(struct node *node) {}
#endif
/* /*
* register_node - Setup a sysfs device for a node. * register_node - Setup a sysfs device for a node.
...@@ -196,6 +233,7 @@ int register_node(struct node *node, int num, struct node *parent) ...@@ -196,6 +233,7 @@ int register_node(struct node *node, int num, struct node *parent)
sysdev_create_file(&node->sysdev, &attr_distance); sysdev_create_file(&node->sysdev, &attr_distance);
scan_unevictable_register_node(node); scan_unevictable_register_node(node);
hugetlb_register_node(node);
} }
return error; return error;
} }
...@@ -216,6 +254,7 @@ void unregister_node(struct node *node) ...@@ -216,6 +254,7 @@ void unregister_node(struct node *node)
sysdev_remove_file(&node->sysdev, &attr_distance); sysdev_remove_file(&node->sysdev, &attr_distance);
scan_unevictable_unregister_node(node); scan_unevictable_unregister_node(node);
hugetlb_unregister_node(node);
sysdev_unregister(&node->sysdev); sysdev_unregister(&node->sysdev);
} }
......
...@@ -28,6 +28,7 @@ struct node { ...@@ -28,6 +28,7 @@ struct node {
struct memory_block; struct memory_block;
extern struct node node_devices[]; extern struct node node_devices[];
typedef void (*node_registration_func_t)(struct node *);
extern int register_node(struct node *, int, struct node *); extern int register_node(struct node *, int, struct node *);
extern void unregister_node(struct node *node); extern void unregister_node(struct node *node);
...@@ -39,6 +40,11 @@ extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); ...@@ -39,6 +40,11 @@ extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
extern int register_mem_sect_under_node(struct memory_block *mem_blk, extern int register_mem_sect_under_node(struct memory_block *mem_blk,
int nid); int nid);
extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk); extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk);
#ifdef CONFIG_HUGETLBFS
extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
node_registration_func_t unregister);
#endif
#else #else
static inline int register_one_node(int nid) static inline int register_one_node(int nid)
{ {
...@@ -65,6 +71,11 @@ static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) ...@@ -65,6 +71,11 @@ static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
{ {
return 0; return 0;
} }
static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
node_registration_func_t unreg)
{
}
#endif #endif
#define to_node(sys_device) container_of(sys_device, struct node, sysdev) #define to_node(sys_device) container_of(sys_device, struct node, sysdev)
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <asm/io.h> #include <asm/io.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/node.h>
#include "internal.h" #include "internal.h"
const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
...@@ -1320,39 +1321,71 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, ...@@ -1320,39 +1321,71 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
static struct kobject *hugepages_kobj; static struct kobject *hugepages_kobj;
static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
static struct hstate *kobj_to_hstate(struct kobject *kobj) static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp);
static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp)
{ {
int i; int i;
for (i = 0; i < HUGE_MAX_HSTATE; i++) for (i = 0; i < HUGE_MAX_HSTATE; i++)
if (hstate_kobjs[i] == kobj) if (hstate_kobjs[i] == kobj) {
if (nidp)
*nidp = NUMA_NO_NODE;
return &hstates[i]; return &hstates[i];
BUG(); }
return NULL;
return kobj_to_node_hstate(kobj, nidp);
} }
static ssize_t nr_hugepages_show_common(struct kobject *kobj, static ssize_t nr_hugepages_show_common(struct kobject *kobj,
struct kobj_attribute *attr, char *buf) struct kobj_attribute *attr, char *buf)
{ {
struct hstate *h = kobj_to_hstate(kobj); struct hstate *h;
return sprintf(buf, "%lu\n", h->nr_huge_pages); unsigned long nr_huge_pages;
int nid;
h = kobj_to_hstate(kobj, &nid);
if (nid == NUMA_NO_NODE)
nr_huge_pages = h->nr_huge_pages;
else
nr_huge_pages = h->nr_huge_pages_node[nid];
return sprintf(buf, "%lu\n", nr_huge_pages);
} }
static ssize_t nr_hugepages_store_common(bool obey_mempolicy, static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
struct kobject *kobj, struct kobj_attribute *attr, struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t len) const char *buf, size_t len)
{ {
int err; int err;
int nid;
unsigned long count; unsigned long count;
struct hstate *h = kobj_to_hstate(kobj); struct hstate *h;
NODEMASK_ALLOC(nodemask_t, nodes_allowed); NODEMASK_ALLOC(nodemask_t, nodes_allowed);
err = strict_strtoul(buf, 10, &count); err = strict_strtoul(buf, 10, &count);
if (err) if (err)
return 0; return 0;
if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) { h = kobj_to_hstate(kobj, &nid);
NODEMASK_FREE(nodes_allowed); if (nid == NUMA_NO_NODE) {
nodes_allowed = &node_online_map; /*
} * global hstate attribute
*/
if (!(obey_mempolicy &&
init_nodemask_of_mempolicy(nodes_allowed))) {
NODEMASK_FREE(nodes_allowed);
nodes_allowed = &node_states[N_HIGH_MEMORY];
}
} else if (nodes_allowed) {
/*
* per node hstate attribute: adjust count to global,
* but restrict alloc/free to the specified node.
*/
count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
init_nodemask_of_node(nodes_allowed, nid);
} else
nodes_allowed = &node_states[N_HIGH_MEMORY];
h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed); h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
if (nodes_allowed != &node_online_map) if (nodes_allowed != &node_online_map)
...@@ -1398,7 +1431,7 @@ HSTATE_ATTR(nr_hugepages_mempolicy); ...@@ -1398,7 +1431,7 @@ HSTATE_ATTR(nr_hugepages_mempolicy);
static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf) struct kobj_attribute *attr, char *buf)
{ {
struct hstate *h = kobj_to_hstate(kobj); struct hstate *h = kobj_to_hstate(kobj, NULL);
return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages); return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
} }
static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
...@@ -1406,7 +1439,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, ...@@ -1406,7 +1439,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
{ {
int err; int err;
unsigned long input; unsigned long input;
struct hstate *h = kobj_to_hstate(kobj); struct hstate *h = kobj_to_hstate(kobj, NULL);
err = strict_strtoul(buf, 10, &input); err = strict_strtoul(buf, 10, &input);
if (err) if (err)
...@@ -1423,15 +1456,24 @@ HSTATE_ATTR(nr_overcommit_hugepages); ...@@ -1423,15 +1456,24 @@ HSTATE_ATTR(nr_overcommit_hugepages);
static ssize_t free_hugepages_show(struct kobject *kobj, static ssize_t free_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf) struct kobj_attribute *attr, char *buf)
{ {
struct hstate *h = kobj_to_hstate(kobj); struct hstate *h;
return sprintf(buf, "%lu\n", h->free_huge_pages); unsigned long free_huge_pages;
int nid;
h = kobj_to_hstate(kobj, &nid);
if (nid == NUMA_NO_NODE)
free_huge_pages = h->free_huge_pages;
else
free_huge_pages = h->free_huge_pages_node[nid];
return sprintf(buf, "%lu\n", free_huge_pages);
} }
HSTATE_ATTR_RO(free_hugepages); HSTATE_ATTR_RO(free_hugepages);
static ssize_t resv_hugepages_show(struct kobject *kobj, static ssize_t resv_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf) struct kobj_attribute *attr, char *buf)
{ {
struct hstate *h = kobj_to_hstate(kobj); struct hstate *h = kobj_to_hstate(kobj, NULL);
return sprintf(buf, "%lu\n", h->resv_huge_pages); return sprintf(buf, "%lu\n", h->resv_huge_pages);
} }
HSTATE_ATTR_RO(resv_hugepages); HSTATE_ATTR_RO(resv_hugepages);
...@@ -1439,8 +1481,17 @@ HSTATE_ATTR_RO(resv_hugepages); ...@@ -1439,8 +1481,17 @@ HSTATE_ATTR_RO(resv_hugepages);
static ssize_t surplus_hugepages_show(struct kobject *kobj, static ssize_t surplus_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf) struct kobj_attribute *attr, char *buf)
{ {
struct hstate *h = kobj_to_hstate(kobj); struct hstate *h;
return sprintf(buf, "%lu\n", h->surplus_huge_pages); unsigned long surplus_huge_pages;
int nid;
h = kobj_to_hstate(kobj, &nid);
if (nid == NUMA_NO_NODE)
surplus_huge_pages = h->surplus_huge_pages;
else
surplus_huge_pages = h->surplus_huge_pages_node[nid];
return sprintf(buf, "%lu\n", surplus_huge_pages);
} }
HSTATE_ATTR_RO(surplus_hugepages); HSTATE_ATTR_RO(surplus_hugepages);
...@@ -1460,19 +1511,21 @@ static struct attribute_group hstate_attr_group = { ...@@ -1460,19 +1511,21 @@ static struct attribute_group hstate_attr_group = {
.attrs = hstate_attrs, .attrs = hstate_attrs,
}; };
static int __init hugetlb_sysfs_add_hstate(struct hstate *h) static int __init hugetlb_sysfs_add_hstate(struct hstate *h,
struct kobject *parent,
struct kobject **hstate_kobjs,
struct attribute_group *hstate_attr_group)
{ {
int retval; int retval;
int hi = h - hstates;
hstate_kobjs[h - hstates] = kobject_create_and_add(h->name, hstate_kobjs[hi] = kobject_create_and_add(h->name, parent);
hugepages_kobj); if (!hstate_kobjs[hi])
if (!hstate_kobjs[h - hstates])
return -ENOMEM; return -ENOMEM;
retval = sysfs_create_group(hstate_kobjs[h - hstates], retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group);
&hstate_attr_group);
if (retval) if (retval)
kobject_put(hstate_kobjs[h - hstates]); kobject_put(hstate_kobjs[hi]);
return retval; return retval;
} }
...@@ -1487,17 +1540,184 @@ static void __init hugetlb_sysfs_init(void) ...@@ -1487,17 +1540,184 @@ static void __init hugetlb_sysfs_init(void)
return; return;
for_each_hstate(h) { for_each_hstate(h) {
err = hugetlb_sysfs_add_hstate(h); err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
hstate_kobjs, &hstate_attr_group);
if (err) if (err)
printk(KERN_ERR "Hugetlb: Unable to add hstate %s", printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
h->name); h->name);
} }
} }
#ifdef CONFIG_NUMA
/*
* node_hstate/s - associate per node hstate attributes, via their kobjects,
* with node sysdevs in node_devices[] using a parallel array. The array
* index of a node sysdev or _hstate == node id.
* This is here to avoid any static dependency of the node sysdev driver, in
* the base kernel, on the hugetlb module.
*/
struct node_hstate {
struct kobject *hugepages_kobj;
struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
};
struct node_hstate node_hstates[MAX_NUMNODES];
/*
* A subset of global hstate attributes for node sysdevs
*/
static struct attribute *per_node_hstate_attrs[] = {
&nr_hugepages_attr.attr,
&free_hugepages_attr.attr,
&surplus_hugepages_attr.attr,
NULL,
};
static struct attribute_group per_node_hstate_attr_group = {
.attrs = per_node_hstate_attrs,
};
/*
* kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj.
* Returns node id via non-NULL nidp.
*/
static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
{
int nid;
for (nid = 0; nid < nr_node_ids; nid++) {
struct node_hstate *nhs = &node_hstates[nid];
int i;
for (i = 0; i < HUGE_MAX_HSTATE; i++)
if (nhs->hstate_kobjs[i] == kobj) {
if (nidp)
*nidp = nid;
return &hstates[i];
}
}
BUG();
return NULL;
}
/*
* Unregister hstate attributes from a single node sysdev.
* No-op if no hstate attributes attached.
*/
void hugetlb_unregister_node(struct node *node)
{
struct hstate *h;
struct node_hstate *nhs = &node_hstates[node->sysdev.id];
if (!nhs->hugepages_kobj)
return;
for_each_hstate(h)
if (nhs->hstate_kobjs[h - hstates]) {
kobject_put(nhs->hstate_kobjs[h - hstates]);
nhs->hstate_kobjs[h - hstates] = NULL;
}
kobject_put(nhs->hugepages_kobj);
nhs->hugepages_kobj = NULL;
}
/*
* hugetlb module exit: unregister hstate attributes from node sysdevs
* that have them.
*/
static void hugetlb_unregister_all_nodes(void)
{
int nid;
/*
* disable node sysdev registrations.
*/
register_hugetlbfs_with_node(NULL, NULL);
/*
* remove hstate attributes from any nodes that have them.
*/
for (nid = 0; nid < nr_node_ids; nid++)
hugetlb_unregister_node(&node_devices[nid]);
}
/*
* Register hstate attributes for a single node sysdev.
* No-op if attributes already registered.
*/
void hugetlb_register_node(struct node *node)
{
struct hstate *h;
struct node_hstate *nhs = &node_hstates[node->sysdev.id];
int err;
if (nhs->hugepages_kobj)
return; /* already allocated */
nhs->hugepages_kobj = kobject_create_and_add("hugepages",
&node->sysdev.kobj);
if (!nhs->hugepages_kobj)
return;
for_each_hstate(h) {
err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
nhs->hstate_kobjs,
&per_node_hstate_attr_group);
if (err) {
printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
" for node %d\n",
h->name, node->sysdev.id);
hugetlb_unregister_node(node);
break;
}
}
}
/*
* hugetlb init time: register hstate attributes for all registered
* node sysdevs. All on-line nodes should have registered their
* associated sysdev by the time the hugetlb module initializes.
*/
static void hugetlb_register_all_nodes(void)
{
int nid;
for (nid = 0; nid < nr_node_ids; nid++) {
struct node *node = &node_devices[nid];
if (node->sysdev.id == nid)
hugetlb_register_node(node);
}
/*
* Let the node sysdev driver know we're here so it can
* [un]register hstate attributes on node hotplug.
*/
register_hugetlbfs_with_node(hugetlb_register_node,
hugetlb_unregister_node);
}
#else /* !CONFIG_NUMA */
static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
{
BUG();
if (nidp)
*nidp = -1;
return NULL;
}
static void hugetlb_unregister_all_nodes(void) { }
static void hugetlb_register_all_nodes(void) { }
#endif
static void __exit hugetlb_exit(void) static void __exit hugetlb_exit(void)
{ {
struct hstate *h; struct hstate *h;
hugetlb_unregister_all_nodes();
for_each_hstate(h) { for_each_hstate(h) {
kobject_put(hstate_kobjs[h - hstates]); kobject_put(hstate_kobjs[h - hstates]);
} }
...@@ -1532,6 +1752,8 @@ static int __init hugetlb_init(void) ...@@ -1532,6 +1752,8 @@ static int __init hugetlb_init(void)
hugetlb_sysfs_init(); hugetlb_sysfs_init();
hugetlb_register_all_nodes();
return 0; return 0;
} }
module_init(hugetlb_init); module_init(hugetlb_init);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment