Commit 37012946 authored by David Rientjes's avatar David Rientjes Committed by Linus Torvalds

mempolicy: create mempolicy_operations structure

Create a mempolicy_operations structure that currently points to two
functions[*] for the various modes:

	int (*create)(struct mempolicy *, const nodemask_t *);
	void (*rebind)(struct mempolicy *, const nodemask_t *);

This splits the implementation for the various modes out of two large
functions, mpol_new() and mpol_rebind_policy().  Eventually it may be
beneficial to add additional functions to accomodate the existing switch()
statements in mm/mempolicy.c.

 [*] The ->create() function for MPOL_DEFAULT is currently NULL since no
     struct mempolicy is dynamically allocated.

[Lee.Schermerhorn@hp.com: fix regression in the package mempolicy regression tests]
Signed-off-by: default avatarDavid Rientjes <rientjes@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1d0d2680
...@@ -63,7 +63,6 @@ ...@@ -63,7 +63,6 @@
grows down? grows down?
make bind policy root only? It can trigger oom much faster and the make bind policy root only? It can trigger oom much faster and the
kernel is not always grateful with that. kernel is not always grateful with that.
could replace all the switch()es with a mempolicy_ops structure.
*/ */
#include <linux/mempolicy.h> #include <linux/mempolicy.h>
...@@ -110,8 +109,13 @@ struct mempolicy default_policy = { ...@@ -110,8 +109,13 @@ struct mempolicy default_policy = {
.policy = MPOL_DEFAULT, .policy = MPOL_DEFAULT,
}; };
static const struct mempolicy_operations {
int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes);
} mpol_ops[MPOL_MAX];
/* Check that the nodemask contains at least one populated zone */ /* Check that the nodemask contains at least one populated zone */
static int is_valid_nodemask(nodemask_t *nodemask) static int is_valid_nodemask(const nodemask_t *nodemask)
{ {
int nd, k; int nd, k;
...@@ -144,23 +148,60 @@ static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig, ...@@ -144,23 +148,60 @@ static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig,
nodes_onto(*ret, tmp, *rel); nodes_onto(*ret, tmp, *rel);
} }
static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes)
{
if (nodes_empty(*nodes))
return -EINVAL;
pol->v.nodes = *nodes;
return 0;
}
static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
{
if (!nodes)
pol->v.preferred_node = -1; /* local allocation */
else if (nodes_empty(*nodes))
return -EINVAL; /* no allowed nodes */
else
pol->v.preferred_node = first_node(*nodes);
return 0;
}
static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
{
if (!is_valid_nodemask(nodes))
return -EINVAL;
pol->v.nodes = *nodes;
return 0;
}
/* Create a new policy */ /* Create a new policy */
static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
nodemask_t *nodes) nodemask_t *nodes)
{ {
struct mempolicy *policy; struct mempolicy *policy;
nodemask_t cpuset_context_nmask; nodemask_t cpuset_context_nmask;
int localalloc = 0;
int ret;
pr_debug("setting mode %d flags %d nodes[0] %lx\n", pr_debug("setting mode %d flags %d nodes[0] %lx\n",
mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
if (mode == MPOL_DEFAULT) if (mode == MPOL_DEFAULT)
return (nodes && nodes_weight(*nodes)) ? ERR_PTR(-EINVAL) : return NULL;
NULL; if (!nodes || nodes_empty(*nodes)) {
if (mode != MPOL_PREFERRED)
return ERR_PTR(-EINVAL);
localalloc = 1; /* special case: no mode flags */
}
policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
if (!policy) if (!policy)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
atomic_set(&policy->refcnt, 1); atomic_set(&policy->refcnt, 1);
policy->policy = mode;
if (!localalloc) {
policy->flags = flags;
cpuset_update_task_memory_state(); cpuset_update_task_memory_state();
if (flags & MPOL_F_RELATIVE_NODES) if (flags & MPOL_F_RELATIVE_NODES)
mpol_relative_nodemask(&cpuset_context_nmask, nodes, mpol_relative_nodemask(&cpuset_context_nmask, nodes,
...@@ -168,70 +209,41 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, ...@@ -168,70 +209,41 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
else else
nodes_and(cpuset_context_nmask, *nodes, nodes_and(cpuset_context_nmask, *nodes,
cpuset_current_mems_allowed); cpuset_current_mems_allowed);
switch (mode) {
case MPOL_INTERLEAVE:
if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask))
goto free;
policy->v.nodes = cpuset_context_nmask;
break;
case MPOL_PREFERRED:
policy->v.preferred_node = first_node(cpuset_context_nmask);
if (policy->v.preferred_node >= MAX_NUMNODES)
goto free;
break;
case MPOL_BIND:
if (!is_valid_nodemask(&cpuset_context_nmask))
goto free;
policy->v.nodes = cpuset_context_nmask;
break;
default:
BUG();
}
policy->policy = mode;
policy->flags = flags;
if (mpol_store_user_nodemask(policy)) if (mpol_store_user_nodemask(policy))
policy->w.user_nodemask = *nodes; policy->w.user_nodemask = *nodes;
else else
policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current); policy->w.cpuset_mems_allowed =
return policy; cpuset_mems_allowed(current);
}
free: ret = mpol_ops[mode].create(policy,
localalloc ? NULL : &cpuset_context_nmask);
if (ret < 0) {
kmem_cache_free(policy_cache, policy); kmem_cache_free(policy_cache, policy);
return ERR_PTR(-EINVAL); return ERR_PTR(ret);
}
return policy;
} }
/* Migrate a policy to a different set of nodes */ static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes)
static void mpol_rebind_policy(struct mempolicy *pol,
const nodemask_t *newmask)
{ {
nodemask_t tmp; }
int static_nodes;
int relative_nodes;
if (!pol) static void mpol_rebind_nodemask(struct mempolicy *pol,
return; const nodemask_t *nodes)
static_nodes = pol->flags & MPOL_F_STATIC_NODES; {
relative_nodes = pol->flags & MPOL_F_RELATIVE_NODES; nodemask_t tmp;
if (!mpol_store_user_nodemask(pol) &&
nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
return;
switch (pol->policy) { if (pol->flags & MPOL_F_STATIC_NODES)
case MPOL_DEFAULT: nodes_and(tmp, pol->w.user_nodemask, *nodes);
break; else if (pol->flags & MPOL_F_RELATIVE_NODES)
case MPOL_BIND: mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
/* Fall through */
case MPOL_INTERLEAVE:
if (static_nodes)
nodes_and(tmp, pol->w.user_nodemask, *newmask);
else if (relative_nodes)
mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
newmask);
else { else {
nodes_remap(tmp, pol->v.nodes, nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed,
pol->w.cpuset_mems_allowed, *newmask); *nodes);
pol->w.cpuset_mems_allowed = *newmask; pol->w.cpuset_mems_allowed = *nodes;
} }
pol->v.nodes = tmp; pol->v.nodes = tmp;
if (!node_isset(current->il_next, tmp)) { if (!node_isset(current->il_next, tmp)) {
current->il_next = next_node(current->il_next, tmp); current->il_next = next_node(current->il_next, tmp);
...@@ -240,31 +252,49 @@ static void mpol_rebind_policy(struct mempolicy *pol, ...@@ -240,31 +252,49 @@ static void mpol_rebind_policy(struct mempolicy *pol,
if (current->il_next >= MAX_NUMNODES) if (current->il_next >= MAX_NUMNODES)
current->il_next = numa_node_id(); current->il_next = numa_node_id();
} }
break; }
case MPOL_PREFERRED:
if (static_nodes) { static void mpol_rebind_preferred(struct mempolicy *pol,
const nodemask_t *nodes)
{
nodemask_t tmp;
/*
* check 'STATIC_NODES first, as preferred_node == -1 may be
* a temporary, "fallback" state for this policy.
*/
if (pol->flags & MPOL_F_STATIC_NODES) {
int node = first_node(pol->w.user_nodemask); int node = first_node(pol->w.user_nodemask);
if (node_isset(node, *newmask)) if (node_isset(node, *nodes))
pol->v.preferred_node = node; pol->v.preferred_node = node;
else else
pol->v.preferred_node = -1; pol->v.preferred_node = -1;
} else if (relative_nodes) { } else if (pol->v.preferred_node == -1) {
mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, return; /* no remap required for explicit local alloc */
newmask); } else if (pol->flags & MPOL_F_RELATIVE_NODES) {
mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
pol->v.preferred_node = first_node(tmp); pol->v.preferred_node = first_node(tmp);
} else { } else {
pol->v.preferred_node = node_remap(pol->v.preferred_node, pol->v.preferred_node = node_remap(pol->v.preferred_node,
pol->w.cpuset_mems_allowed, *newmask); pol->w.cpuset_mems_allowed,
pol->w.cpuset_mems_allowed = *newmask; *nodes);
} pol->w.cpuset_mems_allowed = *nodes;
break;
default:
BUG();
break;
} }
} }
/* Migrate a policy to a different set of nodes */
static void mpol_rebind_policy(struct mempolicy *pol,
const nodemask_t *newmask)
{
if (!pol)
return;
if (!mpol_store_user_nodemask(pol) &&
nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
return;
mpol_ops[pol->policy].rebind(pol, newmask);
}
/* /*
* Wrapper for mpol_rebind_policy() that just requires task * Wrapper for mpol_rebind_policy() that just requires task
* pointer, and updates task mempolicy. * pointer, and updates task mempolicy.
...@@ -291,6 +321,24 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) ...@@ -291,6 +321,24 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
} }
static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
[MPOL_DEFAULT] = {
.rebind = mpol_rebind_default,
},
[MPOL_INTERLEAVE] = {
.create = mpol_new_interleave,
.rebind = mpol_rebind_nodemask,
},
[MPOL_PREFERRED] = {
.create = mpol_new_preferred,
.rebind = mpol_rebind_preferred,
},
[MPOL_BIND] = {
.create = mpol_new_bind,
.rebind = mpol_rebind_nodemask,
},
};
static void gather_stats(struct page *, void *, int pte_dirty); static void gather_stats(struct page *, void *, int pte_dirty);
static void migrate_page_add(struct page *page, struct list_head *pagelist, static void migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags); unsigned long flags);
...@@ -1848,7 +1896,6 @@ void numa_default_policy(void) ...@@ -1848,7 +1896,6 @@ void numa_default_policy(void)
/* /*
* Display pages allocated per node and memory policy via /proc. * Display pages allocated per node and memory policy via /proc.
*/ */
static const char * const policy_types[] = static const char * const policy_types[] =
{ "default", "prefer", "bind", "interleave" }; { "default", "prefer", "bind", "interleave" };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment