Commit 957f822a authored by David Rientjes's avatar David Rientjes Committed by Linus Torvalds

mm, numa: reclaim from all nodes within reclaim distance

RECLAIM_DISTANCE represents the distance between nodes at which it is
deemed too costly to allocate from; it's preferred to try to reclaim from
a local zone before falling back to allocating on a remote node with such
a distance.

To do this, zone_reclaim_mode is set if the distance between any two
nodes on the system is greather than this distance.  This, however, ends
up causing the page allocator to reclaim from every zone regardless of
its affinity.

What we really want is to reclaim only from zones that are closer than
RECLAIM_DISTANCE.  This patch adds a nodemask to each node that
represents the set of nodes that are within this distance.  During the
zone iteration, if the bit for a zone's node is set for the local node,
then reclaim is attempted; otherwise, the zone is skipped.

[akpm@linux-foundation.org: fix CONFIG_NUMA=n build]
Signed-off-by: default avatarDavid Rientjes <rientjes@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Minchan Kim <minchan@kernel.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a0c5e813
...@@ -709,6 +709,7 @@ typedef struct pglist_data { ...@@ -709,6 +709,7 @@ typedef struct pglist_data {
unsigned long node_spanned_pages; /* total size of physical page unsigned long node_spanned_pages; /* total size of physical page
range, including holes */ range, including holes */
int node_id; int node_id;
nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */
wait_queue_head_t kswapd_wait; wait_queue_head_t kswapd_wait;
wait_queue_head_t pfmemalloc_wait; wait_queue_head_t pfmemalloc_wait;
struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
......
...@@ -1799,6 +1799,22 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) ...@@ -1799,6 +1799,22 @@ static void zlc_clear_zones_full(struct zonelist *zonelist)
bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
} }
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes);
}
static void __paginginit init_zone_allows_reclaim(int nid)
{
int i;
for_each_online_node(i)
if (node_distance(nid, i) <= RECLAIM_DISTANCE) {
node_set(i, NODE_DATA(nid)->reclaim_nodes);
zone_reclaim_mode = 1;
}
}
#else /* CONFIG_NUMA */ #else /* CONFIG_NUMA */
static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
...@@ -1819,6 +1835,15 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) ...@@ -1819,6 +1835,15 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
static void zlc_clear_zones_full(struct zonelist *zonelist) static void zlc_clear_zones_full(struct zonelist *zonelist)
{ {
} }
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return true;
}
static inline void init_zone_allows_reclaim(int nid)
{
}
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
/* /*
...@@ -1903,7 +1928,8 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, ...@@ -1903,7 +1928,8 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
did_zlc_setup = 1; did_zlc_setup = 1;
} }
if (zone_reclaim_mode == 0) if (zone_reclaim_mode == 0 ||
!zone_allows_reclaim(preferred_zone, zone))
goto this_zone_full; goto this_zone_full;
/* /*
...@@ -3364,21 +3390,13 @@ static void build_zonelists(pg_data_t *pgdat) ...@@ -3364,21 +3390,13 @@ static void build_zonelists(pg_data_t *pgdat)
j = 0; j = 0;
while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
int distance = node_distance(local_node, node);
/*
* If another node is sufficiently far away then it is better
* to reclaim pages in a zone before going off node.
*/
if (distance > RECLAIM_DISTANCE)
zone_reclaim_mode = 1;
/* /*
* We don't want to pressure a particular node. * We don't want to pressure a particular node.
* So adding penalty to the first node in same * So adding penalty to the first node in same
* distance group to make it round-robin. * distance group to make it round-robin.
*/ */
if (distance != node_distance(local_node, prev_node)) if (node_distance(local_node, node) !=
node_distance(local_node, prev_node))
node_load[node] = load; node_load[node] = load;
prev_node = node; prev_node = node;
...@@ -4552,6 +4570,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, ...@@ -4552,6 +4570,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
pgdat->node_id = nid; pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn; pgdat->node_start_pfn = node_start_pfn;
init_zone_allows_reclaim(nid);
calculate_node_totalpages(pgdat, zones_size, zholes_size); calculate_node_totalpages(pgdat, zones_size, zholes_size);
alloc_node_mem_map(pgdat); alloc_node_mem_map(pgdat);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment