[PATCH] ia64: add support for NUMA machines with CPU-only (memory-less) nodes

This patch works around a limitation in the current NUMA code, which doesn't support CPU-only (memory-less) nodes. With this patch CPU-only nodes are moved to a node with memory which is at the closest relative distance per the SLIT information. Any node reassignments will result in the compression of the nodes and renumbering the nid values where appropriate.

[PATCH] ia64: add support for NUMA machines with CPU-only (memory-less) nodes
This patch works around a limitation in the current NUMA code, which doesn't support CPU-only (memory-less) nodes. With this patch CPU-only nodes are moved to a node with memory which is at the closest relative distance per the SLIT information. Any node reassignments will result in the compression of the nodes and renumbering the nid values where appropriate.
7f2ca985 · Robert Picco · David Mosberger · 304f3d56 · 7f2ca985
Commit 7f2ca985 authored Feb 23, 2004 by Robert Picco Committed by David Mosberger Feb 23, 2004
Show whitespace changes
Inline Side-by-side

Showing with 122 additions and 0 deletions

arch/ia64/mm/discontig.c arch/ia64/mm/discontig.c +122 -0

No files found.
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -40,6 +40,125 @@ struct early_node_data {

 static struct early_node_data mem_data[NR_NODES] __initdata;

+/**
+ * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node
+ *
+ * This function will move nodes with only CPUs (no memory)
+ * to a node with memory which is at the minimum numa_slit distance.
+ * Any reassigments will result in the compression of the nodes
+ * and renumbering the nid values where appropriate.
+ * The static declarations below are to avoid large stack size which
+ * makes the code not re-entrant.
+ */
+static void __init reassign_cpu_only_nodes(void)
+{
+	struct node_memblk_s *p;
+	int i, j, k, nnode, nid, cpu, cpunid;
+	u8 cslit, slit;
+	static DECLARE_BITMAP(nodes_with_mem, NR_NODES) __initdata;
+	static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
+	static int node_flip[NR_NODES] __initdata;
+
+	for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
+		if (!test_bit(p->nid, (void *) nodes_with_mem)) {
+			set_bit(p->nid, (void *) nodes_with_mem);
+			nnode++;
+		}
+
+	/*
+	 * All nids with memory.
+	 */
+	if (nnode == numnodes)
+		return;
+
+	/*
+	 * Change nids and attempt to migrate CPU-only nodes
+	 * to the best numa_slit (closest neighbor) possible.
+	 * For reassigned CPU nodes a nid can't be arrived at
+	 * until after this loop because the target nid's new
+	 * identity might not have been established yet. So
+	 * new nid values are fabricated above numnodes and
+	 * mapped back later to their true value.
+	 */
+	for (nid = 0, i = 0; i < numnodes; i++)  {
+		if (test_bit(i, (void *) nodes_with_mem)) {
+			/*
+			 * Save original nid value for numa_slit
+			 * fixup and node_cpuid reassignments.
+			 */
+			node_flip[nid] = i;
+
+			if (i == nid) {
+				nid++;
+				continue;
+			}
+
+			for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
+				if (p->nid == i)
+					p->nid = nid;
+
+			cpunid = nid;
+			nid++;
+		} else
+			cpunid = numnodes;
+
+		for (cpu = 0; cpu < NR_CPUS; cpu++)
+			if (node_cpuid[cpu].nid == i) {
+				/* For nodes not being reassigned just fix the cpu's nid. */
+				if (cpunid < numnodes) {
+					node_cpuid[cpu].nid = cpunid;
+					continue;
+				}
+
+				/*
+				 * For nodes being reassigned, find best node by
+				 * numa_slit information and then make a temporary
+				 * nid value based on current nid and numnodes.
+				 */
+				for (slit = 0xff, k = numnodes + numnodes, j = 0; j < numnodes; j++)
+					if (i == j)
+						continue;
+					else if (test_bit(j, (void *) nodes_with_mem)) {
+						cslit = numa_slit[i * numnodes + j];
+						if (cslit < slit) {
+							k = numnodes + j;
+							slit = cslit;
+						}
+					}
+
+				node_cpuid[cpu].nid = k;
+			}
+	}
+
+	/*
+	 * Fixup temporary nid values for CPU-only nodes.
+	 */
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		if (node_cpuid[cpu].nid == (numnodes + numnodes))
+			node_cpuid[cpu].nid = nnode - 1;
+		else
+			for (i = 0; i < nnode; i++)
+				if (node_flip[i] == (node_cpuid[cpu].nid - numnodes)) {
+					node_cpuid[cpu].nid = i;
+					break;
+				}
+
+	/*
+	 * Fix numa_slit by compressing from larger
+	 * nid array to reduced nid array.
+	 */
+	for (i = 0; i < nnode; i++)
+		for (j = 0; j < nnode; j++)
+			numa_slit_fix[i * nnode + j] =
+				numa_slit[node_flip[i] * numnodes + node_flip[j]];
+
+	memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit));
+
+	numnodes = nnode;
+
+	return;
+}
+
 /*
 * To prevent cache aliasing effects, align per-node structures so that they
 * start at addresses that are strided by node number.
@@ -301,6 +420,9 @@ void __init find_memory(void)
 	min_low_pfn = -1;
 	max_low_pfn = 0;

+	if (numnodes > 1)
+		reassign_cpu_only_nodes();
+
 	/* These actually end up getting called by call_pernode_memory() */
 	efi_memmap_walk(filter_rsvd_memory, build_node_maps);
 	efi_memmap_walk(filter_rsvd_memory, find_pernode_space);