[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] [64/66] x86_64: Handle empty PXMs that only contain hotplug memory



The node setup code would try to allocate the node metadata
in the node itself, but that fails if there is no memory
in there. 

This can happen with memory hotplug when the hotplug area
defines an so far empty node.

Now use bootmem to try to allocate the mem_map in other nodes.

And if it fails don't panic, but just ignore the node.

To make this work I added a new __alloc_bootmem_nopanic 
function that does what its name implies.

TBD should try to use nearby nodes here. Currently we just use
any.  It's hard to do it better because bootmem doesn't have
proper fallback lists yet.

Signed-off-by: Andi Kleen <ak@xxxxxxx>

---
 arch/x86_64/mm/numa.c   |   41 ++++++++++++++++++++++++++++++++---------
 arch/x86_64/mm/srat.c   |    6 ++++++
 include/linux/bootmem.h |    1 +
 mm/bootmem.c            |    9 ++++++++-
 4 files changed, 47 insertions(+), 10 deletions(-)

Index: linux/arch/x86_64/mm/numa.c
===================================================================
--- linux.orig/arch/x86_64/mm/numa.c
+++ linux/arch/x86_64/mm/numa.c
@@ -100,11 +100,30 @@ int early_pfn_to_nid(unsigned long pfn)
 }
 #endif
 
+static void * __init 
+early_node_mem(int nodeid, unsigned long start, unsigned long end,
+	      unsigned long size)
+{
+	unsigned long mem = find_e820_area(start, end, size);
+	void *ptr;
+	if (mem != -1L)
+		return __va(mem);
+	ptr = __alloc_bootmem_nopanic(size, 
+				SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
+	if (ptr == 0) {
+		printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
+			size, nodeid);
+		return NULL;
+	}
+	return ptr;
+}
+
 /* Initialize bootmem allocator for a node */
 void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 { 
 	unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; 
 	unsigned long nodedata_phys;
+	void *bootmap;
 	const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
 
 	start = round_up(start, ZONE_ALIGN); 
@@ -114,13 +133,11 @@ void __init setup_node_bootmem(int nodei
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
 
-	nodedata_phys = find_e820_area(start, end, pgdat_size); 
-	if (nodedata_phys == -1L) 
-		panic("Cannot find memory pgdat in node %d\n", nodeid);
-
-	Dprintk("nodedata_phys %lx\n", nodedata_phys); 
+	node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size);
+	if (node_data[nodeid] == NULL)
+		return;
+	nodedata_phys = __pa(node_data[nodeid]);
 
-	node_data[nodeid] = phys_to_virt(nodedata_phys);
 	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
 	NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
 	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
@@ -129,9 +146,15 @@ void __init setup_node_bootmem(int nodei
 	/* Find a place for the bootmem map */
 	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 
 	bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
-	bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT);
-	if (bootmap_start == -1L) 
-		panic("Not enough continuous space for bootmap on node %d", nodeid); 
+	bootmap = early_node_mem(nodeid, bootmap_start, end, 
+					bootmap_pages<<PAGE_SHIFT);
+	if (bootmap == NULL)  {
+		if (nodedata_phys < start || nodedata_phys >= end)
+			free_bootmem((unsigned long)node_data[nodeid],pgdat_size);
+		node_data[nodeid] = NULL;
+		return;
+	}
+	bootmap_start = __pa(bootmap);
 	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); 
 	
 	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
Index: linux/arch/x86_64/mm/srat.c
===================================================================
--- linux.orig/arch/x86_64/mm/srat.c
+++ linux/arch/x86_64/mm/srat.c
@@ -363,6 +363,12 @@ int __init acpi_scan_nodes(unsigned long
 	/* Finally register nodes */
 	for_each_node_mask(i, nodes_parsed)
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+	/* Try again in case setup_node_bootmem missed one due
+	   to missing bootmem */
+	for_each_node_mask(i, nodes_parsed)
+		if (!node_online(i))
+			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+
 	for (i = 0; i < NR_CPUS; i++) { 
 		if (cpu_to_node[i] == NUMA_NO_NODE)
 			continue;
Index: linux/include/linux/bootmem.h
===================================================================
--- linux.orig/include/linux/bootmem.h
+++ linux/include/linux/bootmem.h
@@ -44,6 +44,7 @@ extern unsigned long __init bootmem_boot
 extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
 extern void __init free_bootmem (unsigned long addr, unsigned long size);
 extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
 extern void * __init __alloc_bootmem_low(unsigned long size,
 					 unsigned long align,
 					 unsigned long goal);
Index: linux/mm/bootmem.c
===================================================================
--- linux.orig/mm/bootmem.c
+++ linux/mm/bootmem.c
@@ -381,7 +381,7 @@ unsigned long __init free_all_bootmem (v
 	return(free_all_bootmem_core(NODE_DATA(0)));
 }
 
-void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
 {
 	pg_data_t *pgdat = pgdat_list;
 	void *ptr;
@@ -390,7 +390,14 @@ void * __init __alloc_bootmem(unsigned l
 		if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
 						 align, goal, 0)))
 			return(ptr);
+	return NULL;
+}
 
+void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+{
+	void *mem = __alloc_bootmem_nopanic(size,align,goal);
+	if (mem)
+		return mem;
 	/*
 	 * Whoops, we cannot satisfy the allocation request.
 	 */
-
: send the line "unsubscribe linux-x86_64" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Linux ia64]     [Linux Kernel]     [DCCP]     [Linux ARM]     [Linux]     [Photo]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

Add to Google Powered by Linux