[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Google
  Web www.spinics.net

x86 remap allocator in kernel 3.0



Hi folks,

I've just discovered that the crash utility fails to initialize the vm 
subsystem properly on our latest SLES 32-bit kernels. It turns out that our 
kernels are compiled with CONFIG_DISCONTIGMEM=y, which causes pgdat structs to 
be allocated by the remap allocator (cf. arch/x86/mm/numa_32.c and also the 
code in setup_node_data).

If you don't know what the remap allocator is (like I didn't before I hit the 
bug), it's a very special early-boot allocator which remaps physical pages 
from low memory to high memory, giving them virtual addresses from the 
identity mapping. Looks a bit like this:

                        physical addr
                       +------------+
                       |            |
                       +------------+
                  +--> |  KVA RAM   |
                  |    +------------+
                  |    |            |
                  |    \/\/\/\/\/\/\/
                  |    /\/\/\/\/\/\/\
                  |    |            |
  virtual addr    |    |  highmem   |
 +------------+   |    |------------|
 |            | -----> |            |
 +------------+   |    +------------+
 |  remap va  | --+    |   KVA PG   | (unused)
 +------------+        +------------+
 |            |        |            |
 |            | -----> | RAM bottom |
 +------------+        +------------+

This breaks a very basic assumption that crash makes about low-memory virtual 
addresses.

The attached patch fixes the issue for me, but may not be the cleanest method 
to handle these mappings.

Ken'ichi Ohmichi, please note that makedumpfile is also affected by this 
deficiency. On my test system, it will fail to produce any output if I set 
dump level to anything greater than zero:

makedumpfile -c -d 31 -x vmlinux-3.0.13-0.5-pae.debug vmcore kdump.31
readmem: Can't convert a physical address(34a012b4) to offset.
readmem: type_addr: 0, addr:f4a012b4, size:4
get_mm_discontigmem: Can't get node_start_pfn.

makedumpfile Failed.

However, fixing this for makedumpfile is harder, and it will most likely 
require a few more lines in VMCOREINFO, because debug symbols may not be 
available at dump time, and I can't see any alternative method to locate the 
remapped regions.

Regards,
Petr Tesarik
SUSE Linux
From: Petr Tesarik <ptesarik@xxxxxxx>
Subject: [x86] Add correct handling of regions allocated with the remap allocator
References: bnc#738742
Patch-mainline: no

For NUMA x86, the pgdat is remapped into the node's physical memory. Since
that physical memory may not be reachable through the identity mapping, a
small part of the identity mapping is used.

This special case has never been handled properly by crash.

Signed-off-by: Petr Tesarik <ptesarik@xxxxxxx>

---
 defs.h |    6 ++++++
 x86.c  |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 60 insertions(+), 4 deletions(-)

--- a/defs.h
+++ b/defs.h
@@ -1843,6 +1843,12 @@ struct node_table {
 	ulong present;
 	ulonglong start_paddr;
 	ulong start_mapnr;
+#ifdef X86
+	ulong remap_start_vaddr;
+	ulong remap_end_vaddr;
+	ulong remap_start_pfn;
+	int remap_init;
+#endif
 };
 
 struct meminfo;
--- a/x86.c
+++ b/x86.c
@@ -2994,6 +2994,55 @@ no_kpage:
 	return FALSE;
 }
 
+static void
+initialize_remap(struct node_table *node, int n)
+{
+	ulong start_vaddr, end_vaddr, start_pfn;
+	struct syment *sp;
+
+	node->remap_init = TRUE;
+
+	if (! (sp = symbol_search("node_remap_start_vaddr")) )
+		return;
+	readmem(sp->value + n * sizeof(void*), KVADDR, &start_vaddr,
+		sizeof(ulong), "node_remap_start_vaddr", FAULT_ON_ERROR);
+
+	if (! (sp = symbol_search("node_remap_end_vaddr")) )
+		return;
+	readmem(sp->value + n * sizeof(void*), KVADDR, &end_vaddr,
+		sizeof(ulong), "node_remap_end_vaddr", FAULT_ON_ERROR);
+
+	if (! (sp = symbol_search("node_remap_start_pfn")) )
+		return;
+	readmem(sp->value + n * sizeof(ulong), KVADDR, &start_pfn,
+		sizeof(ulong), "node_remap_end_vaddr", FAULT_ON_ERROR);
+
+	node->remap_start_vaddr = start_vaddr;
+	node->remap_end_vaddr = end_vaddr;
+	node->remap_start_pfn = start_pfn;
+}
+
+static int
+x86_kvtop_remap(ulong kvaddr, physaddr_t *paddr)
+{
+	int i;
+
+	if (!vt->node_table)
+		return FALSE;
+
+	for (i = 0; i < vt->numnodes; ++i) {
+		struct node_table *node = &vt->node_table[i];
+		if (!node->remap_init)
+			initialize_remap(node, i);
+		if (kvaddr >= node->remap_start_vaddr &&
+		    kvaddr < node->remap_end_vaddr) {
+			*paddr = PTOB(node->remap_start_pfn) +
+				kvaddr - node->remap_start_vaddr;
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
 
 static int
 x86_kvtop_PAE(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose)
@@ -3023,12 +3072,13 @@ x86_kvtop_PAE(struct task_context *tc, u
 		else
 			pgd = (ulonglong *)symbol_value("idle_pg_table");
 	} else {
-		if (!vt->vmalloc_start) {
+		if (x86_kvtop_remap(kvaddr, paddr)) {
+			if (!verbose)
+				return TRUE;
+		} else if (!vt->vmalloc_start) {
 			*paddr = VTOP(kvaddr);
 			return TRUE;
-		}
-
-		if (!IS_VMALLOC_ADDR(kvaddr)) { 
+		} else if (!IS_VMALLOC_ADDR(kvaddr)) { 
 			*paddr = VTOP(kvaddr);
 			if (!verbose)
 				return TRUE;
--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Home]     [Fedora Legacy List]     [Fedora Maintainers]     [Fedora Desktop]     [Red Hat 9 Bible]     [Fedora Bible]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [Yosemite Photos]     [KDE Users]     [Fedora Tools]

Add to Google

Powered by Linux