summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/mm/numa_64.c171
1 files changed, 71 insertions, 100 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dc9516587cf5..bd086ebc0ffc 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -541,8 +541,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
-static struct bootnode physnodes[MAX_NUMNODES] __initdata;
-
static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
static char *emu_cmdline __initdata;
@@ -551,6 +549,16 @@ void __init numa_emu_cmdline(char *str)
emu_cmdline = str;
}
+static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
+{
+ int i;
+
+ for (i = 0; i < mi->nr_blks; i++)
+ if (mi->blk[i].nid == nid)
+ return i;
+ return -ENOENT;
+}
+
int __init find_node_by_addr(unsigned long addr)
{
const struct numa_meminfo *mi = &numa_meminfo;
@@ -568,63 +576,6 @@ int __init find_node_by_addr(unsigned long addr)
return NUMA_NO_NODE;
}
-static int __init setup_physnodes(unsigned long start, unsigned long end)
-{
- const struct numa_meminfo *mi = &numa_meminfo;
- int ret = 0;
- int i;
-
- memset(physnodes, 0, sizeof(physnodes));
-
- for (i = 0; i < mi->nr_blks; i++) {
- int nid = mi->blk[i].nid;
-
- if (physnodes[nid].start == physnodes[nid].end) {
- physnodes[nid].start = mi->blk[i].start;
- physnodes[nid].end = mi->blk[i].end;
- } else {
- physnodes[nid].start = min(physnodes[nid].start,
- mi->blk[i].start);
- physnodes[nid].end = max(physnodes[nid].end,
- mi->blk[i].end);
- }
- }
-
- /*
- * Basic sanity checking on the physical node map: there may be errors
- * if the SRAT or AMD code incorrectly reported the topology or the mem=
- * kernel parameter is used.
- */
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (physnodes[i].start == physnodes[i].end)
- continue;
- if (physnodes[i].start > end) {
- physnodes[i].end = physnodes[i].start;
- continue;
- }
- if (physnodes[i].end < start) {
- physnodes[i].start = physnodes[i].end;
- continue;
- }
- if (physnodes[i].start < start)
- physnodes[i].start = start;
- if (physnodes[i].end > end)
- physnodes[i].end = end;
- ret++;
- }
-
- /*
- * If no physical topology was detected, a single node is faked to cover
- * the entire address space.
- */
- if (!ret) {
- physnodes[ret].start = start;
- physnodes[ret].end = end;
- ret = 1;
- }
- return ret;
-}
-
static void __init fake_physnodes(int acpi, int amd,
const struct numa_meminfo *ei)
{
@@ -663,9 +614,11 @@ static void __init fake_physnodes(int acpi, int amd,
* something went wrong, 0 otherwise.
*/
static int __init emu_setup_memblk(struct numa_meminfo *ei,
- int nid, int physnid, u64 start, u64 end)
+ struct numa_meminfo *pi,
+ int nid, int phys_blk, u64 size)
{
struct numa_memblk *eb = &ei->blk[ei->nr_blks];
+ struct numa_memblk *pb = &pi->blk[phys_blk];
if (ei->nr_blks >= NR_NODE_MEMBLKS) {
pr_err("NUMA: Too many emulated memblks, failing emulation\n");
@@ -673,12 +626,18 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
}
ei->nr_blks++;
- eb->start = start;
- eb->end = end;
+ eb->start = pb->start;
+ eb->end = pb->start + size;
eb->nid = nid;
if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
- emu_nid_to_phys[nid] = physnid;
+ emu_nid_to_phys[nid] = pb->nid;
+
+ pb->start += size;
+ if (pb->start >= pb->end) {
+ WARN_ON_ONCE(pb->start > pb->end);
+ numa_remove_memblk_from(phys_blk, pi);
+ }
printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
eb->start, eb->end, (eb->end - eb->start) >> 20);
@@ -690,6 +649,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
* to max_addr. The return value is the number of nodes allocated.
*/
static int __init split_nodes_interleave(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
u64 addr, u64 max_addr, int nr_nodes)
{
nodemask_t physnode_mask = NODE_MASK_NONE;
@@ -721,9 +681,8 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
return -1;
}
- for (i = 0; i < MAX_NUMNODES; i++)
- if (physnodes[i].start != physnodes[i].end)
- node_set(i, physnode_mask);
+ for (i = 0; i < pi->nr_blks; i++)
+ node_set(pi->blk[i].nid, physnode_mask);
/*
* Continue to fill physical nodes with fake nodes until there is no
@@ -731,8 +690,18 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
*/
while (nodes_weight(physnode_mask)) {
for_each_node_mask(i, physnode_mask) {
- u64 end = physnodes[i].start + size;
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
+ u64 start, limit, end;
+ int phys_blk;
+
+ phys_blk = emu_find_memblk_by_nid(i, pi);
+ if (phys_blk < 0) {
+ node_clear(i, physnode_mask);
+ continue;
+ }
+ start = pi->blk[phys_blk].start;
+ limit = pi->blk[phys_blk].end;
+ end = start + size;
if (nid < big)
end += FAKE_NODE_MIN_SIZE;
@@ -741,11 +710,11 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
* Continue to add memory to this fake node if its
* non-reserved memory is less than the per-node size.
*/
- while (end - physnodes[i].start -
- memblock_x86_hole_size(physnodes[i].start, end) < size) {
+ while (end - start -
+ memblock_x86_hole_size(start, end) < size) {
end += FAKE_NODE_MIN_SIZE;
- if (end > physnodes[i].end) {
- end = physnodes[i].end;
+ if (end > limit) {
+ end = limit;
break;
}
}
@@ -764,19 +733,15 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
* next node, this one must extend to the end of the
* physical node.
*/
- if (physnodes[i].end - end -
- memblock_x86_hole_size(end, physnodes[i].end) < size)
- end = physnodes[i].end;
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
+ end = limit;
- ret = emu_setup_memblk(ei, nid++ % nr_nodes, i,
- physnodes[i].start,
- min(end, physnodes[i].end));
+ ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
+ phys_blk,
+ min(end, limit) - start);
if (ret < 0)
return ret;
-
- physnodes[i].start = min(end, physnodes[i].end);
- if (physnodes[i].start == physnodes[i].end)
- node_clear(i, physnode_mask);
}
}
return 0;
@@ -805,6 +770,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
* `addr' to `max_addr'. The return value is the number of nodes allocated.
*/
static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
u64 addr, u64 max_addr, u64 size)
{
nodemask_t physnode_mask = NODE_MASK_NONE;
@@ -833,9 +799,9 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
}
size &= FAKE_NODE_MIN_HASH_MASK;
- for (i = 0; i < MAX_NUMNODES; i++)
- if (physnodes[i].start != physnodes[i].end)
- node_set(i, physnode_mask);
+ for (i = 0; i < pi->nr_blks; i++)
+ node_set(pi->blk[i].nid, physnode_mask);
+
/*
* Fill physical nodes with fake nodes of size until there is no memory
* left on any of them.
@@ -843,10 +809,18 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
while (nodes_weight(physnode_mask)) {
for_each_node_mask(i, physnode_mask) {
u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
- u64 end;
+ u64 start, limit, end;
+ int phys_blk;
- end = find_end_of_node(physnodes[i].start,
- physnodes[i].end, size);
+ phys_blk = emu_find_memblk_by_nid(i, pi);
+ if (phys_blk < 0) {
+ node_clear(i, physnode_mask);
+ continue;
+ }
+ start = pi->blk[phys_blk].start;
+ limit = pi->blk[phys_blk].end;
+
+ end = find_end_of_node(start, limit, size);
/*
* If there won't be at least FAKE_NODE_MIN_SIZE of
* non-reserved memory in ZONE_DMA32 for the next node,
@@ -861,19 +835,15 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
* next node, this one must extend to the end of the
* physical node.
*/
- if (physnodes[i].end - end -
- memblock_x86_hole_size(end, physnodes[i].end) < size)
- end = physnodes[i].end;
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
+ end = limit;
- ret = emu_setup_memblk(ei, nid++ % MAX_NUMNODES, i,
- physnodes[i].start,
- min(end, physnodes[i].end));
+ ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
+ phys_blk,
+ min(end, limit) - start);
if (ret < 0)
return ret;
-
- physnodes[i].start = min(end, physnodes[i].end);
- if (physnodes[i].start == physnodes[i].end)
- node_clear(i, physnode_mask);
}
}
return 0;
@@ -886,10 +856,12 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
static bool __init numa_emulation(int acpi, int amd)
{
static struct numa_meminfo ei __initdata;
+ static struct numa_meminfo pi __initdata;
const u64 max_addr = max_pfn << PAGE_SHIFT;
int i, ret;
memset(&ei, 0, sizeof(ei));
+ pi = numa_meminfo;
for (i = 0; i < MAX_NUMNODES; i++)
emu_nid_to_phys[i] = NUMA_NO_NODE;
@@ -903,12 +875,12 @@ static bool __init numa_emulation(int acpi, int amd)
u64 size;
size = memparse(emu_cmdline, &emu_cmdline);
- ret = split_nodes_size_interleave(&ei, 0, max_addr, size);
+ ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
} else {
unsigned long n;
n = simple_strtoul(emu_cmdline, NULL, 0);
- ret = split_nodes_interleave(&ei, 0, max_addr, n);
+ ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
}
if (ret < 0)
@@ -980,7 +952,6 @@ void __init initmem_init(void)
if (numa_cleanup_meminfo(&numa_meminfo) < 0)
continue;
#ifdef CONFIG_NUMA_EMU
- setup_physnodes(0, max_pfn << PAGE_SHIFT);
/*
* If requested, try emulation. If emulation is not used,
* build identity emu_nid_to_phys[] for numa_add_cpu()