aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Capper <steve.capper@linaro.org>2013-05-28 16:03:00 +0100
committerSteve Capper <steve.capper@linaro.org>2013-05-29 16:36:06 +0100
commitc7e4be6d3803c689cb4c90231547a20a1016e876 (patch)
tree8e6fe04745d34700260aa0ccc1cbc708ec593efa
parent67ec9ed9f542f50951e899ed6c1d7bab29511157 (diff)
downloadlinux-aarch64-numa.tar.gz
ARM64: mm: NUMA Supportaarch64-numa
This patch adds support for NUMA. At the moment, the number of nodes has to be specified on the commandline. One can also, optionally, specify the memory size of each node. (Otherwise the memory range is split roughly equally between nodes). CPUs can be striped across nodes (cpu number modulo the number of nodes), or assigned to a node based on their topology_physical_package_id. Signed-off-by: Steve Capper <steve.capper@linaro.org>
-rw-r--r--arch/arm64/Kconfig26
-rw-r--r--arch/arm64/include/asm/mmzone.h32
-rw-r--r--arch/arm64/include/asm/numa.h28
-rw-r--r--arch/arm64/kernel/setup.c5
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/init.c4
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/arm64/mm/numa.c214
8 files changed, 313 insertions, 0 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4024fdde5ce..77d74c1fb79 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -183,6 +183,32 @@ config HW_PERF_EVENTS
source "mm/Kconfig"
+config NUMA
+ bool "NUMA Support"
+ help
+ Say Y to compile the kernel to support NUMA (Non-Uniform Memory
+ Access). At the moment, one has to specify the number of nodes using
+ the commandline:
+ numa=fake=x,[size0],[size1],...,[sizeN-1],[usetopology]
+ where x is the number of nodes, and sizeY is the size of node Y in
+ bytes (one can suffix m or g for megabytes or gigabytes). If no sizes
+ are specified, the memory is distributed roughly evenly between nodes.
+ If "usetopology" is specified, the "topology_physical_package_id" is
+ used to assign CPUs to nodes.
+
+config NODES_SHIFT
+ int "Maximum NUMA Nodes (as a power of 2)" if NUMA
+ range 1 10
+ default "1"
+ depends on NEED_MULTIPLE_NODES
+ ---help---
+ Specify the maximum number of NUMA Nodes available on the target
+ system. Increases memory reserved to accommodate various tables.
+
+config USE_PERCPU_NUMA_NODE_ID
+ def_bool y
+ depends on NUMA
+
endmenu
menu "Boot options"
diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h
new file mode 100644
index 00000000000..d0bf32d7c52
--- /dev/null
+++ b/arch/arm64/include/asm/mmzone.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_MMZONE_H
+#define __ASM_MMZONE_H
+
+#define NODE_DATA(nid) (node_data[nid])
+extern struct pglist_data *node_data[];
+extern cpumask_var_t *node_to_cpumask_map;
+
+#define cpumask_of_node(node) ((node) == -1 ? \
+ cpu_all_mask : \
+ node_to_cpumask_map[node])
+
+#define parent_node(node) (node)
+
+
+
+#endif /* __ASM_MMZONE_H */
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
new file mode 100644
index 00000000000..a9c2c53b945
--- /dev/null
+++ b/arch/arm64/include/asm/numa.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_NUMA_H
+#define __ASM_NUMA_H
+
+#ifdef CONFIG_NUMA
+void __init arm64_numa_init(void);
+#else
+static inline void arm64_numa_init()
+{
+}
+#endif
+
+#endif /* __ASM_NUMA_H */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index add6ea61684..cd277d83f99 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -296,6 +296,11 @@ static int __init topology_init(void)
{
int i;
+#ifdef CONFIG_NUMA
+ for_each_online_node(i)
+ register_one_node(i);
+#endif
+
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_data, i);
cpu->hotpluggable = 1;
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 3140a2abcdc..56eae3ff9d4 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -2,3 +2,5 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
context.o tlb.o proc.o
+
+obj-$(CONFIG_NUMA) += numa.o
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 8376bed686b..9265a419eed 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -81,7 +81,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
max_zone_pfns[ZONE_DMA32] = max(min, min(max, MAX_DMA32_PFN));
#endif
+#ifndef CONFIG_NUMA
memblock_set_node(__pfn_to_phys(min), __pfn_to_phys(max), 0);
+#endif
free_area_init_nodes(max_zone_pfns);
}
@@ -262,7 +264,9 @@ void __init mem_init(void)
arm64_swiotlb_init();
+#ifndef CONFIG_NEED_MULTIPLE_NODES
max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
+#endif
#ifndef CONFIG_SPARSEMEM_VMEMMAP
/* this will put all unused low memory onto the freelists */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index eeecc9c8ed6..abc96e46299 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -33,6 +33,7 @@
#include <asm/sizes.h>
#include <asm/tlb.h>
#include <asm/mmu_context.h>
+#include <asm/numa.h>
#include "mm.h"
@@ -325,6 +326,7 @@ void __init paging_init(void)
init_mem_pgprot();
map_mem();
+ arm64_numa_init();
/*
* Finally flush the caches and tlb to ensure that we're in a
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
new file mode 100644
index 00000000000..daf2e56cbd2
--- /dev/null
+++ b/arch/arm64/mm/numa.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/cpu.h>
+
+#include <asm/numa.h>
+
+struct pglist_data *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+cpumask_var_t *node_to_cpumask_map;
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+static unsigned int numa_use_topology;
+static unsigned int numa_node_count = 1;
+
+static char *memcmdline __initdata;
+
+/*
+ * Add a CPU to a NUMA node.
+ * Default assignment policy is the cpu number modulo the number of nodes.
+ *
+ * We can also group CPUs via the topology_physical_package_id.
+ * (if the user adds "usetopology" to the command line).
+ * When we add CPU 0 (the boot CPU), it is always to node 0, as we don't have
+ * the topology information at that time.
+ * Subsequent CPUs get added based on the topology_physical_package_id.
+ * To stop CPU0 being added to the same node as CPUs on a different cluster,
+ * we subtract the topology_physical_package_id of node 0.
+ */
+static void __cpuinit add_cpu_to_node(int cpu)
+{
+ unsigned int node;
+ unsigned int n0 = topology_physical_package_id(0);
+ unsigned int nc = topology_physical_package_id(cpu);
+
+ if (numa_use_topology)
+ node = cpu ? (numa_node_count + nc - n0) % numa_node_count : 0;
+ else
+ node = cpu % numa_node_count;
+
+ cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+ set_cpu_numa_node(cpu, node);
+ pr_info("NUMA: Adding CPU %d to node %d\n", cpu, node);
+}
+
+static int __cpuinit numa_add_cpu(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ if (action == CPU_ONLINE) {
+ int cpu = (long)hcpu;
+ add_cpu_to_node(cpu);
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata numa_node_nb = {
+ .notifier_call = numa_add_cpu,
+ .priority = 1, /* Must run before sched domains notifier. */
+};
+
+ /*
+ * Split the available memory between the NUMA nodes.
+ *
+ * By default, the memory is distributed roughly evenly between nodes.
+ *
+ * One can also specify requested node sizes on the command line, if
+ * "memcmdline" is not NULL, we try to parse it as a size.
+ *
+ * We traverse memory blocks rather than the pfn addressable range to allow for
+ * sparse memory configurations and memory holes.
+ */
+static void __init numa_split_memblocks(void)
+{
+ int node;
+ unsigned long pfnsrem = 0, pfnsblock, pfncurr, pfnend = 0;
+ unsigned long pfn_starts[MAX_NUMNODES];
+ struct memblock_region *reg;
+
+ for_each_memblock(memory, reg) {
+ pfnend = memblock_region_memory_end_pfn(reg);
+ pfnsrem += pfnend - memblock_region_memory_base_pfn(reg);
+ }
+
+ reg = memblock.memory.regions;
+ pfncurr = memblock_region_memory_base_pfn(reg);
+ pfnsblock = memblock_region_memory_end_pfn(reg) - pfncurr;
+ pfn_starts[0] = pfncurr;
+
+ for(node = 0; node < numa_node_count - 1; node++) {
+ unsigned long pfnsnode = pfnsrem / (numa_node_count - node);
+
+ if (memcmdline) {
+ unsigned long nsize = __phys_to_pfn(
+ memparse(memcmdline, &memcmdline));
+
+ if (*memcmdline == ',')
+ ++memcmdline;
+
+ if ((nsize > 0) && (nsize < pfnsrem))
+ pfnsnode = nsize;
+ else
+ memcmdline = NULL;
+ }
+
+ while(pfnsnode > 0) {
+ unsigned long pfnsset = min(pfnsnode, pfnsblock);
+
+ pfncurr += pfnsset;
+
+ pfnsblock -= pfnsset;
+ pfnsrem -= pfnsset;
+ pfnsnode -= pfnsset;
+
+ if (pfnsblock == 0) {
+ reg++;
+ pfncurr = memblock_region_memory_base_pfn(reg);
+ pfnsblock = memblock_region_memory_end_pfn(reg)
+ - pfncurr;
+ }
+ }
+
+ pfn_starts[node + 1] = pfncurr;
+ }
+
+ for (node = 0; node < numa_node_count - 1; node++)
+ memblock_set_node(__pfn_to_phys(pfn_starts[node]),
+ __pfn_to_phys(pfn_starts[node + 1]
+ - pfn_starts[node]),
+ node);
+
+ memblock_set_node(__pfn_to_phys(pfn_starts[node]),
+ __pfn_to_phys(pfnend - pfn_starts[node]), node);
+}
+
+void __init arm64_numa_init()
+{
+ int node;
+ size_t size;
+
+ for (node = 0; node < numa_node_count; node++) {
+ phys_addr_t pa = memblock_alloc(sizeof(pg_data_t),
+ L1_CACHE_BYTES);
+ NODE_DATA(node) = __va(pa);
+ memset(NODE_DATA(node), 0, sizeof(pg_data_t));
+ node_set_online(node);
+ }
+
+ size = sizeof(cpumask_var_t) * numa_node_count;
+ node_to_cpumask_map = __va(memblock_alloc(size, L1_CACHE_BYTES));
+ memset(node_to_cpumask_map, 0, size);
+
+ numa_split_memblocks();
+ register_cpu_notifier(&numa_node_nb);
+
+ add_cpu_to_node(0);
+}
+
+static int __init early_numa(char *p)
+{
+ if (!p)
+ return 0;
+
+ p = strstr(p, "fake=");
+ if (p) {
+ int num_nodes = 0;
+ int optres;
+
+ p += strlen("fake=");
+ optres = get_option(&p, &num_nodes);
+ if ((optres == 0) || (optres == 3))
+ return -EINVAL;
+
+ if ((num_nodes > 0) && (num_nodes <= MAX_NUMNODES)) {
+ pr_info("NUMA: setting up fake NUMA with %d nodes.\n",
+ num_nodes);
+ numa_node_count = num_nodes;
+ } else {
+ pr_info("NUMA: can't set up %d nodes for NUMA.\n"
+ "(MAX_NUMNODES = %d)\n",
+ num_nodes, MAX_NUMNODES);
+ return -EINVAL;
+ }
+
+ if (optres == 2)
+ memcmdline = p;
+
+ if (strstr(p, "usetopology")) {
+ numa_use_topology = 1;
+ pr_info("NUMA: using CPU topology to assign nodes.\n");
+ } else
+ pr_info("NUMA: NOT using CPU topology.\n");
+ }
+
+ return 0;
+}
+
+early_param("numa", early_numa);