aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraeme Gregory <graeme.gregory@linaro.org>2016-05-17 12:41:13 +0100
committerGraeme Gregory <graeme.gregory@linaro.org>2016-05-17 12:41:13 +0100
commitaa4f2e35a233a1bf805e0a09e080f62901b86e90 (patch)
tree87ae5361926e68957a6cfb2d386f9c8c19920676
parente524f676b12e8288d74add07db3957184c0f45de (diff)
parent7e915347e666b7fe00d51a188152891956007b18 (diff)
downloadleg-kernel-aa4f2e35a233a1bf805e0a09e080f62901b86e90.tar.gz
Merge branch 'topic-numa' into leg-kernel
-rw-r--r--Documentation/devicetree/bindings/numa.txt275
-rw-r--r--arch/arm64/Kconfig27
-rw-r--r--arch/arm64/include/asm/acpi.h9
-rw-r--r--arch/arm64/include/asm/mmu.h1
-rw-r--r--arch/arm64/include/asm/mmzone.h12
-rw-r--r--arch/arm64/include/asm/numa.h47
-rw-r--r--arch/arm64/include/asm/pgtable.h15
-rw-r--r--arch/arm64/include/asm/topology.h10
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/acpi_numa.c149
-rw-r--r--arch/arm64/kernel/pci.c10
-rw-r--r--arch/arm64/kernel/setup.c17
-rw-r--r--arch/arm64/kernel/smp.c6
-rw-r--r--arch/arm64/mm/Makefile1
-rw-r--r--arch/arm64/mm/init.c35
-rw-r--r--arch/arm64/mm/mm.h1
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/arm64/mm/numa.c402
-rw-r--r--arch/ia64/include/asm/acpi.h3
-rw-r--r--arch/ia64/kernel/acpi.c2
-rw-r--r--arch/ia64/kernel/setup.c1
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/mm/numa.c2
-rw-r--r--arch/x86/mm/srat.c116
-rw-r--r--drivers/acpi/Kconfig4
-rw-r--r--drivers/acpi/numa.c224
-rw-r--r--drivers/firmware/efi/arm-init.c8
-rw-r--r--drivers/firmware/efi/libstub/fdt.c24
-rw-r--r--drivers/of/Kconfig3
-rw-r--r--drivers/of/Makefile1
-rw-r--r--drivers/of/of_numa.c211
-rw-r--r--include/acpi/acpi_numa.h4
-rw-r--r--include/linux/acpi.h18
-rw-r--r--include/linux/of.h9
34 files changed, 1442 insertions, 209 deletions
diff --git a/Documentation/devicetree/bindings/numa.txt b/Documentation/devicetree/bindings/numa.txt
new file mode 100644
index 000000000000..21b35053ca5a
--- /dev/null
+++ b/Documentation/devicetree/bindings/numa.txt
@@ -0,0 +1,275 @@
+==============================================================================
+NUMA binding description.
+==============================================================================
+
+==============================================================================
+1 - Introduction
+==============================================================================
+
+Systems employing a Non Uniform Memory Access (NUMA) architecture contain
+collections of hardware resources including processors, memory, and I/O buses,
+that comprise what is commonly known as a NUMA node.
+Processor accesses to memory within the local NUMA node is generally faster
+than processor accesses to memory outside of the local NUMA node.
+DT defines interfaces that allow the platform to convey NUMA node
+topology information to OS.
+
+==============================================================================
+2 - numa-node-id
+==============================================================================
+
+For the purpose of identification, each NUMA node is associated with a unique
+token known as a node id. For the purpose of this binding
+a node id is a 32-bit integer.
+
+A device node is associated with a NUMA node by the presence of a
+numa-node-id property which contains the node id of the device.
+
+Example:
+ /* numa node 0 */
+ numa-node-id = <0>;
+
+ /* numa node 1 */
+ numa-node-id = <1>;
+
+==============================================================================
+3 - distance-map
+==============================================================================
+
+The optional device tree node distance-map describes the relative
+distance (memory latency) between all numa nodes.
+
+- compatible : Should at least contain "numa-distance-map-v1".
+
+- distance-matrix
+ This property defines a matrix to describe the relative distances
+ between all numa nodes.
+ It is represented as a list of node pairs and their relative distance.
+
+ Note:
+ 1. Each entry represents distance from first node to second node.
+ The distances are equal in either direction.
+ 2. The distance from a node to self (local distance) is represented
+ with value 10 and all internode distance should be represented with
+ a value greater than 10.
+ 3. distance-matrix should have entries in lexicographical ascending
+ order of nodes.
+ 4. There must be only one device node distance-map which must
+ reside in the root node.
+ 5. If the distance-map node is not present, a default
+ distance-matrix is used.
+
+Example:
+ 4 nodes connected in mesh/ring topology as below,
+
+ 0_______20______1
+ | |
+ | |
+ 20 20
+ | |
+ | |
+ |_______________|
+ 3 20 2
+
+ if relative distance for each hop is 20,
+ then internode distance would be,
+ 0 -> 1 = 20
+ 1 -> 2 = 20
+ 2 -> 3 = 20
+ 3 -> 0 = 20
+ 0 -> 2 = 40
+ 1 -> 3 = 40
+
+ and dt presentation for this distance matrix is,
+
+ distance-map {
+ compatible = "numa-distance-map-v1";
+ distance-matrix = <0 0 10>,
+ <0 1 20>,
+ <0 2 40>,
+ <0 3 20>,
+ <1 0 20>,
+ <1 1 10>,
+ <1 2 20>,
+ <1 3 40>,
+ <2 0 40>,
+ <2 1 20>,
+ <2 2 10>,
+ <2 3 20>,
+ <3 0 20>,
+ <3 1 40>,
+ <3 2 20>,
+ <3 3 10>;
+ };
+
+==============================================================================
+4 - Example dts
+==============================================================================
+
+Dual socket system consists of 2 boards connected through ccn bus and
+each board having one socket/soc of 8 cpus, memory and pci bus.
+
+ memory@c00000 {
+ device_type = "memory";
+ reg = <0x0 0xc00000 0x0 0x80000000>;
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+
+ memory@10000000000 {
+ device_type = "memory";
+ reg = <0x100 0x0 0x0 0x80000000>;
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x2>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x3>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@4 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x4>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@5 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x5>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@6 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x6>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@7 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x7>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@8 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x8>;
+ enable-method = "psci";
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+ cpu@9 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x9>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@a {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xa>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@b {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xb>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@c {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xc>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@d {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xd>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@e {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xe>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@f {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xf>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ };
+
+ pcie0: pcie0@848000000000 {
+ compatible = "arm,armv8";
+ device_type = "pci";
+ bus-range = <0 255>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0x8480 0x00000000 0 0x10000000>; /* Configuration space */
+ ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>;
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+
+ pcie1: pcie1@948000000000 {
+ compatible = "arm,armv8";
+ device_type = "pci";
+ bus-range = <0 255>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0x9480 0x00000000 0 0x10000000>; /* Configuration space */
+ ranges = <0x03000000 0x9010 0x00000000 0x9010 0x00000000 0x70 0x00000000>;
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+
+ distance-map {
+ compatible = "numa-distance-map-v1";
+ distance-matrix = <0 0 10>,
+ <0 1 20>,
+ <1 1 10>;
+ };
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 42ebcff9e754..0189dd5dc770 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -14,6 +14,7 @@ config ARM64
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select ARCH_WANT_FRAME_POINTERS
@@ -79,6 +80,7 @@ config ARM64
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP if NUMA
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -101,6 +103,7 @@ config ARM64
select SYSCTL_EXCEPTION_TRACE
select HAVE_CONTEXT_TRACKING
select HAVE_ARM_SMCCC
+ select OF_NUMA if NUMA && OF
help
ARM 64-bit (AArch64) Linux support.
@@ -549,6 +552,30 @@ config HOTPLUG_CPU
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
+# Common NUMA Features
+config NUMA
+ bool "Numa Memory Allocation and Scheduler Support"
+ depends on SMP
+ help
+ Enable NUMA (Non Uniform Memory Access) support.
+
+ The kernel will try to allocate memory used by a CPU on the
+ local memory of the CPU and add some more
+ NUMA awareness to the kernel.
+
+config NODES_SHIFT
+ int "Maximum NUMA Nodes (as a power of 2)"
+ range 1 10
+ default "2"
+ depends on NEED_MULTIPLE_NODES
+ help
+ Specify the maximum number of NUMA Nodes available on the target
+ system. Increases memory reserved to accommodate various tables.
+
+config USE_PERCPU_NUMA_NODE_ID
+ def_bool y
+ depends on NUMA
+
source kernel/Kconfig.preempt
source kernel/Kconfig.hz
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 4a6c959995cc..355a28fb7261 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -126,4 +126,13 @@ static inline void arch_apei_flush_tlb_one(unsigned long addr)
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
}
#endif /* CONFIG_ACPI_APEI */
+
+#ifdef CONFIG_ACPI_NUMA
+int arm64_acpi_numa_init(void);
+int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
+#else
+static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
+static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; }
+#endif /* CONFIG_ACPI_NUMA */
+
#endif /*_ASM_ACPI_H*/
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 990124a67eeb..97b1d8f26b9c 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -29,6 +29,7 @@ typedef struct {
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
extern void paging_init(void);
+extern void bootmem_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h
new file mode 100644
index 000000000000..a0de9e6ba73f
--- /dev/null
+++ b/arch/arm64/include/asm/mmzone.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_MMZONE_H
+#define __ASM_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+#include <asm/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[(nid)])
+
+#endif /* CONFIG_NUMA */
+#endif /* __ASM_MMZONE_H */
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
new file mode 100644
index 000000000000..600887e491fd
--- /dev/null
+++ b/arch/arm64/include/asm/numa.h
@@ -0,0 +1,47 @@
+#ifndef __ASM_NUMA_H
+#define __ASM_NUMA_H
+
+#include <asm/topology.h>
+
+#ifdef CONFIG_NUMA
+
+#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
+
+/* currently, arm64 implements flat NUMA topology */
+#define parent_node(node) (node)
+
+int __node_distance(int from, int to);
+#define node_distance(a, b) __node_distance(a, b)
+
+extern nodemask_t numa_nodes_parsed __initdata;
+
+/* Mappings between node number and cpus on that node. */
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+void numa_clear_node(unsigned int cpu);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+const struct cpumask *cpumask_of_node(int node);
+#else
+/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+ return node_to_cpumask_map[node];
+}
+#endif
+
+void __init arm64_numa_init(void);
+int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+void __init numa_set_distance(int from, int to, int distance);
+void __init numa_free_distance(void);
+void __init early_map_cpu_to_node(unsigned int cpu, int nid);
+void numa_store_cpu_info(unsigned int cpu);
+
+#else /* CONFIG_NUMA */
+
+static inline void numa_store_cpu_info(unsigned int cpu) { }
+static inline void arm64_numa_init(void) { }
+static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
+
+#endif /* CONFIG_NUMA */
+
+#endif /* __ASM_NUMA_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 989fef16d461..89b8f205a14c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -272,6 +272,21 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot)
return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
}
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * See the comment in include/asm-generic/pgtable.h
+ */
+static inline int pte_protnone(pte_t pte)
+{
+ return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ return pte_protnone(pmd_pte(pmd));
+}
+#endif
+
/*
* THP definitions.
*/
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index a3e9d6fdbf21..8b57339823e9 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -22,6 +22,16 @@ void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
+#ifdef CONFIG_NUMA
+
+struct pci_bus;
+int pcibus_to_node(struct pci_bus *bus);
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_of_node(pcibus_to_node(bus)))
+
+#endif /* CONFIG_NUMA */
+
#include <asm-generic/topology.h>
#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 3793003e16a2..69569c6fa97f 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
+arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
new file mode 100644
index 000000000000..94a6b35fab92
--- /dev/null
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -0,0 +1,149 @@
+/*
+ * ACPI 5.1 based NUMA setup for ARM64
+ * Lots of code was borrowed from arch/x86/mm/srat.c
+ *
+ * Copyright 2004 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2013-2016, Linaro Ltd.
+ * Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *
+ * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
+ *
+ * Called from acpi_numa_init while reading the SRAT and SLIT tables.
+ * Assumes all memory regions belonging to a single proximity domain
+ * are in one chunk. Holes between them will be included in the node.
+ */
+
+#define pr_fmt(fmt) "ACPI: NUMA: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitmap.h>
+#include <linux/bootmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include <acpi/processor.h>
+#include <asm/numa.h>
+
+static int cpus_in_srat;
+
+struct __node_cpu_hwid {
+ u32 node_id; /* logical node containing this CPU */
+ u64 cpu_hwid; /* MPIDR for this CPU */
+};
+
+static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
+[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
+
+int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
+{
+ int i;
+
+ for (i = 0; i < cpus_in_srat; i++) {
+ if (hwid == early_node_cpu_hwid[i].cpu_hwid)
+ return early_node_cpu_hwid[i].node_id;
+ }
+
+ return NUMA_NO_NODE;
+}
+
+static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
+{
+ unsigned long madt_end, entry;
+ struct acpi_table_madt *madt;
+ acpi_size tbl_size;
+
+ if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
+ (struct acpi_table_header **)&madt, &tbl_size)))
+ return -ENODEV;
+
+ entry = (unsigned long)madt;
+ madt_end = entry + madt->header.length;
+
+ /* Parse all entries looking for a match. */
+ entry += sizeof(struct acpi_table_madt);
+ while (entry + sizeof(struct acpi_subtable_header) < madt_end) {
+ struct acpi_subtable_header *header =
+ (struct acpi_subtable_header *)entry;
+
+ if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
+ struct acpi_madt_generic_interrupt *gicc =
+ container_of(header,
+ struct acpi_madt_generic_interrupt, header);
+
+ if ((gicc->flags & ACPI_MADT_ENABLED) &&
+ (gicc->uid == acpi_id)) {
+ *mpidr = gicc->arm_mpidr;
+ early_acpi_os_unmap_memory(madt, tbl_size);
+ return 0;
+ }
+ }
+ entry += header->length;
+ }
+
+ early_acpi_os_unmap_memory(madt, tbl_size);
+ return -ENODEV;
+}
+
+/* Callback for Proximity Domain -> ACPI processor UID mapping */
+void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
+{
+ int pxm, node;
+ u64 mpidr;
+
+ if (srat_disabled())
+ return;
+
+ if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) {
+ pr_err("SRAT: Invalid SRAT header length: %d\n",
+ pa->header.length);
+ bad_srat();
+ return;
+ }
+
+ if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
+ return;
+
+ if (cpus_in_srat >= NR_CPUS) {
+ pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n",
+ NR_CPUS);
+ return;
+ }
+
+ pxm = pa->proximity_domain;
+ node = acpi_map_pxm_to_node(pxm);
+
+ if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+ pr_err("SRAT: Too many proximity domains %d\n", pxm);
+ bad_srat();
+ return;
+ }
+
+ if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
+ pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
+ pxm, pa->acpi_processor_uid);
+ bad_srat();
+ return;
+ }
+
+ early_node_cpu_hwid[cpus_in_srat].node_id = node;
+ early_node_cpu_hwid[cpus_in_srat].cpu_hwid = mpidr;
+ node_set(node, numa_nodes_parsed);
+ cpus_in_srat++;
+ pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d\n",
+ pxm, mpidr, node);
+}
+
+int __init arm64_acpi_numa_init(void)
+{
+ int ret;
+
+ ret = acpi_numa_init();
+ if (ret)
+ return ret;
+
+ return srat_disabled() ? -EINVAL : 0;
+}
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index e484c9177456..ea120a847806 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -74,3 +74,13 @@ void pcibios_remove_bus(struct pci_bus *bus)
{
acpi_pci_remove_bus(bus);
}
+
+#ifdef CONFIG_NUMA
+
+int pcibus_to_node(struct pci_bus *bus)
+{
+ return dev_to_node(&bus->dev);
+}
+EXPORT_SYMBOL(pcibus_to_node);
+
+#endif
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 9dc67769b6a4..0ad4b7770c05 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -53,6 +53,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/kasan.h>
+#include <asm/numa.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
@@ -327,6 +328,12 @@ void __init setup_arch(char **cmdline_p)
acpi_boot_table_init();
paging_init();
+
+ if (acpi_disabled)
+ unflatten_device_tree();
+
+ bootmem_init();
+
relocate_initrd();
kasan_init();
@@ -335,12 +342,11 @@ void __init setup_arch(char **cmdline_p)
early_ioremap_reset();
- if (acpi_disabled) {
- unflatten_device_tree();
+ if (acpi_disabled)
psci_dt_init();
- } else {
+ else
psci_acpi_init();
- }
+
xen_early_init();
cpu_read_bootcpu_ops();
@@ -379,6 +385,9 @@ static int __init topology_init(void)
{
int i;
+ for_each_online_node(i)
+ register_one_node(i);
+
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
cpu->hotpluggable = 1;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b2d5f4ee9a1c..6c7ef8f25a90 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -45,6 +45,7 @@
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/mmu_context.h>
+#include <asm/numa.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
@@ -166,6 +167,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
static void smp_store_cpu_info(unsigned int cpuid)
{
store_cpu_topology(cpuid);
+ numa_store_cpu_info(cpuid);
}
/*
@@ -522,6 +524,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
*/
acpi_set_mailbox_entry(cpu_count, processor);
+ early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid));
+
cpu_count++;
}
@@ -595,6 +599,8 @@ static void __init of_parse_and_init_cpus(void)
pr_debug("cpu logical map 0x%llx\n", hwid);
cpu_logical_map(cpu_count) = hwid;
+
+ early_map_cpu_to_node(cpu_count, of_node_to_nid(dn));
next:
cpu_count++;
}
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 57f57fde5722..54bb209cae8e 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,6 +4,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_ARM64_PTDUMP) += dump.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_KASAN) += kasan_init.o
KASAN_SANITIZE_kasan_init.o := n
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ea989d83ea9b..b1ff1511f472 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -40,6 +40,7 @@
#include <asm/kasan.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
+#include <asm/numa.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
@@ -86,6 +87,21 @@ static phys_addr_t __init max_zone_dma_phys(void)
return min(offset + (1ULL << 32), memblock_end_of_DRAM());
}
+#ifdef CONFIG_NUMA
+
+static void __init zone_sizes_init(unsigned long min, unsigned long max)
+{
+ unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
+
+ if (IS_ENABLED(CONFIG_ZONE_DMA))
+ max_zone_pfns[ZONE_DMA] = PFN_DOWN(max_zone_dma_phys());
+ max_zone_pfns[ZONE_NORMAL] = max;
+
+ free_area_init_nodes(max_zone_pfns);
+}
+
+#else
+
static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
struct memblock_region *reg;
@@ -126,6 +142,8 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
free_area_init_node(0, zone_size, min, zhole_size);
}
+#endif /* CONFIG_NUMA */
+
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
@@ -142,10 +160,15 @@ static void __init arm64_memory_present(void)
static void __init arm64_memory_present(void)
{
struct memblock_region *reg;
+ int nid = 0;
- for_each_memblock(memory, reg)
- memory_present(0, memblock_region_memory_base_pfn(reg),
- memblock_region_memory_end_pfn(reg));
+ for_each_memblock(memory, reg) {
+#ifdef CONFIG_NUMA
+ nid = reg->nid;
+#endif
+ memory_present(nid, memblock_region_memory_base_pfn(reg),
+ memblock_region_memory_end_pfn(reg));
+ }
}
#endif
@@ -245,7 +268,6 @@ void __init arm64_memblock_init(void)
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
- memblock_dump_all();
}
void __init bootmem_init(void)
@@ -257,6 +279,9 @@ void __init bootmem_init(void)
early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
+ max_pfn = max_low_pfn = max;
+
+ arm64_numa_init();
/*
* Sparsemem tries to allocate bootmem in memory_present(), so must be
* done after the fixed reservations.
@@ -267,7 +292,7 @@ void __init bootmem_init(void)
zone_sizes_init(min, max);
high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
- max_pfn = max_low_pfn = max;
+ memblock_dump_all();
}
#ifndef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index ef47d99b5cbc..71fe98985455 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h
@@ -1,3 +1,2 @@
-extern void __init bootmem_init(void);
void fixup_init(void);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f3e5c74233f3..267903b59ced 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -564,8 +564,6 @@ void __init paging_init(void)
*/
memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
SWAPPER_DIR_SIZE - PAGE_SIZE);
-
- bootmem_init();
}
/*
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
new file mode 100644
index 000000000000..c7fe3ec70774
--- /dev/null
+++ b/arch/arm64/mm/numa.c
@@ -0,0 +1,402 @@
+/*
+ * NUMA support, based on the x86 implementation.
+ *
+ * Copyright (C) 2015 Cavium Inc.
+ * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+nodemask_t numa_nodes_parsed __initdata;
+static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
+static bool numa_off;
+
+static __init int numa_parse_early_param(char *opt)
+{
+ if (!opt)
+ return -EINVAL;
+ if (!strncmp(opt, "off", 3)) {
+ pr_info("%s\n", "NUMA turned off");
+ numa_off = true;
+ }
+ return 0;
+}
+early_param("numa", numa_parse_early_param);
+
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+const struct cpumask *cpumask_of_node(int node)
+{
+ if (WARN_ON(node >= nr_node_ids))
+ return cpu_none_mask;
+
+ if (WARN_ON(node_to_cpumask_map[node] == NULL))
+ return cpu_online_mask;
+
+ return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(cpumask_of_node);
+
+#endif
+
+static void map_cpu_to_node(unsigned int cpu, int nid)
+{
+ set_cpu_numa_node(cpu, nid);
+ if (nid >= 0)
+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+}
+
+void numa_clear_node(unsigned int cpu)
+{
+ int nid = cpu_to_node(cpu);
+
+ if (nid >= 0)
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]);
+ set_cpu_numa_node(cpu, NUMA_NO_NODE);
+}
+
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: cpumask_of_node() is not valid until after this is done.
+ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
+ */
+static void __init setup_node_to_cpumask_map(void)
+{
+ unsigned int cpu;
+ int node;
+
+ /* setup nr_node_ids if not done yet */
+ if (nr_node_ids == MAX_NUMNODES)
+ setup_nr_node_ids();
+
+ /* allocate and clear the mapping */
+ for (node = 0; node < nr_node_ids; node++) {
+ alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
+ cpumask_clear(node_to_cpumask_map[node]);
+ }
+
+ for_each_possible_cpu(cpu)
+ set_cpu_numa_node(cpu, NUMA_NO_NODE);
+
+ /* cpumask_of_node() will now work */
+ pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids);
+}
+
+/*
+ * Set the cpu to node and mem mapping
+ */
+void numa_store_cpu_info(unsigned int cpu)
+{
+ map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]);
+}
+
+void __init early_map_cpu_to_node(unsigned int cpu, int nid)
+{
+ /* fallback to node 0 */
+ if (nid < 0 || nid >= MAX_NUMNODES)
+ nid = 0;
+
+ cpu_to_node_map[cpu] = nid;
+}
+
+/**
+ * numa_add_memblk - Set node id to memblk
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @end: End address of the new memblk
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+ int ret;
+
+ ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
+ if (ret < 0) {
+ pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n",
+ start, (end - 1), nid);
+ return ret;
+ }
+
+ node_set(nid, numa_nodes_parsed);
+ pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n",
+ start, (end - 1), nid);
+ return ret;
+}
+
+/**
+ * Initialize NODE_DATA for a node on the local memory
+ */
+static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
+{
+ const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
+ u64 nd_pa;
+ void *nd;
+ int tnid;
+
+ pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start_pfn << PAGE_SHIFT,
+ (end_pfn << PAGE_SHIFT) - 1);
+
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd = __va(nd_pa);
+
+ /* report and initialize */
+ pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ nd_pa, nd_pa + nd_size - 1);
+ tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+ if (tnid != nid)
+ pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid);
+
+ node_data[nid] = nd;
+ memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+ NODE_DATA(nid)->node_id = nid;
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
+ NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+/**
+ * numa_free_distance
+ *
+ * The current table is freed.
+ */
+void __init numa_free_distance(void)
+{
+ size_t size;
+
+ if (!numa_distance)
+ return;
+
+ size = numa_distance_cnt * numa_distance_cnt *
+ sizeof(numa_distance[0]);
+
+ memblock_free(__pa(numa_distance), size);
+ numa_distance_cnt = 0;
+ numa_distance = NULL;
+}
+
+/**
+ *
+ * Create a new NUMA distance table.
+ *
+ */
+static int __init numa_alloc_distance(void)
+{
+ size_t size;
+ u64 phys;
+ int i, j;
+
+ size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
+ phys = memblock_find_in_range(0, PFN_PHYS(max_pfn),
+ size, PAGE_SIZE);
+ if (WARN_ON(!phys))
+ return -ENOMEM;
+
+ memblock_reserve(phys, size);
+
+ numa_distance = __va(phys);
+ numa_distance_cnt = nr_node_ids;
+
+ /* fill with the default distances */
+ for (i = 0; i < numa_distance_cnt; i++)
+ for (j = 0; j < numa_distance_cnt; j++)
+ numa_distance[i * numa_distance_cnt + j] = i == j ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+
+ pr_debug("NUMA: Initialized distance table, cnt=%d\n",
+ numa_distance_cnt);
+
+ return 0;
+}
+
+/**
+ * numa_set_distance - Set inter node NUMA distance from node to node.
+ * @from: the 'from' node to set distance
+ * @to: the 'to' node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance.
+ * If distance table doesn't exist, a warning is printed.
+ *
+ * If @from or @to is higher than the highest known node or lower than zero
+ * or @distance doesn't make sense, the call is ignored.
+ *
+ */
+void __init numa_set_distance(int from, int to, int distance)
+{
+ if (!numa_distance) {
+ pr_warn_once("NUMA: Warning: distance table not allocated yet\n");
+ return;
+ }
+
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
+ from < 0 || to < 0) {
+ pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+ from, to, distance);
+ return;
+ }
+
+ if ((u8)distance != distance ||
+ (from == to && distance != LOCAL_DISTANCE)) {
+ pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ from, to, distance);
+ return;
+ }
+
+ numa_distance[from * numa_distance_cnt + to] = distance;
+}
+
+/**
+ * Return NUMA distance @from to @to
+ */
+int __node_distance(int from, int to)
+{
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+ return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+ return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
+
+static int __init numa_register_nodes(void)
+{
+ int nid;
+ struct memblock_region *mblk;
+
+ /* Check that valid nid is set to memblks */
+ for_each_memblock(memory, mblk)
+ if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) {
+ pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+ mblk->nid, mblk->base,
+ mblk->base + mblk->size - 1);
+ return -EINVAL;
+ }
+
+ /* Finally register nodes. */
+ for_each_node_mask(nid, numa_nodes_parsed) {
+ unsigned long start_pfn, end_pfn;
+
+ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ setup_node_data(nid, start_pfn, end_pfn);
+ node_set_online(nid);
+ }
+
+ /* Setup online nodes to actual nodes*/
+ node_possible_map = numa_nodes_parsed;
+
+ return 0;
+}
+
+static int __init numa_init(int (*init_func)(void))
+{
+ int ret;
+
+ nodes_clear(numa_nodes_parsed);
+ nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
+ numa_free_distance();
+
+ ret = numa_alloc_distance();
+ if (ret < 0)
+ return ret;
+
+ ret = init_func();
+ if (ret < 0)
+ return ret;
+
+ if (nodes_empty(numa_nodes_parsed))
+ return -EINVAL;
+
+ ret = numa_register_nodes();
+ if (ret < 0)
+ return ret;
+
+ setup_node_to_cpumask_map();
+
+ /* init boot processor */
+ cpu_to_node_map[0] = 0;
+ map_cpu_to_node(0, 0);
+
+ return 0;
+}
+
+/**
+ * dummy_numa_init - Fallback dummy NUMA init
+ *
+ * Used if there's no underlying NUMA architecture, NUMA initialization
+ * fails, or NUMA is disabled on the command line.
+ *
+ * Must online at least one node (node 0) and add memory blocks that cover all
+ * allowed memory. It is unlikely that this function fails.
+ */
+static int __init dummy_numa_init(void)
+{
+ int ret;
+ struct memblock_region *mblk;
+
+ if (numa_off)
+ pr_info("NUMA disabled\n"); /* Forced off on command line. */
+ else
+ pr_info("No NUMA configuration found\n");
+ pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
+ 0LLU, PFN_PHYS(max_pfn) - 1);
+
+ for_each_memblock(memory, mblk) {
+ ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
+ if (!ret)
+ continue;
+
+ pr_err("NUMA init failed\n");
+ return ret;
+ }
+
+ numa_off = true;
+ return 0;
+}
+
+/**
+ * arm64_numa_init - Initialize NUMA
+ *
+ * Try each configured NUMA initialization method until one succeeds. The
+ * last fallback is dummy single node config encomapssing whole memory.
+ */
+void __init arm64_numa_init(void)
+{
+ if (!numa_off) {
+ if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
+ return;
+ if (acpi_disabled && !numa_init(of_numa_init))
+ return;
+ }
+
+ numa_init(dummy_numa_init);
+}
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index aa0fdf125aba..a3d0211970e9 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -140,6 +140,9 @@ static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
}
}
}
+
+extern void acpi_numa_fixup(void);
+
#endif /* CONFIG_ACPI_NUMA */
#endif /*__KERNEL__*/
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index b1698bc042c8..92b7bc956795 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -524,7 +524,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
return 0;
}
-void __init acpi_numa_arch_fixup(void)
+void __init acpi_numa_fixup(void)
{
int i, j, node_from, node_to;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 2029a38a72ae..afddb3e80a29 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -552,6 +552,7 @@ setup_arch (char **cmdline_p)
early_acpi_boot_init();
# ifdef CONFIG_ACPI_NUMA
acpi_numa_init();
+ acpi_numa_fixup();
# ifdef CONFIG_ACPI_HOTPLUG_CPU
prefill_possible_map();
# endif
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 94c18ebfd68c..65f1e95cade9 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -145,7 +145,6 @@ static inline void disable_acpi(void) { }
#define ARCH_HAS_POWER_INIT 1
#ifdef CONFIG_ACPI_NUMA
-extern int acpi_numa;
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index f70c1ff46125..57673f5cb120 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,4 +1,5 @@
/* Common code for 32 and 64-bit NUMA */
+#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
@@ -15,7 +16,6 @@
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/dma.h>
-#include <asm/acpi.h>
#include <asm/amd_nb.h>
#include "numa_internal.h"
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index b5f821881465..b1ecff460a46 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -15,8 +15,6 @@
#include <linux/bitmap.h>
#include <linux/module.h>
#include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
#include <linux/mm.h>
#include <asm/proto.h>
#include <asm/numa.h>
@@ -24,51 +22,6 @@
#include <asm/apic.h>
#include <asm/uv/uv.h>
-int acpi_numa __initdata;
-
-static __init int setup_node(int pxm)
-{
- return acpi_map_pxm_to_node(pxm);
-}
-
-static __init void bad_srat(void)
-{
- printk(KERN_ERR "SRAT: SRAT not used.\n");
- acpi_numa = -1;
-}
-
-static __init inline int srat_disabled(void)
-{
- return acpi_numa < 0;
-}
-
-/*
- * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
- * I/O localities since SRAT does not list them. I/O localities are
- * not supported at this point.
- */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
- int i, j;
-
- for (i = 0; i < slit->locality_count; i++) {
- const int from_node = pxm_to_node(i);
-
- if (from_node == NUMA_NO_NODE)
- continue;
-
- for (j = 0; j < slit->locality_count; j++) {
- const int to_node = pxm_to_node(j);
-
- if (to_node == NUMA_NO_NODE)
- continue;
-
- numa_set_distance(from_node, to_node,
- slit->entry[slit->locality_count * i + j]);
- }
- }
-}
-
/* Callback for Proximity Domain -> x2APIC mapping */
void __init
acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
@@ -91,7 +44,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
pxm, apic_id);
return;
}
- node = setup_node(pxm);
+ node = acpi_map_pxm_to_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
@@ -104,7 +57,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
}
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
- acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
}
@@ -127,7 +79,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
pxm = pa->proximity_domain_lo;
if (acpi_srat_revision >= 2)
pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
- node = setup_node(pxm);
+ node = acpi_map_pxm_to_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
@@ -146,74 +98,10 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
- acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline int save_add_info(void) {return 1;}
-#else
-static inline int save_add_info(void) {return 0;}
-#endif
-
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-int __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
-{
- u64 start, end;
- u32 hotpluggable;
- int node, pxm;
-
- if (srat_disabled())
- goto out_err;
- if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
- goto out_err_bad_srat;
- if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
- goto out_err;
- hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
- if (hotpluggable && !save_add_info())
- goto out_err;
-
- start = ma->base_address;
- end = start + ma->length;
- pxm = ma->proximity_domain;
- if (acpi_srat_revision <= 1)
- pxm &= 0xff;
-
- node = setup_node(pxm);
- if (node < 0) {
- printk(KERN_ERR "SRAT: Too many proximity domains.\n");
- goto out_err_bad_srat;
- }
-
- if (numa_add_memblk(node, start, end) < 0)
- goto out_err_bad_srat;
-
- node_set(node, numa_nodes_parsed);
-
- pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
- node, pxm,
- (unsigned long long) start, (unsigned long long) end - 1,
- hotpluggable ? " hotplug" : "",
- ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
-
- /* Mark hotplug range in memblock. */
- if (hotpluggable && memblock_mark_hotplug(start, ma->length))
- pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
- (unsigned long long)start, (unsigned long long)end - 1);
-
- max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
-
- return 0;
-out_err_bad_srat:
- bad_srat();
-out_err:
- return -1;
-}
-
-void __init acpi_numa_arch_fixup(void) {}
-
int __init x86_acpi_numa_init(void)
{
int ret;
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index b6320a100f89..64d8fce05adb 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -297,8 +297,8 @@ config ACPI_THERMAL
config ACPI_NUMA
bool "NUMA support"
depends on NUMA
- depends on (X86 || IA64)
- default y if IA64_GENERIC || IA64_SGI_SN2
+ depends on (X86 || IA64 || ARM64)
+ default y if IA64_GENERIC || IA64_SGI_SN2 || ARM64
config ACPI_CUSTOM_DSDT_FILE
string "Custom DSDT Table file to include"
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 72b6e9ef0ae9..f6696ac45e41 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -18,22 +18,21 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
*/
+
+#define pr_fmt(fmt) "ACPI: " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/numa.h>
#include <linux/nodemask.h>
#include <linux/topology.h>
-#define PREFIX "ACPI: "
-
-#define ACPI_NUMA 0x80000000
-#define _COMPONENT ACPI_NUMA
-ACPI_MODULE_NAME("numa");
-
static nodemask_t nodes_found_map = NODE_MASK_NONE;
/* maps to convert between proximity domain and logical node ID */
@@ -43,6 +42,7 @@ static int node_to_pxm_map[MAX_NUMNODES]
= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
unsigned char acpi_srat_revision __initdata;
+int acpi_numa __initdata;
int pxm_to_node(int pxm)
{
@@ -128,68 +128,63 @@ EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
static void __init
acpi_table_print_srat_entry(struct acpi_subtable_header *header)
{
-
- ACPI_FUNCTION_NAME("acpi_table_print_srat_entry");
-
- if (!header)
- return;
-
switch (header->type) {
-
case ACPI_SRAT_TYPE_CPU_AFFINITY:
-#ifdef ACPI_DEBUG_OUTPUT
{
struct acpi_srat_cpu_affinity *p =
(struct acpi_srat_cpu_affinity *)header;
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
- p->apic_id, p->local_sapic_eid,
- p->proximity_domain_lo,
- (p->flags & ACPI_SRAT_CPU_ENABLED)?
- "enabled" : "disabled"));
+ pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
+ p->apic_id, p->local_sapic_eid,
+ p->proximity_domain_lo,
+ (p->flags & ACPI_SRAT_CPU_ENABLED) ?
+ "enabled" : "disabled");
}
-#endif /* ACPI_DEBUG_OUTPUT */
break;
case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
-#ifdef ACPI_DEBUG_OUTPUT
{
struct acpi_srat_mem_affinity *p =
(struct acpi_srat_mem_affinity *)header;
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
- (unsigned long)p->base_address,
- (unsigned long)p->length,
- p->proximity_domain,
- (p->flags & ACPI_SRAT_MEM_ENABLED)?
- "enabled" : "disabled",
- (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)?
- " hot-pluggable" : "",
- (p->flags & ACPI_SRAT_MEM_NON_VOLATILE)?
- " non-volatile" : ""));
+ pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
+ (unsigned long)p->base_address,
+ (unsigned long)p->length,
+ p->proximity_domain,
+ (p->flags & ACPI_SRAT_MEM_ENABLED) ?
+ "enabled" : "disabled",
+ (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
+ " hot-pluggable" : "",
+ (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ?
+ " non-volatile" : "");
}
-#endif /* ACPI_DEBUG_OUTPUT */
break;
case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
-#ifdef ACPI_DEBUG_OUTPUT
{
struct acpi_srat_x2apic_cpu_affinity *p =
(struct acpi_srat_x2apic_cpu_affinity *)header;
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "SRAT Processor (x2apicid[0x%08x]) in"
- " proximity domain %d %s\n",
- p->apic_id,
- p->proximity_domain,
- (p->flags & ACPI_SRAT_CPU_ENABLED) ?
- "enabled" : "disabled"));
+ pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
+ p->apic_id,
+ p->proximity_domain,
+ (p->flags & ACPI_SRAT_CPU_ENABLED) ?
+ "enabled" : "disabled");
}
-#endif /* ACPI_DEBUG_OUTPUT */
break;
+
+ case ACPI_SRAT_TYPE_GICC_AFFINITY:
+ {
+ struct acpi_srat_gicc_affinity *p =
+ (struct acpi_srat_gicc_affinity *)header;
+ pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
+ p->acpi_processor_uid,
+ p->proximity_domain,
+ (p->flags & ACPI_SRAT_GICC_ENABLED) ?
+ "enabled" : "disabled");
+ }
+ break;
+
default:
- printk(KERN_WARNING PREFIX
- "Found unsupported SRAT entry (type = 0x%x)\n",
- header->type);
+ pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
+ header->type);
break;
}
}
@@ -217,12 +212,117 @@ static int __init slit_valid(struct acpi_table_slit *slit)
return 1;
}
+void __init bad_srat(void)
+{
+ pr_err("SRAT: SRAT not used.\n");
+ acpi_numa = -1;
+}
+
+int __init srat_disabled(void)
+{
+ return acpi_numa < 0;
+}
+
+#if defined(CONFIG_X86) || defined (CONFIG_ARM64)
+/*
+ * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
+ * I/O localities since SRAT does not list them. I/O localities are
+ * not supported at this point.
+ */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+ int i, j;
+
+ for (i = 0; i < slit->locality_count; i++) {
+ const int from_node = pxm_to_node(i);
+
+ if (from_node == NUMA_NO_NODE)
+ continue;
+
+ for (j = 0; j < slit->locality_count; j++) {
+ const int to_node = pxm_to_node(j);
+
+ if (to_node == NUMA_NO_NODE)
+ continue;
+
+ numa_set_distance(from_node, to_node,
+ slit->entry[slit->locality_count * i + j]);
+ }
+ }
+}
+
+/*
+ * Default callback for parsing of the Proximity Domain <-> Memory
+ * Area mappings
+ */
+int __init
+acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
+{
+ u64 start, end;
+ u32 hotpluggable;
+ int node, pxm;
+
+ if (srat_disabled())
+ goto out_err;
+ if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) {
+ pr_err("SRAT: Unexpected header length: %d\n",
+ ma->header.length);
+ goto out_err_bad_srat;
+ }
+ if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
+ goto out_err;
+ hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
+ if (hotpluggable && !IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ goto out_err;
+
+ start = ma->base_address;
+ end = start + ma->length;
+ pxm = ma->proximity_domain;
+ if (acpi_srat_revision <= 1)
+ pxm &= 0xff;
+
+ node = acpi_map_pxm_to_node(pxm);
+ if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+ pr_err("SRAT: Too many proximity domains.\n");
+ goto out_err_bad_srat;
+ }
+
+ if (numa_add_memblk(node, start, end) < 0) {
+ pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
+ node, (unsigned long long) start,
+ (unsigned long long) end - 1);
+ goto out_err_bad_srat;
+ }
+
+ node_set(node, numa_nodes_parsed);
+
+ pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
+ node, pxm,
+ (unsigned long long) start, (unsigned long long) end - 1,
+ hotpluggable ? " hotplug" : "",
+ ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
+
+ /* Mark hotplug range in memblock. */
+ if (hotpluggable && memblock_mark_hotplug(start, ma->length))
+ pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
+ (unsigned long long)start, (unsigned long long)end - 1);
+
+ max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
+
+ return 0;
+out_err_bad_srat:
+ bad_srat();
+out_err:
+ return -EINVAL;
+}
+#endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
+
static int __init acpi_parse_slit(struct acpi_table_header *table)
{
struct acpi_table_slit *slit = (struct acpi_table_slit *)table;
if (!slit_valid(slit)) {
- printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
+ pr_info("SLIT table looks invalid. Not used.\n");
return -EINVAL;
}
acpi_numa_slit_init(slit);
@@ -233,12 +333,9 @@ static int __init acpi_parse_slit(struct acpi_table_header *table)
void __init __weak
acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
{
- printk(KERN_WARNING PREFIX
- "Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id);
- return;
+ pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id);
}
-
static int __init
acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
const unsigned long end)
@@ -275,6 +372,24 @@ acpi_parse_processor_affinity(struct acpi_subtable_header *header,
return 0;
}
+static int __init
+acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
+ const unsigned long end)
+{
+ struct acpi_srat_gicc_affinity *processor_affinity;
+
+ processor_affinity = (struct acpi_srat_gicc_affinity *)header;
+ if (!processor_affinity)
+ return -EINVAL;
+
+ acpi_table_print_srat_entry(header);
+
+ /* let architecture-dependent part to do it */
+ acpi_numa_gicc_affinity_init(processor_affinity);
+
+ return 0;
+}
+
static int __initdata parsed_numa_memblks;
static int __init
@@ -319,6 +434,9 @@ int __init acpi_numa_init(void)
{
int cnt = 0;
+ if (acpi_disabled)
+ return -EINVAL;
+
/*
* Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
* SRAT cpu entries could have different order with that in MADT.
@@ -331,6 +449,8 @@ int __init acpi_numa_init(void)
acpi_parse_x2apic_affinity, 0);
acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
acpi_parse_processor_affinity, 0);
+ acpi_table_parse_srat(ACPI_SRAT_TYPE_GICC_AFFINITY,
+ acpi_parse_gicc_affinity, 0);
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
@@ -339,8 +459,6 @@ int __init acpi_numa_init(void)
/* SLIT: System Locality Information Table */
acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
- acpi_numa_arch_fixup();
-
if (cnt < 0)
return cnt;
else if (!parsed_numa_memblks)
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 8714f8c271ba..434dd6065935 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -143,6 +143,14 @@ static __init void reserve_regions(void)
if (efi_enabled(EFI_DBG))
pr_info("Processing EFI memory map:\n");
+ /*
+ * Discard memblocks discovered so far: if there are any at this
+ * point, they originate from memory nodes in the DT, and UEFI
+ * uses its own memory map instead.
+ */
+ memblock_dump_all();
+ memblock_remove(0, ULLONG_MAX);
+
for_each_efi_memory_desc(&memmap, md) {
paddr = md->phys_addr;
npages = md->num_pages;
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 6dba78aef337..e58abfa953cc 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -24,7 +24,7 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
unsigned long map_size, unsigned long desc_size,
u32 desc_ver)
{
- int node, prev, num_rsv;
+ int node, num_rsv;
int status;
u32 fdt_val32;
u64 fdt_val64;
@@ -54,28 +54,6 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
goto fdt_set_fail;
/*
- * Delete any memory nodes present. We must delete nodes which
- * early_init_dt_scan_memory may try to use.
- */
- prev = 0;
- for (;;) {
- const char *type;
- int len;
-
- node = fdt_next_node(fdt, prev, NULL);
- if (node < 0)
- break;
-
- type = fdt_getprop(fdt, node, "device_type", &len);
- if (type && strncmp(type, "memory", len) == 0) {
- fdt_del_node(fdt, node);
- continue;
- }
-
- prev = node;
- }
-
- /*
* Delete all memory reserve map entries. When booting via UEFI,
* kernel will use the UEFI memory map to find reserved regions.
*/
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index e2a48415d969..b3bec3aaa45d 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -112,4 +112,7 @@ config OF_OVERLAY
While this option is selected automatically when needed, you can
enable it manually to improve device tree unit test coverage.
+config OF_NUMA
+ bool
+
endif # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 156c072b3117..bee3fa96b981 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -14,5 +14,6 @@ obj-$(CONFIG_OF_MTD) += of_mtd.o
obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
obj-$(CONFIG_OF_RESOLVE) += resolver.o
obj-$(CONFIG_OF_OVERLAY) += overlay.o
+obj-$(CONFIG_OF_NUMA) += of_numa.o
obj-$(CONFIG_OF_UNITTEST) += unittest-data/
diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
new file mode 100644
index 000000000000..ed5a097f0801
--- /dev/null
+++ b/drivers/of/of_numa.c
@@ -0,0 +1,211 @@
+/*
+ * OF NUMA Parsing support.
+ *
+ * Copyright (C) 2015 - 2016 Cavium Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/nodemask.h>
+
+#include <asm/numa.h>
+
+/* define default numa node to 0 */
+#define DEFAULT_NODE 0
+
+/*
+ * Even though we connect cpus to numa domains later in SMP
+ * init, we need to know the node ids now for all cpus.
+*/
+static void __init of_numa_parse_cpu_nodes(void)
+{
+ u32 nid;
+ int r;
+ struct device_node *cpus;
+ struct device_node *np = NULL;
+
+ cpus = of_find_node_by_path("/cpus");
+ if (!cpus)
+ return;
+
+ for_each_child_of_node(cpus, np) {
+ /* Skip things that are not CPUs */
+ if (of_node_cmp(np->type, "cpu") != 0)
+ continue;
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ if (r)
+ continue;
+
+ pr_debug("NUMA: CPU on %u\n", nid);
+ if (nid >= MAX_NUMNODES)
+ pr_warn("NUMA: Node id %u exceeds maximum value\n",
+ nid);
+ else
+ node_set(nid, numa_nodes_parsed);
+ }
+}
+
+static int __init of_numa_parse_memory_nodes(void)
+{
+ struct device_node *np = NULL;
+ struct resource rsrc;
+ u32 nid;
+ int r = 0;
+
+ for (;;) {
+ np = of_find_node_by_type(np, "memory");
+ if (!np)
+ break;
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ if (r == -EINVAL)
+ /*
+ * property doesn't exist if -EINVAL, continue
+ * looking for more memory nodes with
+ * "numa-node-id" property
+ */
+ continue;
+ else if (r)
+ /* some other error */
+ break;
+
+ r = of_address_to_resource(np, 0, &rsrc);
+ if (r) {
+ pr_err("NUMA: bad reg property in memory node\n");
+ break;
+ }
+
+ pr_debug("NUMA: base = %llx len = %llx, node = %u\n",
+ rsrc.start, rsrc.end - rsrc.start + 1, nid);
+
+
+ r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
+ if (r)
+ break;
+ }
+ of_node_put(np);
+
+ return r;
+}
+
+static int __init of_numa_parse_distance_map_v1(struct device_node *map)
+{
+ const __be32 *matrix;
+ int entry_count;
+ int i;
+
+ pr_info("NUMA: parsing numa-distance-map-v1\n");
+
+ matrix = of_get_property(map, "distance-matrix", NULL);
+ if (!matrix) {
+ pr_err("NUMA: No distance-matrix property in distance-map\n");
+ return -EINVAL;
+ }
+
+ entry_count = of_property_count_u32_elems(map, "distance-matrix");
+ if (entry_count <= 0) {
+ pr_err("NUMA: Invalid distance-matrix\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i + 2 < entry_count; i += 3) {
+ u32 nodea, nodeb, distance;
+
+ nodea = of_read_number(matrix, 1);
+ matrix++;
+ nodeb = of_read_number(matrix, 1);
+ matrix++;
+ distance = of_read_number(matrix, 1);
+ matrix++;
+
+ numa_set_distance(nodea, nodeb, distance);
+ pr_debug("NUMA: distance[node%d -> node%d] = %d\n",
+ nodea, nodeb, distance);
+
+ /* Set default distance of node B->A same as A->B */
+ if (nodeb > nodea)
+ numa_set_distance(nodeb, nodea, distance);
+ }
+
+ return 0;
+}
+
+static int __init of_numa_parse_distance_map(void)
+{
+ int ret = 0;
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL,
+ "numa-distance-map-v1");
+ if (np)
+ ret = of_numa_parse_distance_map_v1(np);
+
+ of_node_put(np);
+ return ret;
+}
+
+int of_node_to_nid(struct device_node *device)
+{
+ struct device_node *np;
+ u32 nid;
+ int r = -ENODATA;
+
+ np = of_node_get(device);
+
+ while (np) {
+ struct device_node *parent;
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ /*
+ * -EINVAL indicates the property was not found, and
+ * we walk up the tree trying to find a parent with a
+ * "numa-node-id". Any other type of error indicates
+ * a bad device tree and we give up.
+ */
+ if (r != -EINVAL)
+ break;
+
+ parent = of_get_parent(np);
+ of_node_put(np);
+ np = parent;
+ }
+ if (np && r)
+ pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n",
+ np->name);
+ of_node_put(np);
+
+ if (!r) {
+ if (nid >= MAX_NUMNODES)
+ pr_warn("NUMA: Node id %u exceeds maximum value\n",
+ nid);
+ else
+ return nid;
+ }
+
+ return NUMA_NO_NODE;
+}
+EXPORT_SYMBOL(of_node_to_nid);
+
+int __init of_numa_init(void)
+{
+ int r;
+
+ of_numa_parse_cpu_nodes();
+ r = of_numa_parse_memory_nodes();
+ if (r)
+ return r;
+ return of_numa_parse_distance_map();
+}
diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index 94a37cd7fbda..d4b72944ccda 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -15,6 +15,10 @@ extern int pxm_to_node(int);
extern int node_to_pxm(int);
extern int acpi_map_pxm_to_node(int);
extern unsigned char acpi_srat_revision;
+extern int acpi_numa __initdata;
+
+extern void bad_srat(void);
+extern int srat_disabled(void);
#endif /* CONFIG_ACPI_NUMA */
#endif /* __ACP_NUMA_H */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a1a5ba1c8f99..e7d04c241635 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -239,12 +239,26 @@ int acpi_table_parse_madt(enum acpi_madt_type id,
int acpi_parse_mcfg (struct acpi_table_header *header);
void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
-/* the following four functions are architecture-dependent */
+/* the following numa functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);
+
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
+#else
+static inline void
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { }
+#endif
+
void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
+
+#ifdef CONFIG_ARM64
+void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa);
+#else
+static inline void
+acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { }
+#endif
+
int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
-void acpi_numa_arch_fixup(void);
#ifndef PHYS_CPUID_INVALID
typedef u32 phys_cpuid_t;
diff --git a/include/linux/of.h b/include/linux/of.h
index 31758036787c..77ddace575e8 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -685,6 +685,15 @@ static inline int of_node_to_nid(struct device_node *device)
}
#endif
+#ifdef CONFIG_OF_NUMA
+extern int of_numa_init(void);
+#else
+static inline int of_numa_init(void)
+{
+ return -ENOSYS;
+}
+#endif
+
static inline struct device_node *of_find_matching_node(
struct device_node *from,
const struct of_device_id *matches)