aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>2012-09-25 17:26:51 +0100
committerAndrey Konovalov <andrey.konovalov@linaro.org>2013-05-25 13:19:25 +0400
commit15fe54be9c48c23eec14cd71f20938311b44483d (patch)
tree6ad8c24d293c50d423de8868913f45137de1c2d0
parentb0e9dfa34c1dd974fa8945140b3b718988cab8ae (diff)
downloadvexpress-lsk-15fe54be9c48c23eec14cd71f20938311b44483d.tar.gz
ARM: perf: set cpu affinity to support multiple PMUs
In a system with multiple heterogeneous CPU PMUs and each PMUs can handle events on a subset of CPUs, probably belonging a the same cluster. This patch introduces a cpumask to track which CPUs each PMU supports. It also updates armpmu_event_init to reject cpu-specific events being initialised for unsupported CPUs. Since process-specific events can be initialised for all the CPU PMUs,armpmu_start/stop/add are modified to prevent from being added on unsupported CPUs. Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt3
-rw-r--r--arch/arm/include/asm/pmu.h1
-rw-r--r--arch/arm/kernel/perf_event.c16
-rw-r--r--arch/arm/kernel/perf_event_cpu.c36
4 files changed, 49 insertions, 7 deletions
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 343781b9f24..4ce82d045a6 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -16,6 +16,9 @@ Required properties:
"arm,arm1176-pmu"
"arm,arm1136-pmu"
- interrupts : 1 combined interrupt or 1 per core.
+- cluster : a phandle to the cluster to which it belongs
+ If there are more than one cluster with same CPU type
+ then there should be separate PMU nodes per cluster.
Example:
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index f24edad26c7..3713e57a467 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -65,6 +65,7 @@ struct pmu_hw_events {
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
+ cpumask_t valid_cpus;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct perf_event *event);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 491573373f6..d847c622a7b 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,6 +12,7 @@
*/
#define pr_fmt(fmt) "hw perfevents: " fmt
+#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@@ -161,6 +162,8 @@ armpmu_stop(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
@@ -177,6 +180,8 @@ static void armpmu_start(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
/*
* ARM pmu always has to reprogram the period, so ignore
* PERF_EF_RELOAD, see the comment below.
@@ -204,6 +209,9 @@ armpmu_del(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
+
armpmu_stop(event, PERF_EF_UPDATE);
hw_events->events[idx] = NULL;
clear_bit(idx, hw_events->used_mask);
@@ -220,6 +228,10 @@ armpmu_add(struct perf_event *event, int flags)
int idx;
int err = 0;
+ /* An event following a process won't be stopped earlier */
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return 0;
+
perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
@@ -419,6 +431,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
+ if (event->cpu != -1 &&
+ !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
+ return -ENOENT;
+
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index f62d6cb6524..725e225c84b 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -136,7 +136,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
{
int cpu;
- for_each_possible_cpu(cpu) {
+ for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
events->events = per_cpu(hw_events, cpu);
events->used_mask = per_cpu(used_mask, cpu);
@@ -149,7 +149,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
/* Ensure the PMU has sane values out of reset. */
if (cpu_pmu->reset)
- on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+ on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
}
/*
@@ -249,6 +249,9 @@ static int probe_current_pmu(struct arm_pmu *pmu)
}
}
+ /* assume PMU support all the CPUs in this case */
+ cpumask_setall(&pmu->valid_cpus);
+
put_cpu();
return ret;
}
@@ -256,10 +259,9 @@ static int probe_current_pmu(struct arm_pmu *pmu)
static int cpu_pmu_device_probe(struct platform_device *pdev)
{
const struct of_device_id *of_id;
- int (*init_fn)(struct arm_pmu *);
struct device_node *node = pdev->dev.of_node;
struct arm_pmu *pmu;
- int ret = -ENODEV;
+ int ret = 0;
int cpu;
pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
@@ -269,8 +271,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
}
if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
- init_fn = of_id->data;
- ret = init_fn(pmu);
+ smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
+ struct device_node *ncluster;
+ int cluster = -1;
+ cpumask_t sibling_mask;
+
+ ncluster = of_parse_phandle(node, "cluster", 0);
+ if (ncluster) {
+ int len;
+ const u32 *hwid;
+ hwid = of_get_property(ncluster, "reg", &len);
+ if (hwid && len == 4)
+ cluster = be32_to_cpup(hwid);
+ }
+ /* set sibling mask to all cpu mask if socket is not specified */
+ if (cluster == -1 ||
+ cluster_to_logical_mask(cluster, &sibling_mask))
+ cpumask_setall(&sibling_mask);
+
+ smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+
+ /* now set the valid_cpus after init */
+ cpumask_copy(&pmu->valid_cpus, &sibling_mask);
} else {
ret = probe_current_pmu(pmu);
}
@@ -280,7 +302,7 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
goto out_free;
}
- for_each_possible_cpu(cpu)
+ for_each_cpu_mask(cpu, pmu->valid_cpus)
per_cpu(cpu_pmu, cpu) = pmu;
pmu->plat_device = pdev;